Coverage Report

Created: 2026-06-30 07:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/splines.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/splines.h"
7
8
#include <jxl/memory_manager.h>
9
10
#include <algorithm>
11
#include <cinttypes>  // PRIu64
12
#include <cmath>
13
#include <cstddef>
14
#include <cstdint>
15
#include <cstring>
16
#include <limits>
17
#include <utility>
18
#include <vector>
19
20
#include "lib/jxl/base/bits.h"
21
#include "lib/jxl/base/common.h"
22
#include "lib/jxl/base/compiler_specific.h"
23
#include "lib/jxl/base/printf_macros.h"
24
#include "lib/jxl/base/rect.h"
25
#include "lib/jxl/base/status.h"
26
#include "lib/jxl/chroma_from_luma.h"
27
#include "lib/jxl/common.h"  // JXL_HIGH_PRECISION
28
#include "lib/jxl/dct_scales.h"
29
#include "lib/jxl/dec_ans.h"
30
#include "lib/jxl/dec_bit_reader.h"
31
#include "lib/jxl/image.h"
32
#include "lib/jxl/memory_manager_internal.h"
33
#include "lib/jxl/pack_signed.h"
34
35
#undef HWY_TARGET_INCLUDE
36
#define HWY_TARGET_INCLUDE "lib/jxl/splines.cc"
37
#include <hwy/foreach_target.h>
38
#include <hwy/highway.h>
39
40
#include "lib/jxl/base/fast_math-inl.h"
41
HWY_BEFORE_NAMESPACE();
42
namespace jxl {
43
namespace HWY_NAMESPACE {
44
namespace {
45
46
// These templates are not found via ADL.
47
using hwy::HWY_NAMESPACE::Mul;
48
using hwy::HWY_NAMESPACE::MulAdd;
49
using hwy::HWY_NAMESPACE::MulSub;
50
using hwy::HWY_NAMESPACE::Sqrt;
51
using hwy::HWY_NAMESPACE::Sub;
52
53
// Given a set of DCT coefficients, this returns the result of performing cosine
54
// interpolation on the original samples.
55
57.5M
float ContinuousIDCT(const Dct32& dct, const float t) {
56
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
57
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
58
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
59
  // treatment of all the coefficients.
60
57.5M
  constexpr float kMultipliers[32] = {
61
57.5M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
62
57.5M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
63
57.5M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
64
57.5M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
65
57.5M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
66
57.5M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
67
57.5M
      kPi / 32 * 30, kPi / 32 * 31,
68
57.5M
  };
69
57.5M
  HWY_CAPPED(float, 32) df;
70
57.5M
  auto result = Zero(df);
71
57.5M
  const auto tandhalf = Set(df, t + 0.5f);
72
437M
  for (int i = 0; i < 32; i += Lanes(df)) {
73
379M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
74
379M
    auto cos = FastCosf(df, cos_arg);
75
379M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
76
379M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
77
379M
  }
78
57.5M
  return GetLane(SumOfLanes(df, result));
79
57.5M
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Line
Count
Source
55
20.1M
float ContinuousIDCT(const Dct32& dct, const float t) {
56
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
57
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
58
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
59
  // treatment of all the coefficients.
60
20.1M
  constexpr float kMultipliers[32] = {
61
20.1M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
62
20.1M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
63
20.1M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
64
20.1M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
65
20.1M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
66
20.1M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
67
20.1M
      kPi / 32 * 30, kPi / 32 * 31,
68
20.1M
  };
69
20.1M
  HWY_CAPPED(float, 32) df;
70
20.1M
  auto result = Zero(df);
71
20.1M
  const auto tandhalf = Set(df, t + 0.5f);
72
100M
  for (int i = 0; i < 32; i += Lanes(df)) {
73
80.7M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
74
80.7M
    auto cos = FastCosf(df, cos_arg);
75
80.7M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
76
80.7M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
77
80.7M
  }
78
20.1M
  return GetLane(SumOfLanes(df, result));
79
20.1M
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Line
Count
Source
55
20.7M
float ContinuousIDCT(const Dct32& dct, const float t) {
56
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
57
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
58
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
59
  // treatment of all the coefficients.
60
20.7M
  constexpr float kMultipliers[32] = {
61
20.7M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
62
20.7M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
63
20.7M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
64
20.7M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
65
20.7M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
66
20.7M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
67
20.7M
      kPi / 32 * 30, kPi / 32 * 31,
68
20.7M
  };
69
20.7M
  HWY_CAPPED(float, 32) df;
70
20.7M
  auto result = Zero(df);
71
20.7M
  const auto tandhalf = Set(df, t + 0.5f);
72
186M
  for (int i = 0; i < 32; i += Lanes(df)) {
73
165M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
74
165M
    auto cos = FastCosf(df, cos_arg);
75
165M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
76
165M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
77
165M
  }
78
20.7M
  return GetLane(SumOfLanes(df, result));
79
20.7M
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Line
Count
Source
55
16.6M
float ContinuousIDCT(const Dct32& dct, const float t) {
56
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
57
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
58
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
59
  // treatment of all the coefficients.
60
16.6M
  constexpr float kMultipliers[32] = {
61
16.6M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
62
16.6M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
63
16.6M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
64
16.6M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
65
16.6M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
66
16.6M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
67
16.6M
      kPi / 32 * 30, kPi / 32 * 31,
68
16.6M
  };
69
16.6M
  HWY_CAPPED(float, 32) df;
70
16.6M
  auto result = Zero(df);
71
16.6M
  const auto tandhalf = Set(df, t + 0.5f);
72
149M
  for (int i = 0; i < 32; i += Lanes(df)) {
73
133M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
74
133M
    auto cos = FastCosf(df, cos_arg);
75
133M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
76
133M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
77
133M
  }
78
16.6M
  return GetLane(SumOfLanes(df, result));
79
16.6M
}
80
81
template <typename DF>
82
void DrawSegment(DF df, const SplineSegment& segment, const bool add,
83
                 const size_t y, const size_t x, const size_t x0,
84
1.47M
                 float* JXL_RESTRICT rows[3]) {
85
1.47M
  Rebind<int32_t, DF> di;
86
1.47M
  const auto inv_sigma = Set(df, segment.inv_sigma);
87
1.47M
  const auto half = Set(df, 0.5f);
88
1.47M
  const auto one_over_2s2 = Set(df, 0.353553391f);
89
1.47M
  const auto sigma_over_4_times_intensity =
90
1.47M
      Set(df, segment.sigma_over_4_times_intensity);
91
1.47M
  const auto dx =
92
1.47M
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
93
1.47M
  const auto dy = Set(df, y - segment.center_y);
94
1.47M
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
95
1.47M
  const auto distance = Sqrt(sqd);
96
1.47M
  const auto one_dimensional_factor =
97
1.47M
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
98
1.47M
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
99
1.47M
  auto local_intensity =
100
1.47M
      Mul(sigma_over_4_times_intensity,
101
1.47M
          Mul(one_dimensional_factor, one_dimensional_factor));
102
5.89M
  for (size_t c = 0; c < 3; ++c) {
103
4.41M
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
104
4.41M
    const auto in = LoadU(df, rows[c] + x);
105
4.41M
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
106
4.41M
  }
107
1.47M
}
splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
84
76.9k
                 float* JXL_RESTRICT rows[3]) {
85
76.9k
  Rebind<int32_t, DF> di;
86
76.9k
  const auto inv_sigma = Set(df, segment.inv_sigma);
87
76.9k
  const auto half = Set(df, 0.5f);
88
76.9k
  const auto one_over_2s2 = Set(df, 0.353553391f);
89
76.9k
  const auto sigma_over_4_times_intensity =
90
76.9k
      Set(df, segment.sigma_over_4_times_intensity);
91
76.9k
  const auto dx =
92
76.9k
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
93
76.9k
  const auto dy = Set(df, y - segment.center_y);
94
76.9k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
95
76.9k
  const auto distance = Sqrt(sqd);
96
76.9k
  const auto one_dimensional_factor =
97
76.9k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
98
76.9k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
99
76.9k
  auto local_intensity =
100
76.9k
      Mul(sigma_over_4_times_intensity,
101
76.9k
          Mul(one_dimensional_factor, one_dimensional_factor));
102
307k
  for (size_t c = 0; c < 3; ++c) {
103
230k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
104
230k
    const auto in = LoadU(df, rows[c] + x);
105
230k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
106
230k
  }
107
76.9k
}
splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
84
151k
                 float* JXL_RESTRICT rows[3]) {
85
151k
  Rebind<int32_t, DF> di;
86
151k
  const auto inv_sigma = Set(df, segment.inv_sigma);
87
151k
  const auto half = Set(df, 0.5f);
88
151k
  const auto one_over_2s2 = Set(df, 0.353553391f);
89
151k
  const auto sigma_over_4_times_intensity =
90
151k
      Set(df, segment.sigma_over_4_times_intensity);
91
151k
  const auto dx =
92
151k
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
93
151k
  const auto dy = Set(df, y - segment.center_y);
94
151k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
95
151k
  const auto distance = Sqrt(sqd);
96
151k
  const auto one_dimensional_factor =
97
151k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
98
151k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
99
151k
  auto local_intensity =
100
151k
      Mul(sigma_over_4_times_intensity,
101
151k
          Mul(one_dimensional_factor, one_dimensional_factor));
102
607k
  for (size_t c = 0; c < 3; ++c) {
103
455k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
104
455k
    const auto in = LoadU(df, rows[c] + x);
105
455k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
106
455k
  }
107
151k
}
splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
84
195k
                 float* JXL_RESTRICT rows[3]) {
85
195k
  Rebind<int32_t, DF> di;
86
195k
  const auto inv_sigma = Set(df, segment.inv_sigma);
87
195k
  const auto half = Set(df, 0.5f);
88
195k
  const auto one_over_2s2 = Set(df, 0.353553391f);
89
195k
  const auto sigma_over_4_times_intensity =
90
195k
      Set(df, segment.sigma_over_4_times_intensity);
91
195k
  const auto dx =
92
195k
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
93
195k
  const auto dy = Set(df, y - segment.center_y);
94
195k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
95
195k
  const auto distance = Sqrt(sqd);
96
195k
  const auto one_dimensional_factor =
97
195k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
98
195k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
99
195k
  auto local_intensity =
100
195k
      Mul(sigma_over_4_times_intensity,
101
195k
          Mul(one_dimensional_factor, one_dimensional_factor));
102
781k
  for (size_t c = 0; c < 3; ++c) {
103
586k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
104
586k
    const auto in = LoadU(df, rows[c] + x);
105
586k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
106
586k
  }
107
195k
}
splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
84
76.8k
                 float* JXL_RESTRICT rows[3]) {
85
76.8k
  Rebind<int32_t, DF> di;
86
76.8k
  const auto inv_sigma = Set(df, segment.inv_sigma);
87
76.8k
  const auto half = Set(df, 0.5f);
88
76.8k
  const auto one_over_2s2 = Set(df, 0.353553391f);
89
76.8k
  const auto sigma_over_4_times_intensity =
90
76.8k
      Set(df, segment.sigma_over_4_times_intensity);
91
76.8k
  const auto dx =
92
76.8k
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
93
76.8k
  const auto dy = Set(df, y - segment.center_y);
94
76.8k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
95
76.8k
  const auto distance = Sqrt(sqd);
96
76.8k
  const auto one_dimensional_factor =
97
76.8k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
98
76.8k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
99
76.8k
  auto local_intensity =
100
76.8k
      Mul(sigma_over_4_times_intensity,
101
76.8k
          Mul(one_dimensional_factor, one_dimensional_factor));
102
307k
  for (size_t c = 0; c < 3; ++c) {
103
230k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
104
230k
    const auto in = LoadU(df, rows[c] + x);
105
230k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
106
230k
  }
107
76.8k
}
splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
84
783k
                 float* JXL_RESTRICT rows[3]) {
85
783k
  Rebind<int32_t, DF> di;
86
783k
  const auto inv_sigma = Set(df, segment.inv_sigma);
87
783k
  const auto half = Set(df, 0.5f);
88
783k
  const auto one_over_2s2 = Set(df, 0.353553391f);
89
783k
  const auto sigma_over_4_times_intensity =
90
783k
      Set(df, segment.sigma_over_4_times_intensity);
91
783k
  const auto dx =
92
783k
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
93
783k
  const auto dy = Set(df, y - segment.center_y);
94
783k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
95
783k
  const auto distance = Sqrt(sqd);
96
783k
  const auto one_dimensional_factor =
97
783k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
98
783k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
99
783k
  auto local_intensity =
100
783k
      Mul(sigma_over_4_times_intensity,
101
783k
          Mul(one_dimensional_factor, one_dimensional_factor));
102
3.13M
  for (size_t c = 0; c < 3; ++c) {
103
2.34M
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
104
2.34M
    const auto in = LoadU(df, rows[c] + x);
105
2.34M
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
106
2.34M
  }
107
783k
}
splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
84
188k
                 float* JXL_RESTRICT rows[3]) {
85
188k
  Rebind<int32_t, DF> di;
86
188k
  const auto inv_sigma = Set(df, segment.inv_sigma);
87
188k
  const auto half = Set(df, 0.5f);
88
188k
  const auto one_over_2s2 = Set(df, 0.353553391f);
89
188k
  const auto sigma_over_4_times_intensity =
90
188k
      Set(df, segment.sigma_over_4_times_intensity);
91
188k
  const auto dx =
92
188k
      Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x));
93
188k
  const auto dy = Set(df, y - segment.center_y);
94
188k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
95
188k
  const auto distance = Sqrt(sqd);
96
188k
  const auto one_dimensional_factor =
97
188k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
98
188k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
99
188k
  auto local_intensity =
100
188k
      Mul(sigma_over_4_times_intensity,
101
188k
          Mul(one_dimensional_factor, one_dimensional_factor));
102
753k
  for (size_t c = 0; c < 3; ++c) {
103
565k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
104
565k
    const auto in = LoadU(df, rows[c] + x);
105
565k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
106
565k
  }
107
188k
}
108
109
void DrawSegment(const SplineSegment& segment, const bool add, const size_t y,
110
                 const size_t x0, const size_t x1,
111
43.7M
                 float* JXL_RESTRICT rows[3]) {
112
43.7M
  ptrdiff_t start = std::llround(segment.center_x - segment.maximum_distance);
113
43.7M
  ptrdiff_t end = std::llround(segment.center_x + segment.maximum_distance);
114
43.7M
  if (end < static_cast<ptrdiff_t>(x0) || start >= static_cast<ptrdiff_t>(x1)) {
115
43.5M
    return;  // span does not intersect scan
116
43.5M
  }
117
217k
  size_t span_x0 = std::max<ptrdiff_t>(x0, start) - x0;
118
217k
  size_t span_x1 = std::min<ptrdiff_t>(x1, end + 1) - x0;  // exclusive
119
217k
  HWY_FULL(float) df;
120
217k
  size_t x = span_x0;
121
1.27M
  for (; x + Lanes(df) <= span_x1; x += Lanes(df)) {
122
1.05M
    DrawSegment(df, segment, add, y, x, x0, rows);
123
1.05M
  }
124
634k
  for (; x < span_x1; ++x) {
125
417k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows);
126
417k
  }
127
217k
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
111
6.59M
                 float* JXL_RESTRICT rows[3]) {
112
6.59M
  ptrdiff_t start = std::llround(segment.center_x - segment.maximum_distance);
113
6.59M
  ptrdiff_t end = std::llround(segment.center_x + segment.maximum_distance);
114
6.59M
  if (end < static_cast<ptrdiff_t>(x0) || start >= static_cast<ptrdiff_t>(x1)) {
115
6.55M
    return;  // span does not intersect scan
116
6.55M
  }
117
40.8k
  size_t span_x0 = std::max<ptrdiff_t>(x0, start) - x0;
118
40.8k
  size_t span_x1 = std::min<ptrdiff_t>(x1, end + 1) - x0;  // exclusive
119
40.8k
  HWY_FULL(float) df;
120
40.8k
  size_t x = span_x0;
121
117k
  for (; x + Lanes(df) <= span_x1; x += Lanes(df)) {
122
76.9k
    DrawSegment(df, segment, add, y, x, x0, rows);
123
76.9k
  }
124
192k
  for (; x < span_x1; ++x) {
125
151k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows);
126
151k
  }
127
40.8k
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
111
21.1M
                 float* JXL_RESTRICT rows[3]) {
112
21.1M
  ptrdiff_t start = std::llround(segment.center_x - segment.maximum_distance);
113
21.1M
  ptrdiff_t end = std::llround(segment.center_x + segment.maximum_distance);
114
21.1M
  if (end < static_cast<ptrdiff_t>(x0) || start >= static_cast<ptrdiff_t>(x1)) {
115
21.1M
    return;  // span does not intersect scan
116
21.1M
  }
117
52.2k
  size_t span_x0 = std::max<ptrdiff_t>(x0, start) - x0;
118
52.2k
  size_t span_x1 = std::min<ptrdiff_t>(x1, end + 1) - x0;  // exclusive
119
52.2k
  HWY_FULL(float) df;
120
52.2k
  size_t x = span_x0;
121
247k
  for (; x + Lanes(df) <= span_x1; x += Lanes(df)) {
122
195k
    DrawSegment(df, segment, add, y, x, x0, rows);
123
195k
  }
124
129k
  for (; x < span_x1; ++x) {
125
76.8k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows);
126
76.8k
  }
127
52.2k
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
111
16.0M
                 float* JXL_RESTRICT rows[3]) {
112
16.0M
  ptrdiff_t start = std::llround(segment.center_x - segment.maximum_distance);
113
16.0M
  ptrdiff_t end = std::llround(segment.center_x + segment.maximum_distance);
114
16.0M
  if (end < static_cast<ptrdiff_t>(x0) || start >= static_cast<ptrdiff_t>(x1)) {
115
15.8M
    return;  // span does not intersect scan
116
15.8M
  }
117
124k
  size_t span_x0 = std::max<ptrdiff_t>(x0, start) - x0;
118
124k
  size_t span_x1 = std::min<ptrdiff_t>(x1, end + 1) - x0;  // exclusive
119
124k
  HWY_FULL(float) df;
120
124k
  size_t x = span_x0;
121
907k
  for (; x + Lanes(df) <= span_x1; x += Lanes(df)) {
122
783k
    DrawSegment(df, segment, add, y, x, x0, rows);
123
783k
  }
124
312k
  for (; x < span_x1; ++x) {
125
188k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows);
126
188k
  }
127
124k
}
128
129
void ComputeSegments(size_t image_ysize, const Spline::Point& center,
130
                     const float intensity, const float color[3],
131
                     const float sigma, std::vector<SplineSegment>& segments,
132
14.3M
                     std::vector<SplineSegmentSpan>& segment_spans) {
133
  // Sanity check sigma, inverse sigma and intensity
134
14.3M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
135
14.3M
        std::isfinite(intensity))) {
136
1.78k
    return;
137
1.78k
  }
138
14.3M
#if JXL_HIGH_PRECISION
139
14.3M
  constexpr float kDistanceExp = 5;
140
#else
141
  // About 30% faster.
142
  constexpr float kDistanceExp = 3;
143
#endif
144
  // We cap from below colors to at least 0.01.
145
14.3M
  float max_color = 0.01f;
146
57.5M
  for (size_t c = 0; c < 3; c++) {
147
43.1M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
148
43.1M
  }
149
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
150
  // below 10^-kDistanceExp.
151
14.3M
  const float maximum_distance =
152
14.3M
      std::sqrt(-2.0f * sigma * sigma *
153
14.3M
                (std::log(0.1f) * kDistanceExp - std::log(max_color)));
154
155
14.3M
  ptrdiff_t y0 = std::llround(center.y - maximum_distance);
156
14.3M
  y0 = std::max<ptrdiff_t>(y0, 0);
157
14.3M
  ptrdiff_t y1 = std::llround(center.y + maximum_distance) + 1;
158
14.3M
  y1 = std::min<ptrdiff_t>(y1, image_ysize);
159
14.3M
  if (y1 <= y0) return;
160
161
6.26M
  SplineSegment segment;
162
6.26M
  segment.center_y = center.y;
163
6.26M
  segment.center_x = center.x;
164
6.26M
  memcpy(segment.color, color, sizeof(segment.color));
165
6.26M
  segment.inv_sigma = 1.0f / sigma;
166
6.26M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
167
6.26M
  segment.maximum_distance = maximum_distance;
168
6.26M
  segments.emplace_back(segment);
169
170
6.26M
  segment_spans.emplace_back(y0, y1);
171
6.26M
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::ComputeSegments(unsigned long, jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&)
Line
Count
Source
132
5.04M
                     std::vector<SplineSegmentSpan>& segment_spans) {
133
  // Sanity check sigma, inverse sigma and intensity
134
5.04M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
135
5.04M
        std::isfinite(intensity))) {
136
441
    return;
137
441
  }
138
5.04M
#if JXL_HIGH_PRECISION
139
5.04M
  constexpr float kDistanceExp = 5;
140
#else
141
  // About 30% faster.
142
  constexpr float kDistanceExp = 3;
143
#endif
144
  // We cap from below colors to at least 0.01.
145
5.04M
  float max_color = 0.01f;
146
20.1M
  for (size_t c = 0; c < 3; c++) {
147
15.1M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
148
15.1M
  }
149
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
150
  // below 10^-kDistanceExp.
151
5.04M
  const float maximum_distance =
152
5.04M
      std::sqrt(-2.0f * sigma * sigma *
153
5.04M
                (std::log(0.1f) * kDistanceExp - std::log(max_color)));
154
155
5.04M
  ptrdiff_t y0 = std::llround(center.y - maximum_distance);
156
5.04M
  y0 = std::max<ptrdiff_t>(y0, 0);
157
5.04M
  ptrdiff_t y1 = std::llround(center.y + maximum_distance) + 1;
158
5.04M
  y1 = std::min<ptrdiff_t>(y1, image_ysize);
159
5.04M
  if (y1 <= y0) return;
160
161
1.08M
  SplineSegment segment;
162
1.08M
  segment.center_y = center.y;
163
1.08M
  segment.center_x = center.x;
164
1.08M
  memcpy(segment.color, color, sizeof(segment.color));
165
1.08M
  segment.inv_sigma = 1.0f / sigma;
166
1.08M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
167
1.08M
  segment.maximum_distance = maximum_distance;
168
1.08M
  segments.emplace_back(segment);
169
170
1.08M
  segment_spans.emplace_back(y0, y1);
171
1.08M
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::ComputeSegments(unsigned long, jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&)
Line
Count
Source
132
5.18M
                     std::vector<SplineSegmentSpan>& segment_spans) {
133
  // Sanity check sigma, inverse sigma and intensity
134
5.18M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
135
5.18M
        std::isfinite(intensity))) {
136
521
    return;
137
521
  }
138
5.18M
#if JXL_HIGH_PRECISION
139
5.18M
  constexpr float kDistanceExp = 5;
140
#else
141
  // About 30% faster.
142
  constexpr float kDistanceExp = 3;
143
#endif
144
  // We cap from below colors to at least 0.01.
145
5.18M
  float max_color = 0.01f;
146
20.7M
  for (size_t c = 0; c < 3; c++) {
147
15.5M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
148
15.5M
  }
149
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
150
  // below 10^-kDistanceExp.
151
5.18M
  const float maximum_distance =
152
5.18M
      std::sqrt(-2.0f * sigma * sigma *
153
5.18M
                (std::log(0.1f) * kDistanceExp - std::log(max_color)));
154
155
5.18M
  ptrdiff_t y0 = std::llround(center.y - maximum_distance);
156
5.18M
  y0 = std::max<ptrdiff_t>(y0, 0);
157
5.18M
  ptrdiff_t y1 = std::llround(center.y + maximum_distance) + 1;
158
5.18M
  y1 = std::min<ptrdiff_t>(y1, image_ysize);
159
5.18M
  if (y1 <= y0) return;
160
161
3.10M
  SplineSegment segment;
162
3.10M
  segment.center_y = center.y;
163
3.10M
  segment.center_x = center.x;
164
3.10M
  memcpy(segment.color, color, sizeof(segment.color));
165
3.10M
  segment.inv_sigma = 1.0f / sigma;
166
3.10M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
167
3.10M
  segment.maximum_distance = maximum_distance;
168
3.10M
  segments.emplace_back(segment);
169
170
3.10M
  segment_spans.emplace_back(y0, y1);
171
3.10M
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::ComputeSegments(unsigned long, jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&)
Line
Count
Source
132
4.16M
                     std::vector<SplineSegmentSpan>& segment_spans) {
133
  // Sanity check sigma, inverse sigma and intensity
134
4.16M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
135
4.15M
        std::isfinite(intensity))) {
136
819
    return;
137
819
  }
138
4.15M
#if JXL_HIGH_PRECISION
139
4.15M
  constexpr float kDistanceExp = 5;
140
#else
141
  // About 30% faster.
142
  constexpr float kDistanceExp = 3;
143
#endif
144
  // We cap from below colors to at least 0.01.
145
4.15M
  float max_color = 0.01f;
146
16.6M
  for (size_t c = 0; c < 3; c++) {
147
12.4M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
148
12.4M
  }
149
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
150
  // below 10^-kDistanceExp.
151
4.15M
  const float maximum_distance =
152
4.15M
      std::sqrt(-2.0f * sigma * sigma *
153
4.15M
                (std::log(0.1f) * kDistanceExp - std::log(max_color)));
154
155
4.15M
  ptrdiff_t y0 = std::llround(center.y - maximum_distance);
156
4.15M
  y0 = std::max<ptrdiff_t>(y0, 0);
157
4.15M
  ptrdiff_t y1 = std::llround(center.y + maximum_distance) + 1;
158
4.15M
  y1 = std::min<ptrdiff_t>(y1, image_ysize);
159
4.15M
  if (y1 <= y0) return;
160
161
2.07M
  SplineSegment segment;
162
2.07M
  segment.center_y = center.y;
163
2.07M
  segment.center_x = center.x;
164
2.07M
  memcpy(segment.color, color, sizeof(segment.color));
165
2.07M
  segment.inv_sigma = 1.0f / sigma;
166
2.07M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
167
2.07M
  segment.maximum_distance = maximum_distance;
168
2.07M
  segments.emplace_back(segment);
169
170
2.07M
  segment_spans.emplace_back(y0, y1);
171
2.07M
}
172
173
void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
174
                  float* JXL_RESTRICT row_b, size_t y, size_t x0, size_t x1,
175
                  const bool add, const SplineSegment* segments,
176
                  const size_t* segment_indices,
177
19.8k
                  const ptrdiff_t* segment_y_start) {
178
19.8k
  float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b};
179
43.8M
  for (ptrdiff_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
180
43.7M
    DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows);
181
43.7M
  }
182
19.8k
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, long const*)
Line
Count
Source
177
6.11k
                  const ptrdiff_t* segment_y_start) {
178
6.11k
  float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b};
179
6.59M
  for (ptrdiff_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
180
6.59M
    DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows);
181
6.59M
  }
182
6.11k
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, long const*)
Line
Count
Source
177
4.51k
                  const ptrdiff_t* segment_y_start) {
178
4.51k
  float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b};
179
21.1M
  for (ptrdiff_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
180
21.1M
    DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows);
181
21.1M
  }
182
4.51k
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, long const*)
Line
Count
Source
177
9.23k
                  const ptrdiff_t* segment_y_start) {
178
9.23k
  float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b};
179
16.0M
  for (ptrdiff_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
180
16.0M
    DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows);
181
16.0M
  }
182
9.23k
}
183
184
void SegmentsFromPoints(
185
    size_t image_ysize, const Spline& spline,
186
    const std::vector<std::pair<Spline::Point, float>>& points_to_draw,
187
    const float arc_length, std::vector<SplineSegment>& segments,
188
20.0k
    std::vector<SplineSegmentSpan>& segments_spans) {
189
20.0k
  const float inv_arc_length = 1.0f / arc_length;
190
20.0k
  int k = 0;
191
14.3M
  for (const auto& point_to_draw : points_to_draw) {
192
14.3M
    const Spline::Point& point = point_to_draw.first;
193
14.3M
    const float multiplier = point_to_draw.second;
194
14.3M
    const float progress_along_arc =
195
14.3M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
196
14.3M
    ++k;
197
14.3M
    float color[3];
198
57.5M
    for (size_t c = 0; c < 3; ++c) {
199
43.1M
      color[c] =
200
43.1M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
201
43.1M
    }
202
14.3M
    const float sigma =
203
14.3M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
204
14.3M
    ComputeSegments(image_ysize, point, multiplier, color, sigma, segments,
205
14.3M
                    segments_spans);
206
14.3M
  }
207
20.0k
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::SegmentsFromPoints(unsigned long, jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&)
Line
Count
Source
188
10.7k
    std::vector<SplineSegmentSpan>& segments_spans) {
189
10.7k
  const float inv_arc_length = 1.0f / arc_length;
190
10.7k
  int k = 0;
191
5.04M
  for (const auto& point_to_draw : points_to_draw) {
192
5.04M
    const Spline::Point& point = point_to_draw.first;
193
5.04M
    const float multiplier = point_to_draw.second;
194
5.04M
    const float progress_along_arc =
195
5.04M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
196
5.04M
    ++k;
197
5.04M
    float color[3];
198
20.1M
    for (size_t c = 0; c < 3; ++c) {
199
15.1M
      color[c] =
200
15.1M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
201
15.1M
    }
202
5.04M
    const float sigma =
203
5.04M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
204
5.04M
    ComputeSegments(image_ysize, point, multiplier, color, sigma, segments,
205
5.04M
                    segments_spans);
206
5.04M
  }
207
10.7k
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::SegmentsFromPoints(unsigned long, jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&)
Line
Count
Source
188
2.14k
    std::vector<SplineSegmentSpan>& segments_spans) {
189
2.14k
  const float inv_arc_length = 1.0f / arc_length;
190
2.14k
  int k = 0;
191
5.18M
  for (const auto& point_to_draw : points_to_draw) {
192
5.18M
    const Spline::Point& point = point_to_draw.first;
193
5.18M
    const float multiplier = point_to_draw.second;
194
5.18M
    const float progress_along_arc =
195
5.18M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
196
5.18M
    ++k;
197
5.18M
    float color[3];
198
20.7M
    for (size_t c = 0; c < 3; ++c) {
199
15.5M
      color[c] =
200
15.5M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
201
15.5M
    }
202
5.18M
    const float sigma =
203
5.18M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
204
5.18M
    ComputeSegments(image_ysize, point, multiplier, color, sigma, segments,
205
5.18M
                    segments_spans);
206
5.18M
  }
207
2.14k
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::SegmentsFromPoints(unsigned long, jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&)
Line
Count
Source
188
7.20k
    std::vector<SplineSegmentSpan>& segments_spans) {
189
7.20k
  const float inv_arc_length = 1.0f / arc_length;
190
7.20k
  int k = 0;
191
4.16M
  for (const auto& point_to_draw : points_to_draw) {
192
4.16M
    const Spline::Point& point = point_to_draw.first;
193
4.16M
    const float multiplier = point_to_draw.second;
194
4.16M
    const float progress_along_arc =
195
4.16M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
196
4.16M
    ++k;
197
4.16M
    float color[3];
198
16.6M
    for (size_t c = 0; c < 3; ++c) {
199
12.4M
      color[c] =
200
12.4M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
201
12.4M
    }
202
4.16M
    const float sigma =
203
4.16M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
204
4.16M
    ComputeSegments(image_ysize, point, multiplier, color, sigma, segments,
205
4.16M
                    segments_spans);
206
4.16M
  }
207
7.20k
}
208
}  // namespace
209
// NOLINTNEXTLINE(google-readability-namespace-comments)
210
}  // namespace HWY_NAMESPACE
211
}  // namespace jxl
212
HWY_AFTER_NAMESPACE();
213
214
#if HWY_ONCE
215
namespace jxl {
216
HWY_EXPORT(SegmentsFromPoints);
217
HWY_EXPORT(DrawSegments);
218
219
namespace {
220
221
// It is not in spec, but reasonable limit to avoid overflows.
222
template <typename T>
223
301k
Status ValidateSplinePointPos(const T& x, const T& y) {
224
301k
  constexpr T kSplinePosLimit = 1u << 23;
225
301k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
226
301k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
227
96
    return JXL_FAILURE("Spline coordinates out of bounds");
228
96
  }
229
301k
  return true;
230
301k
}
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<long>(long const&, long const&)
Line
Count
Source
223
101k
Status ValidateSplinePointPos(const T& x, const T& y) {
224
101k
  constexpr T kSplinePosLimit = 1u << 23;
225
101k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
226
101k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
227
61
    return JXL_FAILURE("Spline coordinates out of bounds");
228
61
  }
229
101k
  return true;
230
101k
}
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<float>(float const&, float const&)
Line
Count
Source
223
37.8k
Status ValidateSplinePointPos(const T& x, const T& y) {
224
37.8k
  constexpr T kSplinePosLimit = 1u << 23;
225
37.8k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
226
37.8k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
227
0
    return JXL_FAILURE("Spline coordinates out of bounds");
228
0
  }
229
37.8k
  return true;
230
37.8k
}
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<int>(int const&, int const&)
Line
Count
Source
223
162k
Status ValidateSplinePointPos(const T& x, const T& y) {
224
162k
  constexpr T kSplinePosLimit = 1u << 23;
225
162k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
226
162k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
227
35
    return JXL_FAILURE("Spline coordinates out of bounds");
228
35
  }
229
162k
  return true;
230
162k
}
231
232
// Maximum number of spline control points per frame is
233
//   std::min(kMaxNumControlPoints, xsize * ysize / 2)
234
constexpr size_t kMaxNumControlPoints = 1u << 20u;
235
constexpr size_t kMaxNumControlPointsPerPixelRatio = 2;
236
237
0
float AdjustedQuant(const int32_t adjustment) {
238
0
  return (adjustment >= 0) ? (1.f + .125f * adjustment)
239
0
                           : 1.f / (1.f - .125f * adjustment);
240
0
}
241
242
37.7k
float InvAdjustedQuant(const int32_t adjustment) {
243
37.7k
  return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment)
244
37.7k
                           : (1.f - .125f * adjustment);
245
37.7k
}
246
247
// X, Y, B, sigma.
248
constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f};
249
250
Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points,
251
                               BitReader* const br, ANSSymbolReader* reader,
252
                               const std::vector<uint8_t>& context_map,
253
2.32k
                               const size_t num_splines) {
254
2.32k
  points->clear();
255
2.32k
  points->reserve(num_splines);
256
2.32k
  int64_t last_x = 0;
257
2.32k
  int64_t last_y = 0;
258
103k
  for (size_t i = 0; i < num_splines; i++) {
259
101k
    size_t dx =
260
101k
        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
261
101k
    size_t dy =
262
101k
        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
263
101k
    int64_t x;
264
101k
    int64_t y;
265
101k
    if (i != 0) {
266
99.2k
      x = UnpackSigned(dx) + last_x;
267
99.2k
      y = UnpackSigned(dy) + last_y;
268
99.2k
    } else {
269
2.32k
      x = dx;
270
2.32k
      y = dy;
271
2.32k
    }
272
101k
    JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y));
273
101k
    points->emplace_back(static_cast<float>(x), static_cast<float>(y));
274
101k
    last_x = x;
275
101k
    last_y = y;
276
101k
  }
277
2.25k
  return true;
278
2.32k
}
279
280
struct Vector {
281
  float x, y;
282
0
  Vector operator-() const { return {-x, -y}; }
283
0
  Vector operator+(const Vector& other) const {
284
0
    return {x + other.x, y + other.y};
285
0
  }
286
15.1M
  float SquaredNorm() const { return x * x + y * y; }
287
};
288
18.8M
Vector operator*(const float k, const Vector& vec) {
289
18.8M
  return {k * vec.x, k * vec.y};
290
18.8M
}
291
292
18.8M
Spline::Point operator+(const Spline::Point& p, const Vector& vec) {
293
18.8M
  return {p.x + vec.x, p.y + vec.y};
294
18.8M
}
295
34.0M
Vector operator-(const Spline::Point& a, const Spline::Point& b) {
296
34.0M
  return {a.x - b.x, a.y - b.y};
297
34.0M
}
298
299
// TODO(eustas): avoid making a copy of "points".
300
void DrawCentripetalCatmullRomSpline(std::vector<Spline::Point> points,
301
36.1k
                                     std::vector<Spline::Point>& result) {
302
36.1k
  if (points.empty()) return;
303
36.1k
  if (points.size() == 1) {
304
16.1k
    result.push_back(points[0]);
305
16.1k
    return;
306
16.1k
  }
307
  // Number of points to compute between each control point.
308
20.0k
  static constexpr int kNumPoints = 16;
309
20.0k
  result.reserve((points.size() - 1) * kNumPoints + 1);
310
20.0k
  points.insert(points.begin(), points[0] + (points[0] - points[1]));
311
20.0k
  points.push_back(points[points.size() - 1] +
312
20.0k
                   (points[points.size() - 1] - points[points.size() - 2]));
313
  // points has at least 4 elements at this point.
314
69.9k
  for (size_t start = 0; start < points.size() - 3; ++start) {
315
    // 4 of them are used, and we draw from p[1] to p[2].
316
49.9k
    const Spline::Point* const p = &points[start];
317
49.9k
    result.push_back(p[1]);
318
49.9k
    float d[3];
319
49.9k
    float t[4];
320
49.9k
    t[0] = 0;
321
199k
    for (int k = 0; k < 3; ++k) {
322
      // TODO(eustas): for each segment delta is calculated 3 times...
323
      // TODO(eustas): restrict d[k] with reasonable limit and spec it.
324
149k
      d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y));
325
149k
      t[k + 1] = t[k] + d[k];
326
149k
    }
327
799k
    for (int i = 1; i < kNumPoints; ++i) {
328
749k
      const float tt = d[0] + (static_cast<float>(i) / kNumPoints) * d[1];
329
749k
      Spline::Point a[3];
330
2.99M
      for (int k = 0; k < 3; ++k) {
331
        // TODO(eustas): reciprocal multiplication would be faster.
332
2.24M
        a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]);
333
2.24M
      }
334
749k
      Spline::Point b[2];
335
2.24M
      for (int k = 0; k < 2; ++k) {
336
1.49M
        b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]);
337
1.49M
      }
338
749k
      result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0]));
339
749k
    }
340
49.9k
  }
341
20.0k
  result.push_back(points[points.size() - 2]);
342
20.0k
}
343
344
// Move along the line segments defined by `points`, `kDesiredRenderingDistance`
345
// pixels at a time, and call `functor` with each point and the actual distance
346
// to the previous point (which will always be kDesiredRenderingDistance except
347
// possibly for the very last point).
348
// TODO(eustas): this method always adds the last point, but never the first
349
//               (unless those are one); I believe both ends matter.
350
template <typename Points, typename Functor>
351
36.1k
Status ForEachEquallySpacedPoint(const Points& points, const Functor& functor) {
352
36.1k
  JXL_ENSURE(!points.empty());
353
36.1k
  Spline::Point current = points.front();
354
36.1k
  functor(current, kDesiredRenderingDistance);
355
36.1k
  auto next = points.begin();
356
14.3M
  while (next != points.end()) {
357
14.3M
    const Spline::Point* previous = &current;
358
14.3M
    float arclength_from_previous = 0.f;
359
15.2M
    for (;;) {
360
15.2M
      if (next == points.end()) {
361
36.1k
        functor(*previous, arclength_from_previous);
362
36.1k
        return true;
363
36.1k
      }
364
15.1M
      const float arclength_to_next =
365
15.1M
          std::sqrt((*next - *previous).SquaredNorm());
366
15.1M
      if (arclength_from_previous + arclength_to_next >=
367
15.1M
          kDesiredRenderingDistance) {
368
14.3M
        current =
369
14.3M
            *previous + ((kDesiredRenderingDistance - arclength_from_previous) /
370
14.3M
                         arclength_to_next) *
371
14.3M
                            (*next - *previous);
372
14.3M
        functor(current, kDesiredRenderingDistance);
373
14.3M
        break;
374
14.3M
      }
375
835k
      arclength_from_previous += arclength_to_next;
376
835k
      previous = &*next;
377
835k
      ++next;
378
835k
    }
379
14.3M
  }
380
0
  return true;
381
36.1k
}
382
383
}  // namespace
384
385
StatusOr<QuantizedSpline> QuantizedSpline::Create(
386
    const Spline& original, const int32_t quantization_adjustment,
387
0
    const float y_to_x, const float y_to_b) {
388
0
  JXL_ENSURE(!original.control_points.empty());
389
0
  QuantizedSpline result;
390
0
  result.control_points_.reserve(original.control_points.size() - 1);
391
0
  const Spline::Point& starting_point = original.control_points.front();
392
0
  int previous_x = static_cast<int>(std::round(starting_point.x));
393
0
  int previous_y = static_cast<int>(std::round(starting_point.y));
394
0
  int previous_delta_x = 0;
395
0
  int previous_delta_y = 0;
396
0
  for (auto it = original.control_points.begin() + 1;
397
0
       it != original.control_points.end(); ++it) {
398
0
    const int new_x = static_cast<int>(std::round(it->x));
399
0
    const int new_y = static_cast<int>(std::round(it->y));
400
0
    const int new_delta_x = new_x - previous_x;
401
0
    const int new_delta_y = new_y - previous_y;
402
0
    result.control_points_.emplace_back(new_delta_x - previous_delta_x,
403
0
                                        new_delta_y - previous_delta_y);
404
0
    previous_delta_x = new_delta_x;
405
0
    previous_delta_y = new_delta_y;
406
0
    previous_x = new_x;
407
0
    previous_y = new_y;
408
0
  }
409
410
0
  const auto to_int = [](float v) -> int {
411
    // Maximal int representable with float.
412
0
    constexpr float kMax = std::numeric_limits<int>::max() - 127;
413
0
    constexpr float kMin = -kMax;
414
0
    return static_cast<int>(std::round(Clamp1(v, kMin, kMax)));
415
0
  };
416
417
0
  const auto quant = AdjustedQuant(quantization_adjustment);
418
0
  const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
419
0
  for (int c : {1, 0, 2}) {
420
0
    float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b;
421
0
    for (int i = 0; i < 32; ++i) {
422
0
      const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
423
0
      const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
424
0
      auto restored_y = result.color_dct_[1][i] * inv_dct_factor *
425
0
                        kChannelWeight[1] * inv_quant;
426
0
      auto decorrelated = original.color_dct[c][i] - factor * restored_y;
427
0
      result.color_dct_[c][i] =
428
0
          to_int(decorrelated * dct_factor * quant / kChannelWeight[c]);
429
0
    }
430
0
  }
431
0
  for (int i = 0; i < 32; ++i) {
432
0
    const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
433
0
    result.sigma_dct_[i] =
434
0
        to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]);
435
0
  }
436
0
  return result;
437
0
}
438
439
Status QuantizedSpline::Dequantize(const Spline::Point& starting_point,
440
                                   const int32_t quantization_adjustment,
441
                                   const float y_to_x, const float y_to_b,
442
                                   const uint64_t image_size,
443
                                   uint64_t* total_estimated_area_reached,
444
37.8k
                                   Spline& result) const {
445
37.8k
  constexpr uint64_t kOne = static_cast<uint64_t>(1);
446
37.8k
  const uint64_t area_limit =
447
37.8k
      std::min(1024 * image_size + (kOne << 32), kOne << 42);
448
449
37.8k
  result.control_points.clear();
450
37.8k
  result.control_points.reserve(control_points_.size() + 1);
451
37.8k
  float px = std::round(starting_point.x);
452
37.8k
  float py = std::round(starting_point.y);
453
37.8k
  JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py));
454
37.8k
  int current_x = static_cast<int>(px);
455
37.8k
  int current_y = static_cast<int>(py);
456
37.8k
  result.control_points.emplace_back(static_cast<float>(current_x),
457
37.8k
                                     static_cast<float>(current_y));
458
37.8k
  int current_delta_x = 0;
459
37.8k
  int current_delta_y = 0;
460
37.8k
  uint64_t manhattan_distance = 0;
461
81.1k
  for (const auto& point : control_points_) {
462
81.1k
    current_delta_x += point.first;
463
81.1k
    current_delta_y += point.second;
464
81.1k
    manhattan_distance += std::abs(current_delta_x) + std::abs(current_delta_y);
465
81.1k
    if (manhattan_distance > area_limit) {
466
0
      return JXL_FAILURE("Too large manhattan_distance reached: %" PRIu64,
467
0
                         manhattan_distance);
468
0
    }
469
81.1k
    JXL_RETURN_IF_ERROR(
470
81.1k
        ValidateSplinePointPos(current_delta_x, current_delta_y));
471
81.0k
    current_x += current_delta_x;
472
81.0k
    current_y += current_delta_y;
473
81.0k
    JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y));
474
81.0k
    result.control_points.emplace_back(static_cast<float>(current_x),
475
81.0k
                                       static_cast<float>(current_y));
476
81.0k
  }
477
478
37.7k
  const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
479
151k
  for (int c = 0; c < 3; ++c) {
480
3.74M
    for (int i = 0; i < 32; ++i) {
481
3.62M
      const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
482
3.62M
      result.color_dct[c][i] =
483
3.62M
          color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant;
484
3.62M
    }
485
113k
  }
486
1.24M
  for (int i = 0; i < 32; ++i) {
487
1.20M
    result.color_dct[0][i] += y_to_x * result.color_dct[1][i];
488
1.20M
    result.color_dct[2][i] += y_to_b * result.color_dct[1][i];
489
1.20M
  }
490
37.7k
  uint64_t width_estimate = 0;
491
492
37.7k
  uint64_t color[3] = {};
493
151k
  for (int c = 0; c < 3; ++c) {
494
3.74M
    for (int i = 0; i < 32; ++i) {
495
3.62M
      color[c] += static_cast<uint64_t>(
496
3.62M
          std::ceil(inv_quant * std::abs(color_dct_[c][i])));
497
3.62M
    }
498
113k
  }
499
37.7k
  color[0] += static_cast<uint64_t>(std::ceil(std::abs(y_to_x))) * color[1];
500
37.7k
  color[2] += static_cast<uint64_t>(std::ceil(std::abs(y_to_b))) * color[1];
501
  // This is not taking kChannelWeight into account, but up to constant factors
502
  // it gives an indication of the influence of the color values on the area
503
  // that will need to be rendered.
504
37.7k
  const uint64_t max_color = std::max({color[1], color[0], color[2]});
505
37.7k
  uint64_t logcolor =
506
37.7k
      std::max(kOne, static_cast<uint64_t>(CeilLog2Nonzero(kOne + max_color)));
507
508
37.7k
  const float weight_limit =
509
37.7k
      std::ceil(std::sqrt((static_cast<float>(area_limit) / logcolor) /
510
37.7k
                          std::max<size_t>(1, manhattan_distance)));
511
512
1.24M
  for (int i = 0; i < 32; ++i) {
513
1.20M
    const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
514
1.20M
    result.sigma_dct[i] =
515
1.20M
        sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant;
516
    // If we include the factor kChannelWeight[3]=.3333f here, we get a
517
    // realistic area estimate. We leave it out to simplify the calculations,
518
    // and understand that this way we underestimate the area by a factor of
519
    // 1/(0.3333*0.3333). This is taken into account in the limits below.
520
1.20M
    float weight_f = std::ceil(inv_quant * std::abs(sigma_dct_[i]));
521
1.20M
    uint64_t weight =
522
1.20M
        static_cast<uint64_t>(std::min(weight_limit, std::max(1.0f, weight_f)));
523
1.20M
    width_estimate += weight * weight * logcolor;
524
1.20M
  }
525
37.7k
  *total_estimated_area_reached += (width_estimate * manhattan_distance);
526
37.7k
  if (*total_estimated_area_reached > area_limit) {
527
57
    return JXL_FAILURE("Too large total_estimated_area eached: %" PRIu64,
528
57
                       *total_estimated_area_reached);
529
57
  }
530
531
37.7k
  return true;
532
37.7k
}
533
534
Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map,
535
                               ANSSymbolReader* const decoder,
536
                               BitReader* const br,
537
                               const size_t max_control_points,
538
46.2k
                               size_t* total_num_control_points) {
539
46.2k
  const size_t num_control_points =
540
46.2k
      decoder->ReadHybridUint(kNumControlPointsContext, br, context_map);
541
46.2k
  if (num_control_points > max_control_points) {
542
18
    return JXL_FAILURE("Too many control points: %" PRIuS, num_control_points);
543
18
  }
544
46.2k
  *total_num_control_points += num_control_points;
545
46.2k
  if (*total_num_control_points > max_control_points) {
546
18
    return JXL_FAILURE("Too many control points: %" PRIuS,
547
18
                       *total_num_control_points);
548
18
  }
549
46.1k
  control_points_.resize(num_control_points);
550
  // Maximal image dimension.
551
46.1k
  constexpr int64_t kDeltaLimit = 1u << 30;
552
3.79M
  for (std::pair<int64_t, int64_t>& control_point : control_points_) {
553
3.79M
    control_point.first = UnpackSigned(
554
3.79M
        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
555
3.79M
    control_point.second = UnpackSigned(
556
3.79M
        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
557
    // Check delta-deltas are not outrageous; it is not in spec, but there is
558
    // no reason to allow larger values.
559
3.79M
    if ((control_point.first >= kDeltaLimit) ||
560
3.79M
        (control_point.first <= -kDeltaLimit) ||
561
3.79M
        (control_point.second >= kDeltaLimit) ||
562
3.79M
        (control_point.second <= -kDeltaLimit)) {
563
23
      return JXL_FAILURE("Spline delta-delta is out of bounds");
564
23
    }
565
3.79M
  }
566
567
184k
  const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status {
568
184k
    constexpr int kWeirdNumber = std::numeric_limits<int>::min();
569
6.09M
    for (int i = 0; i < 32; ++i) {
570
5.90M
      dct[i] =
571
5.90M
          UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map));
572
5.90M
      if (dct[i] == kWeirdNumber) {
573
6
        return JXL_FAILURE("The weird number in spline DCT");
574
6
      }
575
5.90M
    }
576
184k
    return true;
577
184k
  };
578
138k
  for (auto& dct : color_dct_) {
579
138k
    JXL_RETURN_IF_ERROR(decode_dct(dct));
580
138k
  }
581
46.1k
  JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_));
582
46.1k
  return true;
583
46.1k
}
584
585
0
void Splines::SetData(SplineDataView data) {
586
0
  Clear();
587
0
  data_ = data;
588
0
}
589
590
71.4k
void Splines::Clear() {
591
71.4k
  quantization_adjustment_ = 0;
592
71.4k
  splines_storage_.clear();
593
71.4k
  starting_points_storage_.clear();
594
71.4k
  data_ = {};
595
71.4k
  segments_.clear();
596
71.4k
  segment_indices_ = AlignedMemory();
597
71.4k
  segment_y_start_ = AlignedMemory();
598
71.4k
}
599
600
2.36k
Status Splines::Decode(jxl::BitReader* br, const size_t num_pixels) {
601
2.36k
  std::vector<uint8_t> context_map;
602
2.36k
  ANSCode code;
603
2.36k
  JXL_RETURN_IF_ERROR(DecodeHistograms(memory_manager_, br, kNumSplineContexts,
604
2.36k
                                       &code, &context_map));
605
4.69k
  JXL_ASSIGN_OR_RETURN(ANSSymbolReader decoder,
606
4.69k
                       ANSSymbolReader::Create(&code, br));
607
4.69k
  size_t num_splines =
608
4.69k
      decoder.ReadHybridUint(kNumSplinesContext, br, context_map);
609
4.69k
  size_t max_control_points = std::min(
610
4.69k
      kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio);
611
4.69k
  if (num_splines > max_control_points ||
612
2.32k
      num_splines + 1 > max_control_points) {
613
25
    return JXL_FAILURE("Too many splines: %" PRIuS, num_splines);
614
25
  }
615
2.32k
  num_splines++;
616
2.32k
  JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(
617
2.32k
      &starting_points_storage_, br, &decoder, context_map, num_splines));
618
619
2.25k
  quantization_adjustment_ = UnpackSigned(
620
2.25k
      decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map));
621
622
2.25k
  splines_storage_.clear();
623
2.25k
  splines_storage_.reserve(num_splines);
624
2.25k
  size_t num_control_points = num_splines;
625
48.4k
  for (size_t i = 0; i < num_splines; ++i) {
626
46.2k
    QuantizedSpline spline;
627
46.2k
    JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br,
628
46.2k
                                      max_control_points, &num_control_points));
629
46.1k
    splines_storage_.push_back(std::move(spline));
630
46.1k
  }
631
632
2.19k
  JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState());
633
634
2.19k
  data_ = SplineDataView{Span<const QuantizedSpline>(splines_storage_),
635
2.19k
                         Span<const Spline::Point>(starting_points_storage_)};
636
637
2.19k
  if (!HasAny()) {
638
0
    return JXL_FAILURE("Decoded splines but got none");
639
0
  }
640
641
2.19k
  return true;
642
2.19k
}
643
644
0
void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect) const {
645
0
  Apply</*add=*/true>(opsin, opsin_rect);
646
0
}
647
void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
648
                       float* JXL_RESTRICT row_b, size_t y, size_t x0,
649
30.5k
                       size_t x1) const {
650
30.5k
  ApplyToRow</*add=*/true>(row_x, row_y, row_b, y, x0, x1);
651
30.5k
}
652
653
350
void Splines::SubtractFrom(Image3F* const opsin) const {
654
350
  Apply</*add=*/false>(opsin, Rect(*opsin));
655
350
}
656
657
Status Splines::InitializeDrawCache(const size_t image_xsize,
658
                                    const size_t image_ysize,
659
2.49k
                                    const ColorCorrelation& color_correlation) {
660
  // TODO(veluca): avoid storing segments that are entirely outside image
661
  // boundaries.
662
2.49k
  segments_.clear();
663
2.49k
  segment_indices_ = AlignedMemory();
664
2.49k
  segment_y_start_ = AlignedMemory();
665
2.49k
  std::vector<SplineSegmentSpan> segments_spans;
666
2.49k
  std::vector<Spline::Point> intermediate_points;
667
2.49k
  uint64_t total_estimated_area_reached = 0;
668
2.49k
  std::vector<Spline> splines;
669
40.2k
  for (size_t i = 0; i < data_.splines.size(); ++i) {
670
37.8k
    Spline spline;
671
37.8k
    JXL_RETURN_IF_ERROR(data_.splines[i].Dequantize(
672
37.8k
        data_.starting_points[i], quantization_adjustment_,
673
37.8k
        color_correlation.YtoXRatio(0), color_correlation.YtoBRatio(0),
674
37.8k
        image_xsize * image_ysize, &total_estimated_area_reached, spline));
675
37.7k
    if (std::adjacent_find(spline.control_points.begin(),
676
37.7k
                           spline.control_points.end()) !=
677
37.7k
        spline.control_points.end()) {
678
      // Otherwise division by zero might occur. Once control points coincide,
679
      // the direction of curve is undefined...
680
4
      return JXL_FAILURE(
681
4
          "identical successive control points in spline %" PRIuS, i);
682
4
    }
683
37.7k
    splines.push_back(spline);
684
37.7k
  }
685
  // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams.
686
2.39k
  if (total_estimated_area_reached >
687
2.39k
      std::min(
688
2.39k
          (8 * image_xsize * image_ysize + (static_cast<uint64_t>(1) << 25)),
689
2.39k
          (static_cast<uint64_t>(1) << 30))) {
690
35
    JXL_WARNING(
691
35
        "Large total_estimated_area_reached, expect slower decoding: %" PRIu64,
692
35
        total_estimated_area_reached);
693
35
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
694
35
    return JXL_FAILURE("Total spline area is too large");
695
35
#endif
696
35
  }
697
698
36.1k
  for (Spline& spline : splines) {
699
36.1k
    std::vector<std::pair<Spline::Point, float>> points_to_draw;
700
14.4M
    auto add_point = [&](const Spline::Point& point, const float multiplier) {
701
14.4M
      points_to_draw.emplace_back(point, multiplier);
702
14.4M
    };
703
36.1k
    intermediate_points.clear();
704
36.1k
    DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points);
705
36.1k
    JXL_RETURN_IF_ERROR(
706
36.1k
        ForEachEquallySpacedPoint(intermediate_points, add_point));
707
36.1k
    const float arc_length =
708
36.1k
        (points_to_draw.size() - 2) * kDesiredRenderingDistance +
709
36.1k
        points_to_draw.back().second;
710
36.1k
    if (arc_length <= 0.f) {
711
      // This spline wouldn't have any effect.
712
16.1k
      continue;
713
16.1k
    }
714
20.0k
    HWY_DYNAMIC_DISPATCH(SegmentsFromPoints)
715
20.0k
    (image_ysize, spline, points_to_draw, arc_length, segments_,
716
20.0k
     segments_spans);
717
20.0k
  }
718
719
2.36k
  size_t segment_y_start_num_bytes = (image_ysize + 2) * sizeof(ptrdiff_t);
720
2.36k
  JXL_ASSIGN_OR_RETURN(
721
2.36k
      segment_y_start_,
722
2.36k
      AlignedMemory::Create(memory_manager_, segment_y_start_num_bytes));
723
2.36k
  ptrdiff_t* segment_y_start = segment_y_start_.address<ptrdiff_t>();
724
2.36k
  memset(segment_y_start, 0, segment_y_start_num_bytes);
725
2.36k
  ptrdiff_t* population = segment_y_start + 1;
726
6.26M
  for (const auto& segment_span : segments_spans) {
727
6.26M
    population[segment_span.start]++;
728
6.26M
    population[segment_span.end]--;
729
6.26M
  }
730
  // Turn to cumulative.
731
2.36k
  size_t total = 0;
732
2.36k
  ptrdiff_t coverage = 0;
733
1.12M
  for (size_t y = 0; y < image_ysize; y++) {
734
1.12M
    if (population[y] < 0) {
735
8.33k
      JXL_ENSURE(coverage >= -population[y]);
736
8.33k
    }
737
1.12M
    coverage += population[y];
738
1.12M
    population[y] = total;
739
1.12M
    total += coverage;
740
1.12M
  }
741
  // population[0] == 0 (that is segment_y_start[1])
742
  // We are going to place segments using population[y] as index for elements
743
  // of y-th line (and increment them); that way final value of population[y]
744
  // is the staring index for (y+1)-th line. Thus segment_y_start will contain
745
  // indices without an offset.
746
2.36k
  JXL_ASSIGN_OR_RETURN(
747
2.36k
      segment_indices_,
748
2.36k
      AlignedMemory::Create(memory_manager_, total * sizeof(size_t)));
749
2.36k
  size_t* segment_indices = segment_indices_.address<size_t>();
750
6.26M
  for (size_t i = 0; i < segments_.size(); ++i) {
751
6.26M
    const auto& segment_span = segments_spans[i];
752
58.4M
    for (size_t y = segment_span.start; y < segment_span.end; y++) {
753
52.1M
      segment_indices[population[y]++] = i;
754
52.1M
    }
755
6.26M
  }
756
757
2.36k
  return true;
758
2.36k
}
759
760
template <bool add>
761
void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
762
                         float* JXL_RESTRICT row_b, size_t y, size_t x0,
763
30.5k
                         size_t x1) const {
764
30.5k
  if (segments_.empty()) return;
765
19.8k
  HWY_DYNAMIC_DISPATCH(DrawSegments)
766
19.8k
  (row_x, row_y, row_b, y, x0, x1, add, segments_.data(),
767
19.8k
   segment_indices_.address<size_t>(), segment_y_start_.address<ptrdiff_t>());
768
19.8k
}
void jxl::Splines::ApplyToRow<true>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const
Line
Count
Source
763
30.5k
                         size_t x1) const {
764
30.5k
  if (segments_.empty()) return;
765
19.8k
  HWY_DYNAMIC_DISPATCH(DrawSegments)
766
19.8k
  (row_x, row_y, row_b, y, x0, x1, add, segments_.data(),
767
19.8k
   segment_indices_.address<size_t>(), segment_y_start_.address<ptrdiff_t>());
768
19.8k
}
Unexecuted instantiation: void jxl::Splines::ApplyToRow<false>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const
769
770
template <bool add>
771
350
void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect) const {
772
350
  if (segments_.empty()) return;
773
0
  const size_t y0 = opsin_rect.y0();
774
0
  const size_t x0 = opsin_rect.x0();
775
0
  const size_t x1 = opsin_rect.x1();
776
0
  for (size_t y = 0; y < opsin_rect.ysize(); y++) {
777
0
    ApplyToRow<add>(opsin->PlaneRow(0, y0 + y) + x0,
778
0
                    opsin->PlaneRow(1, y0 + y) + x0,
779
0
                    opsin->PlaneRow(2, y0 + y) + x0, y0 + y, x0, x1);
780
0
  }
781
0
}
Unexecuted instantiation: void jxl::Splines::Apply<true>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const
void jxl::Splines::Apply<false>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const
Line
Count
Source
771
350
void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect) const {
772
350
  if (segments_.empty()) return;
773
0
  const size_t y0 = opsin_rect.y0();
774
0
  const size_t x0 = opsin_rect.x0();
775
0
  const size_t x1 = opsin_rect.x1();
776
0
  for (size_t y = 0; y < opsin_rect.ysize(); y++) {
777
0
    ApplyToRow<add>(opsin->PlaneRow(0, y0 + y) + x0,
778
0
                    opsin->PlaneRow(1, y0 + y) + x0,
779
0
                    opsin->PlaneRow(2, y0 + y) + x0, y0 + y, x0, x1);
780
0
  }
781
0
}
782
783
}  // namespace jxl
784
#endif  // HWY_ONCE