Coverage Report

Created: 2024-05-21 06:41

/src/libjxl/lib/jxl/splines.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/splines.h"
7
8
#include <jxl/memory_manager.h>
9
10
#include <algorithm>
11
#include <cinttypes>  // PRIu64
12
#include <cmath>
13
#include <limits>
14
15
#include "lib/jxl/base/common.h"
16
#include "lib/jxl/base/printf_macros.h"
17
#include "lib/jxl/base/rect.h"
18
#include "lib/jxl/base/status.h"
19
#include "lib/jxl/chroma_from_luma.h"
20
#include "lib/jxl/common.h"  // JXL_HIGH_PRECISION
21
#include "lib/jxl/dct_scales.h"
22
#include "lib/jxl/dec_ans.h"
23
#include "lib/jxl/dec_bit_reader.h"
24
#include "lib/jxl/pack_signed.h"
25
26
#undef HWY_TARGET_INCLUDE
27
#define HWY_TARGET_INCLUDE "lib/jxl/splines.cc"
28
#include <hwy/foreach_target.h>
29
#include <hwy/highway.h>
30
31
#include "lib/jxl/base/fast_math-inl.h"
32
HWY_BEFORE_NAMESPACE();
33
namespace jxl {
34
namespace HWY_NAMESPACE {
35
namespace {
36
37
// These templates are not found via ADL.
38
using hwy::HWY_NAMESPACE::Mul;
39
using hwy::HWY_NAMESPACE::MulAdd;
40
using hwy::HWY_NAMESPACE::MulSub;
41
using hwy::HWY_NAMESPACE::Sqrt;
42
using hwy::HWY_NAMESPACE::Sub;
43
44
// Given a set of DCT coefficients, this returns the result of performing cosine
45
// interpolation on the original samples.
46
75.0M
float ContinuousIDCT(const Dct32& dct, const float t) {
47
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
48
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
49
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
50
  // treatment of all the coefficients.
51
75.0M
  constexpr float kMultipliers[32] = {
52
75.0M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
53
75.0M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
54
75.0M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
55
75.0M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
56
75.0M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
57
75.0M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
58
75.0M
      kPi / 32 * 30, kPi / 32 * 31,
59
75.0M
  };
60
75.0M
  HWY_CAPPED(float, 32) df;
61
75.0M
  auto result = Zero(df);
62
75.0M
  const auto tandhalf = Set(df, t + 0.5f);
63
571M
  for (int i = 0; i < 32; i += Lanes(df)) {
64
496M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
65
496M
    auto cos = FastCosf(df, cos_arg);
66
496M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
67
496M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
68
496M
  }
69
75.0M
  return GetLane(SumOfLanes(df, result));
70
75.0M
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Line
Count
Source
46
26.1M
float ContinuousIDCT(const Dct32& dct, const float t) {
47
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
48
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
49
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
50
  // treatment of all the coefficients.
51
26.1M
  constexpr float kMultipliers[32] = {
52
26.1M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
53
26.1M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
54
26.1M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
55
26.1M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
56
26.1M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
57
26.1M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
58
26.1M
      kPi / 32 * 30, kPi / 32 * 31,
59
26.1M
  };
60
26.1M
  HWY_CAPPED(float, 32) df;
61
26.1M
  auto result = Zero(df);
62
26.1M
  const auto tandhalf = Set(df, t + 0.5f);
63
130M
  for (int i = 0; i < 32; i += Lanes(df)) {
64
104M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
65
104M
    auto cos = FastCosf(df, cos_arg);
66
104M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
67
104M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
68
104M
  }
69
26.1M
  return GetLane(SumOfLanes(df, result));
70
26.1M
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Line
Count
Source
46
18.4M
float ContinuousIDCT(const Dct32& dct, const float t) {
47
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
48
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
49
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
50
  // treatment of all the coefficients.
51
18.4M
  constexpr float kMultipliers[32] = {
52
18.4M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
53
18.4M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
54
18.4M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
55
18.4M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
56
18.4M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
57
18.4M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
58
18.4M
      kPi / 32 * 30, kPi / 32 * 31,
59
18.4M
  };
60
18.4M
  HWY_CAPPED(float, 32) df;
61
18.4M
  auto result = Zero(df);
62
18.4M
  const auto tandhalf = Set(df, t + 0.5f);
63
166M
  for (int i = 0; i < 32; i += Lanes(df)) {
64
147M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
65
147M
    auto cos = FastCosf(df, cos_arg);
66
147M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
67
147M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
68
147M
  }
69
18.4M
  return GetLane(SumOfLanes(df, result));
70
18.4M
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float)
Line
Count
Source
46
30.4M
float ContinuousIDCT(const Dct32& dct, const float t) {
47
  // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
48
  // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
49
  // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
50
  // treatment of all the coefficients.
51
30.4M
  constexpr float kMultipliers[32] = {
52
30.4M
      kPi / 32 * 0,  kPi / 32 * 1,  kPi / 32 * 2,  kPi / 32 * 3,  kPi / 32 * 4,
53
30.4M
      kPi / 32 * 5,  kPi / 32 * 6,  kPi / 32 * 7,  kPi / 32 * 8,  kPi / 32 * 9,
54
30.4M
      kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
55
30.4M
      kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
56
30.4M
      kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
57
30.4M
      kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
58
30.4M
      kPi / 32 * 30, kPi / 32 * 31,
59
30.4M
  };
60
30.4M
  HWY_CAPPED(float, 32) df;
61
30.4M
  auto result = Zero(df);
62
30.4M
  const auto tandhalf = Set(df, t + 0.5f);
63
274M
  for (int i = 0; i < 32; i += Lanes(df)) {
64
243M
    auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
65
243M
    auto cos = FastCosf(df, cos_arg);
66
243M
    auto local_res = Mul(LoadU(df, dct.data() + i), cos);
67
243M
    result = MulAdd(Set(df, kSqrt2), local_res, result);
68
243M
  }
69
30.4M
  return GetLane(SumOfLanes(df, result));
70
30.4M
}
71
72
template <typename DF>
73
void DrawSegment(DF df, const SplineSegment& segment, const bool add,
74
1.28M
                 const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) {
75
1.28M
  Rebind<int32_t, DF> di;
76
1.28M
  const auto inv_sigma = Set(df, segment.inv_sigma);
77
1.28M
  const auto half = Set(df, 0.5f);
78
1.28M
  const auto one_over_2s2 = Set(df, 0.353553391f);
79
1.28M
  const auto sigma_over_4_times_intensity =
80
1.28M
      Set(df, segment.sigma_over_4_times_intensity);
81
1.28M
  const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x));
82
1.28M
  const auto dy = Set(df, y - segment.center_y);
83
1.28M
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
84
1.28M
  const auto distance = Sqrt(sqd);
85
1.28M
  const auto one_dimensional_factor =
86
1.28M
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
87
1.28M
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
88
1.28M
  auto local_intensity =
89
1.28M
      Mul(sigma_over_4_times_intensity,
90
1.28M
          Mul(one_dimensional_factor, one_dimensional_factor));
91
5.14M
  for (size_t c = 0; c < 3; ++c) {
92
18.4E
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
93
3.85M
    const auto in = LoadU(df, rows[c] + x);
94
3.85M
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
95
3.85M
  }
96
1.28M
}
splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
74
354k
                 const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) {
75
354k
  Rebind<int32_t, DF> di;
76
354k
  const auto inv_sigma = Set(df, segment.inv_sigma);
77
354k
  const auto half = Set(df, 0.5f);
78
354k
  const auto one_over_2s2 = Set(df, 0.353553391f);
79
354k
  const auto sigma_over_4_times_intensity =
80
354k
      Set(df, segment.sigma_over_4_times_intensity);
81
354k
  const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x));
82
354k
  const auto dy = Set(df, y - segment.center_y);
83
354k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
84
354k
  const auto distance = Sqrt(sqd);
85
354k
  const auto one_dimensional_factor =
86
354k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
87
354k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
88
354k
  auto local_intensity =
89
354k
      Mul(sigma_over_4_times_intensity,
90
354k
          Mul(one_dimensional_factor, one_dimensional_factor));
91
1.41M
  for (size_t c = 0; c < 3; ++c) {
92
18.4E
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
93
1.06M
    const auto in = LoadU(df, rows[c] + x);
94
1.06M
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
95
1.06M
  }
96
354k
}
splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
74
314k
                 const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) {
75
314k
  Rebind<int32_t, DF> di;
76
314k
  const auto inv_sigma = Set(df, segment.inv_sigma);
77
314k
  const auto half = Set(df, 0.5f);
78
314k
  const auto one_over_2s2 = Set(df, 0.353553391f);
79
314k
  const auto sigma_over_4_times_intensity =
80
314k
      Set(df, segment.sigma_over_4_times_intensity);
81
314k
  const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x));
82
314k
  const auto dy = Set(df, y - segment.center_y);
83
314k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
84
314k
  const auto distance = Sqrt(sqd);
85
314k
  const auto one_dimensional_factor =
86
314k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
87
314k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
88
314k
  auto local_intensity =
89
314k
      Mul(sigma_over_4_times_intensity,
90
314k
          Mul(one_dimensional_factor, one_dimensional_factor));
91
1.25M
  for (size_t c = 0; c < 3; ++c) {
92
943k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
93
943k
    const auto in = LoadU(df, rows[c] + x);
94
943k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
95
943k
  }
96
314k
}
splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
74
154k
                 const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) {
75
154k
  Rebind<int32_t, DF> di;
76
154k
  const auto inv_sigma = Set(df, segment.inv_sigma);
77
154k
  const auto half = Set(df, 0.5f);
78
154k
  const auto one_over_2s2 = Set(df, 0.353553391f);
79
154k
  const auto sigma_over_4_times_intensity =
80
154k
      Set(df, segment.sigma_over_4_times_intensity);
81
154k
  const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x));
82
154k
  const auto dy = Set(df, y - segment.center_y);
83
154k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
84
154k
  const auto distance = Sqrt(sqd);
85
154k
  const auto one_dimensional_factor =
86
154k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
87
154k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
88
154k
  auto local_intensity =
89
154k
      Mul(sigma_over_4_times_intensity,
90
154k
          Mul(one_dimensional_factor, one_dimensional_factor));
91
616k
  for (size_t c = 0; c < 3; ++c) {
92
462k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
93
462k
    const auto in = LoadU(df, rows[c] + x);
94
462k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
95
462k
  }
96
154k
}
splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
74
41.4k
                 const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) {
75
41.4k
  Rebind<int32_t, DF> di;
76
41.4k
  const auto inv_sigma = Set(df, segment.inv_sigma);
77
41.4k
  const auto half = Set(df, 0.5f);
78
41.4k
  const auto one_over_2s2 = Set(df, 0.353553391f);
79
41.4k
  const auto sigma_over_4_times_intensity =
80
41.4k
      Set(df, segment.sigma_over_4_times_intensity);
81
41.4k
  const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x));
82
41.4k
  const auto dy = Set(df, y - segment.center_y);
83
41.4k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
84
41.4k
  const auto distance = Sqrt(sqd);
85
41.4k
  const auto one_dimensional_factor =
86
41.4k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
87
41.4k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
88
41.4k
  auto local_intensity =
89
41.4k
      Mul(sigma_over_4_times_intensity,
90
41.4k
          Mul(one_dimensional_factor, one_dimensional_factor));
91
165k
  for (size_t c = 0; c < 3; ++c) {
92
124k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
93
124k
    const auto in = LoadU(df, rows[c] + x);
94
124k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
95
124k
  }
96
41.4k
}
splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
74
341k
                 const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) {
75
341k
  Rebind<int32_t, DF> di;
76
341k
  const auto inv_sigma = Set(df, segment.inv_sigma);
77
341k
  const auto half = Set(df, 0.5f);
78
341k
  const auto one_over_2s2 = Set(df, 0.353553391f);
79
341k
  const auto sigma_over_4_times_intensity =
80
341k
      Set(df, segment.sigma_over_4_times_intensity);
81
341k
  const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x));
82
341k
  const auto dy = Set(df, y - segment.center_y);
83
341k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
84
341k
  const auto distance = Sqrt(sqd);
85
341k
  const auto one_dimensional_factor =
86
341k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
87
341k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
88
341k
  auto local_intensity =
89
341k
      Mul(sigma_over_4_times_intensity,
90
341k
          Mul(one_dimensional_factor, one_dimensional_factor));
91
1.36M
  for (size_t c = 0; c < 3; ++c) {
92
1.02M
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
93
1.02M
    const auto in = LoadU(df, rows[c] + x);
94
1.02M
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
95
1.02M
  }
96
341k
}
splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*)
Line
Count
Source
74
79.2k
                 const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) {
75
79.2k
  Rebind<int32_t, DF> di;
76
79.2k
  const auto inv_sigma = Set(df, segment.inv_sigma);
77
79.2k
  const auto half = Set(df, 0.5f);
78
79.2k
  const auto one_over_2s2 = Set(df, 0.353553391f);
79
79.2k
  const auto sigma_over_4_times_intensity =
80
79.2k
      Set(df, segment.sigma_over_4_times_intensity);
81
79.2k
  const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x));
82
79.2k
  const auto dy = Set(df, y - segment.center_y);
83
79.2k
  const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
84
79.2k
  const auto distance = Sqrt(sqd);
85
79.2k
  const auto one_dimensional_factor =
86
79.2k
      Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
87
79.2k
          FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
88
79.2k
  auto local_intensity =
89
79.2k
      Mul(sigma_over_4_times_intensity,
90
79.2k
          Mul(one_dimensional_factor, one_dimensional_factor));
91
316k
  for (size_t c = 0; c < 3; ++c) {
92
237k
    const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
93
237k
    const auto in = LoadU(df, rows[c] + x);
94
237k
    StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
95
237k
  }
96
79.2k
}
97
98
void DrawSegment(const SplineSegment& segment, const bool add, const size_t y,
99
38.6M
                 const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) {
100
38.6M
  ssize_t x = std::max<ssize_t>(
101
38.6M
      x0, std::llround(segment.center_x - segment.maximum_distance));
102
  // one-past-the-end
103
38.6M
  x1 = std::min<ssize_t>(
104
38.6M
      x1, std::llround(segment.center_x + segment.maximum_distance) + 1);
105
38.6M
  HWY_FULL(float) df;
106
39.4M
  for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) {
107
850k
    DrawSegment(df, segment, add, y, x, rows);
108
850k
  }
109
39.0M
  for (; x < x1; ++x) {
110
435k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows);
111
435k
  }
112
38.6M
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, long, long, float* restrict*)
Line
Count
Source
99
10.5M
                 const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) {
100
10.5M
  ssize_t x = std::max<ssize_t>(
101
10.5M
      x0, std::llround(segment.center_x - segment.maximum_distance));
102
  // one-past-the-end
103
10.5M
  x1 = std::min<ssize_t>(
104
10.5M
      x1, std::llround(segment.center_x + segment.maximum_distance) + 1);
105
10.5M
  HWY_FULL(float) df;
106
10.9M
  for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) {
107
354k
    DrawSegment(df, segment, add, y, x, rows);
108
354k
  }
109
10.8M
  for (; x < x1; ++x) {
110
314k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows);
111
314k
  }
112
10.5M
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, long, long, float* restrict*)
Line
Count
Source
99
9.32M
                 const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) {
100
9.32M
  ssize_t x = std::max<ssize_t>(
101
9.32M
      x0, std::llround(segment.center_x - segment.maximum_distance));
102
  // one-past-the-end
103
9.32M
  x1 = std::min<ssize_t>(
104
9.32M
      x1, std::llround(segment.center_x + segment.maximum_distance) + 1);
105
9.32M
  HWY_FULL(float) df;
106
9.48M
  for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) {
107
154k
    DrawSegment(df, segment, add, y, x, rows);
108
154k
  }
109
9.37M
  for (; x < x1; ++x) {
110
41.4k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows);
111
41.4k
  }
112
9.32M
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, long, long, float* restrict*)
Line
Count
Source
99
18.6M
                 const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) {
100
18.6M
  ssize_t x = std::max<ssize_t>(
101
18.6M
      x0, std::llround(segment.center_x - segment.maximum_distance));
102
  // one-past-the-end
103
18.6M
  x1 = std::min<ssize_t>(
104
18.6M
      x1, std::llround(segment.center_x + segment.maximum_distance) + 1);
105
18.6M
  HWY_FULL(float) df;
106
19.0M
  for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) {
107
341k
    DrawSegment(df, segment, add, y, x, rows);
108
341k
  }
109
18.7M
  for (; x < x1; ++x) {
110
79.2k
    DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows);
111
79.2k
  }
112
18.6M
}
113
114
void ComputeSegments(const Spline::Point& center, const float intensity,
115
                     const float color[3], const float sigma,
116
                     std::vector<SplineSegment>& segments,
117
18.7M
                     std::vector<std::pair<size_t, size_t>>& segments_by_y) {
118
  // Sanity check sigma, inverse sigma and intensity
119
18.7M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
120
18.7M
        std::isfinite(intensity))) {
121
196k
    return;
122
196k
  }
123
18.5M
#if JXL_HIGH_PRECISION
124
18.5M
  constexpr float kDistanceExp = 5;
125
#else
126
  // About 30% faster.
127
  constexpr float kDistanceExp = 3;
128
#endif
129
  // We cap from below colors to at least 0.01.
130
18.5M
  float max_color = 0.01f;
131
74.2M
  for (size_t c = 0; c < 3; c++) {
132
55.7M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
133
55.7M
  }
134
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
135
  // below 10^-kDistanceExp.
136
18.5M
  const float maximum_distance =
137
18.5M
      std::sqrt(-2 * sigma * sigma *
138
18.5M
                (std::log(0.1) * kDistanceExp - std::log(max_color)));
139
18.5M
  SplineSegment segment;
140
18.5M
  segment.center_y = center.y;
141
18.5M
  segment.center_x = center.x;
142
18.5M
  memcpy(segment.color, color, sizeof(segment.color));
143
18.5M
  segment.inv_sigma = 1.0f / sigma;
144
18.5M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
145
18.5M
  segment.maximum_distance = maximum_distance;
146
18.5M
  ssize_t y0 = std::llround(center.y - maximum_distance);
147
18.5M
  ssize_t y1 =
148
18.5M
      std::llround(center.y + maximum_distance) + 1;  // one-past-the-end
149
102M
  for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) {
150
83.8M
    segments_by_y.emplace_back(y, segments.size());
151
83.8M
  }
152
18.5M
  segments.push_back(segment);
153
18.5M
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Line
Count
Source
117
6.53M
                     std::vector<std::pair<size_t, size_t>>& segments_by_y) {
118
  // Sanity check sigma, inverse sigma and intensity
119
6.53M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
120
6.53M
        std::isfinite(intensity))) {
121
230
    return;
122
230
  }
123
6.53M
#if JXL_HIGH_PRECISION
124
6.53M
  constexpr float kDistanceExp = 5;
125
#else
126
  // About 30% faster.
127
  constexpr float kDistanceExp = 3;
128
#endif
129
  // We cap from below colors to at least 0.01.
130
6.53M
  float max_color = 0.01f;
131
26.1M
  for (size_t c = 0; c < 3; c++) {
132
19.5M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
133
19.5M
  }
134
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
135
  // below 10^-kDistanceExp.
136
6.53M
  const float maximum_distance =
137
6.53M
      std::sqrt(-2 * sigma * sigma *
138
6.53M
                (std::log(0.1) * kDistanceExp - std::log(max_color)));
139
6.53M
  SplineSegment segment;
140
6.53M
  segment.center_y = center.y;
141
6.53M
  segment.center_x = center.x;
142
6.53M
  memcpy(segment.color, color, sizeof(segment.color));
143
6.53M
  segment.inv_sigma = 1.0f / sigma;
144
6.53M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
145
6.53M
  segment.maximum_distance = maximum_distance;
146
6.53M
  ssize_t y0 = std::llround(center.y - maximum_distance);
147
6.53M
  ssize_t y1 =
148
6.53M
      std::llround(center.y + maximum_distance) + 1;  // one-past-the-end
149
29.7M
  for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) {
150
23.1M
    segments_by_y.emplace_back(y, segments.size());
151
23.1M
  }
152
6.53M
  segments.push_back(segment);
153
6.53M
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Line
Count
Source
117
4.61M
                     std::vector<std::pair<size_t, size_t>>& segments_by_y) {
118
  // Sanity check sigma, inverse sigma and intensity
119
4.61M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
120
4.61M
        std::isfinite(intensity))) {
121
72.6k
    return;
122
72.6k
  }
123
4.54M
#if JXL_HIGH_PRECISION
124
4.54M
  constexpr float kDistanceExp = 5;
125
#else
126
  // About 30% faster.
127
  constexpr float kDistanceExp = 3;
128
#endif
129
  // We cap from below colors to at least 0.01.
130
4.54M
  float max_color = 0.01f;
131
18.1M
  for (size_t c = 0; c < 3; c++) {
132
13.6M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
133
13.6M
  }
134
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
135
  // below 10^-kDistanceExp.
136
4.54M
  const float maximum_distance =
137
4.54M
      std::sqrt(-2 * sigma * sigma *
138
4.54M
                (std::log(0.1) * kDistanceExp - std::log(max_color)));
139
4.54M
  SplineSegment segment;
140
4.54M
  segment.center_y = center.y;
141
4.54M
  segment.center_x = center.x;
142
4.54M
  memcpy(segment.color, color, sizeof(segment.color));
143
4.54M
  segment.inv_sigma = 1.0f / sigma;
144
4.54M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
145
4.54M
  segment.maximum_distance = maximum_distance;
146
4.54M
  ssize_t y0 = std::llround(center.y - maximum_distance);
147
4.54M
  ssize_t y1 =
148
4.54M
      std::llround(center.y + maximum_distance) + 1;  // one-past-the-end
149
23.2M
  for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) {
150
18.6M
    segments_by_y.emplace_back(y, segments.size());
151
18.6M
  }
152
4.54M
  segments.push_back(segment);
153
4.54M
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Line
Count
Source
117
7.61M
                     std::vector<std::pair<size_t, size_t>>& segments_by_y) {
118
  // Sanity check sigma, inverse sigma and intensity
119
7.61M
  if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
120
7.61M
        std::isfinite(intensity))) {
121
123k
    return;
122
123k
  }
123
7.49M
#if JXL_HIGH_PRECISION
124
7.49M
  constexpr float kDistanceExp = 5;
125
#else
126
  // About 30% faster.
127
  constexpr float kDistanceExp = 3;
128
#endif
129
  // We cap from below colors to at least 0.01.
130
7.49M
  float max_color = 0.01f;
131
29.9M
  for (size_t c = 0; c < 3; c++) {
132
22.4M
    max_color = std::max(max_color, std::abs(color[c] * intensity));
133
22.4M
  }
134
  // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
135
  // below 10^-kDistanceExp.
136
7.49M
  const float maximum_distance =
137
7.49M
      std::sqrt(-2 * sigma * sigma *
138
7.49M
                (std::log(0.1) * kDistanceExp - std::log(max_color)));
139
7.49M
  SplineSegment segment;
140
7.49M
  segment.center_y = center.y;
141
7.49M
  segment.center_x = center.x;
142
7.49M
  memcpy(segment.color, color, sizeof(segment.color));
143
7.49M
  segment.inv_sigma = 1.0f / sigma;
144
7.49M
  segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
145
7.49M
  segment.maximum_distance = maximum_distance;
146
7.49M
  ssize_t y0 = std::llround(center.y - maximum_distance);
147
7.49M
  ssize_t y1 =
148
7.49M
      std::llround(center.y + maximum_distance) + 1;  // one-past-the-end
149
49.5M
  for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) {
150
42.0M
    segments_by_y.emplace_back(y, segments.size());
151
42.0M
  }
152
7.49M
  segments.push_back(segment);
153
7.49M
}
154
155
void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
156
                  float* JXL_RESTRICT row_b, const Rect& image_rect,
157
                  const bool add, const SplineSegment* segments,
158
                  const size_t* segment_indices,
159
9.31k
                  const size_t* segment_y_start) {
160
9.31k
  JXL_ASSERT(image_rect.ysize() == 1);
161
9.31k
  float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(),
162
9.31k
                                 row_y - image_rect.x0(),
163
9.31k
                                 row_b - image_rect.x0()};
164
9.31k
  size_t y = image_rect.y0();
165
38.6M
  for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
166
38.6M
    DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(),
167
38.6M
                image_rect.x0() + image_rect.xsize(), rows);
168
38.6M
  }
169
9.31k
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegments(float*, float*, float*, jxl::RectT<unsigned long> const&, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*)
Line
Count
Source
159
4.73k
                  const size_t* segment_y_start) {
160
4.73k
  JXL_ASSERT(image_rect.ysize() == 1);
161
4.73k
  float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(),
162
4.73k
                                 row_y - image_rect.x0(),
163
4.73k
                                 row_b - image_rect.x0()};
164
4.73k
  size_t y = image_rect.y0();
165
10.5M
  for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
166
10.5M
    DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(),
167
10.5M
                image_rect.x0() + image_rect.xsize(), rows);
168
10.5M
  }
169
4.73k
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegments(float*, float*, float*, jxl::RectT<unsigned long> const&, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*)
Line
Count
Source
159
1.88k
                  const size_t* segment_y_start) {
160
1.88k
  JXL_ASSERT(image_rect.ysize() == 1);
161
1.88k
  float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(),
162
1.88k
                                 row_y - image_rect.x0(),
163
1.88k
                                 row_b - image_rect.x0()};
164
1.88k
  size_t y = image_rect.y0();
165
9.33M
  for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
166
9.32M
    DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(),
167
9.32M
                image_rect.x0() + image_rect.xsize(), rows);
168
9.32M
  }
169
1.88k
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegments(float*, float*, float*, jxl::RectT<unsigned long> const&, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*)
Line
Count
Source
159
2.69k
                  const size_t* segment_y_start) {
160
2.69k
  JXL_ASSERT(image_rect.ysize() == 1);
161
2.69k
  float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(),
162
2.69k
                                 row_y - image_rect.x0(),
163
2.69k
                                 row_b - image_rect.x0()};
164
2.69k
  size_t y = image_rect.y0();
165
18.6M
  for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
166
18.6M
    DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(),
167
18.6M
                image_rect.x0() + image_rect.xsize(), rows);
168
18.6M
  }
169
2.69k
}
170
171
void SegmentsFromPoints(
172
    const Spline& spline,
173
    const std::vector<std::pair<Spline::Point, float>>& points_to_draw,
174
    const float arc_length, std::vector<SplineSegment>& segments,
175
3.25k
    std::vector<std::pair<size_t, size_t>>& segments_by_y) {
176
3.25k
  const float inv_arc_length = 1.0f / arc_length;
177
3.25k
  int k = 0;
178
18.7M
  for (const auto& point_to_draw : points_to_draw) {
179
18.7M
    const Spline::Point& point = point_to_draw.first;
180
18.7M
    const float multiplier = point_to_draw.second;
181
18.7M
    const float progress_along_arc =
182
18.7M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
183
18.7M
    ++k;
184
18.7M
    float color[3];
185
75.0M
    for (size_t c = 0; c < 3; ++c) {
186
56.3M
      color[c] =
187
56.3M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
188
56.3M
    }
189
18.7M
    const float sigma =
190
18.7M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
191
18.7M
    ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y);
192
18.7M
  }
193
3.25k
}
splines.cc:jxl::N_AVX2::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Line
Count
Source
175
1.46k
    std::vector<std::pair<size_t, size_t>>& segments_by_y) {
176
1.46k
  const float inv_arc_length = 1.0f / arc_length;
177
1.46k
  int k = 0;
178
6.53M
  for (const auto& point_to_draw : points_to_draw) {
179
6.53M
    const Spline::Point& point = point_to_draw.first;
180
6.53M
    const float multiplier = point_to_draw.second;
181
6.53M
    const float progress_along_arc =
182
6.53M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
183
6.53M
    ++k;
184
6.53M
    float color[3];
185
26.1M
    for (size_t c = 0; c < 3; ++c) {
186
19.5M
      color[c] =
187
19.5M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
188
19.5M
    }
189
6.53M
    const float sigma =
190
6.53M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
191
6.53M
    ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y);
192
6.53M
  }
193
1.46k
}
splines.cc:jxl::N_SSE4::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Line
Count
Source
175
889
    std::vector<std::pair<size_t, size_t>>& segments_by_y) {
176
889
  const float inv_arc_length = 1.0f / arc_length;
177
889
  int k = 0;
178
4.61M
  for (const auto& point_to_draw : points_to_draw) {
179
4.61M
    const Spline::Point& point = point_to_draw.first;
180
4.61M
    const float multiplier = point_to_draw.second;
181
4.61M
    const float progress_along_arc =
182
4.61M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
183
4.61M
    ++k;
184
4.61M
    float color[3];
185
18.4M
    for (size_t c = 0; c < 3; ++c) {
186
13.8M
      color[c] =
187
13.8M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
188
13.8M
    }
189
4.61M
    const float sigma =
190
4.61M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
191
4.61M
    ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y);
192
4.61M
  }
193
889
}
splines.cc:jxl::N_SSE2::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&)
Line
Count
Source
175
894
    std::vector<std::pair<size_t, size_t>>& segments_by_y) {
176
894
  const float inv_arc_length = 1.0f / arc_length;
177
894
  int k = 0;
178
7.61M
  for (const auto& point_to_draw : points_to_draw) {
179
7.61M
    const Spline::Point& point = point_to_draw.first;
180
7.61M
    const float multiplier = point_to_draw.second;
181
7.61M
    const float progress_along_arc =
182
7.61M
        std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
183
7.61M
    ++k;
184
7.61M
    float color[3];
185
30.4M
    for (size_t c = 0; c < 3; ++c) {
186
22.8M
      color[c] =
187
22.8M
          ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
188
22.8M
    }
189
7.61M
    const float sigma =
190
7.61M
        ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
191
7.61M
    ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y);
192
7.61M
  }
193
894
}
194
}  // namespace
195
// NOLINTNEXTLINE(google-readability-namespace-comments)
196
}  // namespace HWY_NAMESPACE
197
}  // namespace jxl
198
HWY_AFTER_NAMESPACE();
199
200
#if HWY_ONCE
201
namespace jxl {
202
HWY_EXPORT(SegmentsFromPoints);
203
HWY_EXPORT(DrawSegments);
204
205
namespace {
206
207
// It is not in spec, but reasonable limit to avoid overflows.
208
template <typename T>
209
417k
Status ValidateSplinePointPos(const T& x, const T& y) {
210
417k
  constexpr T kSplinePosLimit = 1u << 23;
211
417k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
212
417k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
213
123
    return JXL_FAILURE("Spline coordinates out of bounds");
214
123
  }
215
417k
  return true;
216
417k
}
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<long>(long const&, long const&)
Line
Count
Source
209
304k
Status ValidateSplinePointPos(const T& x, const T& y) {
210
304k
  constexpr T kSplinePosLimit = 1u << 23;
211
304k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
212
304k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
213
105
    return JXL_FAILURE("Spline coordinates out of bounds");
214
105
  }
215
304k
  return true;
216
304k
}
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<float>(float const&, float const&)
Line
Count
Source
209
44.3k
Status ValidateSplinePointPos(const T& x, const T& y) {
210
44.3k
  constexpr T kSplinePosLimit = 1u << 23;
211
44.3k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
212
44.3k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
213
0
    return JXL_FAILURE("Spline coordinates out of bounds");
214
0
  }
215
44.3k
  return true;
216
44.3k
}
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<int>(int const&, int const&)
Line
Count
Source
209
68.5k
Status ValidateSplinePointPos(const T& x, const T& y) {
210
68.5k
  constexpr T kSplinePosLimit = 1u << 23;
211
68.5k
  if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
212
68.5k
      (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
213
18
    return JXL_FAILURE("Spline coordinates out of bounds");
214
18
  }
215
68.5k
  return true;
216
68.5k
}
217
218
// Maximum number of spline control points per frame is
219
//   std::min(kMaxNumControlPoints, xsize * ysize / 2)
220
constexpr size_t kMaxNumControlPoints = 1u << 20u;
221
constexpr size_t kMaxNumControlPointsPerPixelRatio = 2;
222
223
0
float AdjustedQuant(const int32_t adjustment) {
224
0
  return (adjustment >= 0) ? (1.f + .125f * adjustment)
225
0
                           : 1.f / (1.f - .125f * adjustment);
226
0
}
227
228
44.3k
float InvAdjustedQuant(const int32_t adjustment) {
229
44.3k
  return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment)
230
44.3k
                           : (1.f - .125f * adjustment);
231
44.3k
}
232
233
// X, Y, B, sigma.
234
constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f};
235
236
Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points,
237
                               BitReader* const br, ANSSymbolReader* reader,
238
                               const std::vector<uint8_t>& context_map,
239
1.10k
                               const size_t num_splines) {
240
1.10k
  points->clear();
241
1.10k
  points->reserve(num_splines);
242
1.10k
  int64_t last_x = 0;
243
1.10k
  int64_t last_y = 0;
244
305k
  for (size_t i = 0; i < num_splines; i++) {
245
304k
    int64_t x =
246
304k
        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
247
304k
    int64_t y =
248
304k
        reader->ReadHybridUint(kStartingPositionContext, br, context_map);
249
304k
    if (i != 0) {
250
303k
      x = UnpackSigned(x) + last_x;
251
303k
      y = UnpackSigned(y) + last_y;
252
303k
    }
253
304k
    JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y));
254
304k
    points->emplace_back(static_cast<float>(x), static_cast<float>(y));
255
304k
    last_x = x;
256
304k
    last_y = y;
257
304k
  }
258
999
  return true;
259
1.10k
}
260
261
struct Vector {
262
  float x, y;
263
0
  Vector operator-() const { return {-x, -y}; }
264
0
  Vector operator+(const Vector& other) const {
265
0
    return {x + other.x, y + other.y};
266
0
  }
267
19.0M
  float SquaredNorm() const { return x * x + y * y; }
268
};
269
20.3M
Vector operator*(const float k, const Vector& vec) {
270
20.3M
  return {k * vec.x, k * vec.y};
271
20.3M
}
272
273
20.3M
Spline::Point operator+(const Spline::Point& p, const Vector& vec) {
274
20.3M
  return {p.x + vec.x, p.y + vec.y};
275
20.3M
}
276
39.4M
Vector operator-(const Spline::Point& a, const Spline::Point& b) {
277
39.4M
  return {a.x - b.x, a.y - b.y};
278
39.4M
}
279
280
// TODO(eustas): avoid making a copy of "points".
281
void DrawCentripetalCatmullRomSpline(std::vector<Spline::Point> points,
282
44.1k
                                     std::vector<Spline::Point>& result) {
283
44.1k
  if (points.empty()) return;
284
44.1k
  if (points.size() == 1) {
285
40.9k
    result.push_back(points[0]);
286
40.9k
    return;
287
40.9k
  }
288
  // Number of points to compute between each control point.
289
3.25k
  static constexpr int kNumPoints = 16;
290
3.25k
  result.reserve((points.size() - 1) * kNumPoints + 1);
291
3.25k
  points.insert(points.begin(), points[0] + (points[0] - points[1]));
292
3.25k
  points.push_back(points[points.size() - 1] +
293
3.25k
                   (points[points.size() - 1] - points[points.size() - 2]));
294
  // points has at least 4 elements at this point.
295
20.9k
  for (size_t start = 0; start < points.size() - 3; ++start) {
296
    // 4 of them are used, and we draw from p[1] to p[2].
297
17.6k
    const Spline::Point* const p = &points[start];
298
17.6k
    result.push_back(p[1]);
299
17.6k
    float d[3];
300
17.6k
    float t[4];
301
17.6k
    t[0] = 0;
302
70.7k
    for (int k = 0; k < 3; ++k) {
303
      // TODO(eustas): for each segment delta is calculated 3 times...
304
      // TODO(eustas): restrict d[k] with reasonable limit and spec it.
305
53.0k
      d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y));
306
53.0k
      t[k + 1] = t[k] + d[k];
307
53.0k
    }
308
283k
    for (int i = 1; i < kNumPoints; ++i) {
309
265k
      const float tt = d[0] + (static_cast<float>(i) / kNumPoints) * d[1];
310
265k
      Spline::Point a[3];
311
1.06M
      for (int k = 0; k < 3; ++k) {
312
        // TODO(eustas): reciprocal multiplication would be faster.
313
796k
        a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]);
314
796k
      }
315
265k
      Spline::Point b[2];
316
796k
      for (int k = 0; k < 2; ++k) {
317
530k
        b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]);
318
530k
      }
319
265k
      result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0]));
320
265k
    }
321
17.6k
  }
322
3.25k
  result.push_back(points[points.size() - 2]);
323
3.25k
}
324
325
// Move along the line segments defined by `points`, `kDesiredRenderingDistance`
326
// pixels at a time, and call `functor` with each point and the actual distance
327
// to the previous point (which will always be kDesiredRenderingDistance except
328
// possibly for the very last point).
329
// TODO(eustas): this method always adds the last point, but never the first
330
//               (unless those are one); I believe both ends matter.
331
template <typename Points, typename Functor>
332
44.1k
void ForEachEquallySpacedPoint(const Points& points, const Functor& functor) {
333
44.1k
  JXL_ASSERT(!points.empty());
334
44.1k
  Spline::Point current = points.front();
335
44.1k
  functor(current, kDesiredRenderingDistance);
336
44.1k
  auto next = points.begin();
337
18.8M
  while (next != points.end()) {
338
18.8M
    const Spline::Point* previous = &current;
339
18.8M
    float arclength_from_previous = 0.f;
340
19.1M
    for (;;) {
341
19.1M
      if (next == points.end()) {
342
44.1k
        functor(*previous, arclength_from_previous);
343
44.1k
        return;
344
44.1k
      }
345
19.0M
      const float arclength_to_next =
346
19.0M
          std::sqrt((*next - *previous).SquaredNorm());
347
19.0M
      if (arclength_from_previous + arclength_to_next >=
348
19.0M
          kDesiredRenderingDistance) {
349
18.7M
        current =
350
18.7M
            *previous + ((kDesiredRenderingDistance - arclength_from_previous) /
351
18.7M
                         arclength_to_next) *
352
18.7M
                            (*next - *previous);
353
18.7M
        functor(current, kDesiredRenderingDistance);
354
18.7M
        break;
355
18.7M
      }
356
327k
      arclength_from_previous += arclength_to_next;
357
327k
      previous = &*next;
358
327k
      ++next;
359
327k
    }
360
18.8M
  }
361
44.1k
}
362
363
}  // namespace
364
365
QuantizedSpline::QuantizedSpline(const Spline& original,
366
                                 const int32_t quantization_adjustment,
367
0
                                 const float y_to_x, const float y_to_b) {
368
0
  JXL_ASSERT(!original.control_points.empty());
369
0
  control_points_.reserve(original.control_points.size() - 1);
370
0
  const Spline::Point& starting_point = original.control_points.front();
371
0
  int previous_x = static_cast<int>(std::roundf(starting_point.x));
372
0
  int previous_y = static_cast<int>(std::roundf(starting_point.y));
373
0
  int previous_delta_x = 0;
374
0
  int previous_delta_y = 0;
375
0
  for (auto it = original.control_points.begin() + 1;
376
0
       it != original.control_points.end(); ++it) {
377
0
    const int new_x = static_cast<int>(std::roundf(it->x));
378
0
    const int new_y = static_cast<int>(std::roundf(it->y));
379
0
    const int new_delta_x = new_x - previous_x;
380
0
    const int new_delta_y = new_y - previous_y;
381
0
    control_points_.emplace_back(new_delta_x - previous_delta_x,
382
0
                                 new_delta_y - previous_delta_y);
383
0
    previous_delta_x = new_delta_x;
384
0
    previous_delta_y = new_delta_y;
385
0
    previous_x = new_x;
386
0
    previous_y = new_y;
387
0
  }
388
389
0
  const auto to_int = [](float v) -> int {
390
    // Maximal int representable with float.
391
0
    constexpr float kMax = std::numeric_limits<int>::max() - 127;
392
0
    constexpr float kMin = -kMax;
393
0
    return static_cast<int>(std::roundf(Clamp1(v, kMin, kMax)));
394
0
  };
395
396
0
  const auto quant = AdjustedQuant(quantization_adjustment);
397
0
  const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
398
0
  for (int c : {1, 0, 2}) {
399
0
    float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b;
400
0
    for (int i = 0; i < 32; ++i) {
401
0
      const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
402
0
      const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
403
0
      auto restored_y =
404
0
          color_dct_[1][i] * inv_dct_factor * kChannelWeight[1] * inv_quant;
405
0
      auto decorrelated = original.color_dct[c][i] - factor * restored_y;
406
0
      color_dct_[c][i] =
407
0
          to_int(decorrelated * dct_factor * quant / kChannelWeight[c]);
408
0
    }
409
0
  }
410
0
  for (int i = 0; i < 32; ++i) {
411
0
    const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
412
0
    sigma_dct_[i] =
413
0
        to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]);
414
0
  }
415
0
}
416
417
Status QuantizedSpline::Dequantize(const Spline::Point& starting_point,
418
                                   const int32_t quantization_adjustment,
419
                                   const float y_to_x, const float y_to_b,
420
                                   const uint64_t image_size,
421
                                   uint64_t* total_estimated_area_reached,
422
44.3k
                                   Spline& result) const {
423
44.3k
  constexpr uint64_t kOne = static_cast<uint64_t>(1);
424
44.3k
  const uint64_t area_limit =
425
44.3k
      std::min(1024 * image_size + (kOne << 32), kOne << 42);
426
427
44.3k
  result.control_points.clear();
428
44.3k
  result.control_points.reserve(control_points_.size() + 1);
429
44.3k
  float px = std::roundf(starting_point.x);
430
44.3k
  float py = std::roundf(starting_point.y);
431
44.3k
  JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py));
432
44.3k
  int current_x = static_cast<int>(px);
433
44.3k
  int current_y = static_cast<int>(py);
434
44.3k
  result.control_points.emplace_back(static_cast<float>(current_x),
435
44.3k
                                     static_cast<float>(current_y));
436
44.3k
  int current_delta_x = 0;
437
44.3k
  int current_delta_y = 0;
438
44.3k
  uint64_t manhattan_distance = 0;
439
44.3k
  for (const auto& point : control_points_) {
440
34.3k
    current_delta_x += point.first;
441
34.3k
    current_delta_y += point.second;
442
34.3k
    manhattan_distance += std::abs(current_delta_x) + std::abs(current_delta_y);
443
34.3k
    if (manhattan_distance > area_limit) {
444
0
      return JXL_FAILURE("Too large manhattan_distance reached: %" PRIu64,
445
0
                         manhattan_distance);
446
0
    }
447
34.3k
    JXL_RETURN_IF_ERROR(
448
34.3k
        ValidateSplinePointPos(current_delta_x, current_delta_y));
449
34.2k
    current_x += current_delta_x;
450
34.2k
    current_y += current_delta_y;
451
34.2k
    JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y));
452
34.2k
    result.control_points.emplace_back(static_cast<float>(current_x),
453
34.2k
                                       static_cast<float>(current_y));
454
34.2k
  }
455
456
44.3k
  const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
457
177k
  for (int c = 0; c < 3; ++c) {
458
4.38M
    for (int i = 0; i < 32; ++i) {
459
4.25M
      const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
460
4.25M
      result.color_dct[c][i] =
461
4.25M
          color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant;
462
4.25M
    }
463
132k
  }
464
1.46M
  for (int i = 0; i < 32; ++i) {
465
1.41M
    result.color_dct[0][i] += y_to_x * result.color_dct[1][i];
466
1.41M
    result.color_dct[2][i] += y_to_b * result.color_dct[1][i];
467
1.41M
  }
468
44.3k
  uint64_t width_estimate = 0;
469
470
44.3k
  uint64_t color[3] = {};
471
177k
  for (int c = 0; c < 3; ++c) {
472
4.38M
    for (int i = 0; i < 32; ++i) {
473
4.25M
      color[c] += static_cast<uint64_t>(
474
4.25M
          std::ceil(inv_quant * std::abs(color_dct_[c][i])));
475
4.25M
    }
476
132k
  }
477
44.3k
  color[0] += static_cast<uint64_t>(std::ceil(std::abs(y_to_x))) * color[1];
478
44.3k
  color[2] += static_cast<uint64_t>(std::ceil(std::abs(y_to_b))) * color[1];
479
  // This is not taking kChannelWeight into account, but up to constant factors
480
  // it gives an indication of the influence of the color values on the area
481
  // that will need to be rendered.
482
44.3k
  const uint64_t max_color = std::max({color[1], color[0], color[2]});
483
44.3k
  uint64_t logcolor =
484
44.3k
      std::max(kOne, static_cast<uint64_t>(CeilLog2Nonzero(kOne + max_color)));
485
486
44.3k
  const float weight_limit =
487
44.3k
      std::ceil(std::sqrt((static_cast<float>(area_limit) / logcolor) /
488
44.3k
                          std::max<size_t>(1, manhattan_distance)));
489
490
1.46M
  for (int i = 0; i < 32; ++i) {
491
1.41M
    const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
492
1.41M
    result.sigma_dct[i] =
493
1.41M
        sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant;
494
    // If we include the factor kChannelWeight[3]=.3333f here, we get a
495
    // realistic area estimate. We leave it out to simplify the calculations,
496
    // and understand that this way we underestimate the area by a factor of
497
    // 1/(0.3333*0.3333). This is taken into account in the limits below.
498
1.41M
    float weight_f = std::ceil(inv_quant * std::abs(sigma_dct_[i]));
499
1.41M
    uint64_t weight =
500
1.41M
        static_cast<uint64_t>(std::min(weight_limit, std::max(1.0f, weight_f)));
501
1.41M
    width_estimate += weight * weight * logcolor;
502
1.41M
  }
503
44.3k
  *total_estimated_area_reached += (width_estimate * manhattan_distance);
504
44.3k
  if (*total_estimated_area_reached > area_limit) {
505
34
    return JXL_FAILURE("Too large total_estimated_area eached: %" PRIu64,
506
34
                       *total_estimated_area_reached);
507
34
  }
508
509
44.2k
  return true;
510
44.3k
}
511
512
Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map,
513
                               ANSSymbolReader* const decoder,
514
                               BitReader* const br,
515
                               const size_t max_control_points,
516
146k
                               size_t* total_num_control_points) {
517
146k
  const size_t num_control_points =
518
146k
      decoder->ReadHybridUint(kNumControlPointsContext, br, context_map);
519
146k
  if (num_control_points > max_control_points) {
520
10
    return JXL_FAILURE("Too many control points: %" PRIuS, num_control_points);
521
10
  }
522
146k
  *total_num_control_points += num_control_points;
523
146k
  if (*total_num_control_points > max_control_points) {
524
21
    return JXL_FAILURE("Too many control points: %" PRIuS,
525
21
                       *total_num_control_points);
526
21
  }
527
146k
  control_points_.resize(num_control_points);
528
  // Maximal image dimension.
529
146k
  constexpr int64_t kDeltaLimit = 1u << 30;
530
851k
  for (std::pair<int64_t, int64_t>& control_point : control_points_) {
531
851k
    control_point.first = UnpackSigned(
532
851k
        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
533
851k
    control_point.second = UnpackSigned(
534
851k
        decoder->ReadHybridUint(kControlPointsContext, br, context_map));
535
    // Check delta-deltas are not outrageous; it is not in spec, but there is
536
    // no reason to allow larger values.
537
851k
    if ((control_point.first >= kDeltaLimit) ||
538
851k
        (control_point.first <= -kDeltaLimit) ||
539
851k
        (control_point.second >= kDeltaLimit) ||
540
851k
        (control_point.second <= -kDeltaLimit)) {
541
14
      return JXL_FAILURE("Spline delta-delta is out of bounds");
542
14
    }
543
851k
  }
544
545
587k
  const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status {
546
587k
    constexpr int kWeirdNumber = std::numeric_limits<int>::min();
547
19.3M
    for (int i = 0; i < 32; ++i) {
548
18.7M
      dct[i] =
549
18.7M
          UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map));
550
18.7M
      if (dct[i] == kWeirdNumber) {
551
8
        return JXL_FAILURE("The weird number in spline DCT");
552
8
      }
553
18.7M
    }
554
587k
    return true;
555
587k
  };
556
440k
  for (auto& dct : color_dct_) {
557
440k
    JXL_RETURN_IF_ERROR(decode_dct(dct));
558
440k
  }
559
146k
  JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_));
560
146k
  return true;
561
146k
}
562
563
37.1k
void Splines::Clear() {
564
37.1k
  quantization_adjustment_ = 0;
565
37.1k
  splines_.clear();
566
37.1k
  starting_points_.clear();
567
37.1k
  segments_.clear();
568
37.1k
  segment_indices_.clear();
569
37.1k
  segment_y_start_.clear();
570
37.1k
}
571
572
Status Splines::Decode(JxlMemoryManager* memory_manager, jxl::BitReader* br,
573
1.14k
                       const size_t num_pixels) {
574
1.14k
  std::vector<uint8_t> context_map;
575
1.14k
  ANSCode code;
576
1.14k
  JXL_RETURN_IF_ERROR(DecodeHistograms(memory_manager, br, kNumSplineContexts,
577
1.14k
                                       &code, &context_map));
578
2.24k
  JXL_ASSIGN_OR_RETURN(ANSSymbolReader decoder,
579
2.24k
                       ANSSymbolReader::Create(&code, br));
580
2.24k
  size_t num_splines =
581
2.24k
      decoder.ReadHybridUint(kNumSplinesContext, br, context_map);
582
2.24k
  size_t max_control_points = std::min(
583
2.24k
      kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio);
584
2.24k
  if (num_splines > max_control_points ||
585
1.12k
      num_splines + 1 > max_control_points) {
586
18
    return JXL_FAILURE("Too many splines: %" PRIuS, num_splines);
587
18
  }
588
1.10k
  num_splines++;
589
1.10k
  JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder,
590
1.10k
                                              context_map, num_splines));
591
592
999
  quantization_adjustment_ = UnpackSigned(
593
999
      decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map));
594
595
999
  splines_.clear();
596
999
  splines_.reserve(num_splines);
597
999
  size_t num_control_points = num_splines;
598
147k
  for (size_t i = 0; i < num_splines; ++i) {
599
146k
    QuantizedSpline spline;
600
146k
    JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br,
601
146k
                                      max_control_points, &num_control_points));
602
146k
    splines_.push_back(std::move(spline));
603
146k
  }
604
605
946
  JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState());
606
607
946
  if (!HasAny()) {
608
0
    return JXL_FAILURE("Decoded splines but got none");
609
0
  }
610
611
946
  return true;
612
946
}
613
614
void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect,
615
0
                    const Rect& image_rect) const {
616
0
  Apply</*add=*/true>(opsin, opsin_rect, image_rect);
617
0
}
618
void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
619
11.4k
                       float* JXL_RESTRICT row_b, const Rect& image_row) const {
620
11.4k
  ApplyToRow</*add=*/true>(row_x, row_y, row_b, image_row);
621
11.4k
}
622
623
0
void Splines::SubtractFrom(Image3F* const opsin) const {
624
0
  Apply</*add=*/false>(opsin, Rect(*opsin), Rect(*opsin));
625
0
}
626
627
Status Splines::InitializeDrawCache(const size_t image_xsize,
628
                                    const size_t image_ysize,
629
845
                                    const ColorCorrelation& color_correlation) {
630
  // TODO(veluca): avoid storing segments that are entirely outside image
631
  // boundaries.
632
845
  segments_.clear();
633
845
  segment_indices_.clear();
634
845
  segment_y_start_.clear();
635
845
  std::vector<std::pair<size_t, size_t>> segments_by_y;
636
845
  std::vector<Spline::Point> intermediate_points;
637
845
  uint64_t total_estimated_area_reached = 0;
638
845
  std::vector<Spline> splines;
639
45.1k
  for (size_t i = 0; i < splines_.size(); ++i) {
640
44.3k
    Spline spline;
641
44.3k
    JXL_RETURN_IF_ERROR(splines_[i].Dequantize(
642
44.3k
        starting_points_[i], quantization_adjustment_,
643
44.3k
        color_correlation.YtoXRatio(0), color_correlation.YtoBRatio(0),
644
44.3k
        image_xsize * image_ysize, &total_estimated_area_reached, spline));
645
44.2k
    if (std::adjacent_find(spline.control_points.begin(),
646
44.2k
                           spline.control_points.end()) !=
647
44.2k
        spline.control_points.end()) {
648
      // Otherwise division by zero might occur. Once control points coincide,
649
      // the direction of curve is undefined...
650
4
      return JXL_FAILURE(
651
4
          "identical successive control points in spline %" PRIuS, i);
652
4
    }
653
44.2k
    splines.push_back(spline);
654
44.2k
  }
655
  // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams.
656
789
  if (total_estimated_area_reached >
657
789
      std::min(
658
789
          (8 * image_xsize * image_ysize + (static_cast<uint64_t>(1) << 25)),
659
789
          (static_cast<uint64_t>(1) << 30))) {
660
13
    JXL_WARNING(
661
13
        "Large total_estimated_area_reached, expect slower decoding: %" PRIu64,
662
13
        total_estimated_area_reached);
663
13
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
664
13
    return JXL_FAILURE("Total spline area is too large");
665
13
#endif
666
13
  }
667
668
44.1k
  for (Spline& spline : splines) {
669
44.1k
    std::vector<std::pair<Spline::Point, float>> points_to_draw;
670
18.8M
    auto add_point = [&](const Spline::Point& point, const float multiplier) {
671
18.8M
      points_to_draw.emplace_back(point, multiplier);
672
18.8M
    };
673
44.1k
    intermediate_points.clear();
674
44.1k
    DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points);
675
44.1k
    ForEachEquallySpacedPoint(intermediate_points, add_point);
676
44.1k
    const float arc_length =
677
44.1k
        (points_to_draw.size() - 2) * kDesiredRenderingDistance +
678
44.1k
        points_to_draw.back().second;
679
44.1k
    if (arc_length <= 0.f) {
680
      // This spline wouldn't have any effect.
681
40.9k
      continue;
682
40.9k
    }
683
3.25k
    HWY_DYNAMIC_DISPATCH(SegmentsFromPoints)
684
3.25k
    (spline, points_to_draw, arc_length, segments_, segments_by_y);
685
3.25k
  }
686
687
  // TODO(eustas): consider linear sorting here.
688
776
  std::sort(segments_by_y.begin(), segments_by_y.end());
689
776
  segment_indices_.resize(segments_by_y.size());
690
776
  segment_y_start_.resize(image_ysize + 1);
691
83.8M
  for (size_t i = 0; i < segments_by_y.size(); i++) {
692
83.8M
    segment_indices_[i] = segments_by_y[i].second;
693
83.8M
    size_t y = segments_by_y[i].first;
694
83.8M
    if (y < image_ysize) {
695
64.9M
      segment_y_start_[y + 1]++;
696
64.9M
    }
697
83.8M
  }
698
90.7k
  for (size_t y = 0; y < image_ysize; y++) {
699
90.0k
    segment_y_start_[y + 1] += segment_y_start_[y];
700
90.0k
  }
701
776
  return true;
702
789
}
703
704
template <bool add>
705
void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
706
                         float* JXL_RESTRICT row_b,
707
11.4k
                         const Rect& image_row) const {
708
11.4k
  if (segments_.empty()) return;
709
9.31k
  JXL_ASSERT(image_row.ysize() == 1);
710
18.6k
  for (size_t iy = 0; iy < image_row.ysize(); iy++) {
711
9.31k
    HWY_DYNAMIC_DISPATCH(DrawSegments)
712
9.31k
    (row_x, row_y, row_b, image_row.Line(iy), add, segments_.data(),
713
9.31k
     segment_indices_.data(), segment_y_start_.data());
714
9.31k
  }
715
9.31k
}
void jxl::Splines::ApplyToRow<true>(float*, float*, float*, jxl::RectT<unsigned long> const&) const
Line
Count
Source
707
11.4k
                         const Rect& image_row) const {
708
11.4k
  if (segments_.empty()) return;
709
9.31k
  JXL_ASSERT(image_row.ysize() == 1);
710
18.6k
  for (size_t iy = 0; iy < image_row.ysize(); iy++) {
711
9.31k
    HWY_DYNAMIC_DISPATCH(DrawSegments)
712
9.31k
    (row_x, row_y, row_b, image_row.Line(iy), add, segments_.data(),
713
9.31k
     segment_indices_.data(), segment_y_start_.data());
714
9.31k
  }
715
9.31k
}
Unexecuted instantiation: void jxl::Splines::ApplyToRow<false>(float*, float*, float*, jxl::RectT<unsigned long> const&) const
716
717
template <bool add>
718
void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect,
719
0
                    const Rect& image_rect) const {
720
0
  if (segments_.empty()) return;
721
0
  for (size_t iy = 0; iy < image_rect.ysize(); iy++) {
722
0
    const size_t y0 = opsin_rect.Line(iy).y0();
723
0
    const size_t x0 = opsin_rect.x0();
724
0
    ApplyToRow<add>(opsin->PlaneRow(0, y0) + x0, opsin->PlaneRow(1, y0) + x0,
725
0
                    opsin->PlaneRow(2, y0) + x0, image_rect.Line(iy));
726
0
  }
727
0
}
Unexecuted instantiation: void jxl::Splines::Apply<true>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&, jxl::RectT<unsigned long> const&) const
Unexecuted instantiation: void jxl::Splines::Apply<false>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&, jxl::RectT<unsigned long> const&) const
728
729
}  // namespace jxl
730
#endif  // HWY_ONCE