/src/libjxl/lib/jxl/splines.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/splines.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | |
10 | | #include <algorithm> |
11 | | #include <cinttypes> // PRIu64 |
12 | | #include <cmath> |
13 | | #include <limits> |
14 | | |
15 | | #include "lib/jxl/base/common.h" |
16 | | #include "lib/jxl/base/printf_macros.h" |
17 | | #include "lib/jxl/base/rect.h" |
18 | | #include "lib/jxl/base/status.h" |
19 | | #include "lib/jxl/chroma_from_luma.h" |
20 | | #include "lib/jxl/common.h" // JXL_HIGH_PRECISION |
21 | | #include "lib/jxl/dct_scales.h" |
22 | | #include "lib/jxl/dec_ans.h" |
23 | | #include "lib/jxl/dec_bit_reader.h" |
24 | | #include "lib/jxl/pack_signed.h" |
25 | | |
26 | | #undef HWY_TARGET_INCLUDE |
27 | | #define HWY_TARGET_INCLUDE "lib/jxl/splines.cc" |
28 | | #include <hwy/foreach_target.h> |
29 | | #include <hwy/highway.h> |
30 | | |
31 | | #include "lib/jxl/base/fast_math-inl.h" |
32 | | HWY_BEFORE_NAMESPACE(); |
33 | | namespace jxl { |
34 | | namespace HWY_NAMESPACE { |
35 | | namespace { |
36 | | |
37 | | // These templates are not found via ADL. |
38 | | using hwy::HWY_NAMESPACE::Mul; |
39 | | using hwy::HWY_NAMESPACE::MulAdd; |
40 | | using hwy::HWY_NAMESPACE::MulSub; |
41 | | using hwy::HWY_NAMESPACE::Sqrt; |
42 | | using hwy::HWY_NAMESPACE::Sub; |
43 | | |
44 | | // Given a set of DCT coefficients, this returns the result of performing cosine |
45 | | // interpolation on the original samples. |
46 | 75.0M | float ContinuousIDCT(const Dct32& dct, const float t) { |
47 | | // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of |
48 | | // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces |
49 | | // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform |
50 | | // treatment of all the coefficients. |
51 | 75.0M | constexpr float kMultipliers[32] = { |
52 | 75.0M | kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, |
53 | 75.0M | kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, |
54 | 75.0M | kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, |
55 | 75.0M | kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, |
56 | 75.0M | kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, |
57 | 75.0M | kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, |
58 | 75.0M | kPi / 32 * 30, kPi / 32 * 31, |
59 | 75.0M | }; |
60 | 75.0M | HWY_CAPPED(float, 32) df; |
61 | 75.0M | auto result = Zero(df); |
62 | 75.0M | const auto tandhalf = Set(df, t + 0.5f); |
63 | 571M | for (int i = 0; i < 32; i += Lanes(df)) { |
64 | 496M | auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); |
65 | 496M | auto cos = FastCosf(df, cos_arg); |
66 | 496M | auto local_res = Mul(LoadU(df, dct.data() + i), cos); |
67 | 496M | result = MulAdd(Set(df, kSqrt2), local_res, result); |
68 | 496M | } |
69 | 75.0M | return GetLane(SumOfLanes(df, result)); |
70 | 75.0M | } splines.cc:jxl::N_AVX2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float) Line | Count | Source | 46 | 26.1M | float ContinuousIDCT(const Dct32& dct, const float t) { | 47 | | // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of | 48 | | // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces | 49 | | // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform | 50 | | // treatment of all the coefficients. | 51 | 26.1M | constexpr float kMultipliers[32] = { | 52 | 26.1M | kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, | 53 | 26.1M | kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, | 54 | 26.1M | kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, | 55 | 26.1M | kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, | 56 | 26.1M | kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, | 57 | 26.1M | kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, | 58 | 26.1M | kPi / 32 * 30, kPi / 32 * 31, | 59 | 26.1M | }; | 60 | 26.1M | HWY_CAPPED(float, 32) df; | 61 | 26.1M | auto result = Zero(df); | 62 | 26.1M | const auto tandhalf = Set(df, t + 0.5f); | 63 | 130M | for (int i = 0; i < 32; i += Lanes(df)) { | 64 | 104M | auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); | 65 | 104M | auto cos = FastCosf(df, cos_arg); | 66 | 104M | auto local_res = Mul(LoadU(df, dct.data() + i), cos); | 67 | 104M | result = MulAdd(Set(df, kSqrt2), local_res, result); | 68 | 104M | } | 69 | 26.1M | return GetLane(SumOfLanes(df, result)); | 70 | 26.1M | } |
splines.cc:jxl::N_SSE4::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float) Line | Count | Source | 46 | 18.4M | float ContinuousIDCT(const Dct32& dct, const float t) { | 47 | | // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of | 48 | | // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces | 49 | | // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform | 50 | | // treatment of all the coefficients. | 51 | 18.4M | constexpr float kMultipliers[32] = { | 52 | 18.4M | kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, | 53 | 18.4M | kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, | 54 | 18.4M | kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, | 55 | 18.4M | kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, | 56 | 18.4M | kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, | 57 | 18.4M | kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, | 58 | 18.4M | kPi / 32 * 30, kPi / 32 * 31, | 59 | 18.4M | }; | 60 | 18.4M | HWY_CAPPED(float, 32) df; | 61 | 18.4M | auto result = Zero(df); | 62 | 18.4M | const auto tandhalf = Set(df, t + 0.5f); | 63 | 166M | for (int i = 0; i < 32; i += Lanes(df)) { | 64 | 147M | auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); | 65 | 147M | auto cos = FastCosf(df, cos_arg); | 66 | 147M | auto local_res = Mul(LoadU(df, dct.data() + i), cos); | 67 | 147M | result = MulAdd(Set(df, kSqrt2), local_res, result); | 68 | 147M | } | 69 | 18.4M | return GetLane(SumOfLanes(df, result)); | 70 | 18.4M | } |
splines.cc:jxl::N_SSE2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float) Line | Count | Source | 46 | 30.4M | float ContinuousIDCT(const Dct32& dct, const float t) { | 47 | | // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of | 48 | | // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces | 49 | | // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform | 50 | | // treatment of all the coefficients. | 51 | 30.4M | constexpr float kMultipliers[32] = { | 52 | 30.4M | kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, | 53 | 30.4M | kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, | 54 | 30.4M | kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, | 55 | 30.4M | kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, | 56 | 30.4M | kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, | 57 | 30.4M | kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, | 58 | 30.4M | kPi / 32 * 30, kPi / 32 * 31, | 59 | 30.4M | }; | 60 | 30.4M | HWY_CAPPED(float, 32) df; | 61 | 30.4M | auto result = Zero(df); | 62 | 30.4M | const auto tandhalf = Set(df, t + 0.5f); | 63 | 274M | for (int i = 0; i < 32; i += Lanes(df)) { | 64 | 243M | auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); | 65 | 243M | auto cos = FastCosf(df, cos_arg); | 66 | 243M | auto local_res = Mul(LoadU(df, dct.data() + i), cos); | 67 | 243M | result = MulAdd(Set(df, kSqrt2), local_res, result); | 68 | 243M | } | 69 | 30.4M | return GetLane(SumOfLanes(df, result)); | 70 | 30.4M | } |
|
71 | | |
72 | | template <typename DF> |
73 | | void DrawSegment(DF df, const SplineSegment& segment, const bool add, |
74 | 1.28M | const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { |
75 | 1.28M | Rebind<int32_t, DF> di; |
76 | 1.28M | const auto inv_sigma = Set(df, segment.inv_sigma); |
77 | 1.28M | const auto half = Set(df, 0.5f); |
78 | 1.28M | const auto one_over_2s2 = Set(df, 0.353553391f); |
79 | 1.28M | const auto sigma_over_4_times_intensity = |
80 | 1.28M | Set(df, segment.sigma_over_4_times_intensity); |
81 | 1.28M | const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); |
82 | 1.28M | const auto dy = Set(df, y - segment.center_y); |
83 | 1.28M | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); |
84 | 1.28M | const auto distance = Sqrt(sqd); |
85 | 1.28M | const auto one_dimensional_factor = |
86 | 1.28M | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), |
87 | 1.28M | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); |
88 | 1.28M | auto local_intensity = |
89 | 1.28M | Mul(sigma_over_4_times_intensity, |
90 | 1.28M | Mul(one_dimensional_factor, one_dimensional_factor)); |
91 | 5.14M | for (size_t c = 0; c < 3; ++c) { |
92 | 18.4E | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); |
93 | 3.85M | const auto in = LoadU(df, rows[c] + x); |
94 | 3.85M | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); |
95 | 3.85M | } |
96 | 1.28M | } splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 74 | 354k | const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { | 75 | 354k | Rebind<int32_t, DF> di; | 76 | 354k | const auto inv_sigma = Set(df, segment.inv_sigma); | 77 | 354k | const auto half = Set(df, 0.5f); | 78 | 354k | const auto one_over_2s2 = Set(df, 0.353553391f); | 79 | 354k | const auto sigma_over_4_times_intensity = | 80 | 354k | Set(df, segment.sigma_over_4_times_intensity); | 81 | 354k | const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); | 82 | 354k | const auto dy = Set(df, y - segment.center_y); | 83 | 354k | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 84 | 354k | const auto distance = Sqrt(sqd); | 85 | 354k | const auto one_dimensional_factor = | 86 | 354k | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 87 | 354k | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 88 | 354k | auto local_intensity = | 89 | 354k | Mul(sigma_over_4_times_intensity, | 90 | 354k | Mul(one_dimensional_factor, one_dimensional_factor)); | 91 | 1.41M | for (size_t c = 0; c < 3; ++c) { | 92 | 18.4E | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 93 | 1.06M | const auto in = LoadU(df, rows[c] + x); | 94 | 1.06M | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 95 | 1.06M | } | 96 | 354k | } |
splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 74 | 314k | const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { | 75 | 314k | Rebind<int32_t, DF> di; | 76 | 314k | const auto inv_sigma = Set(df, segment.inv_sigma); | 77 | 314k | const auto half = Set(df, 0.5f); | 78 | 314k | const auto one_over_2s2 = Set(df, 0.353553391f); | 79 | 314k | const auto sigma_over_4_times_intensity = | 80 | 314k | Set(df, segment.sigma_over_4_times_intensity); | 81 | 314k | const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); | 82 | 314k | const auto dy = Set(df, y - segment.center_y); | 83 | 314k | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 84 | 314k | const auto distance = Sqrt(sqd); | 85 | 314k | const auto one_dimensional_factor = | 86 | 314k | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 87 | 314k | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 88 | 314k | auto local_intensity = | 89 | 314k | Mul(sigma_over_4_times_intensity, | 90 | 314k | Mul(one_dimensional_factor, one_dimensional_factor)); | 91 | 1.25M | for (size_t c = 0; c < 3; ++c) { | 92 | 943k | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 93 | 943k | const auto in = LoadU(df, rows[c] + x); | 94 | 943k | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 95 | 943k | } | 96 | 314k | } |
splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 74 | 154k | const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { | 75 | 154k | Rebind<int32_t, DF> di; | 76 | 154k | const auto inv_sigma = Set(df, segment.inv_sigma); | 77 | 154k | const auto half = Set(df, 0.5f); | 78 | 154k | const auto one_over_2s2 = Set(df, 0.353553391f); | 79 | 154k | const auto sigma_over_4_times_intensity = | 80 | 154k | Set(df, segment.sigma_over_4_times_intensity); | 81 | 154k | const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); | 82 | 154k | const auto dy = Set(df, y - segment.center_y); | 83 | 154k | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 84 | 154k | const auto distance = Sqrt(sqd); | 85 | 154k | const auto one_dimensional_factor = | 86 | 154k | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 87 | 154k | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 88 | 154k | auto local_intensity = | 89 | 154k | Mul(sigma_over_4_times_intensity, | 90 | 154k | Mul(one_dimensional_factor, one_dimensional_factor)); | 91 | 616k | for (size_t c = 0; c < 3; ++c) { | 92 | 462k | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 93 | 462k | const auto in = LoadU(df, rows[c] + x); | 94 | 462k | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 95 | 462k | } | 96 | 154k | } |
splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 74 | 41.4k | const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { | 75 | 41.4k | Rebind<int32_t, DF> di; | 76 | 41.4k | const auto inv_sigma = Set(df, segment.inv_sigma); | 77 | 41.4k | const auto half = Set(df, 0.5f); | 78 | 41.4k | const auto one_over_2s2 = Set(df, 0.353553391f); | 79 | 41.4k | const auto sigma_over_4_times_intensity = | 80 | 41.4k | Set(df, segment.sigma_over_4_times_intensity); | 81 | 41.4k | const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); | 82 | 41.4k | const auto dy = Set(df, y - segment.center_y); | 83 | 41.4k | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 84 | 41.4k | const auto distance = Sqrt(sqd); | 85 | 41.4k | const auto one_dimensional_factor = | 86 | 41.4k | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 87 | 41.4k | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 88 | 41.4k | auto local_intensity = | 89 | 41.4k | Mul(sigma_over_4_times_intensity, | 90 | 41.4k | Mul(one_dimensional_factor, one_dimensional_factor)); | 91 | 165k | for (size_t c = 0; c < 3; ++c) { | 92 | 124k | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 93 | 124k | const auto in = LoadU(df, rows[c] + x); | 94 | 124k | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 95 | 124k | } | 96 | 41.4k | } |
splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 74 | 341k | const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { | 75 | 341k | Rebind<int32_t, DF> di; | 76 | 341k | const auto inv_sigma = Set(df, segment.inv_sigma); | 77 | 341k | const auto half = Set(df, 0.5f); | 78 | 341k | const auto one_over_2s2 = Set(df, 0.353553391f); | 79 | 341k | const auto sigma_over_4_times_intensity = | 80 | 341k | Set(df, segment.sigma_over_4_times_intensity); | 81 | 341k | const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); | 82 | 341k | const auto dy = Set(df, y - segment.center_y); | 83 | 341k | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 84 | 341k | const auto distance = Sqrt(sqd); | 85 | 341k | const auto one_dimensional_factor = | 86 | 341k | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 87 | 341k | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 88 | 341k | auto local_intensity = | 89 | 341k | Mul(sigma_over_4_times_intensity, | 90 | 341k | Mul(one_dimensional_factor, one_dimensional_factor)); | 91 | 1.36M | for (size_t c = 0; c < 3; ++c) { | 92 | 1.02M | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 93 | 1.02M | const auto in = LoadU(df, rows[c] + x); | 94 | 1.02M | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 95 | 1.02M | } | 96 | 341k | } |
splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 74 | 79.2k | const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { | 75 | 79.2k | Rebind<int32_t, DF> di; | 76 | 79.2k | const auto inv_sigma = Set(df, segment.inv_sigma); | 77 | 79.2k | const auto half = Set(df, 0.5f); | 78 | 79.2k | const auto one_over_2s2 = Set(df, 0.353553391f); | 79 | 79.2k | const auto sigma_over_4_times_intensity = | 80 | 79.2k | Set(df, segment.sigma_over_4_times_intensity); | 81 | 79.2k | const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); | 82 | 79.2k | const auto dy = Set(df, y - segment.center_y); | 83 | 79.2k | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 84 | 79.2k | const auto distance = Sqrt(sqd); | 85 | 79.2k | const auto one_dimensional_factor = | 86 | 79.2k | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 87 | 79.2k | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 88 | 79.2k | auto local_intensity = | 89 | 79.2k | Mul(sigma_over_4_times_intensity, | 90 | 79.2k | Mul(one_dimensional_factor, one_dimensional_factor)); | 91 | 316k | for (size_t c = 0; c < 3; ++c) { | 92 | 237k | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 93 | 237k | const auto in = LoadU(df, rows[c] + x); | 94 | 237k | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 95 | 237k | } | 96 | 79.2k | } |
|
97 | | |
98 | | void DrawSegment(const SplineSegment& segment, const bool add, const size_t y, |
99 | 38.6M | const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) { |
100 | 38.6M | ssize_t x = std::max<ssize_t>( |
101 | 38.6M | x0, std::llround(segment.center_x - segment.maximum_distance)); |
102 | | // one-past-the-end |
103 | 38.6M | x1 = std::min<ssize_t>( |
104 | 38.6M | x1, std::llround(segment.center_x + segment.maximum_distance) + 1); |
105 | 38.6M | HWY_FULL(float) df; |
106 | 39.4M | for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) { |
107 | 850k | DrawSegment(df, segment, add, y, x, rows); |
108 | 850k | } |
109 | 39.0M | for (; x < x1; ++x) { |
110 | 435k | DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows); |
111 | 435k | } |
112 | 38.6M | } splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, long, long, float* restrict*) Line | Count | Source | 99 | 10.5M | const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) { | 100 | 10.5M | ssize_t x = std::max<ssize_t>( | 101 | 10.5M | x0, std::llround(segment.center_x - segment.maximum_distance)); | 102 | | // one-past-the-end | 103 | 10.5M | x1 = std::min<ssize_t>( | 104 | 10.5M | x1, std::llround(segment.center_x + segment.maximum_distance) + 1); | 105 | 10.5M | HWY_FULL(float) df; | 106 | 10.9M | for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) { | 107 | 354k | DrawSegment(df, segment, add, y, x, rows); | 108 | 354k | } | 109 | 10.8M | for (; x < x1; ++x) { | 110 | 314k | DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows); | 111 | 314k | } | 112 | 10.5M | } |
splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, long, long, float* restrict*) Line | Count | Source | 99 | 9.32M | const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) { | 100 | 9.32M | ssize_t x = std::max<ssize_t>( | 101 | 9.32M | x0, std::llround(segment.center_x - segment.maximum_distance)); | 102 | | // one-past-the-end | 103 | 9.32M | x1 = std::min<ssize_t>( | 104 | 9.32M | x1, std::llround(segment.center_x + segment.maximum_distance) + 1); | 105 | 9.32M | HWY_FULL(float) df; | 106 | 9.48M | for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) { | 107 | 154k | DrawSegment(df, segment, add, y, x, rows); | 108 | 154k | } | 109 | 9.37M | for (; x < x1; ++x) { | 110 | 41.4k | DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows); | 111 | 41.4k | } | 112 | 9.32M | } |
splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, long, long, float* restrict*) Line | Count | Source | 99 | 18.6M | const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) { | 100 | 18.6M | ssize_t x = std::max<ssize_t>( | 101 | 18.6M | x0, std::llround(segment.center_x - segment.maximum_distance)); | 102 | | // one-past-the-end | 103 | 18.6M | x1 = std::min<ssize_t>( | 104 | 18.6M | x1, std::llround(segment.center_x + segment.maximum_distance) + 1); | 105 | 18.6M | HWY_FULL(float) df; | 106 | 19.0M | for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) { | 107 | 341k | DrawSegment(df, segment, add, y, x, rows); | 108 | 341k | } | 109 | 18.7M | for (; x < x1; ++x) { | 110 | 79.2k | DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows); | 111 | 79.2k | } | 112 | 18.6M | } |
|
113 | | |
114 | | void ComputeSegments(const Spline::Point& center, const float intensity, |
115 | | const float color[3], const float sigma, |
116 | | std::vector<SplineSegment>& segments, |
117 | 18.7M | std::vector<std::pair<size_t, size_t>>& segments_by_y) { |
118 | | // Sanity check sigma, inverse sigma and intensity |
119 | 18.7M | if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && |
120 | 18.7M | std::isfinite(intensity))) { |
121 | 196k | return; |
122 | 196k | } |
123 | 18.5M | #if JXL_HIGH_PRECISION |
124 | 18.5M | constexpr float kDistanceExp = 5; |
125 | | #else |
126 | | // About 30% faster. |
127 | | constexpr float kDistanceExp = 3; |
128 | | #endif |
129 | | // We cap from below colors to at least 0.01. |
130 | 18.5M | float max_color = 0.01f; |
131 | 74.2M | for (size_t c = 0; c < 3; c++) { |
132 | 55.7M | max_color = std::max(max_color, std::abs(color[c] * intensity)); |
133 | 55.7M | } |
134 | | // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops |
135 | | // below 10^-kDistanceExp. |
136 | 18.5M | const float maximum_distance = |
137 | 18.5M | std::sqrt(-2 * sigma * sigma * |
138 | 18.5M | (std::log(0.1) * kDistanceExp - std::log(max_color))); |
139 | 18.5M | SplineSegment segment; |
140 | 18.5M | segment.center_y = center.y; |
141 | 18.5M | segment.center_x = center.x; |
142 | 18.5M | memcpy(segment.color, color, sizeof(segment.color)); |
143 | 18.5M | segment.inv_sigma = 1.0f / sigma; |
144 | 18.5M | segment.sigma_over_4_times_intensity = .25f * sigma * intensity; |
145 | 18.5M | segment.maximum_distance = maximum_distance; |
146 | 18.5M | ssize_t y0 = std::llround(center.y - maximum_distance); |
147 | 18.5M | ssize_t y1 = |
148 | 18.5M | std::llround(center.y + maximum_distance) + 1; // one-past-the-end |
149 | 102M | for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) { |
150 | 83.8M | segments_by_y.emplace_back(y, segments.size()); |
151 | 83.8M | } |
152 | 18.5M | segments.push_back(segment); |
153 | 18.5M | } splines.cc:jxl::N_AVX2::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Line | Count | Source | 117 | 6.53M | std::vector<std::pair<size_t, size_t>>& segments_by_y) { | 118 | | // Sanity check sigma, inverse sigma and intensity | 119 | 6.53M | if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && | 120 | 6.53M | std::isfinite(intensity))) { | 121 | 230 | return; | 122 | 230 | } | 123 | 6.53M | #if JXL_HIGH_PRECISION | 124 | 6.53M | constexpr float kDistanceExp = 5; | 125 | | #else | 126 | | // About 30% faster. | 127 | | constexpr float kDistanceExp = 3; | 128 | | #endif | 129 | | // We cap from below colors to at least 0.01. | 130 | 6.53M | float max_color = 0.01f; | 131 | 26.1M | for (size_t c = 0; c < 3; c++) { | 132 | 19.5M | max_color = std::max(max_color, std::abs(color[c] * intensity)); | 133 | 19.5M | } | 134 | | // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops | 135 | | // below 10^-kDistanceExp. | 136 | 6.53M | const float maximum_distance = | 137 | 6.53M | std::sqrt(-2 * sigma * sigma * | 138 | 6.53M | (std::log(0.1) * kDistanceExp - std::log(max_color))); | 139 | 6.53M | SplineSegment segment; | 140 | 6.53M | segment.center_y = center.y; | 141 | 6.53M | segment.center_x = center.x; | 142 | 6.53M | memcpy(segment.color, color, sizeof(segment.color)); | 143 | 6.53M | segment.inv_sigma = 1.0f / sigma; | 144 | 6.53M | segment.sigma_over_4_times_intensity = .25f * sigma * intensity; | 145 | 6.53M | segment.maximum_distance = maximum_distance; | 146 | 6.53M | ssize_t y0 = std::llround(center.y - maximum_distance); | 147 | 6.53M | ssize_t y1 = | 148 | 6.53M | std::llround(center.y + maximum_distance) + 1; // one-past-the-end | 149 | 29.7M | for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) { | 150 | 23.1M | segments_by_y.emplace_back(y, segments.size()); | 151 | 23.1M | } | 152 | 6.53M | segments.push_back(segment); | 153 | 6.53M | } |
splines.cc:jxl::N_SSE4::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Line | Count | Source | 117 | 4.61M | std::vector<std::pair<size_t, size_t>>& segments_by_y) { | 118 | | // Sanity check sigma, inverse sigma and intensity | 119 | 4.61M | if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && | 120 | 4.61M | std::isfinite(intensity))) { | 121 | 72.6k | return; | 122 | 72.6k | } | 123 | 4.54M | #if JXL_HIGH_PRECISION | 124 | 4.54M | constexpr float kDistanceExp = 5; | 125 | | #else | 126 | | // About 30% faster. | 127 | | constexpr float kDistanceExp = 3; | 128 | | #endif | 129 | | // We cap from below colors to at least 0.01. | 130 | 4.54M | float max_color = 0.01f; | 131 | 18.1M | for (size_t c = 0; c < 3; c++) { | 132 | 13.6M | max_color = std::max(max_color, std::abs(color[c] * intensity)); | 133 | 13.6M | } | 134 | | // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops | 135 | | // below 10^-kDistanceExp. | 136 | 4.54M | const float maximum_distance = | 137 | 4.54M | std::sqrt(-2 * sigma * sigma * | 138 | 4.54M | (std::log(0.1) * kDistanceExp - std::log(max_color))); | 139 | 4.54M | SplineSegment segment; | 140 | 4.54M | segment.center_y = center.y; | 141 | 4.54M | segment.center_x = center.x; | 142 | 4.54M | memcpy(segment.color, color, sizeof(segment.color)); | 143 | 4.54M | segment.inv_sigma = 1.0f / sigma; | 144 | 4.54M | segment.sigma_over_4_times_intensity = .25f * sigma * intensity; | 145 | 4.54M | segment.maximum_distance = maximum_distance; | 146 | 4.54M | ssize_t y0 = std::llround(center.y - maximum_distance); | 147 | 4.54M | ssize_t y1 = | 148 | 4.54M | std::llround(center.y + maximum_distance) + 1; // one-past-the-end | 149 | 23.2M | for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) { | 150 | 18.6M | segments_by_y.emplace_back(y, segments.size()); | 151 | 18.6M | } | 152 | 4.54M | segments.push_back(segment); | 153 | 4.54M | } |
splines.cc:jxl::N_SSE2::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Line | Count | Source | 117 | 7.61M | std::vector<std::pair<size_t, size_t>>& segments_by_y) { | 118 | | // Sanity check sigma, inverse sigma and intensity | 119 | 7.61M | if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && | 120 | 7.61M | std::isfinite(intensity))) { | 121 | 123k | return; | 122 | 123k | } | 123 | 7.49M | #if JXL_HIGH_PRECISION | 124 | 7.49M | constexpr float kDistanceExp = 5; | 125 | | #else | 126 | | // About 30% faster. | 127 | | constexpr float kDistanceExp = 3; | 128 | | #endif | 129 | | // We cap from below colors to at least 0.01. | 130 | 7.49M | float max_color = 0.01f; | 131 | 29.9M | for (size_t c = 0; c < 3; c++) { | 132 | 22.4M | max_color = std::max(max_color, std::abs(color[c] * intensity)); | 133 | 22.4M | } | 134 | | // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops | 135 | | // below 10^-kDistanceExp. | 136 | 7.49M | const float maximum_distance = | 137 | 7.49M | std::sqrt(-2 * sigma * sigma * | 138 | 7.49M | (std::log(0.1) * kDistanceExp - std::log(max_color))); | 139 | 7.49M | SplineSegment segment; | 140 | 7.49M | segment.center_y = center.y; | 141 | 7.49M | segment.center_x = center.x; | 142 | 7.49M | memcpy(segment.color, color, sizeof(segment.color)); | 143 | 7.49M | segment.inv_sigma = 1.0f / sigma; | 144 | 7.49M | segment.sigma_over_4_times_intensity = .25f * sigma * intensity; | 145 | 7.49M | segment.maximum_distance = maximum_distance; | 146 | 7.49M | ssize_t y0 = std::llround(center.y - maximum_distance); | 147 | 7.49M | ssize_t y1 = | 148 | 7.49M | std::llround(center.y + maximum_distance) + 1; // one-past-the-end | 149 | 49.5M | for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) { | 150 | 42.0M | segments_by_y.emplace_back(y, segments.size()); | 151 | 42.0M | } | 152 | 7.49M | segments.push_back(segment); | 153 | 7.49M | } |
|
154 | | |
155 | | void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
156 | | float* JXL_RESTRICT row_b, const Rect& image_rect, |
157 | | const bool add, const SplineSegment* segments, |
158 | | const size_t* segment_indices, |
159 | 9.31k | const size_t* segment_y_start) { |
160 | 9.31k | JXL_ASSERT(image_rect.ysize() == 1); |
161 | 9.31k | float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(), |
162 | 9.31k | row_y - image_rect.x0(), |
163 | 9.31k | row_b - image_rect.x0()}; |
164 | 9.31k | size_t y = image_rect.y0(); |
165 | 38.6M | for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { |
166 | 38.6M | DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(), |
167 | 38.6M | image_rect.x0() + image_rect.xsize(), rows); |
168 | 38.6M | } |
169 | 9.31k | } splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegments(float*, float*, float*, jxl::RectT<unsigned long> const&, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*) Line | Count | Source | 159 | 4.73k | const size_t* segment_y_start) { | 160 | 4.73k | JXL_ASSERT(image_rect.ysize() == 1); | 161 | 4.73k | float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(), | 162 | 4.73k | row_y - image_rect.x0(), | 163 | 4.73k | row_b - image_rect.x0()}; | 164 | 4.73k | size_t y = image_rect.y0(); | 165 | 10.5M | for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { | 166 | 10.5M | DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(), | 167 | 10.5M | image_rect.x0() + image_rect.xsize(), rows); | 168 | 10.5M | } | 169 | 4.73k | } |
splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegments(float*, float*, float*, jxl::RectT<unsigned long> const&, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*) Line | Count | Source | 159 | 1.88k | const size_t* segment_y_start) { | 160 | 1.88k | JXL_ASSERT(image_rect.ysize() == 1); | 161 | 1.88k | float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(), | 162 | 1.88k | row_y - image_rect.x0(), | 163 | 1.88k | row_b - image_rect.x0()}; | 164 | 1.88k | size_t y = image_rect.y0(); | 165 | 9.33M | for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { | 166 | 9.32M | DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(), | 167 | 9.32M | image_rect.x0() + image_rect.xsize(), rows); | 168 | 9.32M | } | 169 | 1.88k | } |
splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegments(float*, float*, float*, jxl::RectT<unsigned long> const&, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*) Line | Count | Source | 159 | 2.69k | const size_t* segment_y_start) { | 160 | 2.69k | JXL_ASSERT(image_rect.ysize() == 1); | 161 | 2.69k | float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(), | 162 | 2.69k | row_y - image_rect.x0(), | 163 | 2.69k | row_b - image_rect.x0()}; | 164 | 2.69k | size_t y = image_rect.y0(); | 165 | 18.6M | for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { | 166 | 18.6M | DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(), | 167 | 18.6M | image_rect.x0() + image_rect.xsize(), rows); | 168 | 18.6M | } | 169 | 2.69k | } |
|
170 | | |
171 | | void SegmentsFromPoints( |
172 | | const Spline& spline, |
173 | | const std::vector<std::pair<Spline::Point, float>>& points_to_draw, |
174 | | const float arc_length, std::vector<SplineSegment>& segments, |
175 | 3.25k | std::vector<std::pair<size_t, size_t>>& segments_by_y) { |
176 | 3.25k | const float inv_arc_length = 1.0f / arc_length; |
177 | 3.25k | int k = 0; |
178 | 18.7M | for (const auto& point_to_draw : points_to_draw) { |
179 | 18.7M | const Spline::Point& point = point_to_draw.first; |
180 | 18.7M | const float multiplier = point_to_draw.second; |
181 | 18.7M | const float progress_along_arc = |
182 | 18.7M | std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); |
183 | 18.7M | ++k; |
184 | 18.7M | float color[3]; |
185 | 75.0M | for (size_t c = 0; c < 3; ++c) { |
186 | 56.3M | color[c] = |
187 | 56.3M | ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); |
188 | 56.3M | } |
189 | 18.7M | const float sigma = |
190 | 18.7M | ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); |
191 | 18.7M | ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y); |
192 | 18.7M | } |
193 | 3.25k | } splines.cc:jxl::N_AVX2::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Line | Count | Source | 175 | 1.46k | std::vector<std::pair<size_t, size_t>>& segments_by_y) { | 176 | 1.46k | const float inv_arc_length = 1.0f / arc_length; | 177 | 1.46k | int k = 0; | 178 | 6.53M | for (const auto& point_to_draw : points_to_draw) { | 179 | 6.53M | const Spline::Point& point = point_to_draw.first; | 180 | 6.53M | const float multiplier = point_to_draw.second; | 181 | 6.53M | const float progress_along_arc = | 182 | 6.53M | std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); | 183 | 6.53M | ++k; | 184 | 6.53M | float color[3]; | 185 | 26.1M | for (size_t c = 0; c < 3; ++c) { | 186 | 19.5M | color[c] = | 187 | 19.5M | ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); | 188 | 19.5M | } | 189 | 6.53M | const float sigma = | 190 | 6.53M | ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); | 191 | 6.53M | ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y); | 192 | 6.53M | } | 193 | 1.46k | } |
splines.cc:jxl::N_SSE4::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Line | Count | Source | 175 | 889 | std::vector<std::pair<size_t, size_t>>& segments_by_y) { | 176 | 889 | const float inv_arc_length = 1.0f / arc_length; | 177 | 889 | int k = 0; | 178 | 4.61M | for (const auto& point_to_draw : points_to_draw) { | 179 | 4.61M | const Spline::Point& point = point_to_draw.first; | 180 | 4.61M | const float multiplier = point_to_draw.second; | 181 | 4.61M | const float progress_along_arc = | 182 | 4.61M | std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); | 183 | 4.61M | ++k; | 184 | 4.61M | float color[3]; | 185 | 18.4M | for (size_t c = 0; c < 3; ++c) { | 186 | 13.8M | color[c] = | 187 | 13.8M | ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); | 188 | 13.8M | } | 189 | 4.61M | const float sigma = | 190 | 4.61M | ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); | 191 | 4.61M | ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y); | 192 | 4.61M | } | 193 | 889 | } |
splines.cc:jxl::N_SSE2::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Line | Count | Source | 175 | 894 | std::vector<std::pair<size_t, size_t>>& segments_by_y) { | 176 | 894 | const float inv_arc_length = 1.0f / arc_length; | 177 | 894 | int k = 0; | 178 | 7.61M | for (const auto& point_to_draw : points_to_draw) { | 179 | 7.61M | const Spline::Point& point = point_to_draw.first; | 180 | 7.61M | const float multiplier = point_to_draw.second; | 181 | 7.61M | const float progress_along_arc = | 182 | 7.61M | std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); | 183 | 7.61M | ++k; | 184 | 7.61M | float color[3]; | 185 | 30.4M | for (size_t c = 0; c < 3; ++c) { | 186 | 22.8M | color[c] = | 187 | 22.8M | ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); | 188 | 22.8M | } | 189 | 7.61M | const float sigma = | 190 | 7.61M | ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); | 191 | 7.61M | ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y); | 192 | 7.61M | } | 193 | 894 | } |
|
194 | | } // namespace |
195 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
196 | | } // namespace HWY_NAMESPACE |
197 | | } // namespace jxl |
198 | | HWY_AFTER_NAMESPACE(); |
199 | | |
200 | | #if HWY_ONCE |
201 | | namespace jxl { |
202 | | HWY_EXPORT(SegmentsFromPoints); |
203 | | HWY_EXPORT(DrawSegments); |
204 | | |
205 | | namespace { |
206 | | |
207 | | // It is not in spec, but reasonable limit to avoid overflows. |
208 | | template <typename T> |
209 | 417k | Status ValidateSplinePointPos(const T& x, const T& y) { |
210 | 417k | constexpr T kSplinePosLimit = 1u << 23; |
211 | 417k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || |
212 | 417k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { |
213 | 123 | return JXL_FAILURE("Spline coordinates out of bounds"); |
214 | 123 | } |
215 | 417k | return true; |
216 | 417k | } splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<long>(long const&, long const&) Line | Count | Source | 209 | 304k | Status ValidateSplinePointPos(const T& x, const T& y) { | 210 | 304k | constexpr T kSplinePosLimit = 1u << 23; | 211 | 304k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 212 | 304k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 213 | 105 | return JXL_FAILURE("Spline coordinates out of bounds"); | 214 | 105 | } | 215 | 304k | return true; | 216 | 304k | } |
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<float>(float const&, float const&) Line | Count | Source | 209 | 44.3k | Status ValidateSplinePointPos(const T& x, const T& y) { | 210 | 44.3k | constexpr T kSplinePosLimit = 1u << 23; | 211 | 44.3k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 212 | 44.3k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 213 | 0 | return JXL_FAILURE("Spline coordinates out of bounds"); | 214 | 0 | } | 215 | 44.3k | return true; | 216 | 44.3k | } |
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<int>(int const&, int const&) Line | Count | Source | 209 | 68.5k | Status ValidateSplinePointPos(const T& x, const T& y) { | 210 | 68.5k | constexpr T kSplinePosLimit = 1u << 23; | 211 | 68.5k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 212 | 68.5k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 213 | 18 | return JXL_FAILURE("Spline coordinates out of bounds"); | 214 | 18 | } | 215 | 68.5k | return true; | 216 | 68.5k | } |
|
217 | | |
218 | | // Maximum number of spline control points per frame is |
219 | | // std::min(kMaxNumControlPoints, xsize * ysize / 2) |
220 | | constexpr size_t kMaxNumControlPoints = 1u << 20u; |
221 | | constexpr size_t kMaxNumControlPointsPerPixelRatio = 2; |
222 | | |
223 | 0 | float AdjustedQuant(const int32_t adjustment) { |
224 | 0 | return (adjustment >= 0) ? (1.f + .125f * adjustment) |
225 | 0 | : 1.f / (1.f - .125f * adjustment); |
226 | 0 | } |
227 | | |
228 | 44.3k | float InvAdjustedQuant(const int32_t adjustment) { |
229 | 44.3k | return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment) |
230 | 44.3k | : (1.f - .125f * adjustment); |
231 | 44.3k | } |
232 | | |
233 | | // X, Y, B, sigma. |
234 | | constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f}; |
235 | | |
236 | | Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points, |
237 | | BitReader* const br, ANSSymbolReader* reader, |
238 | | const std::vector<uint8_t>& context_map, |
239 | 1.10k | const size_t num_splines) { |
240 | 1.10k | points->clear(); |
241 | 1.10k | points->reserve(num_splines); |
242 | 1.10k | int64_t last_x = 0; |
243 | 1.10k | int64_t last_y = 0; |
244 | 305k | for (size_t i = 0; i < num_splines; i++) { |
245 | 304k | int64_t x = |
246 | 304k | reader->ReadHybridUint(kStartingPositionContext, br, context_map); |
247 | 304k | int64_t y = |
248 | 304k | reader->ReadHybridUint(kStartingPositionContext, br, context_map); |
249 | 304k | if (i != 0) { |
250 | 303k | x = UnpackSigned(x) + last_x; |
251 | 303k | y = UnpackSigned(y) + last_y; |
252 | 303k | } |
253 | 304k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y)); |
254 | 304k | points->emplace_back(static_cast<float>(x), static_cast<float>(y)); |
255 | 304k | last_x = x; |
256 | 304k | last_y = y; |
257 | 304k | } |
258 | 999 | return true; |
259 | 1.10k | } |
260 | | |
261 | | struct Vector { |
262 | | float x, y; |
263 | 0 | Vector operator-() const { return {-x, -y}; } |
264 | 0 | Vector operator+(const Vector& other) const { |
265 | 0 | return {x + other.x, y + other.y}; |
266 | 0 | } |
267 | 19.0M | float SquaredNorm() const { return x * x + y * y; } |
268 | | }; |
269 | 20.3M | Vector operator*(const float k, const Vector& vec) { |
270 | 20.3M | return {k * vec.x, k * vec.y}; |
271 | 20.3M | } |
272 | | |
273 | 20.3M | Spline::Point operator+(const Spline::Point& p, const Vector& vec) { |
274 | 20.3M | return {p.x + vec.x, p.y + vec.y}; |
275 | 20.3M | } |
276 | 39.4M | Vector operator-(const Spline::Point& a, const Spline::Point& b) { |
277 | 39.4M | return {a.x - b.x, a.y - b.y}; |
278 | 39.4M | } |
279 | | |
280 | | // TODO(eustas): avoid making a copy of "points". |
281 | | void DrawCentripetalCatmullRomSpline(std::vector<Spline::Point> points, |
282 | 44.1k | std::vector<Spline::Point>& result) { |
283 | 44.1k | if (points.empty()) return; |
284 | 44.1k | if (points.size() == 1) { |
285 | 40.9k | result.push_back(points[0]); |
286 | 40.9k | return; |
287 | 40.9k | } |
288 | | // Number of points to compute between each control point. |
289 | 3.25k | static constexpr int kNumPoints = 16; |
290 | 3.25k | result.reserve((points.size() - 1) * kNumPoints + 1); |
291 | 3.25k | points.insert(points.begin(), points[0] + (points[0] - points[1])); |
292 | 3.25k | points.push_back(points[points.size() - 1] + |
293 | 3.25k | (points[points.size() - 1] - points[points.size() - 2])); |
294 | | // points has at least 4 elements at this point. |
295 | 20.9k | for (size_t start = 0; start < points.size() - 3; ++start) { |
296 | | // 4 of them are used, and we draw from p[1] to p[2]. |
297 | 17.6k | const Spline::Point* const p = &points[start]; |
298 | 17.6k | result.push_back(p[1]); |
299 | 17.6k | float d[3]; |
300 | 17.6k | float t[4]; |
301 | 17.6k | t[0] = 0; |
302 | 70.7k | for (int k = 0; k < 3; ++k) { |
303 | | // TODO(eustas): for each segment delta is calculated 3 times... |
304 | | // TODO(eustas): restrict d[k] with reasonable limit and spec it. |
305 | 53.0k | d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y)); |
306 | 53.0k | t[k + 1] = t[k] + d[k]; |
307 | 53.0k | } |
308 | 283k | for (int i = 1; i < kNumPoints; ++i) { |
309 | 265k | const float tt = d[0] + (static_cast<float>(i) / kNumPoints) * d[1]; |
310 | 265k | Spline::Point a[3]; |
311 | 1.06M | for (int k = 0; k < 3; ++k) { |
312 | | // TODO(eustas): reciprocal multiplication would be faster. |
313 | 796k | a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]); |
314 | 796k | } |
315 | 265k | Spline::Point b[2]; |
316 | 796k | for (int k = 0; k < 2; ++k) { |
317 | 530k | b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]); |
318 | 530k | } |
319 | 265k | result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0])); |
320 | 265k | } |
321 | 17.6k | } |
322 | 3.25k | result.push_back(points[points.size() - 2]); |
323 | 3.25k | } |
324 | | |
325 | | // Move along the line segments defined by `points`, `kDesiredRenderingDistance` |
326 | | // pixels at a time, and call `functor` with each point and the actual distance |
327 | | // to the previous point (which will always be kDesiredRenderingDistance except |
328 | | // possibly for the very last point). |
329 | | // TODO(eustas): this method always adds the last point, but never the first |
330 | | // (unless those are one); I believe both ends matter. |
331 | | template <typename Points, typename Functor> |
332 | 44.1k | void ForEachEquallySpacedPoint(const Points& points, const Functor& functor) { |
333 | 44.1k | JXL_ASSERT(!points.empty()); |
334 | 44.1k | Spline::Point current = points.front(); |
335 | 44.1k | functor(current, kDesiredRenderingDistance); |
336 | 44.1k | auto next = points.begin(); |
337 | 18.8M | while (next != points.end()) { |
338 | 18.8M | const Spline::Point* previous = ¤t; |
339 | 18.8M | float arclength_from_previous = 0.f; |
340 | 19.1M | for (;;) { |
341 | 19.1M | if (next == points.end()) { |
342 | 44.1k | functor(*previous, arclength_from_previous); |
343 | 44.1k | return; |
344 | 44.1k | } |
345 | 19.0M | const float arclength_to_next = |
346 | 19.0M | std::sqrt((*next - *previous).SquaredNorm()); |
347 | 19.0M | if (arclength_from_previous + arclength_to_next >= |
348 | 19.0M | kDesiredRenderingDistance) { |
349 | 18.7M | current = |
350 | 18.7M | *previous + ((kDesiredRenderingDistance - arclength_from_previous) / |
351 | 18.7M | arclength_to_next) * |
352 | 18.7M | (*next - *previous); |
353 | 18.7M | functor(current, kDesiredRenderingDistance); |
354 | 18.7M | break; |
355 | 18.7M | } |
356 | 327k | arclength_from_previous += arclength_to_next; |
357 | 327k | previous = &*next; |
358 | 327k | ++next; |
359 | 327k | } |
360 | 18.8M | } |
361 | 44.1k | } |
362 | | |
363 | | } // namespace |
364 | | |
365 | | QuantizedSpline::QuantizedSpline(const Spline& original, |
366 | | const int32_t quantization_adjustment, |
367 | 0 | const float y_to_x, const float y_to_b) { |
368 | 0 | JXL_ASSERT(!original.control_points.empty()); |
369 | 0 | control_points_.reserve(original.control_points.size() - 1); |
370 | 0 | const Spline::Point& starting_point = original.control_points.front(); |
371 | 0 | int previous_x = static_cast<int>(std::roundf(starting_point.x)); |
372 | 0 | int previous_y = static_cast<int>(std::roundf(starting_point.y)); |
373 | 0 | int previous_delta_x = 0; |
374 | 0 | int previous_delta_y = 0; |
375 | 0 | for (auto it = original.control_points.begin() + 1; |
376 | 0 | it != original.control_points.end(); ++it) { |
377 | 0 | const int new_x = static_cast<int>(std::roundf(it->x)); |
378 | 0 | const int new_y = static_cast<int>(std::roundf(it->y)); |
379 | 0 | const int new_delta_x = new_x - previous_x; |
380 | 0 | const int new_delta_y = new_y - previous_y; |
381 | 0 | control_points_.emplace_back(new_delta_x - previous_delta_x, |
382 | 0 | new_delta_y - previous_delta_y); |
383 | 0 | previous_delta_x = new_delta_x; |
384 | 0 | previous_delta_y = new_delta_y; |
385 | 0 | previous_x = new_x; |
386 | 0 | previous_y = new_y; |
387 | 0 | } |
388 | |
|
389 | 0 | const auto to_int = [](float v) -> int { |
390 | | // Maximal int representable with float. |
391 | 0 | constexpr float kMax = std::numeric_limits<int>::max() - 127; |
392 | 0 | constexpr float kMin = -kMax; |
393 | 0 | return static_cast<int>(std::roundf(Clamp1(v, kMin, kMax))); |
394 | 0 | }; |
395 | |
|
396 | 0 | const auto quant = AdjustedQuant(quantization_adjustment); |
397 | 0 | const auto inv_quant = InvAdjustedQuant(quantization_adjustment); |
398 | 0 | for (int c : {1, 0, 2}) { |
399 | 0 | float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b; |
400 | 0 | for (int i = 0; i < 32; ++i) { |
401 | 0 | const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; |
402 | 0 | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
403 | 0 | auto restored_y = |
404 | 0 | color_dct_[1][i] * inv_dct_factor * kChannelWeight[1] * inv_quant; |
405 | 0 | auto decorrelated = original.color_dct[c][i] - factor * restored_y; |
406 | 0 | color_dct_[c][i] = |
407 | 0 | to_int(decorrelated * dct_factor * quant / kChannelWeight[c]); |
408 | 0 | } |
409 | 0 | } |
410 | 0 | for (int i = 0; i < 32; ++i) { |
411 | 0 | const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; |
412 | 0 | sigma_dct_[i] = |
413 | 0 | to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]); |
414 | 0 | } |
415 | 0 | } |
416 | | |
417 | | Status QuantizedSpline::Dequantize(const Spline::Point& starting_point, |
418 | | const int32_t quantization_adjustment, |
419 | | const float y_to_x, const float y_to_b, |
420 | | const uint64_t image_size, |
421 | | uint64_t* total_estimated_area_reached, |
422 | 44.3k | Spline& result) const { |
423 | 44.3k | constexpr uint64_t kOne = static_cast<uint64_t>(1); |
424 | 44.3k | const uint64_t area_limit = |
425 | 44.3k | std::min(1024 * image_size + (kOne << 32), kOne << 42); |
426 | | |
427 | 44.3k | result.control_points.clear(); |
428 | 44.3k | result.control_points.reserve(control_points_.size() + 1); |
429 | 44.3k | float px = std::roundf(starting_point.x); |
430 | 44.3k | float py = std::roundf(starting_point.y); |
431 | 44.3k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py)); |
432 | 44.3k | int current_x = static_cast<int>(px); |
433 | 44.3k | int current_y = static_cast<int>(py); |
434 | 44.3k | result.control_points.emplace_back(static_cast<float>(current_x), |
435 | 44.3k | static_cast<float>(current_y)); |
436 | 44.3k | int current_delta_x = 0; |
437 | 44.3k | int current_delta_y = 0; |
438 | 44.3k | uint64_t manhattan_distance = 0; |
439 | 44.3k | for (const auto& point : control_points_) { |
440 | 34.3k | current_delta_x += point.first; |
441 | 34.3k | current_delta_y += point.second; |
442 | 34.3k | manhattan_distance += std::abs(current_delta_x) + std::abs(current_delta_y); |
443 | 34.3k | if (manhattan_distance > area_limit) { |
444 | 0 | return JXL_FAILURE("Too large manhattan_distance reached: %" PRIu64, |
445 | 0 | manhattan_distance); |
446 | 0 | } |
447 | 34.3k | JXL_RETURN_IF_ERROR( |
448 | 34.3k | ValidateSplinePointPos(current_delta_x, current_delta_y)); |
449 | 34.2k | current_x += current_delta_x; |
450 | 34.2k | current_y += current_delta_y; |
451 | 34.2k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y)); |
452 | 34.2k | result.control_points.emplace_back(static_cast<float>(current_x), |
453 | 34.2k | static_cast<float>(current_y)); |
454 | 34.2k | } |
455 | | |
456 | 44.3k | const auto inv_quant = InvAdjustedQuant(quantization_adjustment); |
457 | 177k | for (int c = 0; c < 3; ++c) { |
458 | 4.38M | for (int i = 0; i < 32; ++i) { |
459 | 4.25M | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
460 | 4.25M | result.color_dct[c][i] = |
461 | 4.25M | color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant; |
462 | 4.25M | } |
463 | 132k | } |
464 | 1.46M | for (int i = 0; i < 32; ++i) { |
465 | 1.41M | result.color_dct[0][i] += y_to_x * result.color_dct[1][i]; |
466 | 1.41M | result.color_dct[2][i] += y_to_b * result.color_dct[1][i]; |
467 | 1.41M | } |
468 | 44.3k | uint64_t width_estimate = 0; |
469 | | |
470 | 44.3k | uint64_t color[3] = {}; |
471 | 177k | for (int c = 0; c < 3; ++c) { |
472 | 4.38M | for (int i = 0; i < 32; ++i) { |
473 | 4.25M | color[c] += static_cast<uint64_t>( |
474 | 4.25M | std::ceil(inv_quant * std::abs(color_dct_[c][i]))); |
475 | 4.25M | } |
476 | 132k | } |
477 | 44.3k | color[0] += static_cast<uint64_t>(std::ceil(std::abs(y_to_x))) * color[1]; |
478 | 44.3k | color[2] += static_cast<uint64_t>(std::ceil(std::abs(y_to_b))) * color[1]; |
479 | | // This is not taking kChannelWeight into account, but up to constant factors |
480 | | // it gives an indication of the influence of the color values on the area |
481 | | // that will need to be rendered. |
482 | 44.3k | const uint64_t max_color = std::max({color[1], color[0], color[2]}); |
483 | 44.3k | uint64_t logcolor = |
484 | 44.3k | std::max(kOne, static_cast<uint64_t>(CeilLog2Nonzero(kOne + max_color))); |
485 | | |
486 | 44.3k | const float weight_limit = |
487 | 44.3k | std::ceil(std::sqrt((static_cast<float>(area_limit) / logcolor) / |
488 | 44.3k | std::max<size_t>(1, manhattan_distance))); |
489 | | |
490 | 1.46M | for (int i = 0; i < 32; ++i) { |
491 | 1.41M | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
492 | 1.41M | result.sigma_dct[i] = |
493 | 1.41M | sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant; |
494 | | // If we include the factor kChannelWeight[3]=.3333f here, we get a |
495 | | // realistic area estimate. We leave it out to simplify the calculations, |
496 | | // and understand that this way we underestimate the area by a factor of |
497 | | // 1/(0.3333*0.3333). This is taken into account in the limits below. |
498 | 1.41M | float weight_f = std::ceil(inv_quant * std::abs(sigma_dct_[i])); |
499 | 1.41M | uint64_t weight = |
500 | 1.41M | static_cast<uint64_t>(std::min(weight_limit, std::max(1.0f, weight_f))); |
501 | 1.41M | width_estimate += weight * weight * logcolor; |
502 | 1.41M | } |
503 | 44.3k | *total_estimated_area_reached += (width_estimate * manhattan_distance); |
504 | 44.3k | if (*total_estimated_area_reached > area_limit) { |
505 | 34 | return JXL_FAILURE("Too large total_estimated_area eached: %" PRIu64, |
506 | 34 | *total_estimated_area_reached); |
507 | 34 | } |
508 | | |
509 | 44.2k | return true; |
510 | 44.3k | } |
511 | | |
512 | | Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map, |
513 | | ANSSymbolReader* const decoder, |
514 | | BitReader* const br, |
515 | | const size_t max_control_points, |
516 | 146k | size_t* total_num_control_points) { |
517 | 146k | const size_t num_control_points = |
518 | 146k | decoder->ReadHybridUint(kNumControlPointsContext, br, context_map); |
519 | 146k | if (num_control_points > max_control_points) { |
520 | 10 | return JXL_FAILURE("Too many control points: %" PRIuS, num_control_points); |
521 | 10 | } |
522 | 146k | *total_num_control_points += num_control_points; |
523 | 146k | if (*total_num_control_points > max_control_points) { |
524 | 21 | return JXL_FAILURE("Too many control points: %" PRIuS, |
525 | 21 | *total_num_control_points); |
526 | 21 | } |
527 | 146k | control_points_.resize(num_control_points); |
528 | | // Maximal image dimension. |
529 | 146k | constexpr int64_t kDeltaLimit = 1u << 30; |
530 | 851k | for (std::pair<int64_t, int64_t>& control_point : control_points_) { |
531 | 851k | control_point.first = UnpackSigned( |
532 | 851k | decoder->ReadHybridUint(kControlPointsContext, br, context_map)); |
533 | 851k | control_point.second = UnpackSigned( |
534 | 851k | decoder->ReadHybridUint(kControlPointsContext, br, context_map)); |
535 | | // Check delta-deltas are not outrageous; it is not in spec, but there is |
536 | | // no reason to allow larger values. |
537 | 851k | if ((control_point.first >= kDeltaLimit) || |
538 | 851k | (control_point.first <= -kDeltaLimit) || |
539 | 851k | (control_point.second >= kDeltaLimit) || |
540 | 851k | (control_point.second <= -kDeltaLimit)) { |
541 | 14 | return JXL_FAILURE("Spline delta-delta is out of bounds"); |
542 | 14 | } |
543 | 851k | } |
544 | | |
545 | 587k | const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status { |
546 | 587k | constexpr int kWeirdNumber = std::numeric_limits<int>::min(); |
547 | 19.3M | for (int i = 0; i < 32; ++i) { |
548 | 18.7M | dct[i] = |
549 | 18.7M | UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map)); |
550 | 18.7M | if (dct[i] == kWeirdNumber) { |
551 | 8 | return JXL_FAILURE("The weird number in spline DCT"); |
552 | 8 | } |
553 | 18.7M | } |
554 | 587k | return true; |
555 | 587k | }; |
556 | 440k | for (auto& dct : color_dct_) { |
557 | 440k | JXL_RETURN_IF_ERROR(decode_dct(dct)); |
558 | 440k | } |
559 | 146k | JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_)); |
560 | 146k | return true; |
561 | 146k | } |
562 | | |
563 | 37.1k | void Splines::Clear() { |
564 | 37.1k | quantization_adjustment_ = 0; |
565 | 37.1k | splines_.clear(); |
566 | 37.1k | starting_points_.clear(); |
567 | 37.1k | segments_.clear(); |
568 | 37.1k | segment_indices_.clear(); |
569 | 37.1k | segment_y_start_.clear(); |
570 | 37.1k | } |
571 | | |
572 | | Status Splines::Decode(JxlMemoryManager* memory_manager, jxl::BitReader* br, |
573 | 1.14k | const size_t num_pixels) { |
574 | 1.14k | std::vector<uint8_t> context_map; |
575 | 1.14k | ANSCode code; |
576 | 1.14k | JXL_RETURN_IF_ERROR(DecodeHistograms(memory_manager, br, kNumSplineContexts, |
577 | 1.14k | &code, &context_map)); |
578 | 2.24k | JXL_ASSIGN_OR_RETURN(ANSSymbolReader decoder, |
579 | 2.24k | ANSSymbolReader::Create(&code, br)); |
580 | 2.24k | size_t num_splines = |
581 | 2.24k | decoder.ReadHybridUint(kNumSplinesContext, br, context_map); |
582 | 2.24k | size_t max_control_points = std::min( |
583 | 2.24k | kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio); |
584 | 2.24k | if (num_splines > max_control_points || |
585 | 1.12k | num_splines + 1 > max_control_points) { |
586 | 18 | return JXL_FAILURE("Too many splines: %" PRIuS, num_splines); |
587 | 18 | } |
588 | 1.10k | num_splines++; |
589 | 1.10k | JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder, |
590 | 1.10k | context_map, num_splines)); |
591 | | |
592 | 999 | quantization_adjustment_ = UnpackSigned( |
593 | 999 | decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map)); |
594 | | |
595 | 999 | splines_.clear(); |
596 | 999 | splines_.reserve(num_splines); |
597 | 999 | size_t num_control_points = num_splines; |
598 | 147k | for (size_t i = 0; i < num_splines; ++i) { |
599 | 146k | QuantizedSpline spline; |
600 | 146k | JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br, |
601 | 146k | max_control_points, &num_control_points)); |
602 | 146k | splines_.push_back(std::move(spline)); |
603 | 146k | } |
604 | | |
605 | 946 | JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState()); |
606 | | |
607 | 946 | if (!HasAny()) { |
608 | 0 | return JXL_FAILURE("Decoded splines but got none"); |
609 | 0 | } |
610 | | |
611 | 946 | return true; |
612 | 946 | } |
613 | | |
614 | | void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect, |
615 | 0 | const Rect& image_rect) const { |
616 | 0 | Apply</*add=*/true>(opsin, opsin_rect, image_rect); |
617 | 0 | } |
618 | | void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
619 | 11.4k | float* JXL_RESTRICT row_b, const Rect& image_row) const { |
620 | 11.4k | ApplyToRow</*add=*/true>(row_x, row_y, row_b, image_row); |
621 | 11.4k | } |
622 | | |
623 | 0 | void Splines::SubtractFrom(Image3F* const opsin) const { |
624 | 0 | Apply</*add=*/false>(opsin, Rect(*opsin), Rect(*opsin)); |
625 | 0 | } |
626 | | |
627 | | Status Splines::InitializeDrawCache(const size_t image_xsize, |
628 | | const size_t image_ysize, |
629 | 845 | const ColorCorrelation& color_correlation) { |
630 | | // TODO(veluca): avoid storing segments that are entirely outside image |
631 | | // boundaries. |
632 | 845 | segments_.clear(); |
633 | 845 | segment_indices_.clear(); |
634 | 845 | segment_y_start_.clear(); |
635 | 845 | std::vector<std::pair<size_t, size_t>> segments_by_y; |
636 | 845 | std::vector<Spline::Point> intermediate_points; |
637 | 845 | uint64_t total_estimated_area_reached = 0; |
638 | 845 | std::vector<Spline> splines; |
639 | 45.1k | for (size_t i = 0; i < splines_.size(); ++i) { |
640 | 44.3k | Spline spline; |
641 | 44.3k | JXL_RETURN_IF_ERROR(splines_[i].Dequantize( |
642 | 44.3k | starting_points_[i], quantization_adjustment_, |
643 | 44.3k | color_correlation.YtoXRatio(0), color_correlation.YtoBRatio(0), |
644 | 44.3k | image_xsize * image_ysize, &total_estimated_area_reached, spline)); |
645 | 44.2k | if (std::adjacent_find(spline.control_points.begin(), |
646 | 44.2k | spline.control_points.end()) != |
647 | 44.2k | spline.control_points.end()) { |
648 | | // Otherwise division by zero might occur. Once control points coincide, |
649 | | // the direction of curve is undefined... |
650 | 4 | return JXL_FAILURE( |
651 | 4 | "identical successive control points in spline %" PRIuS, i); |
652 | 4 | } |
653 | 44.2k | splines.push_back(spline); |
654 | 44.2k | } |
655 | | // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams. |
656 | 789 | if (total_estimated_area_reached > |
657 | 789 | std::min( |
658 | 789 | (8 * image_xsize * image_ysize + (static_cast<uint64_t>(1) << 25)), |
659 | 789 | (static_cast<uint64_t>(1) << 30))) { |
660 | 13 | JXL_WARNING( |
661 | 13 | "Large total_estimated_area_reached, expect slower decoding: %" PRIu64, |
662 | 13 | total_estimated_area_reached); |
663 | 13 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
664 | 13 | return JXL_FAILURE("Total spline area is too large"); |
665 | 13 | #endif |
666 | 13 | } |
667 | | |
668 | 44.1k | for (Spline& spline : splines) { |
669 | 44.1k | std::vector<std::pair<Spline::Point, float>> points_to_draw; |
670 | 18.8M | auto add_point = [&](const Spline::Point& point, const float multiplier) { |
671 | 18.8M | points_to_draw.emplace_back(point, multiplier); |
672 | 18.8M | }; |
673 | 44.1k | intermediate_points.clear(); |
674 | 44.1k | DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points); |
675 | 44.1k | ForEachEquallySpacedPoint(intermediate_points, add_point); |
676 | 44.1k | const float arc_length = |
677 | 44.1k | (points_to_draw.size() - 2) * kDesiredRenderingDistance + |
678 | 44.1k | points_to_draw.back().second; |
679 | 44.1k | if (arc_length <= 0.f) { |
680 | | // This spline wouldn't have any effect. |
681 | 40.9k | continue; |
682 | 40.9k | } |
683 | 3.25k | HWY_DYNAMIC_DISPATCH(SegmentsFromPoints) |
684 | 3.25k | (spline, points_to_draw, arc_length, segments_, segments_by_y); |
685 | 3.25k | } |
686 | | |
687 | | // TODO(eustas): consider linear sorting here. |
688 | 776 | std::sort(segments_by_y.begin(), segments_by_y.end()); |
689 | 776 | segment_indices_.resize(segments_by_y.size()); |
690 | 776 | segment_y_start_.resize(image_ysize + 1); |
691 | 83.8M | for (size_t i = 0; i < segments_by_y.size(); i++) { |
692 | 83.8M | segment_indices_[i] = segments_by_y[i].second; |
693 | 83.8M | size_t y = segments_by_y[i].first; |
694 | 83.8M | if (y < image_ysize) { |
695 | 64.9M | segment_y_start_[y + 1]++; |
696 | 64.9M | } |
697 | 83.8M | } |
698 | 90.7k | for (size_t y = 0; y < image_ysize; y++) { |
699 | 90.0k | segment_y_start_[y + 1] += segment_y_start_[y]; |
700 | 90.0k | } |
701 | 776 | return true; |
702 | 789 | } |
703 | | |
704 | | template <bool add> |
705 | | void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
706 | | float* JXL_RESTRICT row_b, |
707 | 11.4k | const Rect& image_row) const { |
708 | 11.4k | if (segments_.empty()) return; |
709 | 9.31k | JXL_ASSERT(image_row.ysize() == 1); |
710 | 18.6k | for (size_t iy = 0; iy < image_row.ysize(); iy++) { |
711 | 9.31k | HWY_DYNAMIC_DISPATCH(DrawSegments) |
712 | 9.31k | (row_x, row_y, row_b, image_row.Line(iy), add, segments_.data(), |
713 | 9.31k | segment_indices_.data(), segment_y_start_.data()); |
714 | 9.31k | } |
715 | 9.31k | } void jxl::Splines::ApplyToRow<true>(float*, float*, float*, jxl::RectT<unsigned long> const&) const Line | Count | Source | 707 | 11.4k | const Rect& image_row) const { | 708 | 11.4k | if (segments_.empty()) return; | 709 | 9.31k | JXL_ASSERT(image_row.ysize() == 1); | 710 | 18.6k | for (size_t iy = 0; iy < image_row.ysize(); iy++) { | 711 | 9.31k | HWY_DYNAMIC_DISPATCH(DrawSegments) | 712 | 9.31k | (row_x, row_y, row_b, image_row.Line(iy), add, segments_.data(), | 713 | 9.31k | segment_indices_.data(), segment_y_start_.data()); | 714 | 9.31k | } | 715 | 9.31k | } |
Unexecuted instantiation: void jxl::Splines::ApplyToRow<false>(float*, float*, float*, jxl::RectT<unsigned long> const&) const |
716 | | |
717 | | template <bool add> |
718 | | void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect, |
719 | 0 | const Rect& image_rect) const { |
720 | 0 | if (segments_.empty()) return; |
721 | 0 | for (size_t iy = 0; iy < image_rect.ysize(); iy++) { |
722 | 0 | const size_t y0 = opsin_rect.Line(iy).y0(); |
723 | 0 | const size_t x0 = opsin_rect.x0(); |
724 | 0 | ApplyToRow<add>(opsin->PlaneRow(0, y0) + x0, opsin->PlaneRow(1, y0) + x0, |
725 | 0 | opsin->PlaneRow(2, y0) + x0, image_rect.Line(iy)); |
726 | 0 | } |
727 | 0 | } Unexecuted instantiation: void jxl::Splines::Apply<true>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&, jxl::RectT<unsigned long> const&) const Unexecuted instantiation: void jxl::Splines::Apply<false>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&, jxl::RectT<unsigned long> const&) const |
728 | | |
729 | | } // namespace jxl |
730 | | #endif // HWY_ONCE |