/src/libjxl/lib/jxl/splines.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/splines.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | |
10 | | #include <algorithm> |
11 | | #include <cinttypes> // PRIu64 |
12 | | #include <cmath> |
13 | | #include <cstddef> |
14 | | #include <cstdint> |
15 | | #include <cstring> |
16 | | #include <limits> |
17 | | #include <utility> |
18 | | #include <vector> |
19 | | |
20 | | #include "lib/jxl/base/bits.h" |
21 | | #include "lib/jxl/base/common.h" |
22 | | #include "lib/jxl/base/compiler_specific.h" // ssize_t |
23 | | #include "lib/jxl/base/printf_macros.h" |
24 | | #include "lib/jxl/base/rect.h" |
25 | | #include "lib/jxl/base/status.h" |
26 | | #include "lib/jxl/chroma_from_luma.h" |
27 | | #include "lib/jxl/common.h" // JXL_HIGH_PRECISION |
28 | | #include "lib/jxl/dct_scales.h" |
29 | | #include "lib/jxl/dec_ans.h" |
30 | | #include "lib/jxl/dec_bit_reader.h" |
31 | | #include "lib/jxl/image.h" |
32 | | #include "lib/jxl/pack_signed.h" |
33 | | |
34 | | #undef HWY_TARGET_INCLUDE |
35 | | #define HWY_TARGET_INCLUDE "lib/jxl/splines.cc" |
36 | | #include <hwy/foreach_target.h> |
37 | | #include <hwy/highway.h> |
38 | | |
39 | | #include "lib/jxl/base/fast_math-inl.h" |
40 | | HWY_BEFORE_NAMESPACE(); |
41 | | namespace jxl { |
42 | | namespace HWY_NAMESPACE { |
43 | | namespace { |
44 | | |
45 | | // These templates are not found via ADL. |
46 | | using hwy::HWY_NAMESPACE::Mul; |
47 | | using hwy::HWY_NAMESPACE::MulAdd; |
48 | | using hwy::HWY_NAMESPACE::MulSub; |
49 | | using hwy::HWY_NAMESPACE::Sqrt; |
50 | | using hwy::HWY_NAMESPACE::Sub; |
51 | | |
52 | | // Given a set of DCT coefficients, this returns the result of performing cosine |
53 | | // interpolation on the original samples. |
54 | 48.1M | float ContinuousIDCT(const Dct32& dct, const float t) { |
55 | | // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of |
56 | | // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces |
57 | | // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform |
58 | | // treatment of all the coefficients. |
59 | 48.1M | constexpr float kMultipliers[32] = { |
60 | 48.1M | kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, |
61 | 48.1M | kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, |
62 | 48.1M | kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, |
63 | 48.1M | kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, |
64 | 48.1M | kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, |
65 | 48.1M | kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, |
66 | 48.1M | kPi / 32 * 30, kPi / 32 * 31, |
67 | 48.1M | }; |
68 | 48.1M | HWY_CAPPED(float, 32) df; |
69 | 48.1M | auto result = Zero(df); |
70 | 48.1M | const auto tandhalf = Set(df, t + 0.5f); |
71 | 240M | for (int i = 0; i < 32; i += Lanes(df)) { |
72 | 192M | auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); |
73 | 192M | auto cos = FastCosf(df, cos_arg); |
74 | 192M | auto local_res = Mul(LoadU(df, dct.data() + i), cos); |
75 | 192M | result = MulAdd(Set(df, kSqrt2), local_res, result); |
76 | 192M | } |
77 | 48.1M | return GetLane(SumOfLanes(df, result)); |
78 | 48.1M | } splines.cc:jxl::N_AVX2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float) Line | Count | Source | 54 | 48.1M | float ContinuousIDCT(const Dct32& dct, const float t) { | 55 | | // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of | 56 | | // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces | 57 | | // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform | 58 | | // treatment of all the coefficients. | 59 | 48.1M | constexpr float kMultipliers[32] = { | 60 | 48.1M | kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, | 61 | 48.1M | kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, | 62 | 48.1M | kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, | 63 | 48.1M | kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, | 64 | 48.1M | kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, | 65 | 48.1M | kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, | 66 | 48.1M | kPi / 32 * 30, kPi / 32 * 31, | 67 | 48.1M | }; | 68 | 48.1M | HWY_CAPPED(float, 32) df; | 69 | 48.1M | auto result = Zero(df); | 70 | 48.1M | const auto tandhalf = Set(df, t + 0.5f); | 71 | 240M | for (int i = 0; i < 32; i += Lanes(df)) { | 72 | 192M | auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); | 73 | 192M | auto cos = FastCosf(df, cos_arg); | 74 | 192M | auto local_res = Mul(LoadU(df, dct.data() + i), cos); | 75 | 192M | result = MulAdd(Set(df, kSqrt2), local_res, result); | 76 | 192M | } | 77 | 48.1M | return GetLane(SumOfLanes(df, result)); | 78 | 48.1M | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float) |
79 | | |
80 | | template <typename DF> |
81 | | void DrawSegment(DF df, const SplineSegment& segment, const bool add, |
82 | | const size_t y, const size_t x, const size_t x0, |
83 | 708k | float* JXL_RESTRICT rows[3]) { |
84 | 708k | Rebind<int32_t, DF> di; |
85 | 708k | const auto inv_sigma = Set(df, segment.inv_sigma); |
86 | 708k | const auto half = Set(df, 0.5f); |
87 | 708k | const auto one_over_2s2 = Set(df, 0.353553391f); |
88 | 708k | const auto sigma_over_4_times_intensity = |
89 | 708k | Set(df, segment.sigma_over_4_times_intensity); |
90 | 708k | const auto dx = |
91 | 708k | Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x)); |
92 | 708k | const auto dy = Set(df, y - segment.center_y); |
93 | 708k | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); |
94 | 708k | const auto distance = Sqrt(sqd); |
95 | 708k | const auto one_dimensional_factor = |
96 | 708k | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), |
97 | 708k | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); |
98 | 708k | auto local_intensity = |
99 | 708k | Mul(sigma_over_4_times_intensity, |
100 | 708k | Mul(one_dimensional_factor, one_dimensional_factor)); |
101 | 2.83M | for (size_t c = 0; c < 3; ++c) { |
102 | 2.12M | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); |
103 | 2.12M | const auto in = LoadU(df, rows[c] + x); |
104 | 2.12M | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); |
105 | 2.12M | } |
106 | 708k | } splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 83 | 325k | float* JXL_RESTRICT rows[3]) { | 84 | 325k | Rebind<int32_t, DF> di; | 85 | 325k | const auto inv_sigma = Set(df, segment.inv_sigma); | 86 | 325k | const auto half = Set(df, 0.5f); | 87 | 325k | const auto one_over_2s2 = Set(df, 0.353553391f); | 88 | 325k | const auto sigma_over_4_times_intensity = | 89 | 325k | Set(df, segment.sigma_over_4_times_intensity); | 90 | 325k | const auto dx = | 91 | 325k | Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x)); | 92 | 325k | const auto dy = Set(df, y - segment.center_y); | 93 | 325k | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 94 | 325k | const auto distance = Sqrt(sqd); | 95 | 325k | const auto one_dimensional_factor = | 96 | 325k | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 97 | 325k | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 98 | 325k | auto local_intensity = | 99 | 325k | Mul(sigma_over_4_times_intensity, | 100 | 325k | Mul(one_dimensional_factor, one_dimensional_factor)); | 101 | 1.30M | for (size_t c = 0; c < 3; ++c) { | 102 | 975k | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 103 | 975k | const auto in = LoadU(df, rows[c] + x); | 104 | 975k | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 105 | 975k | } | 106 | 325k | } |
splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 83 | 383k | float* JXL_RESTRICT rows[3]) { | 84 | 383k | Rebind<int32_t, DF> di; | 85 | 383k | const auto inv_sigma = Set(df, segment.inv_sigma); | 86 | 383k | const auto half = Set(df, 0.5f); | 87 | 383k | const auto one_over_2s2 = Set(df, 0.353553391f); | 88 | 383k | const auto sigma_over_4_times_intensity = | 89 | 383k | Set(df, segment.sigma_over_4_times_intensity); | 90 | 383k | const auto dx = | 91 | 383k | Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x)); | 92 | 383k | const auto dy = Set(df, y - segment.center_y); | 93 | 383k | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 94 | 383k | const auto distance = Sqrt(sqd); | 95 | 383k | const auto one_dimensional_factor = | 96 | 383k | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 97 | 383k | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 98 | 383k | auto local_intensity = | 99 | 383k | Mul(sigma_over_4_times_intensity, | 100 | 383k | Mul(one_dimensional_factor, one_dimensional_factor)); | 101 | 1.53M | for (size_t c = 0; c < 3; ++c) { | 102 | 1.14M | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 103 | 1.14M | const auto in = LoadU(df, rows[c] + x); | 104 | 1.14M | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 105 | 1.14M | } | 106 | 383k | } |
Unexecuted instantiation: splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Unexecuted instantiation: splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Unexecuted instantiation: splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Unexecuted instantiation: splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) |
107 | | |
108 | | void DrawSegment(const SplineSegment& segment, const bool add, const size_t y, |
109 | | const size_t x0, const size_t x1, |
110 | 235k | float* JXL_RESTRICT rows[3]) { |
111 | 235k | ssize_t start = std::llround(segment.center_x - segment.maximum_distance); |
112 | 235k | ssize_t end = std::llround(segment.center_x + segment.maximum_distance); |
113 | 235k | if (end < static_cast<ssize_t>(x0) || start >= static_cast<ssize_t>(x1)) { |
114 | 121k | return; // span does not intersect scan |
115 | 121k | } |
116 | 113k | size_t span_x0 = std::max<ssize_t>(x0, start) - x0; |
117 | 113k | size_t span_x1 = std::min<ssize_t>(x1, end + 1) - x0; // exclusive |
118 | 113k | HWY_FULL(float) df; |
119 | 113k | size_t x = span_x0; |
120 | 438k | for (; x + Lanes(df) <= span_x1; x += Lanes(df)) { |
121 | 325k | DrawSegment(df, segment, add, y, x, x0, rows); |
122 | 325k | } |
123 | 496k | for (; x < span_x1; ++x) { |
124 | 383k | DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows); |
125 | 383k | } |
126 | 113k | } splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 110 | 235k | float* JXL_RESTRICT rows[3]) { | 111 | 235k | ssize_t start = std::llround(segment.center_x - segment.maximum_distance); | 112 | 235k | ssize_t end = std::llround(segment.center_x + segment.maximum_distance); | 113 | 235k | if (end < static_cast<ssize_t>(x0) || start >= static_cast<ssize_t>(x1)) { | 114 | 121k | return; // span does not intersect scan | 115 | 121k | } | 116 | 113k | size_t span_x0 = std::max<ssize_t>(x0, start) - x0; | 117 | 113k | size_t span_x1 = std::min<ssize_t>(x1, end + 1) - x0; // exclusive | 118 | 113k | HWY_FULL(float) df; | 119 | 113k | size_t x = span_x0; | 120 | 438k | for (; x + Lanes(df) <= span_x1; x += Lanes(df)) { | 121 | 325k | DrawSegment(df, segment, add, y, x, x0, rows); | 122 | 325k | } | 123 | 496k | for (; x < span_x1; ++x) { | 124 | 383k | DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows); | 125 | 383k | } | 126 | 113k | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) |
127 | | |
128 | | void ComputeSegments(const Spline::Point& center, const float intensity, |
129 | | const float color[3], const float sigma, |
130 | | std::vector<SplineSegment>& segments, |
131 | 12.0M | std::vector<std::pair<size_t, size_t>>& segments_by_y) { |
132 | | // Sanity check sigma, inverse sigma and intensity |
133 | 12.0M | if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && |
134 | 12.0M | std::isfinite(intensity))) { |
135 | 763 | return; |
136 | 763 | } |
137 | 12.0M | #if JXL_HIGH_PRECISION |
138 | 12.0M | constexpr float kDistanceExp = 5; |
139 | | #else |
140 | | // About 30% faster. |
141 | | constexpr float kDistanceExp = 3; |
142 | | #endif |
143 | | // We cap from below colors to at least 0.01. |
144 | 12.0M | float max_color = 0.01f; |
145 | 48.1M | for (size_t c = 0; c < 3; c++) { |
146 | 36.1M | max_color = std::max(max_color, std::abs(color[c] * intensity)); |
147 | 36.1M | } |
148 | | // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops |
149 | | // below 10^-kDistanceExp. |
150 | 12.0M | const float maximum_distance = |
151 | 12.0M | std::sqrt(-2 * sigma * sigma * |
152 | 12.0M | (std::log(0.1) * kDistanceExp - std::log(max_color))); |
153 | 12.0M | SplineSegment segment; |
154 | 12.0M | segment.center_y = center.y; |
155 | 12.0M | segment.center_x = center.x; |
156 | 12.0M | memcpy(segment.color, color, sizeof(segment.color)); |
157 | 12.0M | segment.inv_sigma = 1.0f / sigma; |
158 | 12.0M | segment.sigma_over_4_times_intensity = .25f * sigma * intensity; |
159 | 12.0M | segment.maximum_distance = maximum_distance; |
160 | 12.0M | ssize_t y0 = std::llround(center.y - maximum_distance); |
161 | 12.0M | ssize_t y1 = |
162 | 12.0M | std::llround(center.y + maximum_distance) + 1; // one-past-the-end |
163 | 47.6M | for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) { |
164 | 35.6M | segments_by_y.emplace_back(y, segments.size()); |
165 | 35.6M | } |
166 | 12.0M | segments.push_back(segment); |
167 | 12.0M | } splines.cc:jxl::N_AVX2::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Line | Count | Source | 131 | 12.0M | std::vector<std::pair<size_t, size_t>>& segments_by_y) { | 132 | | // Sanity check sigma, inverse sigma and intensity | 133 | 12.0M | if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && | 134 | 12.0M | std::isfinite(intensity))) { | 135 | 763 | return; | 136 | 763 | } | 137 | 12.0M | #if JXL_HIGH_PRECISION | 138 | 12.0M | constexpr float kDistanceExp = 5; | 139 | | #else | 140 | | // About 30% faster. | 141 | | constexpr float kDistanceExp = 3; | 142 | | #endif | 143 | | // We cap from below colors to at least 0.01. | 144 | 12.0M | float max_color = 0.01f; | 145 | 48.1M | for (size_t c = 0; c < 3; c++) { | 146 | 36.1M | max_color = std::max(max_color, std::abs(color[c] * intensity)); | 147 | 36.1M | } | 148 | | // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops | 149 | | // below 10^-kDistanceExp. | 150 | 12.0M | const float maximum_distance = | 151 | 12.0M | std::sqrt(-2 * sigma * sigma * | 152 | 12.0M | (std::log(0.1) * kDistanceExp - std::log(max_color))); | 153 | 12.0M | SplineSegment segment; | 154 | 12.0M | segment.center_y = center.y; | 155 | 12.0M | segment.center_x = center.x; | 156 | 12.0M | memcpy(segment.color, color, sizeof(segment.color)); | 157 | 12.0M | segment.inv_sigma = 1.0f / sigma; | 158 | 12.0M | segment.sigma_over_4_times_intensity = .25f * sigma * intensity; | 159 | 12.0M | segment.maximum_distance = maximum_distance; | 160 | 12.0M | ssize_t y0 = std::llround(center.y - maximum_distance); | 161 | 12.0M | ssize_t y1 = | 162 | 12.0M | std::llround(center.y + maximum_distance) + 1; // one-past-the-end | 163 | 47.6M | for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) { | 164 | 35.6M | segments_by_y.emplace_back(y, segments.size()); | 165 | 35.6M | } | 166 | 12.0M | segments.push_back(segment); | 167 | 12.0M | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::ComputeSegments(jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) |
168 | | |
169 | | void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
170 | | float* JXL_RESTRICT row_b, size_t y, size_t x0, size_t x1, |
171 | | const bool add, const SplineSegment* segments, |
172 | | const size_t* segment_indices, |
173 | 27.5k | const size_t* segment_y_start) { |
174 | 27.5k | float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b}; |
175 | 262k | for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { |
176 | 235k | DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows); |
177 | 235k | } |
178 | 27.5k | } splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*) Line | Count | Source | 173 | 27.5k | const size_t* segment_y_start) { | 174 | 27.5k | float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b}; | 175 | 262k | for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { | 176 | 235k | DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows); | 177 | 235k | } | 178 | 27.5k | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, unsigned long const*) |
179 | | |
180 | | void SegmentsFromPoints( |
181 | | const Spline& spline, |
182 | | const std::vector<std::pair<Spline::Point, float>>& points_to_draw, |
183 | | const float arc_length, std::vector<SplineSegment>& segments, |
184 | 3.40k | std::vector<std::pair<size_t, size_t>>& segments_by_y) { |
185 | 3.40k | const float inv_arc_length = 1.0f / arc_length; |
186 | 3.40k | int k = 0; |
187 | 12.0M | for (const auto& point_to_draw : points_to_draw) { |
188 | 12.0M | const Spline::Point& point = point_to_draw.first; |
189 | 12.0M | const float multiplier = point_to_draw.second; |
190 | 12.0M | const float progress_along_arc = |
191 | 12.0M | std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); |
192 | 12.0M | ++k; |
193 | 12.0M | float color[3]; |
194 | 48.1M | for (size_t c = 0; c < 3; ++c) { |
195 | 36.1M | color[c] = |
196 | 36.1M | ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); |
197 | 36.1M | } |
198 | 12.0M | const float sigma = |
199 | 12.0M | ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); |
200 | 12.0M | ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y); |
201 | 12.0M | } |
202 | 3.40k | } splines.cc:jxl::N_AVX2::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Line | Count | Source | 184 | 3.40k | std::vector<std::pair<size_t, size_t>>& segments_by_y) { | 185 | 3.40k | const float inv_arc_length = 1.0f / arc_length; | 186 | 3.40k | int k = 0; | 187 | 12.0M | for (const auto& point_to_draw : points_to_draw) { | 188 | 12.0M | const Spline::Point& point = point_to_draw.first; | 189 | 12.0M | const float multiplier = point_to_draw.second; | 190 | 12.0M | const float progress_along_arc = | 191 | 12.0M | std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); | 192 | 12.0M | ++k; | 193 | 12.0M | float color[3]; | 194 | 48.1M | for (size_t c = 0; c < 3; ++c) { | 195 | 36.1M | color[c] = | 196 | 36.1M | ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); | 197 | 36.1M | } | 198 | 12.0M | const float sigma = | 199 | 12.0M | ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); | 200 | 12.0M | ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y); | 201 | 12.0M | } | 202 | 3.40k | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::SegmentsFromPoints(jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<std::__1::pair<unsigned long, unsigned long>, std::__1::allocator<std::__1::pair<unsigned long, unsigned long> > >&) |
203 | | } // namespace |
204 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
205 | | } // namespace HWY_NAMESPACE |
206 | | } // namespace jxl |
207 | | HWY_AFTER_NAMESPACE(); |
208 | | |
209 | | #if HWY_ONCE |
210 | | namespace jxl { |
211 | | HWY_EXPORT(SegmentsFromPoints); |
212 | | HWY_EXPORT(DrawSegments); |
213 | | |
214 | | namespace { |
215 | | |
216 | | // It is not in spec, but reasonable limit to avoid overflows. |
217 | | template <typename T> |
218 | 261k | Status ValidateSplinePointPos(const T& x, const T& y) { |
219 | 261k | constexpr T kSplinePosLimit = 1u << 23; |
220 | 261k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || |
221 | 261k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { |
222 | 40 | return JXL_FAILURE("Spline coordinates out of bounds"); |
223 | 40 | } |
224 | 261k | return true; |
225 | 261k | } splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<long>(long const&, long const&) Line | Count | Source | 218 | 54.0k | Status ValidateSplinePointPos(const T& x, const T& y) { | 219 | 54.0k | constexpr T kSplinePosLimit = 1u << 23; | 220 | 54.0k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 221 | 54.0k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 222 | 31 | return JXL_FAILURE("Spline coordinates out of bounds"); | 223 | 31 | } | 224 | 53.9k | return true; | 225 | 54.0k | } |
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<float>(float const&, float const&) Line | Count | Source | 218 | 7.83k | Status ValidateSplinePointPos(const T& x, const T& y) { | 219 | 7.83k | constexpr T kSplinePosLimit = 1u << 23; | 220 | 7.83k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 221 | 7.83k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 222 | 0 | return JXL_FAILURE("Spline coordinates out of bounds"); | 223 | 0 | } | 224 | 7.83k | return true; | 225 | 7.83k | } |
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<int>(int const&, int const&) Line | Count | Source | 218 | 199k | Status ValidateSplinePointPos(const T& x, const T& y) { | 219 | 199k | constexpr T kSplinePosLimit = 1u << 23; | 220 | 199k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 221 | 199k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 222 | 9 | return JXL_FAILURE("Spline coordinates out of bounds"); | 223 | 9 | } | 224 | 199k | return true; | 225 | 199k | } |
|
226 | | |
227 | | // Maximum number of spline control points per frame is |
228 | | // std::min(kMaxNumControlPoints, xsize * ysize / 2) |
229 | | constexpr size_t kMaxNumControlPoints = 1u << 20u; |
230 | | constexpr size_t kMaxNumControlPointsPerPixelRatio = 2; |
231 | | |
232 | 0 | float AdjustedQuant(const int32_t adjustment) { |
233 | 0 | return (adjustment >= 0) ? (1.f + .125f * adjustment) |
234 | 0 | : 1.f / (1.f - .125f * adjustment); |
235 | 0 | } |
236 | | |
237 | 7.82k | float InvAdjustedQuant(const int32_t adjustment) { |
238 | 7.82k | return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment) |
239 | 7.82k | : (1.f - .125f * adjustment); |
240 | 7.82k | } |
241 | | |
242 | | // X, Y, B, sigma. |
243 | | constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f}; |
244 | | |
245 | | Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points, |
246 | | BitReader* const br, ANSSymbolReader* reader, |
247 | | const std::vector<uint8_t>& context_map, |
248 | 3.43k | const size_t num_splines) { |
249 | 3.43k | points->clear(); |
250 | 3.43k | points->reserve(num_splines); |
251 | 3.43k | int64_t last_x = 0; |
252 | 3.43k | int64_t last_y = 0; |
253 | 57.4k | for (size_t i = 0; i < num_splines; i++) { |
254 | 54.0k | size_t dx = |
255 | 54.0k | reader->ReadHybridUint(kStartingPositionContext, br, context_map); |
256 | 54.0k | size_t dy = |
257 | 54.0k | reader->ReadHybridUint(kStartingPositionContext, br, context_map); |
258 | 54.0k | int64_t x; |
259 | 54.0k | int64_t y; |
260 | 54.0k | if (i != 0) { |
261 | 50.5k | x = UnpackSigned(dx) + last_x; |
262 | 50.5k | y = UnpackSigned(dy) + last_y; |
263 | 50.5k | } else { |
264 | 3.43k | x = dx; |
265 | 3.43k | y = dy; |
266 | 3.43k | } |
267 | 54.0k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y)); |
268 | 53.9k | points->emplace_back(static_cast<float>(x), static_cast<float>(y)); |
269 | 53.9k | last_x = x; |
270 | 53.9k | last_y = y; |
271 | 53.9k | } |
272 | 3.40k | return true; |
273 | 3.43k | } |
274 | | |
275 | | struct Vector { |
276 | | float x, y; |
277 | 0 | Vector operator-() const { return {-x, -y}; } |
278 | 0 | Vector operator+(const Vector& other) const { |
279 | 0 | return {x + other.x, y + other.y}; |
280 | 0 | } |
281 | 13.4M | float SquaredNorm() const { return x * x + y * y; } |
282 | | }; |
283 | 19.7M | Vector operator*(const float k, const Vector& vec) { |
284 | 19.7M | return {k * vec.x, k * vec.y}; |
285 | 19.7M | } |
286 | | |
287 | 19.8M | Spline::Point operator+(const Spline::Point& p, const Vector& vec) { |
288 | 19.8M | return {p.x + vec.x, p.y + vec.y}; |
289 | 19.8M | } |
290 | 33.2M | Vector operator-(const Spline::Point& a, const Spline::Point& b) { |
291 | 33.2M | return {a.x - b.x, a.y - b.y}; |
292 | 33.2M | } |
293 | | |
294 | | // TODO(eustas): avoid making a copy of "points". |
295 | | void DrawCentripetalCatmullRomSpline(std::vector<Spline::Point> points, |
296 | 7.61k | std::vector<Spline::Point>& result) { |
297 | 7.61k | if (points.empty()) return; |
298 | 7.61k | if (points.size() == 1) { |
299 | 4.20k | result.push_back(points[0]); |
300 | 4.20k | return; |
301 | 4.20k | } |
302 | | // Number of points to compute between each control point. |
303 | 3.40k | static constexpr int kNumPoints = 16; |
304 | 3.40k | result.reserve((points.size() - 1) * kNumPoints + 1); |
305 | 3.40k | points.insert(points.begin(), points[0] + (points[0] - points[1])); |
306 | 3.40k | points.push_back(points[points.size() - 1] + |
307 | 3.40k | (points[points.size() - 1] - points[points.size() - 2])); |
308 | | // points has at least 4 elements at this point. |
309 | 89.6k | for (size_t start = 0; start < points.size() - 3; ++start) { |
310 | | // 4 of them are used, and we draw from p[1] to p[2]. |
311 | 86.2k | const Spline::Point* const p = &points[start]; |
312 | 86.2k | result.push_back(p[1]); |
313 | 86.2k | float d[3]; |
314 | 86.2k | float t[4]; |
315 | 86.2k | t[0] = 0; |
316 | 344k | for (int k = 0; k < 3; ++k) { |
317 | | // TODO(eustas): for each segment delta is calculated 3 times... |
318 | | // TODO(eustas): restrict d[k] with reasonable limit and spec it. |
319 | 258k | d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y)); |
320 | 258k | t[k + 1] = t[k] + d[k]; |
321 | 258k | } |
322 | 1.37M | for (int i = 1; i < kNumPoints; ++i) { |
323 | 1.29M | const float tt = d[0] + (static_cast<float>(i) / kNumPoints) * d[1]; |
324 | 1.29M | Spline::Point a[3]; |
325 | 5.17M | for (int k = 0; k < 3; ++k) { |
326 | | // TODO(eustas): reciprocal multiplication would be faster. |
327 | 3.87M | a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]); |
328 | 3.87M | } |
329 | 1.29M | Spline::Point b[2]; |
330 | 3.87M | for (int k = 0; k < 2; ++k) { |
331 | 2.58M | b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]); |
332 | 2.58M | } |
333 | 1.29M | result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0])); |
334 | 1.29M | } |
335 | 86.2k | } |
336 | 3.40k | result.push_back(points[points.size() - 2]); |
337 | 3.40k | } |
338 | | |
339 | | // Move along the line segments defined by `points`, `kDesiredRenderingDistance` |
340 | | // pixels at a time, and call `functor` with each point and the actual distance |
341 | | // to the previous point (which will always be kDesiredRenderingDistance except |
342 | | // possibly for the very last point). |
343 | | // TODO(eustas): this method always adds the last point, but never the first |
344 | | // (unless those are one); I believe both ends matter. |
345 | | template <typename Points, typename Functor> |
346 | 7.61k | Status ForEachEquallySpacedPoint(const Points& points, const Functor& functor) { |
347 | 7.61k | JXL_ENSURE(!points.empty()); |
348 | 7.61k | Spline::Point current = points.front(); |
349 | 7.61k | functor(current, kDesiredRenderingDistance); |
350 | 7.61k | auto next = points.begin(); |
351 | 12.0M | while (next != points.end()) { |
352 | 12.0M | const Spline::Point* previous = ¤t; |
353 | 12.0M | float arclength_from_previous = 0.f; |
354 | 13.4M | for (;;) { |
355 | 13.4M | if (next == points.end()) { |
356 | 7.61k | functor(*previous, arclength_from_previous); |
357 | 7.61k | return true; |
358 | 7.61k | } |
359 | 13.4M | const float arclength_to_next = |
360 | 13.4M | std::sqrt((*next - *previous).SquaredNorm()); |
361 | 13.4M | if (arclength_from_previous + arclength_to_next >= |
362 | 13.4M | kDesiredRenderingDistance) { |
363 | 12.0M | current = |
364 | 12.0M | *previous + ((kDesiredRenderingDistance - arclength_from_previous) / |
365 | 12.0M | arclength_to_next) * |
366 | 12.0M | (*next - *previous); |
367 | 12.0M | functor(current, kDesiredRenderingDistance); |
368 | 12.0M | break; |
369 | 12.0M | } |
370 | 1.38M | arclength_from_previous += arclength_to_next; |
371 | 1.38M | previous = &*next; |
372 | 1.38M | ++next; |
373 | 1.38M | } |
374 | 12.0M | } |
375 | 0 | return true; |
376 | 7.61k | } |
377 | | |
378 | | } // namespace |
379 | | |
380 | | StatusOr<QuantizedSpline> QuantizedSpline::Create( |
381 | | const Spline& original, const int32_t quantization_adjustment, |
382 | 0 | const float y_to_x, const float y_to_b) { |
383 | 0 | JXL_ENSURE(!original.control_points.empty()); |
384 | 0 | QuantizedSpline result; |
385 | 0 | result.control_points_.reserve(original.control_points.size() - 1); |
386 | 0 | const Spline::Point& starting_point = original.control_points.front(); |
387 | 0 | int previous_x = static_cast<int>(std::round(starting_point.x)); |
388 | 0 | int previous_y = static_cast<int>(std::round(starting_point.y)); |
389 | 0 | int previous_delta_x = 0; |
390 | 0 | int previous_delta_y = 0; |
391 | 0 | for (auto it = original.control_points.begin() + 1; |
392 | 0 | it != original.control_points.end(); ++it) { |
393 | 0 | const int new_x = static_cast<int>(std::round(it->x)); |
394 | 0 | const int new_y = static_cast<int>(std::round(it->y)); |
395 | 0 | const int new_delta_x = new_x - previous_x; |
396 | 0 | const int new_delta_y = new_y - previous_y; |
397 | 0 | result.control_points_.emplace_back(new_delta_x - previous_delta_x, |
398 | 0 | new_delta_y - previous_delta_y); |
399 | 0 | previous_delta_x = new_delta_x; |
400 | 0 | previous_delta_y = new_delta_y; |
401 | 0 | previous_x = new_x; |
402 | 0 | previous_y = new_y; |
403 | 0 | } |
404 | |
|
405 | 0 | const auto to_int = [](float v) -> int { |
406 | | // Maximal int representable with float. |
407 | 0 | constexpr float kMax = std::numeric_limits<int>::max() - 127; |
408 | 0 | constexpr float kMin = -kMax; |
409 | 0 | return static_cast<int>(std::round(Clamp1(v, kMin, kMax))); |
410 | 0 | }; |
411 | |
|
412 | 0 | const auto quant = AdjustedQuant(quantization_adjustment); |
413 | 0 | const auto inv_quant = InvAdjustedQuant(quantization_adjustment); |
414 | 0 | for (int c : {1, 0, 2}) { |
415 | 0 | float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b; |
416 | 0 | for (int i = 0; i < 32; ++i) { |
417 | 0 | const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; |
418 | 0 | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
419 | 0 | auto restored_y = result.color_dct_[1][i] * inv_dct_factor * |
420 | 0 | kChannelWeight[1] * inv_quant; |
421 | 0 | auto decorrelated = original.color_dct[c][i] - factor * restored_y; |
422 | 0 | result.color_dct_[c][i] = |
423 | 0 | to_int(decorrelated * dct_factor * quant / kChannelWeight[c]); |
424 | 0 | } |
425 | 0 | } |
426 | 0 | for (int i = 0; i < 32; ++i) { |
427 | 0 | const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; |
428 | 0 | result.sigma_dct_[i] = |
429 | 0 | to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]); |
430 | 0 | } |
431 | 0 | return result; |
432 | 0 | } |
433 | | |
434 | | Status QuantizedSpline::Dequantize(const Spline::Point& starting_point, |
435 | | const int32_t quantization_adjustment, |
436 | | const float y_to_x, const float y_to_b, |
437 | | const uint64_t image_size, |
438 | | uint64_t* total_estimated_area_reached, |
439 | 7.83k | Spline& result) const { |
440 | 7.83k | constexpr uint64_t kOne = static_cast<uint64_t>(1); |
441 | 7.83k | const uint64_t area_limit = |
442 | 7.83k | std::min(1024 * image_size + (kOne << 32), kOne << 42); |
443 | | |
444 | 7.83k | result.control_points.clear(); |
445 | 7.83k | result.control_points.reserve(control_points_.size() + 1); |
446 | 7.83k | float px = std::round(starting_point.x); |
447 | 7.83k | float py = std::round(starting_point.y); |
448 | 7.83k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py)); |
449 | 7.83k | int current_x = static_cast<int>(px); |
450 | 7.83k | int current_y = static_cast<int>(py); |
451 | 7.83k | result.control_points.emplace_back(static_cast<float>(current_x), |
452 | 7.83k | static_cast<float>(current_y)); |
453 | 7.83k | int current_delta_x = 0; |
454 | 7.83k | int current_delta_y = 0; |
455 | 7.83k | uint64_t manhattan_distance = 0; |
456 | 99.6k | for (const auto& point : control_points_) { |
457 | 99.6k | current_delta_x += point.first; |
458 | 99.6k | current_delta_y += point.second; |
459 | 99.6k | manhattan_distance += std::abs(current_delta_x) + std::abs(current_delta_y); |
460 | 99.6k | if (manhattan_distance > area_limit) { |
461 | 0 | return JXL_FAILURE("Too large manhattan_distance reached: %" PRIu64, |
462 | 0 | manhattan_distance); |
463 | 0 | } |
464 | 99.6k | JXL_RETURN_IF_ERROR( |
465 | 99.6k | ValidateSplinePointPos(current_delta_x, current_delta_y)); |
466 | 99.6k | current_x += current_delta_x; |
467 | 99.6k | current_y += current_delta_y; |
468 | 99.6k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y)); |
469 | 99.6k | result.control_points.emplace_back(static_cast<float>(current_x), |
470 | 99.6k | static_cast<float>(current_y)); |
471 | 99.6k | } |
472 | | |
473 | 7.82k | const auto inv_quant = InvAdjustedQuant(quantization_adjustment); |
474 | 31.2k | for (int c = 0; c < 3; ++c) { |
475 | 774k | for (int i = 0; i < 32; ++i) { |
476 | 750k | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
477 | 750k | result.color_dct[c][i] = |
478 | 750k | color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant; |
479 | 750k | } |
480 | 23.4k | } |
481 | 258k | for (int i = 0; i < 32; ++i) { |
482 | 250k | result.color_dct[0][i] += y_to_x * result.color_dct[1][i]; |
483 | 250k | result.color_dct[2][i] += y_to_b * result.color_dct[1][i]; |
484 | 250k | } |
485 | 7.82k | uint64_t width_estimate = 0; |
486 | | |
487 | 7.82k | uint64_t color[3] = {}; |
488 | 31.2k | for (int c = 0; c < 3; ++c) { |
489 | 774k | for (int i = 0; i < 32; ++i) { |
490 | 750k | color[c] += static_cast<uint64_t>( |
491 | 750k | std::ceil(inv_quant * std::abs(color_dct_[c][i]))); |
492 | 750k | } |
493 | 23.4k | } |
494 | 7.82k | color[0] += static_cast<uint64_t>(std::ceil(std::abs(y_to_x))) * color[1]; |
495 | 7.82k | color[2] += static_cast<uint64_t>(std::ceil(std::abs(y_to_b))) * color[1]; |
496 | | // This is not taking kChannelWeight into account, but up to constant factors |
497 | | // it gives an indication of the influence of the color values on the area |
498 | | // that will need to be rendered. |
499 | 7.82k | const uint64_t max_color = std::max({color[1], color[0], color[2]}); |
500 | 7.82k | uint64_t logcolor = |
501 | 7.82k | std::max(kOne, static_cast<uint64_t>(CeilLog2Nonzero(kOne + max_color))); |
502 | | |
503 | 7.82k | const float weight_limit = |
504 | 7.82k | std::ceil(std::sqrt((static_cast<float>(area_limit) / logcolor) / |
505 | 7.82k | std::max<size_t>(1, manhattan_distance))); |
506 | | |
507 | 258k | for (int i = 0; i < 32; ++i) { |
508 | 250k | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
509 | 250k | result.sigma_dct[i] = |
510 | 250k | sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant; |
511 | | // If we include the factor kChannelWeight[3]=.3333f here, we get a |
512 | | // realistic area estimate. We leave it out to simplify the calculations, |
513 | | // and understand that this way we underestimate the area by a factor of |
514 | | // 1/(0.3333*0.3333). This is taken into account in the limits below. |
515 | 250k | float weight_f = std::ceil(inv_quant * std::abs(sigma_dct_[i])); |
516 | 250k | uint64_t weight = |
517 | 250k | static_cast<uint64_t>(std::min(weight_limit, std::max(1.0f, weight_f))); |
518 | 250k | width_estimate += weight * weight * logcolor; |
519 | 250k | } |
520 | 7.82k | *total_estimated_area_reached += (width_estimate * manhattan_distance); |
521 | 7.82k | if (*total_estimated_area_reached > area_limit) { |
522 | 14 | return JXL_FAILURE("Too large total_estimated_area eached: %" PRIu64, |
523 | 14 | *total_estimated_area_reached); |
524 | 14 | } |
525 | | |
526 | 7.80k | return true; |
527 | 7.82k | } |
528 | | |
529 | | Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map, |
530 | | ANSSymbolReader* const decoder, |
531 | | BitReader* const br, |
532 | | const size_t max_control_points, |
533 | 13.7k | size_t* total_num_control_points) { |
534 | 13.7k | const size_t num_control_points = |
535 | 13.7k | decoder->ReadHybridUint(kNumControlPointsContext, br, context_map); |
536 | 13.7k | if (num_control_points > max_control_points) { |
537 | 5 | return JXL_FAILURE("Too many control points: %" PRIuS, num_control_points); |
538 | 5 | } |
539 | 13.7k | *total_num_control_points += num_control_points; |
540 | 13.7k | if (*total_num_control_points > max_control_points) { |
541 | 16 | return JXL_FAILURE("Too many control points: %" PRIuS, |
542 | 16 | *total_num_control_points); |
543 | 16 | } |
544 | 13.7k | control_points_.resize(num_control_points); |
545 | | // Maximal image dimension. |
546 | 13.7k | constexpr int64_t kDeltaLimit = 1u << 30; |
547 | 3.99M | for (std::pair<int64_t, int64_t>& control_point : control_points_) { |
548 | 3.99M | control_point.first = UnpackSigned( |
549 | 3.99M | decoder->ReadHybridUint(kControlPointsContext, br, context_map)); |
550 | 3.99M | control_point.second = UnpackSigned( |
551 | 3.99M | decoder->ReadHybridUint(kControlPointsContext, br, context_map)); |
552 | | // Check delta-deltas are not outrageous; it is not in spec, but there is |
553 | | // no reason to allow larger values. |
554 | 3.99M | if ((control_point.first >= kDeltaLimit) || |
555 | 3.99M | (control_point.first <= -kDeltaLimit) || |
556 | 3.99M | (control_point.second >= kDeltaLimit) || |
557 | 3.99M | (control_point.second <= -kDeltaLimit)) { |
558 | 7 | return JXL_FAILURE("Spline delta-delta is out of bounds"); |
559 | 7 | } |
560 | 3.99M | } |
561 | | |
562 | 55.0k | const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status { |
563 | 55.0k | constexpr int kWeirdNumber = std::numeric_limits<int>::min(); |
564 | 1.81M | for (int i = 0; i < 32; ++i) { |
565 | 1.76M | dct[i] = |
566 | 1.76M | UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map)); |
567 | 1.76M | if (dct[i] == kWeirdNumber) { |
568 | 0 | return JXL_FAILURE("The weird number in spline DCT"); |
569 | 0 | } |
570 | 1.76M | } |
571 | 55.0k | return true; |
572 | 55.0k | }; |
573 | 41.3k | for (auto& dct : color_dct_) { |
574 | 41.3k | JXL_RETURN_IF_ERROR(decode_dct(dct)); |
575 | 41.3k | } |
576 | 13.7k | JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_)); |
577 | 13.7k | return true; |
578 | 13.7k | } |
579 | | |
580 | 25.5k | void Splines::Clear() { |
581 | 25.5k | quantization_adjustment_ = 0; |
582 | 25.5k | splines_.clear(); |
583 | 25.5k | starting_points_.clear(); |
584 | 25.5k | segments_.clear(); |
585 | 25.5k | segment_indices_.clear(); |
586 | 25.5k | segment_y_start_.clear(); |
587 | 25.5k | } |
588 | | |
589 | | Status Splines::Decode(JxlMemoryManager* memory_manager, jxl::BitReader* br, |
590 | 3.45k | const size_t num_pixels) { |
591 | 3.45k | std::vector<uint8_t> context_map; |
592 | 3.45k | ANSCode code; |
593 | 3.45k | JXL_RETURN_IF_ERROR(DecodeHistograms(memory_manager, br, kNumSplineContexts, |
594 | 3.45k | &code, &context_map)); |
595 | 6.88k | JXL_ASSIGN_OR_RETURN(ANSSymbolReader decoder, |
596 | 6.88k | ANSSymbolReader::Create(&code, br)); |
597 | 6.88k | size_t num_splines = |
598 | 6.88k | decoder.ReadHybridUint(kNumSplinesContext, br, context_map); |
599 | 6.88k | size_t max_control_points = std::min( |
600 | 6.88k | kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio); |
601 | 6.88k | if (num_splines > max_control_points || |
602 | 3.44k | num_splines + 1 > max_control_points) { |
603 | 9 | return JXL_FAILURE("Too many splines: %" PRIuS, num_splines); |
604 | 9 | } |
605 | 3.43k | num_splines++; |
606 | 3.43k | JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder, |
607 | 3.43k | context_map, num_splines)); |
608 | | |
609 | 3.40k | quantization_adjustment_ = UnpackSigned( |
610 | 3.40k | decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map)); |
611 | | |
612 | 3.40k | splines_.clear(); |
613 | 3.40k | splines_.reserve(num_splines); |
614 | 3.40k | size_t num_control_points = num_splines; |
615 | 17.1k | for (size_t i = 0; i < num_splines; ++i) { |
616 | 13.7k | QuantizedSpline spline; |
617 | 13.7k | JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br, |
618 | 13.7k | max_control_points, &num_control_points)); |
619 | 13.7k | splines_.push_back(std::move(spline)); |
620 | 13.7k | } |
621 | | |
622 | 3.37k | JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState()); |
623 | | |
624 | 3.37k | if (!HasAny()) { |
625 | 0 | return JXL_FAILURE("Decoded splines but got none"); |
626 | 0 | } |
627 | | |
628 | 3.37k | return true; |
629 | 3.37k | } |
630 | | |
631 | 0 | void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect) const { |
632 | 0 | Apply</*add=*/true>(opsin, opsin_rect); |
633 | 0 | } |
634 | | void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
635 | | float* JXL_RESTRICT row_b, size_t y, size_t x0, |
636 | 82.9k | size_t x1) const { |
637 | 82.9k | ApplyToRow</*add=*/true>(row_x, row_y, row_b, y, x0, x1); |
638 | 82.9k | } |
639 | | |
640 | 186 | void Splines::SubtractFrom(Image3F* const opsin) const { |
641 | 186 | Apply</*add=*/false>(opsin, Rect(*opsin)); |
642 | 186 | } |
643 | | |
644 | | Status Splines::InitializeDrawCache(const size_t image_xsize, |
645 | | const size_t image_ysize, |
646 | 3.51k | const ColorCorrelation& color_correlation) { |
647 | | // TODO(veluca): avoid storing segments that are entirely outside image |
648 | | // boundaries. |
649 | 3.51k | segments_.clear(); |
650 | 3.51k | segment_indices_.clear(); |
651 | 3.51k | segment_y_start_.clear(); |
652 | 3.51k | std::vector<std::pair<size_t, size_t>> segments_by_y; |
653 | 3.51k | std::vector<Spline::Point> intermediate_points; |
654 | 3.51k | uint64_t total_estimated_area_reached = 0; |
655 | 3.51k | std::vector<Spline> splines; |
656 | 11.3k | for (size_t i = 0; i < splines_.size(); ++i) { |
657 | 7.83k | Spline spline; |
658 | 7.83k | JXL_RETURN_IF_ERROR(splines_[i].Dequantize( |
659 | 7.83k | starting_points_[i], quantization_adjustment_, |
660 | 7.83k | color_correlation.YtoXRatio(0), color_correlation.YtoBRatio(0), |
661 | 7.83k | image_xsize * image_ysize, &total_estimated_area_reached, spline)); |
662 | 7.80k | if (std::adjacent_find(spline.control_points.begin(), |
663 | 7.80k | spline.control_points.end()) != |
664 | 7.80k | spline.control_points.end()) { |
665 | | // Otherwise division by zero might occur. Once control points coincide, |
666 | | // the direction of curve is undefined... |
667 | 2 | return JXL_FAILURE( |
668 | 2 | "identical successive control points in spline %" PRIuS, i); |
669 | 2 | } |
670 | 7.80k | splines.push_back(spline); |
671 | 7.80k | } |
672 | | // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams. |
673 | 3.49k | if (total_estimated_area_reached > |
674 | 3.49k | std::min( |
675 | 3.49k | (8 * image_xsize * image_ysize + (static_cast<uint64_t>(1) << 25)), |
676 | 3.49k | (static_cast<uint64_t>(1) << 30))) { |
677 | 7 | JXL_WARNING( |
678 | 7 | "Large total_estimated_area_reached, expect slower decoding: %" PRIu64, |
679 | 7 | total_estimated_area_reached); |
680 | 7 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
681 | 7 | return JXL_FAILURE("Total spline area is too large"); |
682 | 7 | #endif |
683 | 7 | } |
684 | | |
685 | 7.61k | for (Spline& spline : splines) { |
686 | 7.61k | std::vector<std::pair<Spline::Point, float>> points_to_draw; |
687 | 12.0M | auto add_point = [&](const Spline::Point& point, const float multiplier) { |
688 | 12.0M | points_to_draw.emplace_back(point, multiplier); |
689 | 12.0M | }; |
690 | 7.61k | intermediate_points.clear(); |
691 | 7.61k | DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points); |
692 | 7.61k | JXL_RETURN_IF_ERROR( |
693 | 7.61k | ForEachEquallySpacedPoint(intermediate_points, add_point)); |
694 | 7.61k | const float arc_length = |
695 | 7.61k | (points_to_draw.size() - 2) * kDesiredRenderingDistance + |
696 | 7.61k | points_to_draw.back().second; |
697 | 7.61k | if (arc_length <= 0.f) { |
698 | | // This spline wouldn't have any effect. |
699 | 4.20k | continue; |
700 | 4.20k | } |
701 | 3.40k | HWY_DYNAMIC_DISPATCH(SegmentsFromPoints) |
702 | 3.40k | (spline, points_to_draw, arc_length, segments_, segments_by_y); |
703 | 3.40k | } |
704 | | |
705 | | // TODO(eustas): consider linear sorting here. |
706 | 3.48k | std::sort(segments_by_y.begin(), segments_by_y.end()); |
707 | 3.48k | segment_indices_.resize(segments_by_y.size()); |
708 | 3.48k | segment_y_start_.resize(image_ysize + 1); |
709 | 35.6M | for (size_t i = 0; i < segments_by_y.size(); i++) { |
710 | 35.6M | segment_indices_[i] = segments_by_y[i].second; |
711 | 35.6M | size_t y = segments_by_y[i].first; |
712 | 35.6M | if (y < image_ysize) { |
713 | 4.12M | segment_y_start_[y + 1]++; |
714 | 4.12M | } |
715 | 35.6M | } |
716 | 920k | for (size_t y = 0; y < image_ysize; y++) { |
717 | 916k | segment_y_start_[y + 1] += segment_y_start_[y]; |
718 | 916k | } |
719 | 3.48k | return true; |
720 | 3.48k | } |
721 | | |
722 | | template <bool add> |
723 | | void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
724 | | float* JXL_RESTRICT row_b, size_t y, size_t x0, |
725 | 82.9k | size_t x1) const { |
726 | 82.9k | if (segments_.empty()) return; |
727 | 27.5k | HWY_DYNAMIC_DISPATCH(DrawSegments) |
728 | 27.5k | (row_x, row_y, row_b, y, x0, x1, add, segments_.data(), |
729 | 27.5k | segment_indices_.data(), segment_y_start_.data()); |
730 | 27.5k | } void jxl::Splines::ApplyToRow<true>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const Line | Count | Source | 725 | 82.9k | size_t x1) const { | 726 | 82.9k | if (segments_.empty()) return; | 727 | 27.5k | HWY_DYNAMIC_DISPATCH(DrawSegments) | 728 | 27.5k | (row_x, row_y, row_b, y, x0, x1, add, segments_.data(), | 729 | 27.5k | segment_indices_.data(), segment_y_start_.data()); | 730 | 27.5k | } |
Unexecuted instantiation: void jxl::Splines::ApplyToRow<false>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const |
731 | | |
732 | | template <bool add> |
733 | 186 | void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect) const { |
734 | 186 | if (segments_.empty()) return; |
735 | 0 | const size_t y0 = opsin_rect.y0(); |
736 | 0 | const size_t x0 = opsin_rect.x0(); |
737 | 0 | const size_t x1 = opsin_rect.x1(); |
738 | 0 | for (size_t y = 0; y < opsin_rect.ysize(); y++) { |
739 | 0 | ApplyToRow<add>(opsin->PlaneRow(0, y0 + y) + x0, |
740 | 0 | opsin->PlaneRow(1, y0 + y) + x0, |
741 | 0 | opsin->PlaneRow(2, y0 + y) + x0, y0 + y, x0, x1); |
742 | 0 | } |
743 | 0 | } Unexecuted instantiation: void jxl::Splines::Apply<true>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const void jxl::Splines::Apply<false>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const Line | Count | Source | 733 | 186 | void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect) const { | 734 | 186 | if (segments_.empty()) return; | 735 | 0 | const size_t y0 = opsin_rect.y0(); | 736 | 0 | const size_t x0 = opsin_rect.x0(); | 737 | 0 | const size_t x1 = opsin_rect.x1(); | 738 | 0 | for (size_t y = 0; y < opsin_rect.ysize(); y++) { | 739 | 0 | ApplyToRow<add>(opsin->PlaneRow(0, y0 + y) + x0, | 740 | 0 | opsin->PlaneRow(1, y0 + y) + x0, | 741 | 0 | opsin->PlaneRow(2, y0 + y) + x0, y0 + y, x0, x1); | 742 | 0 | } | 743 | 0 | } |
|
744 | | |
745 | | } // namespace jxl |
746 | | #endif // HWY_ONCE |