/src/libjxl/lib/jxl/splines.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/splines.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | |
10 | | #include <algorithm> |
11 | | #include <cinttypes> // PRIu64 |
12 | | #include <cmath> |
13 | | #include <cstddef> |
14 | | #include <cstdint> |
15 | | #include <cstring> |
16 | | #include <limits> |
17 | | #include <utility> |
18 | | #include <vector> |
19 | | |
20 | | #include "lib/jxl/base/bits.h" |
21 | | #include "lib/jxl/base/common.h" |
22 | | #include "lib/jxl/base/compiler_specific.h" |
23 | | #include "lib/jxl/base/printf_macros.h" |
24 | | #include "lib/jxl/base/rect.h" |
25 | | #include "lib/jxl/base/status.h" |
26 | | #include "lib/jxl/chroma_from_luma.h" |
27 | | #include "lib/jxl/common.h" // JXL_HIGH_PRECISION |
28 | | #include "lib/jxl/dct_scales.h" |
29 | | #include "lib/jxl/dec_ans.h" |
30 | | #include "lib/jxl/dec_bit_reader.h" |
31 | | #include "lib/jxl/image.h" |
32 | | #include "lib/jxl/memory_manager_internal.h" |
33 | | #include "lib/jxl/pack_signed.h" |
34 | | |
35 | | #undef HWY_TARGET_INCLUDE |
36 | | #define HWY_TARGET_INCLUDE "lib/jxl/splines.cc" |
37 | | #include <hwy/foreach_target.h> |
38 | | #include <hwy/highway.h> |
39 | | |
40 | | #include "lib/jxl/base/fast_math-inl.h" |
41 | | HWY_BEFORE_NAMESPACE(); |
42 | | namespace jxl { |
43 | | namespace HWY_NAMESPACE { |
44 | | namespace { |
45 | | |
46 | | // These templates are not found via ADL. |
47 | | using hwy::HWY_NAMESPACE::Mul; |
48 | | using hwy::HWY_NAMESPACE::MulAdd; |
49 | | using hwy::HWY_NAMESPACE::MulSub; |
50 | | using hwy::HWY_NAMESPACE::Sqrt; |
51 | | using hwy::HWY_NAMESPACE::Sub; |
52 | | |
53 | | // Given a set of DCT coefficients, this returns the result of performing cosine |
54 | | // interpolation on the original samples. |
55 | 77.7M | float ContinuousIDCT(const Dct32& dct, const float t) { |
56 | | // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of |
57 | | // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces |
58 | | // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform |
59 | | // treatment of all the coefficients. |
60 | 77.7M | constexpr float kMultipliers[32] = { |
61 | 77.7M | kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, |
62 | 77.7M | kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, |
63 | 77.7M | kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, |
64 | 77.7M | kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, |
65 | 77.7M | kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, |
66 | 77.7M | kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, |
67 | 77.7M | kPi / 32 * 30, kPi / 32 * 31, |
68 | 77.7M | }; |
69 | 77.7M | HWY_CAPPED(float, 32) df; |
70 | 77.7M | auto result = Zero(df); |
71 | 77.7M | const auto tandhalf = Set(df, t + 0.5f); |
72 | 388M | for (int i = 0; i < 32; i += Lanes(df)) { |
73 | 310M | auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); |
74 | 310M | auto cos = FastCosf(df, cos_arg); |
75 | 310M | auto local_res = Mul(LoadU(df, dct.data() + i), cos); |
76 | 310M | result = MulAdd(Set(df, kSqrt2), local_res, result); |
77 | 310M | } |
78 | 77.7M | return GetLane(SumOfLanes(df, result)); |
79 | 77.7M | } splines.cc:jxl::N_AVX2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float) Line | Count | Source | 55 | 77.7M | float ContinuousIDCT(const Dct32& dct, const float t) { | 56 | | // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of | 57 | | // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces | 58 | | // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform | 59 | | // treatment of all the coefficients. | 60 | 77.7M | constexpr float kMultipliers[32] = { | 61 | 77.7M | kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, | 62 | 77.7M | kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, | 63 | 77.7M | kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, | 64 | 77.7M | kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, | 65 | 77.7M | kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, | 66 | 77.7M | kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, | 67 | 77.7M | kPi / 32 * 30, kPi / 32 * 31, | 68 | 77.7M | }; | 69 | 77.7M | HWY_CAPPED(float, 32) df; | 70 | 77.7M | auto result = Zero(df); | 71 | 77.7M | const auto tandhalf = Set(df, t + 0.5f); | 72 | 388M | for (int i = 0; i < 32; i += Lanes(df)) { | 73 | 310M | auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); | 74 | 310M | auto cos = FastCosf(df, cos_arg); | 75 | 310M | auto local_res = Mul(LoadU(df, dct.data() + i), cos); | 76 | 310M | result = MulAdd(Set(df, kSqrt2), local_res, result); | 77 | 310M | } | 78 | 77.7M | return GetLane(SumOfLanes(df, result)); | 79 | 77.7M | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::ContinuousIDCT(std::__1::array<float, 32ul> const&, float) |
80 | | |
81 | | template <typename DF> |
82 | | void DrawSegment(DF df, const SplineSegment& segment, const bool add, |
83 | | const size_t y, const size_t x, const size_t x0, |
84 | 2.51M | float* JXL_RESTRICT rows[3]) { |
85 | 2.51M | Rebind<int32_t, DF> di; |
86 | 2.51M | const auto inv_sigma = Set(df, segment.inv_sigma); |
87 | 2.51M | const auto half = Set(df, 0.5f); |
88 | 2.51M | const auto one_over_2s2 = Set(df, 0.353553391f); |
89 | 2.51M | const auto sigma_over_4_times_intensity = |
90 | 2.51M | Set(df, segment.sigma_over_4_times_intensity); |
91 | 2.51M | const auto dx = |
92 | 2.51M | Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x)); |
93 | 2.51M | const auto dy = Set(df, y - segment.center_y); |
94 | 2.51M | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); |
95 | 2.51M | const auto distance = Sqrt(sqd); |
96 | 2.51M | const auto one_dimensional_factor = |
97 | 2.51M | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), |
98 | 2.51M | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); |
99 | 2.51M | auto local_intensity = |
100 | 2.51M | Mul(sigma_over_4_times_intensity, |
101 | 2.51M | Mul(one_dimensional_factor, one_dimensional_factor)); |
102 | 10.0M | for (size_t c = 0; c < 3; ++c) { |
103 | 7.54M | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); |
104 | 7.54M | const auto in = LoadU(df, rows[c] + x); |
105 | 7.54M | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); |
106 | 7.54M | } |
107 | 2.51M | } splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 8ul, 0> >(hwy::N_AVX2::Simd<float, 8ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 84 | 1.20M | float* JXL_RESTRICT rows[3]) { | 85 | 1.20M | Rebind<int32_t, DF> di; | 86 | 1.20M | const auto inv_sigma = Set(df, segment.inv_sigma); | 87 | 1.20M | const auto half = Set(df, 0.5f); | 88 | 1.20M | const auto one_over_2s2 = Set(df, 0.353553391f); | 89 | 1.20M | const auto sigma_over_4_times_intensity = | 90 | 1.20M | Set(df, segment.sigma_over_4_times_intensity); | 91 | 1.20M | const auto dx = | 92 | 1.20M | Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x)); | 93 | 1.20M | const auto dy = Set(df, y - segment.center_y); | 94 | 1.20M | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 95 | 1.20M | const auto distance = Sqrt(sqd); | 96 | 1.20M | const auto one_dimensional_factor = | 97 | 1.20M | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 98 | 1.20M | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 99 | 1.20M | auto local_intensity = | 100 | 1.20M | Mul(sigma_over_4_times_intensity, | 101 | 1.20M | Mul(one_dimensional_factor, one_dimensional_factor)); | 102 | 4.81M | for (size_t c = 0; c < 3; ++c) { | 103 | 3.61M | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 104 | 3.61M | const auto in = LoadU(df, rows[c] + x); | 105 | 3.61M | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 106 | 3.61M | } | 107 | 1.20M | } |
splines.cc:void jxl::N_AVX2::(anonymous namespace)::DrawSegment<hwy::N_AVX2::Simd<float, 1ul, 0> >(hwy::N_AVX2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 84 | 1.30M | float* JXL_RESTRICT rows[3]) { | 85 | 1.30M | Rebind<int32_t, DF> di; | 86 | 1.30M | const auto inv_sigma = Set(df, segment.inv_sigma); | 87 | 1.30M | const auto half = Set(df, 0.5f); | 88 | 1.30M | const auto one_over_2s2 = Set(df, 0.353553391f); | 89 | 1.30M | const auto sigma_over_4_times_intensity = | 90 | 1.30M | Set(df, segment.sigma_over_4_times_intensity); | 91 | 1.30M | const auto dx = | 92 | 1.30M | Sub(ConvertTo(df, Iota(di, x + x0)), Set(df, segment.center_x)); | 93 | 1.30M | const auto dy = Set(df, y - segment.center_y); | 94 | 1.30M | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); | 95 | 1.30M | const auto distance = Sqrt(sqd); | 96 | 1.30M | const auto one_dimensional_factor = | 97 | 1.30M | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), | 98 | 1.30M | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); | 99 | 1.30M | auto local_intensity = | 100 | 1.30M | Mul(sigma_over_4_times_intensity, | 101 | 1.30M | Mul(one_dimensional_factor, one_dimensional_factor)); | 102 | 5.23M | for (size_t c = 0; c < 3; ++c) { | 103 | 3.92M | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); | 104 | 3.92M | const auto in = LoadU(df, rows[c] + x); | 105 | 3.92M | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); | 106 | 3.92M | } | 107 | 1.30M | } |
Unexecuted instantiation: splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 4ul, 0> >(hwy::N_SSE4::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Unexecuted instantiation: splines.cc:void jxl::N_SSE4::(anonymous namespace)::DrawSegment<hwy::N_SSE4::Simd<float, 1ul, 0> >(hwy::N_SSE4::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Unexecuted instantiation: splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 4ul, 0> >(hwy::N_SSE2::Simd<float, 4ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Unexecuted instantiation: splines.cc:void jxl::N_SSE2::(anonymous namespace)::DrawSegment<hwy::N_SSE2::Simd<float, 1ul, 0> >(hwy::N_SSE2::Simd<float, 1ul, 0>, jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) |
108 | | |
109 | | void DrawSegment(const SplineSegment& segment, const bool add, const size_t y, |
110 | | const size_t x0, const size_t x1, |
111 | 479k | float* JXL_RESTRICT rows[3]) { |
112 | 479k | ptrdiff_t start = std::llround(segment.center_x - segment.maximum_distance); |
113 | 479k | ptrdiff_t end = std::llround(segment.center_x + segment.maximum_distance); |
114 | 479k | if (end < static_cast<ptrdiff_t>(x0) || start >= static_cast<ptrdiff_t>(x1)) { |
115 | 105k | return; // span does not intersect scan |
116 | 105k | } |
117 | 374k | size_t span_x0 = std::max<ptrdiff_t>(x0, start) - x0; |
118 | 374k | size_t span_x1 = std::min<ptrdiff_t>(x1, end + 1) - x0; // exclusive |
119 | 374k | HWY_FULL(float) df; |
120 | 374k | size_t x = span_x0; |
121 | 1.57M | for (; x + Lanes(df) <= span_x1; x += Lanes(df)) { |
122 | 1.20M | DrawSegment(df, segment, add, y, x, x0, rows); |
123 | 1.20M | } |
124 | 1.68M | for (; x < span_x1; ++x) { |
125 | 1.30M | DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows); |
126 | 1.30M | } |
127 | 374k | } splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Line | Count | Source | 111 | 479k | float* JXL_RESTRICT rows[3]) { | 112 | 479k | ptrdiff_t start = std::llround(segment.center_x - segment.maximum_distance); | 113 | 479k | ptrdiff_t end = std::llround(segment.center_x + segment.maximum_distance); | 114 | 479k | if (end < static_cast<ptrdiff_t>(x0) || start >= static_cast<ptrdiff_t>(x1)) { | 115 | 105k | return; // span does not intersect scan | 116 | 105k | } | 117 | 374k | size_t span_x0 = std::max<ptrdiff_t>(x0, start) - x0; | 118 | 374k | size_t span_x1 = std::min<ptrdiff_t>(x1, end + 1) - x0; // exclusive | 119 | 374k | HWY_FULL(float) df; | 120 | 374k | size_t x = span_x0; | 121 | 1.57M | for (; x + Lanes(df) <= span_x1; x += Lanes(df)) { | 122 | 1.20M | DrawSegment(df, segment, add, y, x, x0, rows); | 123 | 1.20M | } | 124 | 1.68M | for (; x < span_x1; ++x) { | 125 | 1.30M | DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, x0, rows); | 126 | 1.30M | } | 127 | 374k | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegment(jxl::SplineSegment const&, bool, unsigned long, unsigned long, unsigned long, float* restrict*) |
128 | | |
129 | | void ComputeSegments(size_t image_ysize, const Spline::Point& center, |
130 | | const float intensity, const float color[3], |
131 | | const float sigma, std::vector<SplineSegment>& segments, |
132 | 19.4M | std::vector<SplineSegmentSpan>& segment_spans) { |
133 | | // Sanity check sigma, inverse sigma and intensity |
134 | 19.4M | if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && |
135 | 19.3M | std::isfinite(intensity))) { |
136 | 31.0k | return; |
137 | 31.0k | } |
138 | 19.3M | #if JXL_HIGH_PRECISION |
139 | 19.3M | constexpr float kDistanceExp = 5; |
140 | | #else |
141 | | // About 30% faster. |
142 | | constexpr float kDistanceExp = 3; |
143 | | #endif |
144 | | // We cap from below colors to at least 0.01. |
145 | 19.3M | float max_color = 0.01f; |
146 | 77.5M | for (size_t c = 0; c < 3; c++) { |
147 | 58.1M | max_color = std::max(max_color, std::abs(color[c] * intensity)); |
148 | 58.1M | } |
149 | | // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops |
150 | | // below 10^-kDistanceExp. |
151 | 19.3M | const float maximum_distance = |
152 | 19.3M | std::sqrt(-2.0f * sigma * sigma * |
153 | 19.3M | (std::log(0.1f) * kDistanceExp - std::log(max_color))); |
154 | | |
155 | 19.3M | ptrdiff_t y0 = std::llround(center.y - maximum_distance); |
156 | 19.3M | y0 = std::max<ptrdiff_t>(y0, 0); |
157 | 19.3M | ptrdiff_t y1 = std::llround(center.y + maximum_distance) + 1; |
158 | 19.3M | y1 = std::min<ptrdiff_t>(y1, image_ysize); |
159 | 19.3M | if (y1 <= y0) return; |
160 | | |
161 | 1.11M | SplineSegment segment; |
162 | 1.11M | segment.center_y = center.y; |
163 | 1.11M | segment.center_x = center.x; |
164 | 1.11M | memcpy(segment.color, color, sizeof(segment.color)); |
165 | 1.11M | segment.inv_sigma = 1.0f / sigma; |
166 | 1.11M | segment.sigma_over_4_times_intensity = .25f * sigma * intensity; |
167 | 1.11M | segment.maximum_distance = maximum_distance; |
168 | 1.11M | segments.emplace_back(segment); |
169 | | |
170 | 1.11M | segment_spans.emplace_back(y0, y1); |
171 | 1.11M | } splines.cc:jxl::N_AVX2::(anonymous namespace)::ComputeSegments(unsigned long, jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&) Line | Count | Source | 132 | 19.4M | std::vector<SplineSegmentSpan>& segment_spans) { | 133 | | // Sanity check sigma, inverse sigma and intensity | 134 | 19.4M | if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && | 135 | 19.3M | std::isfinite(intensity))) { | 136 | 31.0k | return; | 137 | 31.0k | } | 138 | 19.3M | #if JXL_HIGH_PRECISION | 139 | 19.3M | constexpr float kDistanceExp = 5; | 140 | | #else | 141 | | // About 30% faster. | 142 | | constexpr float kDistanceExp = 3; | 143 | | #endif | 144 | | // We cap from below colors to at least 0.01. | 145 | 19.3M | float max_color = 0.01f; | 146 | 77.5M | for (size_t c = 0; c < 3; c++) { | 147 | 58.1M | max_color = std::max(max_color, std::abs(color[c] * intensity)); | 148 | 58.1M | } | 149 | | // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops | 150 | | // below 10^-kDistanceExp. | 151 | 19.3M | const float maximum_distance = | 152 | 19.3M | std::sqrt(-2.0f * sigma * sigma * | 153 | 19.3M | (std::log(0.1f) * kDistanceExp - std::log(max_color))); | 154 | | | 155 | 19.3M | ptrdiff_t y0 = std::llround(center.y - maximum_distance); | 156 | 19.3M | y0 = std::max<ptrdiff_t>(y0, 0); | 157 | 19.3M | ptrdiff_t y1 = std::llround(center.y + maximum_distance) + 1; | 158 | 19.3M | y1 = std::min<ptrdiff_t>(y1, image_ysize); | 159 | 19.3M | if (y1 <= y0) return; | 160 | | | 161 | 1.11M | SplineSegment segment; | 162 | 1.11M | segment.center_y = center.y; | 163 | 1.11M | segment.center_x = center.x; | 164 | 1.11M | memcpy(segment.color, color, sizeof(segment.color)); | 165 | 1.11M | segment.inv_sigma = 1.0f / sigma; | 166 | 1.11M | segment.sigma_over_4_times_intensity = .25f * sigma * intensity; | 167 | 1.11M | segment.maximum_distance = maximum_distance; | 168 | 1.11M | segments.emplace_back(segment); | 169 | | | 170 | 1.11M | segment_spans.emplace_back(y0, y1); | 171 | 1.11M | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::ComputeSegments(unsigned long, jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::ComputeSegments(unsigned long, jxl::Spline::Point const&, float, float const*, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&) |
172 | | |
173 | | void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
174 | | float* JXL_RESTRICT row_b, size_t y, size_t x0, size_t x1, |
175 | | const bool add, const SplineSegment* segments, |
176 | | const size_t* segment_indices, |
177 | 26.8k | const ptrdiff_t* segment_y_start) { |
178 | 26.8k | float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b}; |
179 | 506k | for (ptrdiff_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { |
180 | 479k | DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows); |
181 | 479k | } |
182 | 26.8k | } splines.cc:jxl::N_AVX2::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, long const*) Line | Count | Source | 177 | 26.8k | const ptrdiff_t* segment_y_start) { | 178 | 26.8k | float* JXL_RESTRICT rows[3] = {row_x, row_y, row_b}; | 179 | 506k | for (ptrdiff_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { | 180 | 479k | DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows); | 181 | 479k | } | 182 | 26.8k | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, long const*) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::DrawSegments(float*, float*, float*, unsigned long, unsigned long, unsigned long, bool, jxl::SplineSegment const*, unsigned long const*, long const*) |
183 | | |
184 | | void SegmentsFromPoints( |
185 | | size_t image_ysize, const Spline& spline, |
186 | | const std::vector<std::pair<Spline::Point, float>>& points_to_draw, |
187 | | const float arc_length, std::vector<SplineSegment>& segments, |
188 | 4.85k | std::vector<SplineSegmentSpan>& segments_spans) { |
189 | 4.85k | const float inv_arc_length = 1.0f / arc_length; |
190 | 4.85k | int k = 0; |
191 | 19.4M | for (const auto& point_to_draw : points_to_draw) { |
192 | 19.4M | const Spline::Point& point = point_to_draw.first; |
193 | 19.4M | const float multiplier = point_to_draw.second; |
194 | 19.4M | const float progress_along_arc = |
195 | 19.4M | std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); |
196 | 19.4M | ++k; |
197 | 19.4M | float color[3]; |
198 | 77.7M | for (size_t c = 0; c < 3; ++c) { |
199 | 58.2M | color[c] = |
200 | 58.2M | ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); |
201 | 58.2M | } |
202 | 19.4M | const float sigma = |
203 | 19.4M | ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); |
204 | 19.4M | ComputeSegments(image_ysize, point, multiplier, color, sigma, segments, |
205 | 19.4M | segments_spans); |
206 | 19.4M | } |
207 | 4.85k | } splines.cc:jxl::N_AVX2::(anonymous namespace)::SegmentsFromPoints(unsigned long, jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&) Line | Count | Source | 188 | 4.85k | std::vector<SplineSegmentSpan>& segments_spans) { | 189 | 4.85k | const float inv_arc_length = 1.0f / arc_length; | 190 | 4.85k | int k = 0; | 191 | 19.4M | for (const auto& point_to_draw : points_to_draw) { | 192 | 19.4M | const Spline::Point& point = point_to_draw.first; | 193 | 19.4M | const float multiplier = point_to_draw.second; | 194 | 19.4M | const float progress_along_arc = | 195 | 19.4M | std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); | 196 | 19.4M | ++k; | 197 | 19.4M | float color[3]; | 198 | 77.7M | for (size_t c = 0; c < 3; ++c) { | 199 | 58.2M | color[c] = | 200 | 58.2M | ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); | 201 | 58.2M | } | 202 | 19.4M | const float sigma = | 203 | 19.4M | ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); | 204 | 19.4M | ComputeSegments(image_ysize, point, multiplier, color, sigma, segments, | 205 | 19.4M | segments_spans); | 206 | 19.4M | } | 207 | 4.85k | } |
Unexecuted instantiation: splines.cc:jxl::N_SSE4::(anonymous namespace)::SegmentsFromPoints(unsigned long, jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&) Unexecuted instantiation: splines.cc:jxl::N_SSE2::(anonymous namespace)::SegmentsFromPoints(unsigned long, jxl::Spline const&, std::__1::vector<std::__1::pair<jxl::Spline::Point, float>, std::__1::allocator<std::__1::pair<jxl::Spline::Point, float> > > const&, float, std::__1::vector<jxl::SplineSegment, std::__1::allocator<jxl::SplineSegment> >&, std::__1::vector<jxl::SplineSegmentSpan, std::__1::allocator<jxl::SplineSegmentSpan> >&) |
208 | | } // namespace |
209 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
210 | | } // namespace HWY_NAMESPACE |
211 | | } // namespace jxl |
212 | | HWY_AFTER_NAMESPACE(); |
213 | | |
214 | | #if HWY_ONCE |
215 | | namespace jxl { |
216 | | HWY_EXPORT(SegmentsFromPoints); |
217 | | HWY_EXPORT(DrawSegments); |
218 | | |
219 | | namespace { |
220 | | |
221 | | // It is not in spec, but reasonable limit to avoid overflows. |
222 | | template <typename T> |
223 | 255k | Status ValidateSplinePointPos(const T& x, const T& y) { |
224 | 255k | constexpr T kSplinePosLimit = 1u << 23; |
225 | 255k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || |
226 | 255k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { |
227 | 75 | return JXL_FAILURE("Spline coordinates out of bounds"); |
228 | 75 | } |
229 | 255k | return true; |
230 | 255k | } splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<long>(long const&, long const&) Line | Count | Source | 223 | 86.5k | Status ValidateSplinePointPos(const T& x, const T& y) { | 224 | 86.5k | constexpr T kSplinePosLimit = 1u << 23; | 225 | 86.5k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 226 | 86.5k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 227 | 52 | return JXL_FAILURE("Spline coordinates out of bounds"); | 228 | 52 | } | 229 | 86.5k | return true; | 230 | 86.5k | } |
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<float>(float const&, float const&) Line | Count | Source | 223 | 10.1k | Status ValidateSplinePointPos(const T& x, const T& y) { | 224 | 10.1k | constexpr T kSplinePosLimit = 1u << 23; | 225 | 10.1k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 226 | 10.1k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 227 | 0 | return JXL_FAILURE("Spline coordinates out of bounds"); | 228 | 0 | } | 229 | 10.1k | return true; | 230 | 10.1k | } |
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<int>(int const&, int const&) Line | Count | Source | 223 | 158k | Status ValidateSplinePointPos(const T& x, const T& y) { | 224 | 158k | constexpr T kSplinePosLimit = 1u << 23; | 225 | 158k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 226 | 158k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 227 | 23 | return JXL_FAILURE("Spline coordinates out of bounds"); | 228 | 23 | } | 229 | 158k | return true; | 230 | 158k | } |
|
231 | | |
232 | | // Maximum number of spline control points per frame is |
233 | | // std::min(kMaxNumControlPoints, xsize * ysize / 2) |
234 | | constexpr size_t kMaxNumControlPoints = 1u << 20u; |
235 | | constexpr size_t kMaxNumControlPointsPerPixelRatio = 2; |
236 | | |
237 | 0 | float AdjustedQuant(const int32_t adjustment) { |
238 | 0 | return (adjustment >= 0) ? (1.f + .125f * adjustment) |
239 | 0 | : 1.f / (1.f - .125f * adjustment); |
240 | 0 | } |
241 | | |
242 | 10.1k | float InvAdjustedQuant(const int32_t adjustment) { |
243 | 10.1k | return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment) |
244 | 10.1k | : (1.f - .125f * adjustment); |
245 | 10.1k | } |
246 | | |
247 | | // X, Y, B, sigma. |
248 | | constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f}; |
249 | | |
250 | | Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points, |
251 | | BitReader* const br, ANSSymbolReader* reader, |
252 | | const std::vector<uint8_t>& context_map, |
253 | 4.50k | const size_t num_splines) { |
254 | 4.50k | points->clear(); |
255 | 4.50k | points->reserve(num_splines); |
256 | 4.50k | int64_t last_x = 0; |
257 | 4.50k | int64_t last_y = 0; |
258 | 91.0k | for (size_t i = 0; i < num_splines; i++) { |
259 | 86.5k | size_t dx = |
260 | 86.5k | reader->ReadHybridUint(kStartingPositionContext, br, context_map); |
261 | 86.5k | size_t dy = |
262 | 86.5k | reader->ReadHybridUint(kStartingPositionContext, br, context_map); |
263 | 86.5k | int64_t x; |
264 | 86.5k | int64_t y; |
265 | 86.5k | if (i != 0) { |
266 | 82.0k | x = UnpackSigned(dx) + last_x; |
267 | 82.0k | y = UnpackSigned(dy) + last_y; |
268 | 82.0k | } else { |
269 | 4.50k | x = dx; |
270 | 4.50k | y = dy; |
271 | 4.50k | } |
272 | 86.5k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y)); |
273 | 86.5k | points->emplace_back(static_cast<float>(x), static_cast<float>(y)); |
274 | 86.5k | last_x = x; |
275 | 86.5k | last_y = y; |
276 | 86.5k | } |
277 | 4.45k | return true; |
278 | 4.50k | } |
279 | | |
280 | | struct Vector { |
281 | | float x, y; |
282 | 0 | Vector operator-() const { return {-x, -y}; } |
283 | 0 | Vector operator+(const Vector& other) const { |
284 | 0 | return {x + other.x, y + other.y}; |
285 | 0 | } |
286 | 20.5M | float SquaredNorm() const { return x * x + y * y; } |
287 | | }; |
288 | 25.9M | Vector operator*(const float k, const Vector& vec) { |
289 | 25.9M | return {k * vec.x, k * vec.y}; |
290 | 25.9M | } |
291 | | |
292 | 26.0M | Spline::Point operator+(const Spline::Point& p, const Vector& vec) { |
293 | 26.0M | return {p.x + vec.x, p.y + vec.y}; |
294 | 26.0M | } |
295 | 46.6M | Vector operator-(const Spline::Point& a, const Spline::Point& b) { |
296 | 46.6M | return {a.x - b.x, a.y - b.y}; |
297 | 46.6M | } |
298 | | |
299 | | // TODO(eustas): avoid making a copy of "points". |
300 | | void DrawCentripetalCatmullRomSpline(std::vector<Spline::Point> points, |
301 | 10.0k | std::vector<Spline::Point>& result) { |
302 | 10.0k | if (points.empty()) return; |
303 | 10.0k | if (points.size() == 1) { |
304 | 5.16k | result.push_back(points[0]); |
305 | 5.16k | return; |
306 | 5.16k | } |
307 | | // Number of points to compute between each control point. |
308 | 4.85k | static constexpr int kNumPoints = 16; |
309 | 4.85k | result.reserve((points.size() - 1) * kNumPoints + 1); |
310 | 4.85k | points.insert(points.begin(), points[0] + (points[0] - points[1])); |
311 | 4.85k | points.push_back(points[points.size() - 1] + |
312 | 4.85k | (points[points.size() - 1] - points[points.size() - 2])); |
313 | | // points has at least 4 elements at this point. |
314 | 77.9k | for (size_t start = 0; start < points.size() - 3; ++start) { |
315 | | // 4 of them are used, and we draw from p[1] to p[2]. |
316 | 73.0k | const Spline::Point* const p = &points[start]; |
317 | 73.0k | result.push_back(p[1]); |
318 | 73.0k | float d[3]; |
319 | 73.0k | float t[4]; |
320 | 73.0k | t[0] = 0; |
321 | 292k | for (int k = 0; k < 3; ++k) { |
322 | | // TODO(eustas): for each segment delta is calculated 3 times... |
323 | | // TODO(eustas): restrict d[k] with reasonable limit and spec it. |
324 | 219k | d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y)); |
325 | 219k | t[k + 1] = t[k] + d[k]; |
326 | 219k | } |
327 | 1.16M | for (int i = 1; i < kNumPoints; ++i) { |
328 | 1.09M | const float tt = d[0] + (static_cast<float>(i) / kNumPoints) * d[1]; |
329 | 1.09M | Spline::Point a[3]; |
330 | 4.38M | for (int k = 0; k < 3; ++k) { |
331 | | // TODO(eustas): reciprocal multiplication would be faster. |
332 | 3.28M | a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]); |
333 | 3.28M | } |
334 | 1.09M | Spline::Point b[2]; |
335 | 3.28M | for (int k = 0; k < 2; ++k) { |
336 | 2.19M | b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]); |
337 | 2.19M | } |
338 | 1.09M | result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0])); |
339 | 1.09M | } |
340 | 73.0k | } |
341 | 4.85k | result.push_back(points[points.size() - 2]); |
342 | 4.85k | } |
343 | | |
344 | | // Move along the line segments defined by `points`, `kDesiredRenderingDistance` |
345 | | // pixels at a time, and call `functor` with each point and the actual distance |
346 | | // to the previous point (which will always be kDesiredRenderingDistance except |
347 | | // possibly for the very last point). |
348 | | // TODO(eustas): this method always adds the last point, but never the first |
349 | | // (unless those are one); I believe both ends matter. |
350 | | template <typename Points, typename Functor> |
351 | 10.0k | Status ForEachEquallySpacedPoint(const Points& points, const Functor& functor) { |
352 | 10.0k | JXL_ENSURE(!points.empty()); |
353 | 10.0k | Spline::Point current = points.front(); |
354 | 10.0k | functor(current, kDesiredRenderingDistance); |
355 | 10.0k | auto next = points.begin(); |
356 | 19.4M | while (next != points.end()) { |
357 | 19.4M | const Spline::Point* previous = ¤t; |
358 | 19.4M | float arclength_from_previous = 0.f; |
359 | 20.6M | for (;;) { |
360 | 20.6M | if (next == points.end()) { |
361 | 10.0k | functor(*previous, arclength_from_previous); |
362 | 10.0k | return true; |
363 | 10.0k | } |
364 | 20.5M | const float arclength_to_next = |
365 | 20.5M | std::sqrt((*next - *previous).SquaredNorm()); |
366 | 20.5M | if (arclength_from_previous + arclength_to_next >= |
367 | 20.5M | kDesiredRenderingDistance) { |
368 | 19.4M | current = |
369 | 19.4M | *previous + ((kDesiredRenderingDistance - arclength_from_previous) / |
370 | 19.4M | arclength_to_next) * |
371 | 19.4M | (*next - *previous); |
372 | 19.4M | functor(current, kDesiredRenderingDistance); |
373 | 19.4M | break; |
374 | 19.4M | } |
375 | 1.17M | arclength_from_previous += arclength_to_next; |
376 | 1.17M | previous = &*next; |
377 | 1.17M | ++next; |
378 | 1.17M | } |
379 | 19.4M | } |
380 | 0 | return true; |
381 | 10.0k | } |
382 | | |
383 | | } // namespace |
384 | | |
385 | | StatusOr<QuantizedSpline> QuantizedSpline::Create( |
386 | | const Spline& original, const int32_t quantization_adjustment, |
387 | 0 | const float y_to_x, const float y_to_b) { |
388 | 0 | JXL_ENSURE(!original.control_points.empty()); |
389 | 0 | QuantizedSpline result; |
390 | 0 | result.control_points_.reserve(original.control_points.size() - 1); |
391 | 0 | const Spline::Point& starting_point = original.control_points.front(); |
392 | 0 | int previous_x = static_cast<int>(std::round(starting_point.x)); |
393 | 0 | int previous_y = static_cast<int>(std::round(starting_point.y)); |
394 | 0 | int previous_delta_x = 0; |
395 | 0 | int previous_delta_y = 0; |
396 | 0 | for (auto it = original.control_points.begin() + 1; |
397 | 0 | it != original.control_points.end(); ++it) { |
398 | 0 | const int new_x = static_cast<int>(std::round(it->x)); |
399 | 0 | const int new_y = static_cast<int>(std::round(it->y)); |
400 | 0 | const int new_delta_x = new_x - previous_x; |
401 | 0 | const int new_delta_y = new_y - previous_y; |
402 | 0 | result.control_points_.emplace_back(new_delta_x - previous_delta_x, |
403 | 0 | new_delta_y - previous_delta_y); |
404 | 0 | previous_delta_x = new_delta_x; |
405 | 0 | previous_delta_y = new_delta_y; |
406 | 0 | previous_x = new_x; |
407 | 0 | previous_y = new_y; |
408 | 0 | } |
409 | |
|
410 | 0 | const auto to_int = [](float v) -> int { |
411 | | // Maximal int representable with float. |
412 | 0 | constexpr float kMax = std::numeric_limits<int>::max() - 127; |
413 | 0 | constexpr float kMin = -kMax; |
414 | 0 | return static_cast<int>(std::round(Clamp1(v, kMin, kMax))); |
415 | 0 | }; |
416 | |
|
417 | 0 | const auto quant = AdjustedQuant(quantization_adjustment); |
418 | 0 | const auto inv_quant = InvAdjustedQuant(quantization_adjustment); |
419 | 0 | for (int c : {1, 0, 2}) { |
420 | 0 | float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b; |
421 | 0 | for (int i = 0; i < 32; ++i) { |
422 | 0 | const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; |
423 | 0 | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
424 | 0 | auto restored_y = result.color_dct_[1][i] * inv_dct_factor * |
425 | 0 | kChannelWeight[1] * inv_quant; |
426 | 0 | auto decorrelated = original.color_dct[c][i] - factor * restored_y; |
427 | 0 | result.color_dct_[c][i] = |
428 | 0 | to_int(decorrelated * dct_factor * quant / kChannelWeight[c]); |
429 | 0 | } |
430 | 0 | } |
431 | 0 | for (int i = 0; i < 32; ++i) { |
432 | 0 | const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; |
433 | 0 | result.sigma_dct_[i] = |
434 | 0 | to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]); |
435 | 0 | } |
436 | 0 | return result; |
437 | 0 | } |
438 | | |
439 | | Status QuantizedSpline::Dequantize(const Spline::Point& starting_point, |
440 | | const int32_t quantization_adjustment, |
441 | | const float y_to_x, const float y_to_b, |
442 | | const uint64_t image_size, |
443 | | uint64_t* total_estimated_area_reached, |
444 | 10.1k | Spline& result) const { |
445 | 10.1k | constexpr uint64_t kOne = static_cast<uint64_t>(1); |
446 | 10.1k | const uint64_t area_limit = |
447 | 10.1k | std::min(1024 * image_size + (kOne << 32), kOne << 42); |
448 | | |
449 | 10.1k | result.control_points.clear(); |
450 | 10.1k | result.control_points.reserve(control_points_.size() + 1); |
451 | 10.1k | float px = std::round(starting_point.x); |
452 | 10.1k | float py = std::round(starting_point.y); |
453 | 10.1k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py)); |
454 | 10.1k | int current_x = static_cast<int>(px); |
455 | 10.1k | int current_y = static_cast<int>(py); |
456 | 10.1k | result.control_points.emplace_back(static_cast<float>(current_x), |
457 | 10.1k | static_cast<float>(current_y)); |
458 | 10.1k | int current_delta_x = 0; |
459 | 10.1k | int current_delta_y = 0; |
460 | 10.1k | uint64_t manhattan_distance = 0; |
461 | 79.3k | for (const auto& point : control_points_) { |
462 | 79.3k | current_delta_x += point.first; |
463 | 79.3k | current_delta_y += point.second; |
464 | 79.3k | manhattan_distance += std::abs(current_delta_x) + std::abs(current_delta_y); |
465 | 79.3k | if (manhattan_distance > area_limit) { |
466 | 0 | return JXL_FAILURE("Too large manhattan_distance reached: %" PRIu64, |
467 | 0 | manhattan_distance); |
468 | 0 | } |
469 | 79.3k | JXL_RETURN_IF_ERROR( |
470 | 79.3k | ValidateSplinePointPos(current_delta_x, current_delta_y)); |
471 | 79.3k | current_x += current_delta_x; |
472 | 79.3k | current_y += current_delta_y; |
473 | 79.3k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y)); |
474 | 79.2k | result.control_points.emplace_back(static_cast<float>(current_x), |
475 | 79.2k | static_cast<float>(current_y)); |
476 | 79.2k | } |
477 | | |
478 | 10.1k | const auto inv_quant = InvAdjustedQuant(quantization_adjustment); |
479 | 40.6k | for (int c = 0; c < 3; ++c) { |
480 | 1.00M | for (int i = 0; i < 32; ++i) { |
481 | 976k | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
482 | 976k | result.color_dct[c][i] = |
483 | 976k | color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant; |
484 | 976k | } |
485 | 30.5k | } |
486 | 335k | for (int i = 0; i < 32; ++i) { |
487 | 325k | result.color_dct[0][i] += y_to_x * result.color_dct[1][i]; |
488 | 325k | result.color_dct[2][i] += y_to_b * result.color_dct[1][i]; |
489 | 325k | } |
490 | 10.1k | uint64_t width_estimate = 0; |
491 | | |
492 | 10.1k | uint64_t color[3] = {}; |
493 | 40.6k | for (int c = 0; c < 3; ++c) { |
494 | 1.00M | for (int i = 0; i < 32; ++i) { |
495 | 976k | color[c] += static_cast<uint64_t>( |
496 | 976k | std::ceil(inv_quant * std::abs(color_dct_[c][i]))); |
497 | 976k | } |
498 | 30.5k | } |
499 | 10.1k | color[0] += static_cast<uint64_t>(std::ceil(std::abs(y_to_x))) * color[1]; |
500 | 10.1k | color[2] += static_cast<uint64_t>(std::ceil(std::abs(y_to_b))) * color[1]; |
501 | | // This is not taking kChannelWeight into account, but up to constant factors |
502 | | // it gives an indication of the influence of the color values on the area |
503 | | // that will need to be rendered. |
504 | 10.1k | const uint64_t max_color = std::max({color[1], color[0], color[2]}); |
505 | 10.1k | uint64_t logcolor = |
506 | 10.1k | std::max(kOne, static_cast<uint64_t>(CeilLog2Nonzero(kOne + max_color))); |
507 | | |
508 | 10.1k | const float weight_limit = |
509 | 10.1k | std::ceil(std::sqrt((static_cast<float>(area_limit) / logcolor) / |
510 | 10.1k | std::max<size_t>(1, manhattan_distance))); |
511 | | |
512 | 335k | for (int i = 0; i < 32; ++i) { |
513 | 325k | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
514 | 325k | result.sigma_dct[i] = |
515 | 325k | sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant; |
516 | | // If we include the factor kChannelWeight[3]=.3333f here, we get a |
517 | | // realistic area estimate. We leave it out to simplify the calculations, |
518 | | // and understand that this way we underestimate the area by a factor of |
519 | | // 1/(0.3333*0.3333). This is taken into account in the limits below. |
520 | 325k | float weight_f = std::ceil(inv_quant * std::abs(sigma_dct_[i])); |
521 | 325k | uint64_t weight = |
522 | 325k | static_cast<uint64_t>(std::min(weight_limit, std::max(1.0f, weight_f))); |
523 | 325k | width_estimate += weight * weight * logcolor; |
524 | 325k | } |
525 | 10.1k | *total_estimated_area_reached += (width_estimate * manhattan_distance); |
526 | 10.1k | if (*total_estimated_area_reached > area_limit) { |
527 | 28 | return JXL_FAILURE("Too large total_estimated_area eached: %" PRIu64, |
528 | 28 | *total_estimated_area_reached); |
529 | 28 | } |
530 | | |
531 | 10.1k | return true; |
532 | 10.1k | } |
533 | | |
534 | | Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map, |
535 | | ANSSymbolReader* const decoder, |
536 | | BitReader* const br, |
537 | | const size_t max_control_points, |
538 | 71.5k | size_t* total_num_control_points) { |
539 | 71.5k | const size_t num_control_points = |
540 | 71.5k | decoder->ReadHybridUint(kNumControlPointsContext, br, context_map); |
541 | 71.5k | if (num_control_points > max_control_points) { |
542 | 16 | return JXL_FAILURE("Too many control points: %" PRIuS, num_control_points); |
543 | 16 | } |
544 | 71.4k | *total_num_control_points += num_control_points; |
545 | 71.4k | if (*total_num_control_points > max_control_points) { |
546 | 11 | return JXL_FAILURE("Too many control points: %" PRIuS, |
547 | 11 | *total_num_control_points); |
548 | 11 | } |
549 | 71.4k | control_points_.resize(num_control_points); |
550 | | // Maximal image dimension. |
551 | 71.4k | constexpr int64_t kDeltaLimit = 1u << 30; |
552 | 159k | for (std::pair<int64_t, int64_t>& control_point : control_points_) { |
553 | 159k | control_point.first = UnpackSigned( |
554 | 159k | decoder->ReadHybridUint(kControlPointsContext, br, context_map)); |
555 | 159k | control_point.second = UnpackSigned( |
556 | 159k | decoder->ReadHybridUint(kControlPointsContext, br, context_map)); |
557 | | // Check delta-deltas are not outrageous; it is not in spec, but there is |
558 | | // no reason to allow larger values. |
559 | 159k | if ((control_point.first >= kDeltaLimit) || |
560 | 159k | (control_point.first <= -kDeltaLimit) || |
561 | 159k | (control_point.second >= kDeltaLimit) || |
562 | 159k | (control_point.second <= -kDeltaLimit)) { |
563 | 10 | return JXL_FAILURE("Spline delta-delta is out of bounds"); |
564 | 10 | } |
565 | 159k | } |
566 | | |
567 | 285k | const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status { |
568 | 285k | constexpr int kWeirdNumber = std::numeric_limits<int>::min(); |
569 | 9.43M | for (int i = 0; i < 32; ++i) { |
570 | 9.14M | dct[i] = |
571 | 9.14M | UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map)); |
572 | 9.14M | if (dct[i] == kWeirdNumber) { |
573 | 7 | return JXL_FAILURE("The weird number in spline DCT"); |
574 | 7 | } |
575 | 9.14M | } |
576 | 285k | return true; |
577 | 285k | }; |
578 | 214k | for (auto& dct : color_dct_) { |
579 | 214k | JXL_RETURN_IF_ERROR(decode_dct(dct)); |
580 | 214k | } |
581 | 71.4k | JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_)); |
582 | 71.4k | return true; |
583 | 71.4k | } |
584 | | |
585 | 0 | void Splines::SetData(SplineDataView data) { |
586 | 0 | Clear(); |
587 | 0 | data_ = data; |
588 | 0 | } |
589 | | |
590 | 71.6k | void Splines::Clear() { |
591 | 71.6k | quantization_adjustment_ = 0; |
592 | 71.6k | splines_storage_.clear(); |
593 | 71.6k | starting_points_storage_.clear(); |
594 | 71.6k | data_ = {}; |
595 | 71.6k | segments_.clear(); |
596 | 71.6k | segment_indices_ = AlignedMemory(); |
597 | 71.6k | segment_y_start_ = AlignedMemory(); |
598 | 71.6k | } |
599 | | |
600 | 4.54k | Status Splines::Decode(jxl::BitReader* br, const size_t num_pixels) { |
601 | 4.54k | std::vector<uint8_t> context_map; |
602 | 4.54k | ANSCode code; |
603 | 4.54k | JXL_RETURN_IF_ERROR(DecodeHistograms(memory_manager_, br, kNumSplineContexts, |
604 | 4.54k | &code, &context_map)); |
605 | 9.03k | JXL_ASSIGN_OR_RETURN(ANSSymbolReader decoder, |
606 | 9.03k | ANSSymbolReader::Create(&code, br)); |
607 | 9.03k | size_t num_splines = |
608 | 9.03k | decoder.ReadHybridUint(kNumSplinesContext, br, context_map); |
609 | 9.03k | size_t max_control_points = std::min( |
610 | 9.03k | kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio); |
611 | 9.03k | if (num_splines > max_control_points || |
612 | 4.51k | num_splines + 1 > max_control_points) { |
613 | 8 | return JXL_FAILURE("Too many splines: %" PRIuS, num_splines); |
614 | 8 | } |
615 | 4.50k | num_splines++; |
616 | 4.50k | JXL_RETURN_IF_ERROR(DecodeAllStartingPoints( |
617 | 4.50k | &starting_points_storage_, br, &decoder, context_map, num_splines)); |
618 | | |
619 | 4.45k | quantization_adjustment_ = UnpackSigned( |
620 | 4.45k | decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map)); |
621 | | |
622 | 4.45k | splines_storage_.clear(); |
623 | 4.45k | splines_storage_.reserve(num_splines); |
624 | 4.45k | size_t num_control_points = num_splines; |
625 | 75.9k | for (size_t i = 0; i < num_splines; ++i) { |
626 | 71.5k | QuantizedSpline spline; |
627 | 71.5k | JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br, |
628 | 71.5k | max_control_points, &num_control_points)); |
629 | 71.4k | splines_storage_.push_back(std::move(spline)); |
630 | 71.4k | } |
631 | | |
632 | 4.41k | JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState()); |
633 | | |
634 | 4.41k | data_ = SplineDataView{Span<const QuantizedSpline>(splines_storage_), |
635 | 4.41k | Span<const Spline::Point>(starting_points_storage_)}; |
636 | | |
637 | 4.41k | if (!HasAny()) { |
638 | 0 | return JXL_FAILURE("Decoded splines but got none"); |
639 | 0 | } |
640 | | |
641 | 4.41k | return true; |
642 | 4.41k | } |
643 | | |
644 | 0 | void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect) const { |
645 | 0 | Apply</*add=*/true>(opsin, opsin_rect); |
646 | 0 | } |
647 | | void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
648 | | float* JXL_RESTRICT row_b, size_t y, size_t x0, |
649 | 215k | size_t x1) const { |
650 | 215k | ApplyToRow</*add=*/true>(row_x, row_y, row_b, y, x0, x1); |
651 | 215k | } |
652 | | |
653 | 2.68k | void Splines::SubtractFrom(Image3F* const opsin) const { |
654 | 2.68k | Apply</*add=*/false>(opsin, Rect(*opsin)); |
655 | 2.68k | } |
656 | | |
657 | | Status Splines::InitializeDrawCache(const size_t image_xsize, |
658 | | const size_t image_ysize, |
659 | 7.03k | const ColorCorrelation& color_correlation) { |
660 | | // TODO(veluca): avoid storing segments that are entirely outside image |
661 | | // boundaries. |
662 | 7.03k | segments_.clear(); |
663 | 7.03k | segment_indices_ = AlignedMemory(); |
664 | 7.03k | segment_y_start_ = AlignedMemory(); |
665 | 7.03k | std::vector<SplineSegmentSpan> segments_spans; |
666 | 7.03k | std::vector<Spline::Point> intermediate_points; |
667 | 7.03k | uint64_t total_estimated_area_reached = 0; |
668 | 7.03k | std::vector<Spline> splines; |
669 | 17.1k | for (size_t i = 0; i < data_.splines.size(); ++i) { |
670 | 10.1k | Spline spline; |
671 | 10.1k | JXL_RETURN_IF_ERROR(data_.splines[i].Dequantize( |
672 | 10.1k | data_.starting_points[i], quantization_adjustment_, |
673 | 10.1k | color_correlation.YtoXRatio(0), color_correlation.YtoBRatio(0), |
674 | 10.1k | image_xsize * image_ysize, &total_estimated_area_reached, spline)); |
675 | 10.1k | if (std::adjacent_find(spline.control_points.begin(), |
676 | 10.1k | spline.control_points.end()) != |
677 | 10.1k | spline.control_points.end()) { |
678 | | // Otherwise division by zero might occur. Once control points coincide, |
679 | | // the direction of curve is undefined... |
680 | 5 | return JXL_FAILURE( |
681 | 5 | "identical successive control points in spline %" PRIuS, i); |
682 | 5 | } |
683 | 10.1k | splines.push_back(spline); |
684 | 10.1k | } |
685 | | // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams. |
686 | 6.97k | if (total_estimated_area_reached > |
687 | 6.97k | std::min( |
688 | 6.97k | (8 * image_xsize * image_ysize + (static_cast<uint64_t>(1) << 25)), |
689 | 6.97k | (static_cast<uint64_t>(1) << 30))) { |
690 | 15 | JXL_WARNING( |
691 | 15 | "Large total_estimated_area_reached, expect slower decoding: %" PRIu64, |
692 | 15 | total_estimated_area_reached); |
693 | 15 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
694 | 15 | return JXL_FAILURE("Total spline area is too large"); |
695 | 15 | #endif |
696 | 15 | } |
697 | | |
698 | 10.0k | for (Spline& spline : splines) { |
699 | 10.0k | std::vector<std::pair<Spline::Point, float>> points_to_draw; |
700 | 19.4M | auto add_point = [&](const Spline::Point& point, const float multiplier) { |
701 | 19.4M | points_to_draw.emplace_back(point, multiplier); |
702 | 19.4M | }; |
703 | 10.0k | intermediate_points.clear(); |
704 | 10.0k | DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points); |
705 | 10.0k | JXL_RETURN_IF_ERROR( |
706 | 10.0k | ForEachEquallySpacedPoint(intermediate_points, add_point)); |
707 | 10.0k | const float arc_length = |
708 | 10.0k | (points_to_draw.size() - 2) * kDesiredRenderingDistance + |
709 | 10.0k | points_to_draw.back().second; |
710 | 10.0k | if (arc_length <= 0.f) { |
711 | | // This spline wouldn't have any effect. |
712 | 5.16k | continue; |
713 | 5.16k | } |
714 | 4.85k | HWY_DYNAMIC_DISPATCH(SegmentsFromPoints) |
715 | 4.85k | (image_ysize, spline, points_to_draw, arc_length, segments_, |
716 | 4.85k | segments_spans); |
717 | 4.85k | } |
718 | | |
719 | 6.96k | size_t segment_y_start_num_bytes = (image_ysize + 2) * sizeof(ptrdiff_t); |
720 | 6.96k | JXL_ASSIGN_OR_RETURN( |
721 | 6.96k | segment_y_start_, |
722 | 6.96k | AlignedMemory::Create(memory_manager_, segment_y_start_num_bytes)); |
723 | 6.96k | ptrdiff_t* segment_y_start = segment_y_start_.address<ptrdiff_t>(); |
724 | 6.96k | memset(segment_y_start, 0, segment_y_start_num_bytes); |
725 | 6.96k | ptrdiff_t* population = segment_y_start + 1; |
726 | 1.11M | for (const auto& segment_span : segments_spans) { |
727 | 1.11M | population[segment_span.start]++; |
728 | 1.11M | population[segment_span.end]--; |
729 | 1.11M | } |
730 | | // Turn to cumulative. |
731 | 6.96k | size_t total = 0; |
732 | 6.96k | ptrdiff_t coverage = 0; |
733 | 1.21M | for (size_t y = 0; y < image_ysize; y++) { |
734 | 1.20M | if (population[y] < 0) { |
735 | 26.5k | JXL_ENSURE(coverage >= -population[y]); |
736 | 26.5k | } |
737 | 1.20M | coverage += population[y]; |
738 | 1.20M | population[y] = total; |
739 | 1.20M | total += coverage; |
740 | 1.20M | } |
741 | | // population[0] == 0 (that is segment_y_start[1]) |
742 | | // We are going to place segments using population[y] as index for elements |
743 | | // of y-th line (and increment them); that way final value of population[y] |
744 | | // is the staring index for (y+1)-th line. Thus segment_y_start will contain |
745 | | // indices without an offset. |
746 | 6.96k | JXL_ASSIGN_OR_RETURN( |
747 | 6.96k | segment_indices_, |
748 | 6.96k | AlignedMemory::Create(memory_manager_, total * sizeof(size_t))); |
749 | 6.96k | size_t* segment_indices = segment_indices_.address<size_t>(); |
750 | 1.12M | for (size_t i = 0; i < segments_.size(); ++i) { |
751 | 1.11M | const auto& segment_span = segments_spans[i]; |
752 | 9.60M | for (size_t y = segment_span.start; y < segment_span.end; y++) { |
753 | 8.49M | segment_indices[population[y]++] = i; |
754 | 8.49M | } |
755 | 1.11M | } |
756 | | |
757 | 6.96k | return true; |
758 | 6.96k | } |
759 | | |
760 | | template <bool add> |
761 | | void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
762 | | float* JXL_RESTRICT row_b, size_t y, size_t x0, |
763 | 215k | size_t x1) const { |
764 | 215k | if (segments_.empty()) return; |
765 | 26.8k | HWY_DYNAMIC_DISPATCH(DrawSegments) |
766 | 26.8k | (row_x, row_y, row_b, y, x0, x1, add, segments_.data(), |
767 | 26.8k | segment_indices_.address<size_t>(), segment_y_start_.address<ptrdiff_t>()); |
768 | 26.8k | } void jxl::Splines::ApplyToRow<true>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const Line | Count | Source | 763 | 215k | size_t x1) const { | 764 | 215k | if (segments_.empty()) return; | 765 | 26.8k | HWY_DYNAMIC_DISPATCH(DrawSegments) | 766 | 26.8k | (row_x, row_y, row_b, y, x0, x1, add, segments_.data(), | 767 | 26.8k | segment_indices_.address<size_t>(), segment_y_start_.address<ptrdiff_t>()); | 768 | 26.8k | } |
Unexecuted instantiation: void jxl::Splines::ApplyToRow<false>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const |
769 | | |
770 | | template <bool add> |
771 | 2.68k | void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect) const { |
772 | 2.68k | if (segments_.empty()) return; |
773 | 0 | const size_t y0 = opsin_rect.y0(); |
774 | 0 | const size_t x0 = opsin_rect.x0(); |
775 | 0 | const size_t x1 = opsin_rect.x1(); |
776 | 0 | for (size_t y = 0; y < opsin_rect.ysize(); y++) { |
777 | 0 | ApplyToRow<add>(opsin->PlaneRow(0, y0 + y) + x0, |
778 | 0 | opsin->PlaneRow(1, y0 + y) + x0, |
779 | 0 | opsin->PlaneRow(2, y0 + y) + x0, y0 + y, x0, x1); |
780 | 0 | } |
781 | 0 | } Unexecuted instantiation: void jxl::Splines::Apply<true>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const void jxl::Splines::Apply<false>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const Line | Count | Source | 771 | 2.68k | void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect) const { | 772 | 2.68k | if (segments_.empty()) return; | 773 | 0 | const size_t y0 = opsin_rect.y0(); | 774 | 0 | const size_t x0 = opsin_rect.x0(); | 775 | 0 | const size_t x1 = opsin_rect.x1(); | 776 | 0 | for (size_t y = 0; y < opsin_rect.ysize(); y++) { | 777 | 0 | ApplyToRow<add>(opsin->PlaneRow(0, y0 + y) + x0, | 778 | 0 | opsin->PlaneRow(1, y0 + y) + x0, | 779 | 0 | opsin->PlaneRow(2, y0 + y) + x0, y0 + y, x0, x1); | 780 | 0 | } | 781 | 0 | } |
|
782 | | |
783 | | } // namespace jxl |
784 | | #endif // HWY_ONCE |