/src/libjxl/lib/jxl/splines.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/splines.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | |
10 | | #include <algorithm> |
11 | | #include <cinttypes> // PRIu64 |
12 | | #include <cmath> |
13 | | #include <limits> |
14 | | |
15 | | #include "lib/jxl/base/common.h" |
16 | | #include "lib/jxl/base/printf_macros.h" |
17 | | #include "lib/jxl/base/rect.h" |
18 | | #include "lib/jxl/base/status.h" |
19 | | #include "lib/jxl/chroma_from_luma.h" |
20 | | #include "lib/jxl/common.h" // JXL_HIGH_PRECISION |
21 | | #include "lib/jxl/dct_scales.h" |
22 | | #include "lib/jxl/dec_ans.h" |
23 | | #include "lib/jxl/dec_bit_reader.h" |
24 | | #include "lib/jxl/pack_signed.h" |
25 | | |
26 | | #undef HWY_TARGET_INCLUDE |
27 | | #define HWY_TARGET_INCLUDE "lib/jxl/splines.cc" |
28 | | #include <hwy/foreach_target.h> |
29 | | #include <hwy/highway.h> |
30 | | |
31 | | #include "lib/jxl/base/fast_math-inl.h" |
32 | | HWY_BEFORE_NAMESPACE(); |
33 | | namespace jxl { |
34 | | namespace HWY_NAMESPACE { |
35 | | namespace { |
36 | | |
37 | | // These templates are not found via ADL. |
38 | | using hwy::HWY_NAMESPACE::Mul; |
39 | | using hwy::HWY_NAMESPACE::MulAdd; |
40 | | using hwy::HWY_NAMESPACE::MulSub; |
41 | | using hwy::HWY_NAMESPACE::Sqrt; |
42 | | using hwy::HWY_NAMESPACE::Sub; |
43 | | |
44 | | // Given a set of DCT coefficients, this returns the result of performing cosine |
45 | | // interpolation on the original samples. |
46 | 57.8M | float ContinuousIDCT(const Dct32& dct, const float t) { |
47 | | // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of |
48 | | // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces |
49 | | // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform |
50 | | // treatment of all the coefficients. |
51 | 57.8M | constexpr float kMultipliers[32] = { |
52 | 57.8M | kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, |
53 | 57.8M | kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, |
54 | 57.8M | kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, |
55 | 57.8M | kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, |
56 | 57.8M | kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, |
57 | 57.8M | kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, |
58 | 57.8M | kPi / 32 * 30, kPi / 32 * 31, |
59 | 57.8M | }; |
60 | 57.8M | HWY_CAPPED(float, 32) df; |
61 | 57.8M | auto result = Zero(df); |
62 | 57.8M | const auto tandhalf = Set(df, t + 0.5f); |
63 | 1.90G | for (int i = 0; i < 32; i += Lanes(df)) { |
64 | 1.85G | auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); |
65 | 1.85G | auto cos = FastCosf(df, cos_arg); |
66 | 1.85G | auto local_res = Mul(LoadU(df, dct.data() + i), cos); |
67 | 1.85G | result = MulAdd(Set(df, kSqrt2), local_res, result); |
68 | 1.85G | } |
69 | 57.8M | return GetLane(SumOfLanes(df, result)); |
70 | 57.8M | } |
71 | | |
72 | | template <typename DF> |
73 | | void DrawSegment(DF df, const SplineSegment& segment, const bool add, |
74 | 26.1M | const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { |
75 | 26.1M | Rebind<int32_t, DF> di; |
76 | 26.1M | const auto inv_sigma = Set(df, segment.inv_sigma); |
77 | 26.1M | const auto half = Set(df, 0.5f); |
78 | 26.1M | const auto one_over_2s2 = Set(df, 0.353553391f); |
79 | 26.1M | const auto sigma_over_4_times_intensity = |
80 | 26.1M | Set(df, segment.sigma_over_4_times_intensity); |
81 | 26.1M | const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); |
82 | 26.1M | const auto dy = Set(df, y - segment.center_y); |
83 | 26.1M | const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); |
84 | 26.1M | const auto distance = Sqrt(sqd); |
85 | 26.1M | const auto one_dimensional_factor = |
86 | 26.1M | Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), |
87 | 26.1M | FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); |
88 | 26.1M | auto local_intensity = |
89 | 26.1M | Mul(sigma_over_4_times_intensity, |
90 | 26.1M | Mul(one_dimensional_factor, one_dimensional_factor)); |
91 | 104M | for (size_t c = 0; c < 3; ++c) { |
92 | 78.4M | const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); |
93 | 78.4M | const auto in = LoadU(df, rows[c] + x); |
94 | 78.4M | StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); |
95 | 78.4M | } |
96 | 26.1M | } |
97 | | |
98 | | void DrawSegment(const SplineSegment& segment, const bool add, const size_t y, |
99 | 1.07M | const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) { |
100 | 1.07M | ssize_t x = std::max<ssize_t>( |
101 | 1.07M | x0, std::llround(segment.center_x - segment.maximum_distance)); |
102 | | // one-past-the-end |
103 | 1.07M | x1 = std::min<ssize_t>( |
104 | 1.07M | x1, std::llround(segment.center_x + segment.maximum_distance) + 1); |
105 | 1.07M | HWY_FULL(float) df; |
106 | 27.2M | for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) { |
107 | 26.1M | DrawSegment(df, segment, add, y, x, rows); |
108 | 26.1M | } |
109 | 1.07M | for (; x < x1; ++x) { |
110 | 0 | DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows); |
111 | 0 | } |
112 | 1.07M | } |
113 | | |
114 | | void ComputeSegments(const Spline::Point& center, const float intensity, |
115 | | const float color[3], const float sigma, |
116 | | std::vector<SplineSegment>& segments, |
117 | 14.4M | std::vector<std::pair<size_t, size_t>>& segments_by_y) { |
118 | | // Sanity check sigma, inverse sigma and intensity |
119 | 14.4M | if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && |
120 | 14.4M | std::isfinite(intensity))) { |
121 | 147k | return; |
122 | 147k | } |
123 | 14.3M | #if JXL_HIGH_PRECISION |
124 | 14.3M | constexpr float kDistanceExp = 5; |
125 | | #else |
126 | | // About 30% faster. |
127 | | constexpr float kDistanceExp = 3; |
128 | | #endif |
129 | | // We cap from below colors to at least 0.01. |
130 | 14.3M | float max_color = 0.01f; |
131 | 57.2M | for (size_t c = 0; c < 3; c++) { |
132 | 42.9M | max_color = std::max(max_color, std::abs(color[c] * intensity)); |
133 | 42.9M | } |
134 | | // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops |
135 | | // below 10^-kDistanceExp. |
136 | 14.3M | const float maximum_distance = |
137 | 14.3M | std::sqrt(-2 * sigma * sigma * |
138 | 14.3M | (std::log(0.1) * kDistanceExp - std::log(max_color))); |
139 | 14.3M | SplineSegment segment; |
140 | 14.3M | segment.center_y = center.y; |
141 | 14.3M | segment.center_x = center.x; |
142 | 14.3M | memcpy(segment.color, color, sizeof(segment.color)); |
143 | 14.3M | segment.inv_sigma = 1.0f / sigma; |
144 | 14.3M | segment.sigma_over_4_times_intensity = .25f * sigma * intensity; |
145 | 14.3M | segment.maximum_distance = maximum_distance; |
146 | 14.3M | ssize_t y0 = std::llround(center.y - maximum_distance); |
147 | 14.3M | ssize_t y1 = |
148 | 14.3M | std::llround(center.y + maximum_distance) + 1; // one-past-the-end |
149 | 57.1M | for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) { |
150 | 42.8M | segments_by_y.emplace_back(y, segments.size()); |
151 | 42.8M | } |
152 | 14.3M | segments.push_back(segment); |
153 | 14.3M | } |
154 | | |
155 | | void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
156 | | float* JXL_RESTRICT row_b, size_t y, size_t x0, size_t x1, |
157 | | const bool add, const SplineSegment* segments, |
158 | | const size_t* segment_indices, |
159 | 49.7k | const size_t* segment_y_start) { |
160 | 49.7k | float* JXL_RESTRICT rows[3] = {row_x - x0, row_y - x0, row_b - x0}; |
161 | 1.12M | for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { |
162 | 1.07M | DrawSegment(segments[segment_indices[i]], add, y, x0, x1, rows); |
163 | 1.07M | } |
164 | 49.7k | } |
165 | | |
166 | | void SegmentsFromPoints( |
167 | | const Spline& spline, |
168 | | const std::vector<std::pair<Spline::Point, float>>& points_to_draw, |
169 | | const float arc_length, std::vector<SplineSegment>& segments, |
170 | 4.32k | std::vector<std::pair<size_t, size_t>>& segments_by_y) { |
171 | 4.32k | const float inv_arc_length = 1.0f / arc_length; |
172 | 4.32k | int k = 0; |
173 | 14.4M | for (const auto& point_to_draw : points_to_draw) { |
174 | 14.4M | const Spline::Point& point = point_to_draw.first; |
175 | 14.4M | const float multiplier = point_to_draw.second; |
176 | 14.4M | const float progress_along_arc = |
177 | 14.4M | std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); |
178 | 14.4M | ++k; |
179 | 14.4M | float color[3]; |
180 | 57.8M | for (size_t c = 0; c < 3; ++c) { |
181 | 43.3M | color[c] = |
182 | 43.3M | ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); |
183 | 43.3M | } |
184 | 14.4M | const float sigma = |
185 | 14.4M | ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); |
186 | 14.4M | ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y); |
187 | 14.4M | } |
188 | 4.32k | } |
189 | | } // namespace |
190 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
191 | | } // namespace HWY_NAMESPACE |
192 | | } // namespace jxl |
193 | | HWY_AFTER_NAMESPACE(); |
194 | | |
195 | | #if HWY_ONCE |
196 | | namespace jxl { |
197 | | HWY_EXPORT(SegmentsFromPoints); |
198 | | HWY_EXPORT(DrawSegments); |
199 | | |
200 | | namespace { |
201 | | |
202 | | // It is not in spec, but reasonable limit to avoid overflows. |
203 | | template <typename T> |
204 | 2.24M | Status ValidateSplinePointPos(const T& x, const T& y) { |
205 | 2.24M | constexpr T kSplinePosLimit = 1u << 23; |
206 | 2.24M | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || |
207 | 2.24M | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { |
208 | 190 | return JXL_FAILURE("Spline coordinates out of bounds"); |
209 | 190 | } |
210 | 2.24M | return true; |
211 | 2.24M | } splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<long>(long const&, long const&) Line | Count | Source | 204 | 2.15M | Status ValidateSplinePointPos(const T& x, const T& y) { | 205 | 2.15M | constexpr T kSplinePosLimit = 1u << 23; | 206 | 2.15M | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 207 | 2.15M | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 208 | 121 | return JXL_FAILURE("Spline coordinates out of bounds"); | 209 | 121 | } | 210 | 2.15M | return true; | 211 | 2.15M | } |
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<float>(float const&, float const&) Line | Count | Source | 204 | 21.2k | Status ValidateSplinePointPos(const T& x, const T& y) { | 205 | 21.2k | constexpr T kSplinePosLimit = 1u << 23; | 206 | 21.2k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 207 | 21.2k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 208 | 0 | return JXL_FAILURE("Spline coordinates out of bounds"); | 209 | 0 | } | 210 | 21.2k | return true; | 211 | 21.2k | } |
splines.cc:jxl::Status jxl::(anonymous namespace)::ValidateSplinePointPos<int>(int const&, int const&) Line | Count | Source | 204 | 66.1k | Status ValidateSplinePointPos(const T& x, const T& y) { | 205 | 66.1k | constexpr T kSplinePosLimit = 1u << 23; | 206 | 66.1k | if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || | 207 | 66.1k | (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { | 208 | 69 | return JXL_FAILURE("Spline coordinates out of bounds"); | 209 | 69 | } | 210 | 66.1k | return true; | 211 | 66.1k | } |
|
212 | | |
213 | | // Maximum number of spline control points per frame is |
214 | | // std::min(kMaxNumControlPoints, xsize * ysize / 2) |
215 | | constexpr size_t kMaxNumControlPoints = 1u << 20u; |
216 | | constexpr size_t kMaxNumControlPointsPerPixelRatio = 2; |
217 | | |
218 | 0 | float AdjustedQuant(const int32_t adjustment) { |
219 | 0 | return (adjustment >= 0) ? (1.f + .125f * adjustment) |
220 | 0 | : 1.f / (1.f - .125f * adjustment); |
221 | 0 | } |
222 | | |
223 | 21.1k | float InvAdjustedQuant(const int32_t adjustment) { |
224 | 21.1k | return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment) |
225 | 21.1k | : (1.f - .125f * adjustment); |
226 | 21.1k | } |
227 | | |
228 | | // X, Y, B, sigma. |
229 | | constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f}; |
230 | | |
231 | | Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points, |
232 | | BitReader* const br, ANSSymbolReader* reader, |
233 | | const std::vector<uint8_t>& context_map, |
234 | 2.53k | const size_t num_splines) { |
235 | 2.53k | points->clear(); |
236 | 2.53k | points->reserve(num_splines); |
237 | 2.53k | int64_t last_x = 0; |
238 | 2.53k | int64_t last_y = 0; |
239 | 2.15M | for (size_t i = 0; i < num_splines; i++) { |
240 | 2.15M | int64_t x = |
241 | 2.15M | reader->ReadHybridUint(kStartingPositionContext, br, context_map); |
242 | 2.15M | int64_t y = |
243 | 2.15M | reader->ReadHybridUint(kStartingPositionContext, br, context_map); |
244 | 2.15M | if (i != 0) { |
245 | 2.15M | x = UnpackSigned(x) + last_x; |
246 | 2.15M | y = UnpackSigned(y) + last_y; |
247 | 2.15M | } |
248 | 2.15M | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y)); |
249 | 2.15M | points->emplace_back(static_cast<float>(x), static_cast<float>(y)); |
250 | 2.15M | last_x = x; |
251 | 2.15M | last_y = y; |
252 | 2.15M | } |
253 | 2.41k | return true; |
254 | 2.53k | } |
255 | | |
256 | | struct Vector { |
257 | | float x, y; |
258 | 0 | Vector operator-() const { return {-x, -y}; } |
259 | 0 | Vector operator+(const Vector& other) const { |
260 | 0 | return {x + other.x, y + other.y}; |
261 | 0 | } |
262 | 14.7M | float SquaredNorm() const { return x * x + y * y; } |
263 | | }; |
264 | 16.2M | Vector operator*(const float k, const Vector& vec) { |
265 | 16.2M | return {k * vec.x, k * vec.y}; |
266 | 16.2M | } |
267 | | |
268 | 16.2M | Spline::Point operator+(const Spline::Point& p, const Vector& vec) { |
269 | 16.2M | return {p.x + vec.x, p.y + vec.y}; |
270 | 16.2M | } |
271 | 31.0M | Vector operator-(const Spline::Point& a, const Spline::Point& b) { |
272 | 31.0M | return {a.x - b.x, a.y - b.y}; |
273 | 31.0M | } |
274 | | |
275 | | // TODO(eustas): avoid making a copy of "points". |
276 | | void DrawCentripetalCatmullRomSpline(std::vector<Spline::Point> points, |
277 | 20.8k | std::vector<Spline::Point>& result) { |
278 | 20.8k | if (points.empty()) return; |
279 | 20.8k | if (points.size() == 1) { |
280 | 16.5k | result.push_back(points[0]); |
281 | 16.5k | return; |
282 | 16.5k | } |
283 | | // Number of points to compute between each control point. |
284 | 4.32k | static constexpr int kNumPoints = 16; |
285 | 4.32k | result.reserve((points.size() - 1) * kNumPoints + 1); |
286 | 4.32k | points.insert(points.begin(), points[0] + (points[0] - points[1])); |
287 | 4.32k | points.push_back(points[points.size() - 1] + |
288 | 4.32k | (points[points.size() - 1] - points[points.size() - 2])); |
289 | | // points has at least 4 elements at this point. |
290 | 24.7k | for (size_t start = 0; start < points.size() - 3; ++start) { |
291 | | // 4 of them are used, and we draw from p[1] to p[2]. |
292 | 20.4k | const Spline::Point* const p = &points[start]; |
293 | 20.4k | result.push_back(p[1]); |
294 | 20.4k | float d[3]; |
295 | 20.4k | float t[4]; |
296 | 20.4k | t[0] = 0; |
297 | 81.6k | for (int k = 0; k < 3; ++k) { |
298 | | // TODO(eustas): for each segment delta is calculated 3 times... |
299 | | // TODO(eustas): restrict d[k] with reasonable limit and spec it. |
300 | 61.2k | d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y)); |
301 | 61.2k | t[k + 1] = t[k] + d[k]; |
302 | 61.2k | } |
303 | 326k | for (int i = 1; i < kNumPoints; ++i) { |
304 | 306k | const float tt = d[0] + (static_cast<float>(i) / kNumPoints) * d[1]; |
305 | 306k | Spline::Point a[3]; |
306 | 1.22M | for (int k = 0; k < 3; ++k) { |
307 | | // TODO(eustas): reciprocal multiplication would be faster. |
308 | 918k | a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]); |
309 | 918k | } |
310 | 306k | Spline::Point b[2]; |
311 | 918k | for (int k = 0; k < 2; ++k) { |
312 | 612k | b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]); |
313 | 612k | } |
314 | 306k | result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0])); |
315 | 306k | } |
316 | 20.4k | } |
317 | 4.32k | result.push_back(points[points.size() - 2]); |
318 | 4.32k | } |
319 | | |
320 | | // Move along the line segments defined by `points`, `kDesiredRenderingDistance` |
321 | | // pixels at a time, and call `functor` with each point and the actual distance |
322 | | // to the previous point (which will always be kDesiredRenderingDistance except |
323 | | // possibly for the very last point). |
324 | | // TODO(eustas): this method always adds the last point, but never the first |
325 | | // (unless those are one); I believe both ends matter. |
326 | | template <typename Points, typename Functor> |
327 | 20.8k | Status ForEachEquallySpacedPoint(const Points& points, const Functor& functor) { |
328 | 20.8k | JXL_ENSURE(!points.empty()); |
329 | 20.8k | Spline::Point current = points.front(); |
330 | 20.8k | functor(current, kDesiredRenderingDistance); |
331 | 20.8k | auto next = points.begin(); |
332 | 14.4M | while (next != points.end()) { |
333 | 14.4M | const Spline::Point* previous = ¤t; |
334 | 14.4M | float arclength_from_previous = 0.f; |
335 | 14.8M | for (;;) { |
336 | 14.8M | if (next == points.end()) { |
337 | 20.8k | functor(*previous, arclength_from_previous); |
338 | 20.8k | return true; |
339 | 20.8k | } |
340 | 14.7M | const float arclength_to_next = |
341 | 14.7M | std::sqrt((*next - *previous).SquaredNorm()); |
342 | 14.7M | if (arclength_from_previous + arclength_to_next >= |
343 | 14.7M | kDesiredRenderingDistance) { |
344 | 14.4M | current = |
345 | 14.4M | *previous + ((kDesiredRenderingDistance - arclength_from_previous) / |
346 | 14.4M | arclength_to_next) * |
347 | 14.4M | (*next - *previous); |
348 | 14.4M | functor(current, kDesiredRenderingDistance); |
349 | 14.4M | break; |
350 | 14.4M | } |
351 | 347k | arclength_from_previous += arclength_to_next; |
352 | 347k | previous = &*next; |
353 | 347k | ++next; |
354 | 347k | } |
355 | 14.4M | } |
356 | 0 | return true; |
357 | 20.8k | } |
358 | | |
359 | | } // namespace |
360 | | |
361 | | StatusOr<QuantizedSpline> QuantizedSpline::Create( |
362 | | const Spline& original, const int32_t quantization_adjustment, |
363 | 0 | const float y_to_x, const float y_to_b) { |
364 | 0 | JXL_ENSURE(!original.control_points.empty()); |
365 | 0 | QuantizedSpline result; |
366 | 0 | result.control_points_.reserve(original.control_points.size() - 1); |
367 | 0 | const Spline::Point& starting_point = original.control_points.front(); |
368 | 0 | int previous_x = static_cast<int>(std::roundf(starting_point.x)); |
369 | 0 | int previous_y = static_cast<int>(std::roundf(starting_point.y)); |
370 | 0 | int previous_delta_x = 0; |
371 | 0 | int previous_delta_y = 0; |
372 | 0 | for (auto it = original.control_points.begin() + 1; |
373 | 0 | it != original.control_points.end(); ++it) { |
374 | 0 | const int new_x = static_cast<int>(std::roundf(it->x)); |
375 | 0 | const int new_y = static_cast<int>(std::roundf(it->y)); |
376 | 0 | const int new_delta_x = new_x - previous_x; |
377 | 0 | const int new_delta_y = new_y - previous_y; |
378 | 0 | result.control_points_.emplace_back(new_delta_x - previous_delta_x, |
379 | 0 | new_delta_y - previous_delta_y); |
380 | 0 | previous_delta_x = new_delta_x; |
381 | 0 | previous_delta_y = new_delta_y; |
382 | 0 | previous_x = new_x; |
383 | 0 | previous_y = new_y; |
384 | 0 | } |
385 | |
|
386 | 0 | const auto to_int = [](float v) -> int { |
387 | | // Maximal int representable with float. |
388 | 0 | constexpr float kMax = std::numeric_limits<int>::max() - 127; |
389 | 0 | constexpr float kMin = -kMax; |
390 | 0 | return static_cast<int>(std::roundf(Clamp1(v, kMin, kMax))); |
391 | 0 | }; |
392 | |
|
393 | 0 | const auto quant = AdjustedQuant(quantization_adjustment); |
394 | 0 | const auto inv_quant = InvAdjustedQuant(quantization_adjustment); |
395 | 0 | for (int c : {1, 0, 2}) { |
396 | 0 | float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b; |
397 | 0 | for (int i = 0; i < 32; ++i) { |
398 | 0 | const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; |
399 | 0 | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
400 | 0 | auto restored_y = result.color_dct_[1][i] * inv_dct_factor * |
401 | 0 | kChannelWeight[1] * inv_quant; |
402 | 0 | auto decorrelated = original.color_dct[c][i] - factor * restored_y; |
403 | 0 | result.color_dct_[c][i] = |
404 | 0 | to_int(decorrelated * dct_factor * quant / kChannelWeight[c]); |
405 | 0 | } |
406 | 0 | } |
407 | 0 | for (int i = 0; i < 32; ++i) { |
408 | 0 | const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; |
409 | 0 | result.sigma_dct_[i] = |
410 | 0 | to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]); |
411 | 0 | } |
412 | 0 | return result; |
413 | 0 | } |
414 | | |
415 | | Status QuantizedSpline::Dequantize(const Spline::Point& starting_point, |
416 | | const int32_t quantization_adjustment, |
417 | | const float y_to_x, const float y_to_b, |
418 | | const uint64_t image_size, |
419 | | uint64_t* total_estimated_area_reached, |
420 | 21.2k | Spline& result) const { |
421 | 21.2k | constexpr uint64_t kOne = static_cast<uint64_t>(1); |
422 | 21.2k | const uint64_t area_limit = |
423 | 21.2k | std::min(1024 * image_size + (kOne << 32), kOne << 42); |
424 | | |
425 | 21.2k | result.control_points.clear(); |
426 | 21.2k | result.control_points.reserve(control_points_.size() + 1); |
427 | 21.2k | float px = std::roundf(starting_point.x); |
428 | 21.2k | float py = std::roundf(starting_point.y); |
429 | 21.2k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py)); |
430 | 21.2k | int current_x = static_cast<int>(px); |
431 | 21.2k | int current_y = static_cast<int>(py); |
432 | 21.2k | result.control_points.emplace_back(static_cast<float>(current_x), |
433 | 21.2k | static_cast<float>(current_y)); |
434 | 21.2k | int current_delta_x = 0; |
435 | 21.2k | int current_delta_y = 0; |
436 | 21.2k | uint64_t manhattan_distance = 0; |
437 | 33.1k | for (const auto& point : control_points_) { |
438 | 33.1k | current_delta_x += point.first; |
439 | 33.1k | current_delta_y += point.second; |
440 | 33.1k | manhattan_distance += std::abs(current_delta_x) + std::abs(current_delta_y); |
441 | 33.1k | if (manhattan_distance > area_limit) { |
442 | 0 | return JXL_FAILURE("Too large manhattan_distance reached: %" PRIu64, |
443 | 0 | manhattan_distance); |
444 | 0 | } |
445 | 33.1k | JXL_RETURN_IF_ERROR( |
446 | 33.1k | ValidateSplinePointPos(current_delta_x, current_delta_y)); |
447 | 33.0k | current_x += current_delta_x; |
448 | 33.0k | current_y += current_delta_y; |
449 | 33.0k | JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y)); |
450 | 33.0k | result.control_points.emplace_back(static_cast<float>(current_x), |
451 | 33.0k | static_cast<float>(current_y)); |
452 | 33.0k | } |
453 | | |
454 | 21.1k | const auto inv_quant = InvAdjustedQuant(quantization_adjustment); |
455 | 84.7k | for (int c = 0; c < 3; ++c) { |
456 | 2.09M | for (int i = 0; i < 32; ++i) { |
457 | 2.03M | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
458 | 2.03M | result.color_dct[c][i] = |
459 | 2.03M | color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant; |
460 | 2.03M | } |
461 | 63.5k | } |
462 | 699k | for (int i = 0; i < 32; ++i) { |
463 | 678k | result.color_dct[0][i] += y_to_x * result.color_dct[1][i]; |
464 | 678k | result.color_dct[2][i] += y_to_b * result.color_dct[1][i]; |
465 | 678k | } |
466 | 21.1k | uint64_t width_estimate = 0; |
467 | | |
468 | 21.1k | uint64_t color[3] = {}; |
469 | 84.7k | for (int c = 0; c < 3; ++c) { |
470 | 2.09M | for (int i = 0; i < 32; ++i) { |
471 | 2.03M | color[c] += static_cast<uint64_t>( |
472 | 2.03M | std::ceil(inv_quant * std::abs(color_dct_[c][i]))); |
473 | 2.03M | } |
474 | 63.5k | } |
475 | 21.1k | color[0] += static_cast<uint64_t>(std::ceil(std::abs(y_to_x))) * color[1]; |
476 | 21.1k | color[2] += static_cast<uint64_t>(std::ceil(std::abs(y_to_b))) * color[1]; |
477 | | // This is not taking kChannelWeight into account, but up to constant factors |
478 | | // it gives an indication of the influence of the color values on the area |
479 | | // that will need to be rendered. |
480 | 21.1k | const uint64_t max_color = std::max({color[1], color[0], color[2]}); |
481 | 21.1k | uint64_t logcolor = |
482 | 21.1k | std::max(kOne, static_cast<uint64_t>(CeilLog2Nonzero(kOne + max_color))); |
483 | | |
484 | 21.1k | const float weight_limit = |
485 | 21.1k | std::ceil(std::sqrt((static_cast<float>(area_limit) / logcolor) / |
486 | 21.1k | std::max<size_t>(1, manhattan_distance))); |
487 | | |
488 | 699k | for (int i = 0; i < 32; ++i) { |
489 | 678k | const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; |
490 | 678k | result.sigma_dct[i] = |
491 | 678k | sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant; |
492 | | // If we include the factor kChannelWeight[3]=.3333f here, we get a |
493 | | // realistic area estimate. We leave it out to simplify the calculations, |
494 | | // and understand that this way we underestimate the area by a factor of |
495 | | // 1/(0.3333*0.3333). This is taken into account in the limits below. |
496 | 678k | float weight_f = std::ceil(inv_quant * std::abs(sigma_dct_[i])); |
497 | 678k | uint64_t weight = |
498 | 678k | static_cast<uint64_t>(std::min(weight_limit, std::max(1.0f, weight_f))); |
499 | 678k | width_estimate += weight * weight * logcolor; |
500 | 678k | } |
501 | 21.1k | *total_estimated_area_reached += (width_estimate * manhattan_distance); |
502 | 21.1k | if (*total_estimated_area_reached > area_limit) { |
503 | 56 | return JXL_FAILURE("Too large total_estimated_area eached: %" PRIu64, |
504 | 56 | *total_estimated_area_reached); |
505 | 56 | } |
506 | | |
507 | 21.1k | return true; |
508 | 21.1k | } |
509 | | |
510 | | Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map, |
511 | | ANSSymbolReader* const decoder, |
512 | | BitReader* const br, |
513 | | const size_t max_control_points, |
514 | 1.50M | size_t* total_num_control_points) { |
515 | 1.50M | const size_t num_control_points = |
516 | 1.50M | decoder->ReadHybridUint(kNumControlPointsContext, br, context_map); |
517 | 1.50M | if (num_control_points > max_control_points) { |
518 | 53 | return JXL_FAILURE("Too many control points: %" PRIuS, num_control_points); |
519 | 53 | } |
520 | 1.50M | *total_num_control_points += num_control_points; |
521 | 1.50M | if (*total_num_control_points > max_control_points) { |
522 | 24 | return JXL_FAILURE("Too many control points: %" PRIuS, |
523 | 24 | *total_num_control_points); |
524 | 24 | } |
525 | 1.50M | control_points_.resize(num_control_points); |
526 | | // Maximal image dimension. |
527 | 1.50M | constexpr int64_t kDeltaLimit = 1u << 30; |
528 | 2.24M | for (std::pair<int64_t, int64_t>& control_point : control_points_) { |
529 | 2.24M | control_point.first = UnpackSigned( |
530 | 2.24M | decoder->ReadHybridUint(kControlPointsContext, br, context_map)); |
531 | 2.24M | control_point.second = UnpackSigned( |
532 | 2.24M | decoder->ReadHybridUint(kControlPointsContext, br, context_map)); |
533 | | // Check delta-deltas are not outrageous; it is not in spec, but there is |
534 | | // no reason to allow larger values. |
535 | 2.24M | if ((control_point.first >= kDeltaLimit) || |
536 | 2.24M | (control_point.first <= -kDeltaLimit) || |
537 | 2.24M | (control_point.second >= kDeltaLimit) || |
538 | 2.24M | (control_point.second <= -kDeltaLimit)) { |
539 | 72 | return JXL_FAILURE("Spline delta-delta is out of bounds"); |
540 | 72 | } |
541 | 2.24M | } |
542 | | |
543 | 6.03M | const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status { |
544 | 6.03M | constexpr int kWeirdNumber = std::numeric_limits<int>::min(); |
545 | 199M | for (int i = 0; i < 32; ++i) { |
546 | 193M | dct[i] = |
547 | 193M | UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map)); |
548 | 193M | if (dct[i] == kWeirdNumber) { |
549 | 6 | return JXL_FAILURE("The weird number in spline DCT"); |
550 | 6 | } |
551 | 193M | } |
552 | 6.03M | return true; |
553 | 6.03M | }; |
554 | 4.52M | for (auto& dct : color_dct_) { |
555 | 4.52M | JXL_RETURN_IF_ERROR(decode_dct(dct)); |
556 | 4.52M | } |
557 | 1.50M | JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_)); |
558 | 1.50M | return true; |
559 | 1.50M | } |
560 | | |
561 | 43.1k | void Splines::Clear() { |
562 | 43.1k | quantization_adjustment_ = 0; |
563 | 43.1k | splines_.clear(); |
564 | 43.1k | starting_points_.clear(); |
565 | 43.1k | segments_.clear(); |
566 | 43.1k | segment_indices_.clear(); |
567 | 43.1k | segment_y_start_.clear(); |
568 | 43.1k | } |
569 | | |
570 | | Status Splines::Decode(JxlMemoryManager* memory_manager, jxl::BitReader* br, |
571 | 2.56k | const size_t num_pixels) { |
572 | 2.56k | std::vector<uint8_t> context_map; |
573 | 2.56k | ANSCode code; |
574 | 2.56k | JXL_RETURN_IF_ERROR(DecodeHistograms(memory_manager, br, kNumSplineContexts, |
575 | 2.56k | &code, &context_map)); |
576 | 5.08k | JXL_ASSIGN_OR_RETURN(ANSSymbolReader decoder, |
577 | 5.08k | ANSSymbolReader::Create(&code, br)); |
578 | 5.08k | size_t num_splines = |
579 | 5.08k | decoder.ReadHybridUint(kNumSplinesContext, br, context_map); |
580 | 5.08k | size_t max_control_points = std::min( |
581 | 5.08k | kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio); |
582 | 5.08k | if (num_splines > max_control_points || |
583 | 2.54k | num_splines + 1 > max_control_points) { |
584 | 6 | return JXL_FAILURE("Too many splines: %" PRIuS, num_splines); |
585 | 6 | } |
586 | 2.53k | num_splines++; |
587 | 2.53k | JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder, |
588 | 2.53k | context_map, num_splines)); |
589 | | |
590 | 2.41k | quantization_adjustment_ = UnpackSigned( |
591 | 2.41k | decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map)); |
592 | | |
593 | 2.41k | splines_.clear(); |
594 | 2.41k | splines_.reserve(num_splines); |
595 | 2.41k | size_t num_control_points = num_splines; |
596 | 1.51M | for (size_t i = 0; i < num_splines; ++i) { |
597 | 1.50M | QuantizedSpline spline; |
598 | 1.50M | JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br, |
599 | 1.50M | max_control_points, &num_control_points)); |
600 | 1.50M | splines_.push_back(std::move(spline)); |
601 | 1.50M | } |
602 | | |
603 | 2.25k | JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState()); |
604 | | |
605 | 2.25k | if (!HasAny()) { |
606 | 0 | return JXL_FAILURE("Decoded splines but got none"); |
607 | 0 | } |
608 | | |
609 | 2.25k | return true; |
610 | 2.25k | } |
611 | | |
612 | 0 | void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect) const { |
613 | 0 | Apply</*add=*/true>(opsin, opsin_rect); |
614 | 0 | } |
615 | | void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
616 | | float* JXL_RESTRICT row_b, size_t y, size_t x0, |
617 | 53.8k | size_t x1) const { |
618 | 53.8k | ApplyToRow</*add=*/true>(row_x, row_y, row_b, y, x0, x1); |
619 | 53.8k | } |
620 | | |
621 | 0 | void Splines::SubtractFrom(Image3F* const opsin) const { |
622 | 0 | Apply</*add=*/false>(opsin, Rect(*opsin)); |
623 | 0 | } |
624 | | |
625 | | Status Splines::InitializeDrawCache(const size_t image_xsize, |
626 | | const size_t image_ysize, |
627 | 2.16k | const ColorCorrelation& color_correlation) { |
628 | | // TODO(veluca): avoid storing segments that are entirely outside image |
629 | | // boundaries. |
630 | 2.16k | segments_.clear(); |
631 | 2.16k | segment_indices_.clear(); |
632 | 2.16k | segment_y_start_.clear(); |
633 | 2.16k | std::vector<std::pair<size_t, size_t>> segments_by_y; |
634 | 2.16k | std::vector<Spline::Point> intermediate_points; |
635 | 2.16k | uint64_t total_estimated_area_reached = 0; |
636 | 2.16k | std::vector<Spline> splines; |
637 | 23.2k | for (size_t i = 0; i < splines_.size(); ++i) { |
638 | 21.2k | Spline spline; |
639 | 21.2k | JXL_RETURN_IF_ERROR(splines_[i].Dequantize( |
640 | 21.2k | starting_points_[i], quantization_adjustment_, |
641 | 21.2k | color_correlation.YtoXRatio(0), color_correlation.YtoBRatio(0), |
642 | 21.2k | image_xsize * image_ysize, &total_estimated_area_reached, spline)); |
643 | 21.1k | if (std::adjacent_find(spline.control_points.begin(), |
644 | 21.1k | spline.control_points.end()) != |
645 | 21.1k | spline.control_points.end()) { |
646 | | // Otherwise division by zero might occur. Once control points coincide, |
647 | | // the direction of curve is undefined... |
648 | 26 | return JXL_FAILURE( |
649 | 26 | "identical successive control points in spline %" PRIuS, i); |
650 | 26 | } |
651 | 21.1k | splines.push_back(spline); |
652 | 21.1k | } |
653 | | // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams. |
654 | 2.01k | if (total_estimated_area_reached > |
655 | 2.01k | std::min( |
656 | 2.01k | (8 * image_xsize * image_ysize + (static_cast<uint64_t>(1) << 25)), |
657 | 2.01k | (static_cast<uint64_t>(1) << 30))) { |
658 | 12 | JXL_WARNING( |
659 | 12 | "Large total_estimated_area_reached, expect slower decoding: %" PRIu64, |
660 | 12 | total_estimated_area_reached); |
661 | 12 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
662 | 12 | return JXL_FAILURE("Total spline area is too large"); |
663 | 12 | #endif |
664 | 12 | } |
665 | | |
666 | 20.8k | for (Spline& spline : splines) { |
667 | 20.8k | std::vector<std::pair<Spline::Point, float>> points_to_draw; |
668 | 14.4M | auto add_point = [&](const Spline::Point& point, const float multiplier) { |
669 | 14.4M | points_to_draw.emplace_back(point, multiplier); |
670 | 14.4M | }; |
671 | 20.8k | intermediate_points.clear(); |
672 | 20.8k | DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points); |
673 | 20.8k | JXL_RETURN_IF_ERROR( |
674 | 20.8k | ForEachEquallySpacedPoint(intermediate_points, add_point)); |
675 | 20.8k | const float arc_length = |
676 | 20.8k | (points_to_draw.size() - 2) * kDesiredRenderingDistance + |
677 | 20.8k | points_to_draw.back().second; |
678 | 20.8k | if (arc_length <= 0.f) { |
679 | | // This spline wouldn't have any effect. |
680 | 16.5k | continue; |
681 | 16.5k | } |
682 | 4.32k | HWY_DYNAMIC_DISPATCH(SegmentsFromPoints) |
683 | 4.32k | (spline, points_to_draw, arc_length, segments_, segments_by_y); |
684 | 4.32k | } |
685 | | |
686 | | // TODO(eustas): consider linear sorting here. |
687 | 2.00k | std::sort(segments_by_y.begin(), segments_by_y.end()); |
688 | 2.00k | segment_indices_.resize(segments_by_y.size()); |
689 | 2.00k | segment_y_start_.resize(image_ysize + 1); |
690 | 42.8M | for (size_t i = 0; i < segments_by_y.size(); i++) { |
691 | 42.8M | segment_indices_[i] = segments_by_y[i].second; |
692 | 42.8M | size_t y = segments_by_y[i].first; |
693 | 42.8M | if (y < image_ysize) { |
694 | 3.32M | segment_y_start_[y + 1]++; |
695 | 3.32M | } |
696 | 42.8M | } |
697 | 8.86M | for (size_t y = 0; y < image_ysize; y++) { |
698 | 8.85M | segment_y_start_[y + 1] += segment_y_start_[y]; |
699 | 8.85M | } |
700 | 2.00k | return true; |
701 | 2.00k | } |
702 | | |
703 | | template <bool add> |
704 | | void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, |
705 | | float* JXL_RESTRICT row_b, size_t y, size_t x0, |
706 | 53.8k | size_t x1) const { |
707 | 53.8k | if (segments_.empty()) return; |
708 | 49.7k | HWY_DYNAMIC_DISPATCH(DrawSegments) |
709 | 49.7k | (row_x, row_y, row_b, y, x0, x1, add, segments_.data(), |
710 | 49.7k | segment_indices_.data(), segment_y_start_.data()); |
711 | 49.7k | } void jxl::Splines::ApplyToRow<true>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const Line | Count | Source | 706 | 53.8k | size_t x1) const { | 707 | 53.8k | if (segments_.empty()) return; | 708 | 49.7k | HWY_DYNAMIC_DISPATCH(DrawSegments) | 709 | 49.7k | (row_x, row_y, row_b, y, x0, x1, add, segments_.data(), | 710 | 49.7k | segment_indices_.data(), segment_y_start_.data()); | 711 | 49.7k | } |
Unexecuted instantiation: void jxl::Splines::ApplyToRow<false>(float*, float*, float*, unsigned long, unsigned long, unsigned long) const |
712 | | |
713 | | template <bool add> |
714 | 0 | void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect) const { |
715 | 0 | if (segments_.empty()) return; |
716 | 0 | const size_t y0 = opsin_rect.y0(); |
717 | 0 | const size_t x0 = opsin_rect.x0(); |
718 | 0 | const size_t x1 = opsin_rect.x1(); |
719 | 0 | for (size_t y = 0; y < opsin_rect.ysize(); y++) { |
720 | 0 | ApplyToRow<add>(opsin->PlaneRow(0, y0 + y) + x0, |
721 | 0 | opsin->PlaneRow(1, y0 + y) + x0, |
722 | 0 | opsin->PlaneRow(2, y0 + y) + x0, y0 + y, x0, x1); |
723 | 0 | } |
724 | 0 | } Unexecuted instantiation: void jxl::Splines::Apply<true>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const Unexecuted instantiation: void jxl::Splines::Apply<false>(jxl::Image3<float>*, jxl::RectT<unsigned long> const&) const |
725 | | |
726 | | } // namespace jxl |
727 | | #endif // HWY_ONCE |