/src/libjxl/lib/jxl/convolve_slow.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include <atomic> |
7 | | |
8 | | #include "lib/jxl/base/rect.h" |
9 | | #include "lib/jxl/base/status.h" |
10 | | #include "lib/jxl/convolve-inl.h" |
11 | | #include "lib/jxl/convolve.h" |
12 | | |
13 | | namespace jxl { |
14 | | |
15 | | //------------------------------------------------------------------------------ |
16 | | // Kernels |
17 | | |
18 | | // 4 instances of a given literal value, useful as input to LoadDup128. |
19 | 0 | #define JXL_REP4(literal) literal, literal, literal, literal |
20 | | |
21 | | // Concentrates energy in low-frequency components (e.g. for antialiasing). |
22 | 0 | const WeightsSymmetric3& WeightsSymmetric3Lowpass() { |
23 | | // Computed by research/convolve_weights.py's cubic spline approximations of |
24 | | // prolate spheroidal wave functions. |
25 | 0 | constexpr float w0 = 0.36208932f; |
26 | 0 | constexpr float w1 = 0.12820096f; |
27 | 0 | constexpr float w2 = 0.03127668f; |
28 | 0 | static constexpr WeightsSymmetric3 weights = { |
29 | 0 | {JXL_REP4(w0)}, {JXL_REP4(w1)}, {JXL_REP4(w2)}}; |
30 | 0 | return weights; |
31 | 0 | } |
32 | | |
33 | 0 | const WeightsSeparable5& WeightsSeparable5Lowpass() { |
34 | 0 | constexpr float w0 = 0.41714928f; |
35 | 0 | constexpr float w1 = 0.25539268f; |
36 | 0 | constexpr float w2 = 0.03603267f; |
37 | 0 | static constexpr WeightsSeparable5 weights = { |
38 | 0 | {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, |
39 | 0 | {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; |
40 | 0 | return weights; |
41 | 0 | } |
42 | | |
43 | 0 | const WeightsSymmetric5& WeightsSymmetric5Lowpass() { |
44 | 0 | static constexpr WeightsSymmetric5 weights = { |
45 | 0 | {JXL_REP4(0.1740135f)}, {JXL_REP4(0.1065369f)}, {JXL_REP4(0.0150310f)}, |
46 | 0 | {JXL_REP4(0.0652254f)}, {JXL_REP4(0.0012984f)}, {JXL_REP4(0.0092025f)}}; |
47 | 0 | return weights; |
48 | 0 | } |
49 | | |
50 | 0 | const WeightsSeparable5& WeightsSeparable5Gaussian1() { |
51 | 0 | constexpr float w0 = 0.38774f; |
52 | 0 | constexpr float w1 = 0.24477f; |
53 | 0 | constexpr float w2 = 0.06136f; |
54 | 0 | static constexpr WeightsSeparable5 weights = { |
55 | 0 | {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, |
56 | 0 | {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; |
57 | 0 | return weights; |
58 | 0 | } |
59 | | |
60 | 0 | const WeightsSeparable5& WeightsSeparable5Gaussian2() { |
61 | 0 | constexpr float w0 = 0.250301f; |
62 | 0 | constexpr float w1 = 0.221461f; |
63 | 0 | constexpr float w2 = 0.153388f; |
64 | 0 | static constexpr WeightsSeparable5 weights = { |
65 | 0 | {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, |
66 | 0 | {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; |
67 | 0 | return weights; |
68 | 0 | } |
69 | | |
70 | | #undef JXL_REP4 |
71 | | |
72 | | //------------------------------------------------------------------------------ |
73 | | // Slow |
74 | | |
75 | | namespace { |
76 | | |
77 | | template <class WrapX, class WrapY> |
78 | | float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy, |
79 | | const int64_t xsize, const int64_t ysize, |
80 | 0 | const WeightsSymmetric3& weights) { |
81 | 0 | float sum = 0.0f; |
82 | | |
83 | | // ix: image; kx: kernel |
84 | 0 | for (int64_t ky = -1; ky <= 1; ky++) { |
85 | 0 | const int64_t y = WrapY()(iy + ky, ysize); |
86 | 0 | const float* JXL_RESTRICT row_in = in.ConstRow(static_cast<size_t>(y)); |
87 | |
|
88 | 0 | const float wc = ky == 0 ? weights.c[0] : weights.r[0]; |
89 | 0 | const float wlr = ky == 0 ? weights.r[0] : weights.d[0]; |
90 | |
|
91 | 0 | const int64_t xm1 = WrapX()(ix - 1, xsize); |
92 | 0 | const int64_t xp1 = WrapX()(ix + 1, xsize); |
93 | 0 | sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr; |
94 | 0 | } |
95 | 0 | return sum; |
96 | 0 | } Unexecuted instantiation: convolve_slow.cc:float jxl::(anonymous namespace)::SlowSymmetric3Pixel<jxl::WrapMirror, jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, long, jxl::WeightsSymmetric3 const&) Unexecuted instantiation: convolve_slow.cc:float jxl::(anonymous namespace)::SlowSymmetric3Pixel<jxl::WrapUnchanged, jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, long, jxl::WeightsSymmetric3 const&) Unexecuted instantiation: convolve_slow.cc:float jxl::(anonymous namespace)::SlowSymmetric3Pixel<jxl::WrapMirror, jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, long, jxl::WeightsSymmetric3 const&) Unexecuted instantiation: convolve_slow.cc:float jxl::(anonymous namespace)::SlowSymmetric3Pixel<jxl::WrapUnchanged, jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, long, jxl::WeightsSymmetric3 const&) |
97 | | |
98 | | template <class WrapY> |
99 | | void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize, |
100 | | const int64_t ysize, const WeightsSymmetric3& weights, |
101 | 0 | float* JXL_RESTRICT row_out) { |
102 | 0 | row_out[0] = |
103 | 0 | SlowSymmetric3Pixel<WrapMirror, WrapY>(in, 0, iy, xsize, ysize, weights); |
104 | 0 | for (int64_t ix = 1; ix < xsize - 1; ix++) { |
105 | 0 | row_out[ix] = SlowSymmetric3Pixel<WrapUnchanged, WrapY>(in, ix, iy, xsize, |
106 | 0 | ysize, weights); |
107 | 0 | } |
108 | 0 | { |
109 | 0 | const int64_t ix = xsize - 1; |
110 | 0 | row_out[ix] = SlowSymmetric3Pixel<WrapMirror, WrapY>(in, ix, iy, xsize, |
111 | 0 | ysize, weights); |
112 | 0 | } |
113 | 0 | } Unexecuted instantiation: convolve_slow.cc:void jxl::(anonymous namespace)::SlowSymmetric3Row<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric3 const&, float*) Unexecuted instantiation: convolve_slow.cc:void jxl::(anonymous namespace)::SlowSymmetric3Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric3 const&, float*) |
114 | | |
115 | | } // namespace |
116 | | |
117 | | Status SlowSymmetric3(const ImageF& in, const Rect& rect, |
118 | | const WeightsSymmetric3& weights, ThreadPool* pool, |
119 | 0 | ImageF* JXL_RESTRICT out) { |
120 | 0 | const int64_t xsize = static_cast<int64_t>(rect.xsize()); |
121 | 0 | const int64_t ysize = static_cast<int64_t>(rect.ysize()); |
122 | 0 | const int64_t kRadius = 1; |
123 | |
|
124 | 0 | const auto process_row = [&](const uint32_t task, |
125 | 0 | size_t /*thread*/) -> Status { |
126 | 0 | const int64_t iy = task; |
127 | 0 | float* JXL_RESTRICT out_row = out->Row(static_cast<size_t>(iy)); |
128 | |
|
129 | 0 | if (iy < kRadius || iy >= ysize - kRadius) { |
130 | 0 | SlowSymmetric3Row<WrapMirror>(in, iy, xsize, ysize, weights, out_row); |
131 | 0 | } else { |
132 | 0 | SlowSymmetric3Row<WrapUnchanged>(in, iy, xsize, ysize, weights, out_row); |
133 | 0 | } |
134 | 0 | return true; |
135 | 0 | }; |
136 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
137 | 0 | ThreadPool::NoInit, process_row, |
138 | 0 | "SlowSymmetric3")); |
139 | 0 | return true; |
140 | 0 | } |
141 | | |
142 | | namespace { |
143 | | |
144 | | // Separable kernels, any radius. |
145 | | StatusOr<float> SlowSeparablePixel(const ImageF& in, const Rect& rect, |
146 | | const int64_t x, const int64_t y, |
147 | | const int64_t radius, |
148 | | const float* JXL_RESTRICT horz_weights, |
149 | 0 | const float* JXL_RESTRICT vert_weights) { |
150 | 0 | const size_t xsize = in.xsize(); |
151 | 0 | const size_t ysize = in.ysize(); |
152 | 0 | const WrapMirror wrap; |
153 | |
|
154 | 0 | float mul = 0.0f; |
155 | 0 | for (int dy = -radius; dy <= radius; ++dy) { |
156 | 0 | const float wy = vert_weights[std::abs(dy) * 4]; |
157 | 0 | const size_t sy = wrap(rect.y0() + y + dy, ysize); |
158 | 0 | JXL_ENSURE(sy < ysize); |
159 | 0 | const float* const JXL_RESTRICT row = in.ConstRow(sy); |
160 | 0 | for (int dx = -radius; dx <= radius; ++dx) { |
161 | 0 | const float wx = horz_weights[std::abs(dx) * 4]; |
162 | 0 | const size_t sx = wrap(rect.x0() + x + dx, xsize); |
163 | 0 | JXL_ENSURE(sx < xsize); |
164 | 0 | mul += row[sx] * wx * wy; |
165 | 0 | } |
166 | 0 | } |
167 | 0 | return mul; |
168 | 0 | } |
169 | | |
170 | | template <int R, typename Weights> |
171 | | Status SlowSeparable(const ImageF& in, const Rect& in_rect, |
172 | | const Weights& weights, ThreadPool* pool, ImageF* out, |
173 | 0 | const Rect& out_rect) { |
174 | 0 | JXL_ENSURE(in_rect.xsize() == out_rect.xsize()); |
175 | 0 | JXL_ENSURE(in_rect.ysize() == out_rect.ysize()); |
176 | 0 | JXL_ENSURE(in_rect.IsInside(Rect(in))); |
177 | 0 | JXL_ENSURE(out_rect.IsInside(Rect(*out))); |
178 | 0 | const float* horz_weights = &weights.horz[0]; |
179 | 0 | const float* vert_weights = &weights.vert[0]; |
180 | |
|
181 | 0 | const auto process_row = [&](const uint32_t task, |
182 | 0 | size_t /*thread*/) -> Status { |
183 | 0 | const int64_t y = task; |
184 | |
|
185 | 0 | float* const JXL_RESTRICT row_out = out_rect.Row(out, y); |
186 | 0 | for (size_t x = 0; x < in_rect.xsize(); ++x) { |
187 | 0 | JXL_ASSIGN_OR_RETURN(row_out[x], |
188 | 0 | SlowSeparablePixel(in, in_rect, x, y, /*radius=*/R, |
189 | 0 | horz_weights, vert_weights)); |
190 | 0 | } |
191 | 0 | return true; |
192 | 0 | }; |
193 | 0 | const size_t ysize = in_rect.ysize(); |
194 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
195 | 0 | ThreadPool::NoInit, process_row, |
196 | 0 | "SlowSeparable")); |
197 | 0 | return true; |
198 | 0 | } |
199 | | |
200 | | } // namespace |
201 | | |
202 | | Status SlowSeparable5(const ImageF& in, const Rect& in_rect, |
203 | | const WeightsSeparable5& weights, ThreadPool* pool, |
204 | 0 | ImageF* out, const Rect& out_rect) { |
205 | 0 | return SlowSeparable<2>(in, in_rect, weights, pool, out, out_rect); |
206 | 0 | } |
207 | | |
208 | | } // namespace jxl |