/src/libjxl/lib/jxl/convolve_symmetric5.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/base/status.h" |
7 | | #include "lib/jxl/convolve.h" |
8 | | |
9 | | #undef HWY_TARGET_INCLUDE |
10 | | #define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric5.cc" |
11 | | #include <hwy/foreach_target.h> |
12 | | #include <hwy/highway.h> |
13 | | |
14 | | #include "lib/jxl/base/common.h" |
15 | | #include "lib/jxl/base/rect.h" |
16 | | #include "lib/jxl/convolve-inl.h" |
17 | | |
18 | | HWY_BEFORE_NAMESPACE(); |
19 | | namespace jxl { |
20 | | namespace HWY_NAMESPACE { |
21 | | |
22 | | // These templates are not found via ADL. |
23 | | using hwy::HWY_NAMESPACE::Add; |
24 | | using hwy::HWY_NAMESPACE::Mul; |
25 | | using hwy::HWY_NAMESPACE::Vec; |
26 | | |
27 | | // Weighted sum of 1x5 pixels around ix, iy with [wx2 wx1 wx0 wx1 wx2]. |
28 | | template <class WrapY> |
29 | | static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y, |
30 | | const int64_t ix, const int64_t iy, |
31 | | const size_t xsize, const size_t ysize, |
32 | | const float wx0, const float wx1, |
33 | 0 | const float wx2) { |
34 | 0 | const WrapMirror wrap_x; |
35 | 0 | const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize)); |
36 | 0 | const float in_m2 = row[wrap_x(ix - 2, xsize)]; |
37 | 0 | const float in_p2 = row[wrap_x(ix + 2, xsize)]; |
38 | 0 | const float in_m1 = row[wrap_x(ix - 1, xsize)]; |
39 | 0 | const float in_p1 = row[wrap_x(ix + 1, xsize)]; |
40 | 0 | const float in_00 = row[ix]; |
41 | 0 | const float sum_2 = wx2 * (in_m2 + in_p2); |
42 | 0 | const float sum_1 = wx1 * (in_m1 + in_p1); |
43 | 0 | const float sum_0 = wx0 * in_00; |
44 | 0 | return sum_2 + (sum_1 + sum_0); |
45 | 0 | } Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SCALAR::WeightedSumBorder<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::WrapMirror, long, long, unsigned long, unsigned long, float, float, float) Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SCALAR::WeightedSumBorder<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::WrapUnchanged, long, long, unsigned long, unsigned long, float, float, float) |
46 | | |
47 | | template <class WrapY, class V> |
48 | | static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix, |
49 | | const int64_t iy, const size_t ysize, const V wx0, |
50 | 0 | const V wx1, const V wx2) { |
51 | 0 | const HWY_FULL(float) d; |
52 | 0 | const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix; |
53 | 0 | const auto in_m2 = LoadU(d, center - 2); |
54 | 0 | const auto in_p2 = LoadU(d, center + 2); |
55 | 0 | const auto in_m1 = LoadU(d, center - 1); |
56 | 0 | const auto in_p1 = LoadU(d, center + 1); |
57 | 0 | const auto in_00 = LoadU(d, center); |
58 | 0 | const auto sum_2 = Mul(wx2, Add(in_m2, in_p2)); |
59 | 0 | const auto sum_1 = Mul(wx1, Add(in_m1, in_p1)); |
60 | 0 | const auto sum_0 = Mul(wx0, in_00); |
61 | 0 | return Add(sum_2, Add(sum_1, sum_0)); |
62 | 0 | } Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SCALAR::Vec1<float> jxl::N_SCALAR::WeightedSum<jxl::WrapMirror, hwy::N_SCALAR::Vec1<float> >(jxl::Plane<float> const&, jxl::WrapMirror, unsigned long, long, unsigned long, hwy::N_SCALAR::Vec1<float>, hwy::N_SCALAR::Vec1<float>, hwy::N_SCALAR::Vec1<float>) Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SCALAR::Vec1<float> jxl::N_SCALAR::WeightedSum<jxl::WrapUnchanged, hwy::N_SCALAR::Vec1<float> >(jxl::Plane<float> const&, jxl::WrapUnchanged, unsigned long, long, unsigned long, hwy::N_SCALAR::Vec1<float>, hwy::N_SCALAR::Vec1<float>, hwy::N_SCALAR::Vec1<float>) |
63 | | |
64 | | // Produces result for one pixel |
65 | | template <class WrapY> |
66 | | float Symmetric5Border(const ImageF& in, const int64_t ix, const int64_t iy, |
67 | 0 | const WeightsSymmetric5& weights) { |
68 | 0 | const float w0 = weights.c[0]; |
69 | 0 | const float w1 = weights.r[0]; |
70 | 0 | const float w2 = weights.R[0]; |
71 | 0 | const float w4 = weights.d[0]; |
72 | 0 | const float w5 = weights.L[0]; |
73 | 0 | const float w8 = weights.D[0]; |
74 | |
|
75 | 0 | const size_t xsize = in.xsize(); |
76 | 0 | const size_t ysize = in.ysize(); |
77 | 0 | const WrapY wrap_y; |
78 | | // Unrolled loop over all 5 rows of the kernel. |
79 | 0 | float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2); |
80 | |
|
81 | 0 | sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8); |
82 | 0 | float sum1 = |
83 | 0 | WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8); |
84 | |
|
85 | 0 | sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5); |
86 | 0 | sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5); |
87 | |
|
88 | 0 | return sum0 + sum1; |
89 | 0 | } Unexecuted instantiation: float jxl::N_SCALAR::Symmetric5Border<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&) Unexecuted instantiation: float jxl::N_SCALAR::Symmetric5Border<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&) |
90 | | |
91 | | // Produces result for one vector's worth of pixels |
92 | | template <class WrapY> |
93 | | static void Symmetric5Interior(const ImageF& in, const int64_t ix, |
94 | | const int64_t rix, const int64_t iy, |
95 | | const WeightsSymmetric5& weights, |
96 | 0 | float* JXL_RESTRICT row_out) { |
97 | 0 | const HWY_FULL(float) d; |
98 | |
|
99 | 0 | const auto w0 = LoadDup128(d, weights.c); |
100 | 0 | const auto w1 = LoadDup128(d, weights.r); |
101 | 0 | const auto w2 = LoadDup128(d, weights.R); |
102 | 0 | const auto w4 = LoadDup128(d, weights.d); |
103 | 0 | const auto w5 = LoadDup128(d, weights.L); |
104 | 0 | const auto w8 = LoadDup128(d, weights.D); |
105 | |
|
106 | 0 | const size_t ysize = in.ysize(); |
107 | 0 | const WrapY wrap_y; |
108 | | // Unrolled loop over all 5 rows of the kernel. |
109 | 0 | auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2); |
110 | |
|
111 | 0 | sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 2, ysize, w2, w5, w8)); |
112 | 0 | auto sum1 = WeightedSum(in, wrap_y, ix, iy + 2, ysize, w2, w5, w8); |
113 | |
|
114 | 0 | sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5)); |
115 | 0 | sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5)); |
116 | |
|
117 | 0 | StoreU(Add(sum0, sum1), d, row_out + rix); |
118 | 0 | } Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SCALAR::Symmetric5Interior<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*) Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SCALAR::Symmetric5Interior<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*) |
119 | | |
120 | | template <class WrapY> |
121 | | static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy, |
122 | | const WeightsSymmetric5& weights, |
123 | 0 | float* JXL_RESTRICT row_out) { |
124 | 0 | const int64_t kRadius = 2; |
125 | 0 | const size_t xend = rect.x1(); |
126 | |
|
127 | 0 | size_t rix = 0; |
128 | 0 | size_t ix = rect.x0(); |
129 | 0 | const HWY_FULL(float) d; |
130 | 0 | const size_t N = Lanes(d); |
131 | 0 | const size_t aligned_x = RoundUpTo(kRadius, N); |
132 | 0 | for (; ix < std::min(aligned_x, xend); ++ix, ++rix) { |
133 | 0 | row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); |
134 | 0 | } |
135 | 0 | for (; ix + N + kRadius <= xend; ix += N, rix += N) { |
136 | 0 | Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out); |
137 | 0 | } |
138 | 0 | for (; ix < xend; ++ix, ++rix) { |
139 | 0 | row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); |
140 | 0 | } |
141 | 0 | } Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SCALAR::Symmetric5Row<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*) Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SCALAR::Symmetric5Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*) |
142 | | |
143 | | // Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike |
144 | | // the fully vectorized strategies below. |
145 | | Status Symmetric5(const ImageF& in, const Rect& in_rect, |
146 | | const WeightsSymmetric5& weights, ThreadPool* pool, |
147 | 0 | ImageF* JXL_RESTRICT out, const Rect& out_rect) { |
148 | 0 | JXL_ENSURE(in_rect.xsize() == out_rect.xsize()); |
149 | 0 | JXL_ENSURE(in_rect.ysize() == out_rect.ysize()); |
150 | 0 | const size_t ysize = in_rect.ysize(); |
151 | 0 | const auto process_row = [&](const uint32_t task, |
152 | 0 | size_t /*thread*/) -> Status { |
153 | 0 | const int64_t riy = task; |
154 | 0 | const int64_t iy = in_rect.y0() + riy; |
155 | |
|
156 | 0 | if (iy < 2 || iy >= static_cast<ssize_t>(in.ysize()) - 2) { |
157 | 0 | Symmetric5Row<WrapMirror>(in, in_rect, iy, weights, |
158 | 0 | out_rect.Row(out, riy)); |
159 | 0 | } else { |
160 | 0 | Symmetric5Row<WrapUnchanged>(in, in_rect, iy, weights, |
161 | 0 | out_rect.Row(out, riy)); |
162 | 0 | } |
163 | 0 | return true; |
164 | 0 | }; |
165 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
166 | 0 | ThreadPool::NoInit, process_row, |
167 | 0 | "Symmetric5x5Convolution")); |
168 | 0 | return true; |
169 | 0 | } |
170 | | |
171 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
172 | | } // namespace HWY_NAMESPACE |
173 | | } // namespace jxl |
174 | | HWY_AFTER_NAMESPACE(); |
175 | | |
176 | | #if HWY_ONCE |
177 | | namespace jxl { |
178 | | |
179 | | HWY_EXPORT(Symmetric5); |
180 | | Status Symmetric5(const ImageF& in, const Rect& in_rect, |
181 | | const WeightsSymmetric5& weights, ThreadPool* pool, |
182 | 0 | ImageF* JXL_RESTRICT out, const Rect& out_rect) { |
183 | 0 | return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, in_rect, weights, pool, out, |
184 | 0 | out_rect); |
185 | 0 | } |
186 | | |
187 | | Status Symmetric5(const ImageF& in, const Rect& rect, |
188 | | const WeightsSymmetric5& weights, ThreadPool* pool, |
189 | 0 | ImageF* JXL_RESTRICT out) { |
190 | 0 | return Symmetric5(in, rect, weights, pool, out, Rect(*out)); |
191 | 0 | } |
192 | | |
193 | | } // namespace jxl |
194 | | #endif // HWY_ONCE |