/src/libjxl/lib/jxl/convolve-inl.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #if defined(LIB_JXL_CONVOLVE_INL_H_) == defined(HWY_TARGET_TOGGLE) |
7 | | #ifdef LIB_JXL_CONVOLVE_INL_H_ |
8 | | #undef LIB_JXL_CONVOLVE_INL_H_ |
9 | | #else |
10 | | #define LIB_JXL_CONVOLVE_INL_H_ |
11 | | #endif |
12 | | |
13 | | #include <hwy/highway.h> |
14 | | |
15 | | #include "lib/jxl/base/data_parallel.h" |
16 | | #include "lib/jxl/base/rect.h" |
17 | | #include "lib/jxl/base/status.h" |
18 | | #include "lib/jxl/image_ops.h" |
19 | | |
20 | | HWY_BEFORE_NAMESPACE(); |
21 | | namespace jxl { |
22 | | namespace HWY_NAMESPACE { |
23 | | namespace { |
24 | | |
25 | | // These templates are not found via ADL. |
26 | | using hwy::HWY_NAMESPACE::Broadcast; |
27 | | #if HWY_TARGET != HWY_SCALAR |
28 | | using hwy::HWY_NAMESPACE::CombineShiftRightBytes; |
29 | | #endif |
30 | | using hwy::HWY_NAMESPACE::TableLookupLanes; |
31 | | using hwy::HWY_NAMESPACE::Vec; |
32 | | |
33 | | // Synthesizes left/right neighbors from a vector of center pixels. |
34 | | class Neighbors { |
35 | | public: |
36 | | using D = HWY_CAPPED(float, 16); |
37 | | using V = Vec<D>; |
38 | | |
39 | | // Returns l[i] == c[Mirror(i - 1)]. |
40 | 0 | HWY_INLINE HWY_MAYBE_UNUSED static V FirstL1(const V c) { |
41 | 0 | #if HWY_CAP_GE256 |
42 | 0 | const D d; |
43 | 0 | HWY_ALIGN constexpr int32_t lanes[16] = {0, 0, 1, 2, 3, 4, 5, 6, |
44 | 0 | 7, 8, 9, 10, 11, 12, 13, 14}; |
45 | 0 | const auto indices = SetTableIndices(d, lanes); |
46 | 0 | // c = PONM'LKJI |
47 | 0 | return TableLookupLanes(c, indices); // ONML'KJII |
48 | 0 | #elif HWY_TARGET == HWY_SCALAR |
49 | 0 | return c; // Same (the first mirrored value is the last valid one) |
50 | 0 | #else // 128 bit |
51 | 0 | // c = LKJI |
52 | 0 | #if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) |
53 | 0 | return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(2, 1, 0, 0))}; // KJII |
54 | 0 | #else |
55 | 0 | const D d; |
56 | 0 | // TODO(deymo): Figure out if this can be optimized using a single vsri |
57 | 0 | // instruction to convert LKJI to KJII. |
58 | 0 | HWY_ALIGN constexpr int lanes[4] = {0, 0, 1, 2}; // KJII |
59 | 0 | const auto indices = SetTableIndices(d, lanes); |
60 | 0 | return TableLookupLanes(c, indices); |
61 | 0 | #endif |
62 | 0 | #endif |
63 | 0 | } Unexecuted instantiation: convolve_separable5.cc:jxl::N_SCALAR::(anonymous namespace)::Neighbors::FirstL1(hwy::N_SCALAR::Vec1<float>) Unexecuted instantiation: convolve_slow.cc:jxl::N_SCALAR::(anonymous namespace)::Neighbors::FirstL1(hwy::N_SCALAR::Vec1<float>) Unexecuted instantiation: convolve_symmetric5.cc:jxl::N_SCALAR::(anonymous namespace)::Neighbors::FirstL1(hwy::N_SCALAR::Vec1<float>) |
64 | | |
65 | | // Returns l[i] == c[Mirror(i - 2)]. |
66 | 0 | HWY_INLINE HWY_MAYBE_UNUSED static V FirstL2(const V c) { |
67 | 0 | #if HWY_CAP_GE256 |
68 | 0 | const D d; |
69 | 0 | HWY_ALIGN constexpr int32_t lanes[16] = {1, 0, 0, 1, 2, 3, 4, 5, |
70 | 0 | 6, 7, 8, 9, 10, 11, 12, 13}; |
71 | 0 | const auto indices = SetTableIndices(d, lanes); |
72 | 0 | // c = PONM'LKJI |
73 | 0 | return TableLookupLanes(c, indices); // NMLK'JIIJ |
74 | 0 | #elif HWY_TARGET == HWY_SCALAR |
75 | 0 | const D d; |
76 | 0 | JXL_DEBUG_ABORT("Unsupported"); |
77 | 0 | return Zero(d); |
78 | 0 | #else // 128 bit |
79 | 0 | // c = LKJI |
80 | 0 | #if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) |
81 | 0 | return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(1, 0, 0, 1))}; // JIIJ |
82 | 0 | #else |
83 | 0 | const D d; |
84 | 0 | HWY_ALIGN constexpr int lanes[4] = {1, 0, 0, 1}; // JIIJ |
85 | 0 | const auto indices = SetTableIndices(d, lanes); |
86 | 0 | return TableLookupLanes(c, indices); |
87 | 0 | #endif |
88 | 0 | #endif |
89 | 0 | } Unexecuted instantiation: convolve_separable5.cc:jxl::N_SCALAR::(anonymous namespace)::Neighbors::FirstL2(hwy::N_SCALAR::Vec1<float>) Unexecuted instantiation: convolve_slow.cc:jxl::N_SCALAR::(anonymous namespace)::Neighbors::FirstL2(hwy::N_SCALAR::Vec1<float>) Unexecuted instantiation: convolve_symmetric5.cc:jxl::N_SCALAR::(anonymous namespace)::Neighbors::FirstL2(hwy::N_SCALAR::Vec1<float>) |
90 | | |
91 | | // Returns l[i] == c[Mirror(i - 3)]. |
92 | 0 | HWY_INLINE HWY_MAYBE_UNUSED static V FirstL3(const V c) { |
93 | 0 | #if HWY_CAP_GE256 |
94 | 0 | const D d; |
95 | 0 | HWY_ALIGN constexpr int32_t lanes[16] = {2, 1, 0, 0, 1, 2, 3, 4, |
96 | 0 | 5, 6, 7, 8, 9, 10, 11, 12}; |
97 | 0 | const auto indices = SetTableIndices(d, lanes); |
98 | 0 | // c = PONM'LKJI |
99 | 0 | return TableLookupLanes(c, indices); // MLKJ'IIJK |
100 | 0 | #elif HWY_TARGET == HWY_SCALAR |
101 | 0 | const D d; |
102 | 0 | JXL_DEBUG_ABORT("Unsipported"); |
103 | 0 | return Zero(d); |
104 | 0 | #else // 128 bit |
105 | 0 | // c = LKJI |
106 | 0 | #if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) |
107 | 0 | return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(0, 0, 1, 2))}; // IIJK |
108 | 0 | #else |
109 | 0 | const D d; |
110 | 0 | HWY_ALIGN constexpr int lanes[4] = {2, 1, 0, 0}; // IIJK |
111 | 0 | const auto indices = SetTableIndices(d, lanes); |
112 | 0 | return TableLookupLanes(c, indices); |
113 | 0 | #endif |
114 | 0 | #endif |
115 | 0 | } Unexecuted instantiation: convolve_separable5.cc:jxl::N_SCALAR::(anonymous namespace)::Neighbors::FirstL3(hwy::N_SCALAR::Vec1<float>) Unexecuted instantiation: convolve_slow.cc:jxl::N_SCALAR::(anonymous namespace)::Neighbors::FirstL3(hwy::N_SCALAR::Vec1<float>) Unexecuted instantiation: convolve_symmetric5.cc:jxl::N_SCALAR::(anonymous namespace)::Neighbors::FirstL3(hwy::N_SCALAR::Vec1<float>) |
116 | | }; |
117 | | |
118 | | #if HWY_TARGET != HWY_SCALAR |
119 | | |
120 | | // Returns indices for SetTableIndices such that TableLookupLanes on the |
121 | | // rightmost unaligned vector (rightmost sample in its most-significant lane) |
122 | | // returns the mirrored values, with the mirror outside the last valid sample. |
123 | | inline const int32_t* MirrorLanes(const size_t mod) { |
124 | | const HWY_CAPPED(float, 16) d; |
125 | | constexpr size_t kN = MaxLanes(d); |
126 | | // typo:off |
127 | | // For mod = `image width mod 16` 0..15: |
128 | | // last full vec mirrored (mem order) loadedVec mirrorVec idxVec |
129 | | // 0123456789abcdef| fedcba9876543210 fed..210 012..def 012..def |
130 | | // 0123456789abcdef|0 0fedcba98765432 0fe..321 234..f00 123..eff |
131 | | // 0123456789abcdef|01 10fedcba987654 10f..432 456..110 234..ffe |
132 | | // 0123456789abcdef|012 210fedcba9876 210..543 67..2210 34..ffed |
133 | | // 0123456789abcdef|0123 3210fedcba98 321..654 8..33210 4..ffedc |
134 | | // 0123456789abcdef|01234 43210fedcba |
135 | | // 0123456789abcdef|012345 543210fedc |
136 | | // 0123456789abcdef|0123456 6543210fe |
137 | | // 0123456789abcdef|01234567 76543210 |
138 | | // 0123456789abcdef|012345678 8765432 |
139 | | // 0123456789abcdef|0123456789 987654 |
140 | | // 0123456789abcdef|0123456789A A9876 |
141 | | // 0123456789abcdef|0123456789AB BA98 |
142 | | // 0123456789abcdef|0123456789ABC CBA |
143 | | // 0123456789abcdef|0123456789ABCD DC |
144 | | // 0123456789abcdef|0123456789ABCDE E EDC..10f EED..210 ffe..321 |
145 | | // typo:on |
146 | | #if HWY_CAP_GE512 |
147 | | HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = { |
148 | | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, // |
149 | | 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; |
150 | | #elif HWY_CAP_GE256 |
151 | | HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = { |
152 | | 1, 2, 3, 4, 5, 6, 7, 7, // |
153 | | 6, 5, 4, 3, 2, 1, 0}; |
154 | | #else // 128-bit |
155 | | HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {1, 2, 3, 3, // |
156 | | 2, 1, 0}; |
157 | | #endif |
158 | | return idx_lanes + kN - 1 - mod; |
159 | | } |
160 | | |
161 | | #endif // HWY_TARGET != HWY_SCALAR |
162 | | |
163 | | // Single entry point for convolution. |
164 | | // "Strategy" (Direct*/Separable*) decides kernel size and how to evaluate it. |
165 | | template <class Strategy> |
166 | | class ConvolveT { |
167 | | static constexpr int64_t kRadius = Strategy::kRadius; |
168 | | using Simd = HWY_CAPPED(float, 16); |
169 | | |
170 | | public: |
171 | 0 | static size_t MinWidth() { |
172 | 0 | #if HWY_TARGET == HWY_SCALAR |
173 | | // First/Last use mirrored loads of up to +/- kRadius. |
174 | 0 | return 2 * kRadius; |
175 | | #else |
176 | | return Lanes(Simd()) + kRadius; |
177 | | #endif |
178 | 0 | } |
179 | | |
180 | | // "Image" is ImageF or Image3F. |
181 | | template <class Image, class Weights> |
182 | | static void Run(const Image& in, const Rect& rect, const Weights& weights, |
183 | 0 | ThreadPool* pool, Image* out) { |
184 | 0 | JXL_DASSERT(SameSize(rect, *out)); |
185 | 0 | JXL_DASSERT(rect.xsize() >= MinWidth()); |
186 | |
|
187 | 0 | static_assert(static_cast<int64_t>(kRadius) <= 3, |
188 | 0 | "Must handle [0, kRadius) and >= kRadius"); |
189 | 0 | switch (rect.xsize() % Lanes(Simd())) { |
190 | 0 | case 0: |
191 | 0 | return RunRows<0>(in, rect, weights, pool, out); |
192 | 0 | case 1: |
193 | 0 | return RunRows<1>(in, rect, weights, pool, out); |
194 | 0 | case 2: |
195 | 0 | return RunRows<2>(in, rect, weights, pool, out); |
196 | 0 | default: |
197 | 0 | return RunRows<3>(in, rect, weights, pool, out); |
198 | 0 | } |
199 | 0 | } |
200 | | |
201 | | private: |
202 | | template <size_t kSizeModN, class WrapRow, class Weights> |
203 | | static JXL_INLINE void RunRow(const float* JXL_RESTRICT in, |
204 | | const size_t xsize, const int64_t stride, |
205 | | const WrapRow& wrap_row, const Weights& weights, |
206 | 0 | float* JXL_RESTRICT out) { |
207 | 0 | Strategy::template ConvolveRow<kSizeModN>(in, xsize, stride, wrap_row, |
208 | 0 | weights, out); |
209 | 0 | } Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRow<0ul, jxl::WrapRowMirror, jxl::WeightsSeparable5>(float const*, unsigned long, long, jxl::WrapRowMirror const&, jxl::WeightsSeparable5 const&, float*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRow<0ul, jxl::WrapRowUnchanged, jxl::WeightsSeparable5>(float const*, unsigned long, long, jxl::WrapRowUnchanged const&, jxl::WeightsSeparable5 const&, float*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRow<1ul, jxl::WrapRowMirror, jxl::WeightsSeparable5>(float const*, unsigned long, long, jxl::WrapRowMirror const&, jxl::WeightsSeparable5 const&, float*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRow<1ul, jxl::WrapRowUnchanged, jxl::WeightsSeparable5>(float const*, unsigned long, long, jxl::WrapRowUnchanged const&, jxl::WeightsSeparable5 const&, float*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRow<2ul, jxl::WrapRowMirror, jxl::WeightsSeparable5>(float const*, unsigned long, long, jxl::WrapRowMirror const&, jxl::WeightsSeparable5 const&, float*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRow<2ul, jxl::WrapRowUnchanged, jxl::WeightsSeparable5>(float const*, unsigned long, long, jxl::WrapRowUnchanged const&, jxl::WeightsSeparable5 const&, float*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRow<3ul, jxl::WrapRowMirror, jxl::WeightsSeparable5>(float const*, unsigned long, long, jxl::WrapRowMirror const&, jxl::WeightsSeparable5 const&, float*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRow<3ul, jxl::WrapRowUnchanged, jxl::WeightsSeparable5>(float const*, unsigned long, long, jxl::WrapRowUnchanged const&, jxl::WeightsSeparable5 const&, float*) |
210 | | |
211 | | template <size_t kSizeModN, class Weights> |
212 | | static JXL_INLINE void RunBorderRows(const ImageF& in, const Rect& rect, |
213 | | const int64_t ybegin, const int64_t yend, |
214 | 0 | const Weights& weights, ImageF* out) { |
215 | 0 | const int64_t stride = in.PixelsPerRow(); |
216 | 0 | const WrapRowMirror wrap_row(in, rect.ysize()); |
217 | 0 | for (int64_t y = ybegin; y < yend; ++y) { |
218 | 0 | RunRow<kSizeModN>(rect.ConstRow(in, y), rect.xsize(), stride, wrap_row, |
219 | 0 | weights, out->Row(y)); |
220 | 0 | } |
221 | 0 | } Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunBorderRows<0ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::Plane<float>*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunBorderRows<1ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::Plane<float>*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunBorderRows<2ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::Plane<float>*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunBorderRows<3ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::Plane<float>*) |
222 | | |
223 | | // Image3F. |
224 | | template <size_t kSizeModN, class Weights> |
225 | | static JXL_INLINE void RunBorderRows(const Image3F& in, const Rect& rect, |
226 | | const int64_t ybegin, const int64_t yend, |
227 | | const Weights& weights, Image3F* out) { |
228 | | const int64_t stride = in.PixelsPerRow(); |
229 | | for (int64_t y = ybegin; y < yend; ++y) { |
230 | | for (size_t c = 0; c < 3; ++c) { |
231 | | const WrapRowMirror wrap_row(in.Plane(c), rect.ysize()); |
232 | | RunRow<kSizeModN>(rect.ConstPlaneRow(in, c, y), rect.xsize(), stride, |
233 | | wrap_row, weights, out->PlaneRow(c, y)); |
234 | | } |
235 | | } |
236 | | } |
237 | | |
238 | | template <size_t kSizeModN, class Weights> |
239 | | static JXL_INLINE void RunInteriorRows(const ImageF& in, const Rect& rect, |
240 | | const int64_t ybegin, |
241 | | const int64_t yend, |
242 | | const Weights& weights, |
243 | 0 | ThreadPool* pool, ImageF* out) { |
244 | 0 | const int64_t stride = in.PixelsPerRow(); |
245 | 0 | const auto process_row = [&](const uint32_t y, size_t /*thread*/) HWY_ATTR { |
246 | 0 | RunRow<kSizeModN>(rect.ConstRow(in, y), rect.xsize(), stride, |
247 | 0 | WrapRowUnchanged(), weights, out->Row(y)); |
248 | 0 | return true; |
249 | 0 | }; Unexecuted instantiation: convolve_separable5.cc:jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunInteriorRows<0ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*)::{lambda(unsigned int, unsigned long)#1}::operator()(unsigned int, unsigned long) const Unexecuted instantiation: convolve_separable5.cc:jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunInteriorRows<1ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*)::{lambda(unsigned int, unsigned long)#1}::operator()(unsigned int, unsigned long) const Unexecuted instantiation: convolve_separable5.cc:jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunInteriorRows<2ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*)::{lambda(unsigned int, unsigned long)#1}::operator()(unsigned int, unsigned long) const Unexecuted instantiation: convolve_separable5.cc:jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunInteriorRows<3ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*)::{lambda(unsigned int, unsigned long)#1}::operator()(unsigned int, unsigned long) const |
250 | 0 | Status status = RunOnPool(pool, ybegin, yend, ThreadPool::NoInit, |
251 | 0 | process_row, "Convolve"); |
252 | 0 | (void)status; |
253 | 0 | JXL_DASSERT(status); |
254 | 0 | } Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunInteriorRows<0ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunInteriorRows<1ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunInteriorRows<2ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunInteriorRows<3ul, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, long, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*) |
255 | | |
256 | | // Image3F. |
257 | | template <size_t kSizeModN, class Weights> |
258 | | static JXL_INLINE void RunInteriorRows(const Image3F& in, const Rect& rect, |
259 | | const int64_t ybegin, |
260 | | const int64_t yend, |
261 | | const Weights& weights, |
262 | | ThreadPool* pool, Image3F* out) { |
263 | | const int64_t stride = in.PixelsPerRow(); |
264 | | const auto process_row = [&](const uint32_t y, size_t /*thread*/) HWY_ATTR { |
265 | | for (size_t c = 0; c < 3; ++c) { |
266 | | RunRow<kSizeModN>(rect.ConstPlaneRow(in, c, y), rect.xsize(), stride, |
267 | | WrapRowUnchanged(), weights, out->PlaneRow(c, y)); |
268 | | } |
269 | | return true; |
270 | | }; |
271 | | Status status = RunOnPool(pool, ybegin, yend, ThreadPool::NoInit, |
272 | | process_row, "Convolve3"); |
273 | | (void)status; |
274 | | JXL_DASSERT(status); |
275 | | } |
276 | | |
277 | | template <size_t kSizeModN, class Image, class Weights> |
278 | | static JXL_INLINE void RunRows(const Image& in, const Rect& rect, |
279 | | const Weights& weights, ThreadPool* pool, |
280 | 0 | Image* out) { |
281 | 0 | const int64_t ysize = rect.ysize(); |
282 | 0 | RunBorderRows<kSizeModN>(in, rect, 0, |
283 | 0 | std::min(static_cast<int64_t>(kRadius), ysize), |
284 | 0 | weights, out); |
285 | 0 | if (ysize > 2 * static_cast<int64_t>(kRadius)) { |
286 | 0 | RunInteriorRows<kSizeModN>(in, rect, static_cast<int64_t>(kRadius), |
287 | 0 | ysize - static_cast<int64_t>(kRadius), weights, |
288 | 0 | pool, out); |
289 | 0 | } |
290 | 0 | if (ysize > static_cast<int64_t>(kRadius)) { |
291 | 0 | RunBorderRows<kSizeModN>(in, rect, ysize - static_cast<int64_t>(kRadius), |
292 | 0 | ysize, weights, out); |
293 | 0 | } |
294 | 0 | } Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRows<0ul, jxl::Plane<float>, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRows<1ul, jxl::Plane<float>, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRows<2ul, jxl::Plane<float>, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*) Unexecuted instantiation: convolve_separable5.cc:void jxl::N_SCALAR::(anonymous namespace)::ConvolveT<jxl::N_SCALAR::Separable5Strategy>::RunRows<3ul, jxl::Plane<float>, jxl::WeightsSeparable5>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSeparable5 const&, jxl::ThreadPool*, jxl::Plane<float>*) |
295 | | }; |
296 | | |
297 | | } // namespace |
298 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
299 | | } // namespace HWY_NAMESPACE |
300 | | } // namespace jxl |
301 | | HWY_AFTER_NAMESPACE(); |
302 | | |
303 | | #endif // LIB_JXL_CONVOLVE_INL_H_ |