/src/libjxl/lib/jxl/convolve_slow.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include <cstddef> |
7 | | #include <cstdint> |
8 | | #include <cstdlib> |
9 | | |
10 | | #include "lib/jxl/base/compiler_specific.h" |
11 | | #include "lib/jxl/base/data_parallel.h" |
12 | | #include "lib/jxl/base/rect.h" |
13 | | #include "lib/jxl/base/status.h" |
14 | | #include "lib/jxl/convolve.h" |
15 | | #include "lib/jxl/image.h" |
16 | | #include "lib/jxl/image_ops.h" |
17 | | |
18 | | namespace jxl { |
19 | | |
20 | | //------------------------------------------------------------------------------ |
21 | | // Slow |
22 | | |
23 | | namespace { |
24 | | |
25 | | template <class WrapX, class WrapY> |
26 | | float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy, |
27 | | const int64_t xsize, const int64_t ysize, |
28 | 0 | const WeightsSymmetric3& weights) { |
29 | 0 | float sum = 0.0f; |
30 | | |
31 | | // ix: image; kx: kernel |
32 | 0 | for (int64_t ky = -1; ky <= 1; ky++) { |
33 | 0 | const int64_t y = WrapY()(iy + ky, ysize); |
34 | 0 | const float* JXL_RESTRICT row_in = in.ConstRow(static_cast<size_t>(y)); |
35 | |
|
36 | 0 | const float wc = ky == 0 ? weights.c[0] : weights.r[0]; |
37 | 0 | const float wlr = ky == 0 ? weights.r[0] : weights.d[0]; |
38 | |
|
39 | 0 | const int64_t xm1 = WrapX()(ix - 1, xsize); |
40 | 0 | const int64_t xp1 = WrapX()(ix + 1, xsize); |
41 | 0 | sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr; |
42 | 0 | } |
43 | 0 | return sum; |
44 | 0 | } Unexecuted instantiation: convolve_slow.cc:float jxl::(anonymous namespace)::SlowSymmetric3Pixel<jxl::WrapMirror, jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, long, jxl::WeightsSymmetric3 const&) Unexecuted instantiation: convolve_slow.cc:float jxl::(anonymous namespace)::SlowSymmetric3Pixel<jxl::WrapUnchanged, jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, long, jxl::WeightsSymmetric3 const&) Unexecuted instantiation: convolve_slow.cc:float jxl::(anonymous namespace)::SlowSymmetric3Pixel<jxl::WrapMirror, jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, long, jxl::WeightsSymmetric3 const&) Unexecuted instantiation: convolve_slow.cc:float jxl::(anonymous namespace)::SlowSymmetric3Pixel<jxl::WrapUnchanged, jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, long, jxl::WeightsSymmetric3 const&) |
45 | | |
46 | | template <class WrapY> |
47 | | void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize, |
48 | | const int64_t ysize, const WeightsSymmetric3& weights, |
49 | 0 | float* JXL_RESTRICT row_out) { |
50 | 0 | row_out[0] = |
51 | 0 | SlowSymmetric3Pixel<WrapMirror, WrapY>(in, 0, iy, xsize, ysize, weights); |
52 | 0 | for (int64_t ix = 1; ix < xsize - 1; ix++) { |
53 | 0 | row_out[ix] = SlowSymmetric3Pixel<WrapUnchanged, WrapY>(in, ix, iy, xsize, |
54 | 0 | ysize, weights); |
55 | 0 | } |
56 | 0 | { |
57 | 0 | const int64_t ix = xsize - 1; |
58 | 0 | row_out[ix] = SlowSymmetric3Pixel<WrapMirror, WrapY>(in, ix, iy, xsize, |
59 | 0 | ysize, weights); |
60 | 0 | } |
61 | 0 | } Unexecuted instantiation: convolve_slow.cc:void jxl::(anonymous namespace)::SlowSymmetric3Row<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric3 const&, float*) Unexecuted instantiation: convolve_slow.cc:void jxl::(anonymous namespace)::SlowSymmetric3Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric3 const&, float*) |
62 | | |
63 | | } // namespace |
64 | | |
65 | | Status SlowSymmetric3(const ImageF& in, const Rect& rect, |
66 | | const WeightsSymmetric3& weights, ThreadPool* pool, |
67 | 0 | ImageF* JXL_RESTRICT out) { |
68 | 0 | const int64_t xsize = static_cast<int64_t>(rect.xsize()); |
69 | 0 | const int64_t ysize = static_cast<int64_t>(rect.ysize()); |
70 | 0 | const int64_t kRadius = 1; |
71 | |
|
72 | 0 | const auto process_row = [&](const uint32_t task, |
73 | 0 | size_t /*thread*/) -> Status { |
74 | 0 | const int64_t iy = task; |
75 | 0 | float* JXL_RESTRICT out_row = out->Row(static_cast<size_t>(iy)); |
76 | |
|
77 | 0 | if (iy < kRadius || iy >= ysize - kRadius) { |
78 | 0 | SlowSymmetric3Row<WrapMirror>(in, iy, xsize, ysize, weights, out_row); |
79 | 0 | } else { |
80 | 0 | SlowSymmetric3Row<WrapUnchanged>(in, iy, xsize, ysize, weights, out_row); |
81 | 0 | } |
82 | 0 | return true; |
83 | 0 | }; |
84 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
85 | 0 | ThreadPool::NoInit, process_row, |
86 | 0 | "SlowSymmetric3")); |
87 | 0 | return true; |
88 | 0 | } |
89 | | |
90 | | namespace { |
91 | | |
92 | | // Separable kernels, any radius. |
93 | | StatusOr<float> SlowSeparablePixel(const ImageF& in, const Rect& rect, |
94 | | const int64_t x, const int64_t y, |
95 | | const int64_t radius, |
96 | | const float* JXL_RESTRICT horz_weights, |
97 | 0 | const float* JXL_RESTRICT vert_weights) { |
98 | 0 | const size_t xsize = in.xsize(); |
99 | 0 | const size_t ysize = in.ysize(); |
100 | 0 | const WrapMirror wrap; |
101 | |
|
102 | 0 | float mul = 0.0f; |
103 | 0 | for (int dy = -radius; dy <= radius; ++dy) { |
104 | 0 | const float wy = vert_weights[std::abs(dy) * 4]; |
105 | 0 | const size_t sy = wrap(rect.y0() + y + dy, ysize); |
106 | 0 | JXL_ENSURE(sy < ysize); |
107 | 0 | const float* const JXL_RESTRICT row = in.ConstRow(sy); |
108 | 0 | for (int dx = -radius; dx <= radius; ++dx) { |
109 | 0 | const float wx = horz_weights[std::abs(dx) * 4]; |
110 | 0 | const size_t sx = wrap(rect.x0() + x + dx, xsize); |
111 | 0 | JXL_ENSURE(sx < xsize); |
112 | 0 | mul += row[sx] * wx * wy; |
113 | 0 | } |
114 | 0 | } |
115 | 0 | return mul; |
116 | 0 | } |
117 | | |
118 | | template <int R, typename Weights> |
119 | | Status SlowSeparable(const ImageF& in, const Rect& in_rect, |
120 | | const Weights& weights, ThreadPool* pool, ImageF* out, |
121 | 0 | const Rect& out_rect) { |
122 | 0 | JXL_ENSURE(in_rect.xsize() == out_rect.xsize()); |
123 | 0 | JXL_ENSURE(in_rect.ysize() == out_rect.ysize()); |
124 | 0 | JXL_ENSURE(in_rect.IsInside(Rect(in))); |
125 | 0 | JXL_ENSURE(out_rect.IsInside(Rect(*out))); |
126 | 0 | const float* horz_weights = &weights.horz[0]; |
127 | 0 | const float* vert_weights = &weights.vert[0]; |
128 | |
|
129 | 0 | const auto process_row = [&](const uint32_t task, |
130 | 0 | size_t /*thread*/) -> Status { |
131 | 0 | const int64_t y = task; |
132 | |
|
133 | 0 | float* const JXL_RESTRICT row_out = out_rect.Row(out, y); |
134 | 0 | for (size_t x = 0; x < in_rect.xsize(); ++x) { |
135 | 0 | JXL_ASSIGN_OR_RETURN(row_out[x], |
136 | 0 | SlowSeparablePixel(in, in_rect, x, y, /*radius=*/R, |
137 | 0 | horz_weights, vert_weights)); |
138 | 0 | } |
139 | 0 | return true; |
140 | 0 | }; |
141 | 0 | const size_t ysize = in_rect.ysize(); |
142 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
143 | 0 | ThreadPool::NoInit, process_row, |
144 | 0 | "SlowSeparable")); |
145 | 0 | return true; |
146 | 0 | } |
147 | | |
148 | | } // namespace |
149 | | |
150 | | Status SlowSeparable5(const ImageF& in, const Rect& in_rect, |
151 | | const WeightsSeparable5& weights, ThreadPool* pool, |
152 | 0 | ImageF* out, const Rect& out_rect) { |
153 | 0 | return SlowSeparable<2>(in, in_rect, weights, pool, out, out_rect); |
154 | 0 | } |
155 | | |
156 | | } // namespace jxl |