/src/libjxl/lib/jxl/convolve_symmetric5.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/convolve.h" |
7 | | |
8 | | #undef HWY_TARGET_INCLUDE |
9 | | #define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric5.cc" |
10 | | #include <hwy/foreach_target.h> |
11 | | #include <hwy/highway.h> |
12 | | |
13 | | #include "lib/jxl/base/common.h" |
14 | | #include "lib/jxl/base/rect.h" |
15 | | #include "lib/jxl/convolve-inl.h" |
16 | | |
17 | | HWY_BEFORE_NAMESPACE(); |
18 | | namespace jxl { |
19 | | namespace HWY_NAMESPACE { |
20 | | |
21 | | // These templates are not found via ADL. |
22 | | using hwy::HWY_NAMESPACE::Add; |
23 | | using hwy::HWY_NAMESPACE::Mul; |
24 | | using hwy::HWY_NAMESPACE::Vec; |
25 | | |
26 | | // Weighted sum of 1x5 pixels around ix, iy with [wx2 wx1 wx0 wx1 wx2]. |
27 | | template <class WrapY> |
28 | | static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y, |
29 | | const int64_t ix, const int64_t iy, |
30 | | const size_t xsize, const size_t ysize, |
31 | | const float wx0, const float wx1, |
32 | 60.0k | const float wx2) { |
33 | 60.0k | const WrapMirror wrap_x; |
34 | 60.0k | const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize)); |
35 | 60.0k | const float in_m2 = row[wrap_x(ix - 2, xsize)]; |
36 | 60.0k | const float in_p2 = row[wrap_x(ix + 2, xsize)]; |
37 | 60.0k | const float in_m1 = row[wrap_x(ix - 1, xsize)]; |
38 | 60.0k | const float in_p1 = row[wrap_x(ix + 1, xsize)]; |
39 | 60.0k | const float in_00 = row[ix]; |
40 | 60.0k | const float sum_2 = wx2 * (in_m2 + in_p2); |
41 | 60.0k | const float sum_1 = wx1 * (in_m1 + in_p1); |
42 | 60.0k | const float sum_0 = wx0 * in_00; |
43 | 60.0k | return sum_2 + (sum_1 + sum_0); |
44 | 60.0k | } Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SSE4::WeightedSumBorder<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::WrapMirror, long, long, unsigned long, unsigned long, float, float, float) Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SSE4::WeightedSumBorder<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::WrapUnchanged, long, long, unsigned long, unsigned long, float, float, float) convolve_symmetric5.cc:float jxl::N_AVX2::WeightedSumBorder<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::WrapMirror, long, long, unsigned long, unsigned long, float, float, float) Line | Count | Source | 32 | 30.3k | const float wx2) { | 33 | 30.3k | const WrapMirror wrap_x; | 34 | 30.3k | const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize)); | 35 | 30.3k | const float in_m2 = row[wrap_x(ix - 2, xsize)]; | 36 | 30.3k | const float in_p2 = row[wrap_x(ix + 2, xsize)]; | 37 | 30.3k | const float in_m1 = row[wrap_x(ix - 1, xsize)]; | 38 | 30.3k | const float in_p1 = row[wrap_x(ix + 1, xsize)]; | 39 | 30.3k | const float in_00 = row[ix]; | 40 | 30.3k | const float sum_2 = wx2 * (in_m2 + in_p2); | 41 | 30.3k | const float sum_1 = wx1 * (in_m1 + in_p1); | 42 | 30.3k | const float sum_0 = wx0 * in_00; | 43 | 30.3k | return sum_2 + (sum_1 + sum_0); | 44 | 30.3k | } |
convolve_symmetric5.cc:float jxl::N_AVX2::WeightedSumBorder<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::WrapUnchanged, long, long, unsigned long, unsigned long, float, float, float) Line | Count | Source | 32 | 29.6k | const float wx2) { | 33 | 29.6k | const WrapMirror wrap_x; | 34 | 29.6k | const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize)); | 35 | 29.6k | const float in_m2 = row[wrap_x(ix - 2, xsize)]; | 36 | 29.6k | const float in_p2 = row[wrap_x(ix + 2, xsize)]; | 37 | 29.6k | const float in_m1 = row[wrap_x(ix - 1, xsize)]; | 38 | 29.6k | const float in_p1 = row[wrap_x(ix + 1, xsize)]; | 39 | 29.6k | const float in_00 = row[ix]; | 40 | 29.6k | const float sum_2 = wx2 * (in_m2 + in_p2); | 41 | 29.6k | const float sum_1 = wx1 * (in_m1 + in_p1); | 42 | 29.6k | const float sum_0 = wx0 * in_00; | 43 | 29.6k | return sum_2 + (sum_1 + sum_0); | 44 | 29.6k | } |
Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SSE2::WeightedSumBorder<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::WrapMirror, long, long, unsigned long, unsigned long, float, float, float) Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SSE2::WeightedSumBorder<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::WrapUnchanged, long, long, unsigned long, unsigned long, float, float, float) |
45 | | |
46 | | template <class WrapY, class V> |
47 | | static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix, |
48 | | const int64_t iy, const size_t ysize, const V wx0, |
49 | 0 | const V wx1, const V wx2) { |
50 | 0 | const HWY_FULL(float) d; |
51 | 0 | const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix; |
52 | 0 | const auto in_m2 = LoadU(d, center - 2); |
53 | 0 | const auto in_p2 = LoadU(d, center + 2); |
54 | 0 | const auto in_m1 = LoadU(d, center - 1); |
55 | 0 | const auto in_p1 = LoadU(d, center + 1); |
56 | 0 | const auto in_00 = LoadU(d, center); |
57 | 0 | const auto sum_2 = Mul(wx2, Add(in_m2, in_p2)); |
58 | 0 | const auto sum_1 = Mul(wx1, Add(in_m1, in_p1)); |
59 | 0 | const auto sum_0 = Mul(wx0, in_00); |
60 | 0 | return Add(sum_2, Add(sum_1, sum_0)); |
61 | 0 | } Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::WeightedSum<jxl::WrapMirror, hwy::N_SSE4::Vec128<float, 4ul> >(jxl::Plane<float> const&, jxl::WrapMirror, unsigned long, long, unsigned long, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::WeightedSum<jxl::WrapUnchanged, hwy::N_SSE4::Vec128<float, 4ul> >(jxl::Plane<float> const&, jxl::WrapUnchanged, unsigned long, long, unsigned long, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::WeightedSum<jxl::WrapMirror, hwy::N_AVX2::Vec256<float> >(jxl::Plane<float> const&, jxl::WrapMirror, unsigned long, long, unsigned long, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::WeightedSum<jxl::WrapUnchanged, hwy::N_AVX2::Vec256<float> >(jxl::Plane<float> const&, jxl::WrapUnchanged, unsigned long, long, unsigned long, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::WeightedSum<jxl::WrapMirror, hwy::N_SSE2::Vec128<float, 4ul> >(jxl::Plane<float> const&, jxl::WrapMirror, unsigned long, long, unsigned long, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::WeightedSum<jxl::WrapUnchanged, hwy::N_SSE2::Vec128<float, 4ul> >(jxl::Plane<float> const&, jxl::WrapUnchanged, unsigned long, long, unsigned long, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) |
62 | | |
63 | | // Produces result for one pixel |
64 | | template <class WrapY> |
65 | | float Symmetric5Border(const ImageF& in, const int64_t ix, const int64_t iy, |
66 | 12.8k | const WeightsSymmetric5& weights) { |
67 | 12.8k | const float w0 = weights.c[0]; |
68 | 12.8k | const float w1 = weights.r[0]; |
69 | 12.8k | const float w2 = weights.R[0]; |
70 | 12.8k | const float w4 = weights.d[0]; |
71 | 12.8k | const float w5 = weights.L[0]; |
72 | 12.8k | const float w8 = weights.D[0]; |
73 | | |
74 | 12.8k | const size_t xsize = in.xsize(); |
75 | 12.8k | const size_t ysize = in.ysize(); |
76 | 12.8k | const WrapY wrap_y; |
77 | | // Unrolled loop over all 5 rows of the kernel. |
78 | 12.8k | float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2); |
79 | | |
80 | 12.8k | sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8); |
81 | 12.8k | float sum1 = |
82 | 12.8k | WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8); |
83 | | |
84 | 12.8k | sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5); |
85 | 12.8k | sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5); |
86 | | |
87 | 12.8k | return sum0 + sum1; |
88 | 12.8k | } Unexecuted instantiation: float jxl::N_SSE4::Symmetric5Border<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&) Unexecuted instantiation: float jxl::N_SSE4::Symmetric5Border<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&) float jxl::N_AVX2::Symmetric5Border<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&) Line | Count | Source | 66 | 6.45k | const WeightsSymmetric5& weights) { | 67 | 6.45k | const float w0 = weights.c[0]; | 68 | 6.45k | const float w1 = weights.r[0]; | 69 | 6.45k | const float w2 = weights.R[0]; | 70 | 6.45k | const float w4 = weights.d[0]; | 71 | 6.45k | const float w5 = weights.L[0]; | 72 | 6.45k | const float w8 = weights.D[0]; | 73 | | | 74 | 6.45k | const size_t xsize = in.xsize(); | 75 | 6.45k | const size_t ysize = in.ysize(); | 76 | 6.45k | const WrapY wrap_y; | 77 | | // Unrolled loop over all 5 rows of the kernel. | 78 | 6.45k | float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2); | 79 | | | 80 | 6.45k | sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8); | 81 | 6.45k | float sum1 = | 82 | 6.45k | WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8); | 83 | | | 84 | 6.45k | sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5); | 85 | 6.45k | sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5); | 86 | | | 87 | 6.45k | return sum0 + sum1; | 88 | 6.45k | } |
float jxl::N_AVX2::Symmetric5Border<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&) Line | Count | Source | 66 | 6.36k | const WeightsSymmetric5& weights) { | 67 | 6.36k | const float w0 = weights.c[0]; | 68 | 6.36k | const float w1 = weights.r[0]; | 69 | 6.36k | const float w2 = weights.R[0]; | 70 | 6.36k | const float w4 = weights.d[0]; | 71 | 6.36k | const float w5 = weights.L[0]; | 72 | 6.36k | const float w8 = weights.D[0]; | 73 | | | 74 | 6.36k | const size_t xsize = in.xsize(); | 75 | 6.36k | const size_t ysize = in.ysize(); | 76 | 6.36k | const WrapY wrap_y; | 77 | | // Unrolled loop over all 5 rows of the kernel. | 78 | 6.36k | float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2); | 79 | | | 80 | 6.36k | sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8); | 81 | 6.36k | float sum1 = | 82 | 6.36k | WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8); | 83 | | | 84 | 6.36k | sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5); | 85 | 6.36k | sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5); | 86 | | | 87 | 6.36k | return sum0 + sum1; | 88 | 6.36k | } |
Unexecuted instantiation: float jxl::N_SSE2::Symmetric5Border<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&) Unexecuted instantiation: float jxl::N_SSE2::Symmetric5Border<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&) |
89 | | |
90 | | // Produces result for one vector's worth of pixels |
91 | | template <class WrapY> |
92 | | static void Symmetric5Interior(const ImageF& in, const int64_t ix, |
93 | | const int64_t rix, const int64_t iy, |
94 | | const WeightsSymmetric5& weights, |
95 | 0 | float* JXL_RESTRICT row_out) { |
96 | 0 | const HWY_FULL(float) d; |
97 | |
|
98 | 0 | const auto w0 = LoadDup128(d, weights.c); |
99 | 0 | const auto w1 = LoadDup128(d, weights.r); |
100 | 0 | const auto w2 = LoadDup128(d, weights.R); |
101 | 0 | const auto w4 = LoadDup128(d, weights.d); |
102 | 0 | const auto w5 = LoadDup128(d, weights.L); |
103 | 0 | const auto w8 = LoadDup128(d, weights.D); |
104 | |
|
105 | 0 | const size_t ysize = in.ysize(); |
106 | 0 | const WrapY wrap_y; |
107 | | // Unrolled loop over all 5 rows of the kernel. |
108 | 0 | auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2); |
109 | |
|
110 | 0 | sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 2, ysize, w2, w5, w8)); |
111 | 0 | auto sum1 = WeightedSum(in, wrap_y, ix, iy + 2, ysize, w2, w5, w8); |
112 | |
|
113 | 0 | sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5)); |
114 | 0 | sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5)); |
115 | |
|
116 | 0 | StoreU(Add(sum0, sum1), d, row_out + rix); |
117 | 0 | } Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE4::Symmetric5Interior<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*) Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE4::Symmetric5Interior<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*) Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_AVX2::Symmetric5Interior<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*) Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_AVX2::Symmetric5Interior<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*) Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE2::Symmetric5Interior<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*) Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE2::Symmetric5Interior<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*) |
118 | | |
119 | | template <class WrapY> |
120 | | static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy, |
121 | | const WeightsSymmetric5& weights, |
122 | 1.66k | float* JXL_RESTRICT row_out) { |
123 | 1.66k | const int64_t kRadius = 2; |
124 | 1.66k | const size_t xend = rect.x1(); |
125 | | |
126 | 1.66k | size_t rix = 0; |
127 | 1.66k | size_t ix = rect.x0(); |
128 | 1.66k | const HWY_FULL(float) d; |
129 | 1.66k | const size_t N = Lanes(d); |
130 | 1.66k | const size_t aligned_x = RoundUpTo(kRadius, N); |
131 | 14.4k | for (; ix < std::min(aligned_x, xend); ++ix, ++rix) { |
132 | 12.8k | row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); |
133 | 12.8k | } |
134 | 1.66k | for (; ix + N + kRadius <= xend; ix += N, rix += N) { |
135 | 0 | Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out); |
136 | 0 | } |
137 | 1.66k | for (; ix < xend; ++ix, ++rix) { |
138 | 0 | row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); |
139 | 0 | } |
140 | 1.66k | } Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE4::Symmetric5Row<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*) Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE4::Symmetric5Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*) convolve_symmetric5.cc:void jxl::N_AVX2::Symmetric5Row<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*) Line | Count | Source | 122 | 831 | float* JXL_RESTRICT row_out) { | 123 | 831 | const int64_t kRadius = 2; | 124 | 831 | const size_t xend = rect.x1(); | 125 | | | 126 | 831 | size_t rix = 0; | 127 | 831 | size_t ix = rect.x0(); | 128 | 831 | const HWY_FULL(float) d; | 129 | 831 | const size_t N = Lanes(d); | 130 | 831 | const size_t aligned_x = RoundUpTo(kRadius, N); | 131 | 7.28k | for (; ix < std::min(aligned_x, xend); ++ix, ++rix) { | 132 | 6.45k | row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); | 133 | 6.45k | } | 134 | 831 | for (; ix + N + kRadius <= xend; ix += N, rix += N) { | 135 | 0 | Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out); | 136 | 0 | } | 137 | 831 | for (; ix < xend; ++ix, ++rix) { | 138 | 0 | row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); | 139 | 0 | } | 140 | 831 | } |
convolve_symmetric5.cc:void jxl::N_AVX2::Symmetric5Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*) Line | Count | Source | 122 | 829 | float* JXL_RESTRICT row_out) { | 123 | 829 | const int64_t kRadius = 2; | 124 | 829 | const size_t xend = rect.x1(); | 125 | | | 126 | 829 | size_t rix = 0; | 127 | 829 | size_t ix = rect.x0(); | 128 | 829 | const HWY_FULL(float) d; | 129 | 829 | const size_t N = Lanes(d); | 130 | 829 | const size_t aligned_x = RoundUpTo(kRadius, N); | 131 | 7.19k | for (; ix < std::min(aligned_x, xend); ++ix, ++rix) { | 132 | 6.36k | row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); | 133 | 6.36k | } | 134 | 829 | for (; ix + N + kRadius <= xend; ix += N, rix += N) { | 135 | 0 | Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out); | 136 | 0 | } | 137 | 829 | for (; ix < xend; ++ix, ++rix) { | 138 | 0 | row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights); | 139 | 0 | } | 140 | 829 | } |
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE2::Symmetric5Row<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*) Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE2::Symmetric5Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*) |
141 | | |
142 | | // Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike |
143 | | // the fully vectorized strategies below. |
144 | | void Symmetric5(const ImageF& in, const Rect& in_rect, |
145 | | const WeightsSymmetric5& weights, ThreadPool* pool, |
146 | 208 | ImageF* JXL_RESTRICT out, const Rect& out_rect) { |
147 | 208 | JXL_ASSERT(in_rect.xsize() == out_rect.xsize()); |
148 | 208 | JXL_ASSERT(in_rect.ysize() == out_rect.ysize()); |
149 | 208 | const size_t ysize = in_rect.ysize(); |
150 | 208 | JXL_CHECK(RunOnPool( |
151 | 208 | pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit, |
152 | 208 | [&](const uint32_t task, size_t /*thread*/) { |
153 | 208 | const int64_t riy = task; |
154 | 208 | const int64_t iy = in_rect.y0() + riy; |
155 | | |
156 | 208 | if (iy < 2 || iy >= static_cast<ssize_t>(in.ysize()) - 2) { |
157 | 208 | Symmetric5Row<WrapMirror>(in, in_rect, iy, weights, |
158 | 208 | out_rect.Row(out, riy)); |
159 | 208 | } else { |
160 | 208 | Symmetric5Row<WrapUnchanged>(in, in_rect, iy, weights, |
161 | 208 | out_rect.Row(out, riy)); |
162 | 208 | } |
163 | 208 | }, |
164 | 208 | "Symmetric5x5Convolution")); |
165 | 208 | } Unexecuted instantiation: jxl::N_SSE4::Symmetric5(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSymmetric5 const&, jxl::ThreadPool*, jxl::Plane<float>*, jxl::RectT<unsigned long> const&) jxl::N_AVX2::Symmetric5(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSymmetric5 const&, jxl::ThreadPool*, jxl::Plane<float>*, jxl::RectT<unsigned long> const&) Line | Count | Source | 146 | 208 | ImageF* JXL_RESTRICT out, const Rect& out_rect) { | 147 | 208 | JXL_ASSERT(in_rect.xsize() == out_rect.xsize()); | 148 | 208 | JXL_ASSERT(in_rect.ysize() == out_rect.ysize()); | 149 | 208 | const size_t ysize = in_rect.ysize(); | 150 | 208 | JXL_CHECK(RunOnPool( | 151 | 208 | pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit, | 152 | 208 | [&](const uint32_t task, size_t /*thread*/) { | 153 | 208 | const int64_t riy = task; | 154 | 208 | const int64_t iy = in_rect.y0() + riy; | 155 | | | 156 | 208 | if (iy < 2 || iy >= static_cast<ssize_t>(in.ysize()) - 2) { | 157 | 208 | Symmetric5Row<WrapMirror>(in, in_rect, iy, weights, | 158 | 208 | out_rect.Row(out, riy)); | 159 | 208 | } else { | 160 | 208 | Symmetric5Row<WrapUnchanged>(in, in_rect, iy, weights, | 161 | 208 | out_rect.Row(out, riy)); | 162 | 208 | } | 163 | 208 | }, | 164 | 208 | "Symmetric5x5Convolution")); | 165 | 208 | } |
Unexecuted instantiation: jxl::N_SSE2::Symmetric5(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSymmetric5 const&, jxl::ThreadPool*, jxl::Plane<float>*, jxl::RectT<unsigned long> const&) |
166 | | |
167 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
168 | | } // namespace HWY_NAMESPACE |
169 | | } // namespace jxl |
170 | | HWY_AFTER_NAMESPACE(); |
171 | | |
172 | | #if HWY_ONCE |
173 | | namespace jxl { |
174 | | |
175 | | HWY_EXPORT(Symmetric5); |
176 | | void Symmetric5(const ImageF& in, const Rect& in_rect, |
177 | | const WeightsSymmetric5& weights, ThreadPool* pool, |
178 | 208 | ImageF* JXL_RESTRICT out, const Rect& out_rect) { |
179 | 208 | HWY_DYNAMIC_DISPATCH(Symmetric5)(in, in_rect, weights, pool, out, out_rect); |
180 | 208 | } |
181 | | |
182 | | void Symmetric5(const ImageF& in, const Rect& rect, |
183 | | const WeightsSymmetric5& weights, ThreadPool* pool, |
184 | 52 | ImageF* JXL_RESTRICT out) { |
185 | 52 | Symmetric5(in, rect, weights, pool, out, Rect(*out)); |
186 | 52 | } |
187 | | |
188 | | } // namespace jxl |
189 | | #endif // HWY_ONCE |