Coverage Report

Created: 2025-06-22 08:04

/src/libjxl/lib/jxl/convolve_symmetric5.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/status.h"
7
#include "lib/jxl/convolve.h"
8
9
#undef HWY_TARGET_INCLUDE
10
#define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric5.cc"
11
#include <hwy/foreach_target.h>
12
#include <hwy/highway.h>
13
14
#include "lib/jxl/base/common.h"
15
#include "lib/jxl/base/rect.h"
16
#include "lib/jxl/convolve-inl.h"
17
18
HWY_BEFORE_NAMESPACE();
19
namespace jxl {
20
namespace HWY_NAMESPACE {
21
22
// These templates are not found via ADL.
23
using hwy::HWY_NAMESPACE::Add;
24
using hwy::HWY_NAMESPACE::Mul;
25
using hwy::HWY_NAMESPACE::Vec;
26
27
// Weighted sum of 1x5 pixels around ix, iy with [wx2 wx1 wx0 wx1 wx2].
28
template <class WrapY>
29
static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y,
30
                               const int64_t ix, const int64_t iy,
31
                               const size_t xsize, const size_t ysize,
32
                               const float wx0, const float wx1,
33
0
                               const float wx2) {
34
0
  const WrapMirror wrap_x;
35
0
  const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize));
36
0
  const float in_m2 = row[wrap_x(ix - 2, xsize)];
37
0
  const float in_p2 = row[wrap_x(ix + 2, xsize)];
38
0
  const float in_m1 = row[wrap_x(ix - 1, xsize)];
39
0
  const float in_p1 = row[wrap_x(ix + 1, xsize)];
40
0
  const float in_00 = row[ix];
41
0
  const float sum_2 = wx2 * (in_m2 + in_p2);
42
0
  const float sum_1 = wx1 * (in_m1 + in_p1);
43
0
  const float sum_0 = wx0 * in_00;
44
0
  return sum_2 + (sum_1 + sum_0);
45
0
}
Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SCALAR::WeightedSumBorder<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::WrapMirror, long, long, unsigned long, unsigned long, float, float, float)
Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SCALAR::WeightedSumBorder<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::WrapUnchanged, long, long, unsigned long, unsigned long, float, float, float)
46
47
template <class WrapY, class V>
48
static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
49
                     const int64_t iy, const size_t ysize, const V wx0,
50
0
                     const V wx1, const V wx2) {
51
0
  const HWY_FULL(float) d;
52
0
  const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix;
53
0
  const auto in_m2 = LoadU(d, center - 2);
54
0
  const auto in_p2 = LoadU(d, center + 2);
55
0
  const auto in_m1 = LoadU(d, center - 1);
56
0
  const auto in_p1 = LoadU(d, center + 1);
57
0
  const auto in_00 = LoadU(d, center);
58
0
  const auto sum_2 = Mul(wx2, Add(in_m2, in_p2));
59
0
  const auto sum_1 = Mul(wx1, Add(in_m1, in_p1));
60
0
  const auto sum_0 = Mul(wx0, in_00);
61
0
  return Add(sum_2, Add(sum_1, sum_0));
62
0
}
Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SCALAR::Vec1<float> jxl::N_SCALAR::WeightedSum<jxl::WrapMirror, hwy::N_SCALAR::Vec1<float> >(jxl::Plane<float> const&, jxl::WrapMirror, unsigned long, long, unsigned long, hwy::N_SCALAR::Vec1<float>, hwy::N_SCALAR::Vec1<float>, hwy::N_SCALAR::Vec1<float>)
Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SCALAR::Vec1<float> jxl::N_SCALAR::WeightedSum<jxl::WrapUnchanged, hwy::N_SCALAR::Vec1<float> >(jxl::Plane<float> const&, jxl::WrapUnchanged, unsigned long, long, unsigned long, hwy::N_SCALAR::Vec1<float>, hwy::N_SCALAR::Vec1<float>, hwy::N_SCALAR::Vec1<float>)
63
64
// Produces result for one pixel
65
template <class WrapY>
66
float Symmetric5Border(const ImageF& in, const int64_t ix, const int64_t iy,
67
0
                       const WeightsSymmetric5& weights) {
68
0
  const float w0 = weights.c[0];
69
0
  const float w1 = weights.r[0];
70
0
  const float w2 = weights.R[0];
71
0
  const float w4 = weights.d[0];
72
0
  const float w5 = weights.L[0];
73
0
  const float w8 = weights.D[0];
74
75
0
  const size_t xsize = in.xsize();
76
0
  const size_t ysize = in.ysize();
77
0
  const WrapY wrap_y;
78
  // Unrolled loop over all 5 rows of the kernel.
79
0
  float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2);
80
81
0
  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8);
82
0
  float sum1 =
83
0
      WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8);
84
85
0
  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5);
86
0
  sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5);
87
88
0
  return sum0 + sum1;
89
0
}
Unexecuted instantiation: float jxl::N_SCALAR::Symmetric5Border<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&)
Unexecuted instantiation: float jxl::N_SCALAR::Symmetric5Border<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&)
90
91
// Produces result for one vector's worth of pixels
92
template <class WrapY>
93
static void Symmetric5Interior(const ImageF& in, const int64_t ix,
94
                               const int64_t rix, const int64_t iy,
95
                               const WeightsSymmetric5& weights,
96
0
                               float* JXL_RESTRICT row_out) {
97
0
  const HWY_FULL(float) d;
98
99
0
  const auto w0 = LoadDup128(d, weights.c);
100
0
  const auto w1 = LoadDup128(d, weights.r);
101
0
  const auto w2 = LoadDup128(d, weights.R);
102
0
  const auto w4 = LoadDup128(d, weights.d);
103
0
  const auto w5 = LoadDup128(d, weights.L);
104
0
  const auto w8 = LoadDup128(d, weights.D);
105
106
0
  const size_t ysize = in.ysize();
107
0
  const WrapY wrap_y;
108
  // Unrolled loop over all 5 rows of the kernel.
109
0
  auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2);
110
111
0
  sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 2, ysize, w2, w5, w8));
112
0
  auto sum1 = WeightedSum(in, wrap_y, ix, iy + 2, ysize, w2, w5, w8);
113
114
0
  sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5));
115
0
  sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5));
116
117
0
  StoreU(Add(sum0, sum1), d, row_out + rix);
118
0
}
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SCALAR::Symmetric5Interior<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*)
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SCALAR::Symmetric5Interior<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*)
119
120
template <class WrapY>
121
static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy,
122
                          const WeightsSymmetric5& weights,
123
0
                          float* JXL_RESTRICT row_out) {
124
0
  const int64_t kRadius = 2;
125
0
  const size_t xend = rect.x1();
126
127
0
  size_t rix = 0;
128
0
  size_t ix = rect.x0();
129
0
  const HWY_FULL(float) d;
130
0
  const size_t N = Lanes(d);
131
0
  const size_t aligned_x = RoundUpTo(kRadius, N);
132
0
  for (; ix < std::min(aligned_x, xend); ++ix, ++rix) {
133
0
    row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
134
0
  }
135
0
  for (; ix + N + kRadius <= xend; ix += N, rix += N) {
136
0
    Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out);
137
0
  }
138
0
  for (; ix < xend; ++ix, ++rix) {
139
0
    row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
140
0
  }
141
0
}
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SCALAR::Symmetric5Row<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*)
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SCALAR::Symmetric5Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*)
142
143
// Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike
144
// the fully vectorized strategies below.
145
Status Symmetric5(const ImageF& in, const Rect& in_rect,
146
                  const WeightsSymmetric5& weights, ThreadPool* pool,
147
0
                  ImageF* JXL_RESTRICT out, const Rect& out_rect) {
148
0
  JXL_ENSURE(in_rect.xsize() == out_rect.xsize());
149
0
  JXL_ENSURE(in_rect.ysize() == out_rect.ysize());
150
0
  const size_t ysize = in_rect.ysize();
151
0
  const auto process_row = [&](const uint32_t task,
152
0
                               size_t /*thread*/) -> Status {
153
0
    const int64_t riy = task;
154
0
    const int64_t iy = in_rect.y0() + riy;
155
156
0
    if (iy < 2 || iy >= static_cast<ssize_t>(in.ysize()) - 2) {
157
0
      Symmetric5Row<WrapMirror>(in, in_rect, iy, weights,
158
0
                                out_rect.Row(out, riy));
159
0
    } else {
160
0
      Symmetric5Row<WrapUnchanged>(in, in_rect, iy, weights,
161
0
                                   out_rect.Row(out, riy));
162
0
    }
163
0
    return true;
164
0
  };
165
0
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
166
0
                                ThreadPool::NoInit, process_row,
167
0
                                "Symmetric5x5Convolution"));
168
0
  return true;
169
0
}
170
171
// NOLINTNEXTLINE(google-readability-namespace-comments)
172
}  // namespace HWY_NAMESPACE
173
}  // namespace jxl
174
HWY_AFTER_NAMESPACE();
175
176
#if HWY_ONCE
177
namespace jxl {
178
179
HWY_EXPORT(Symmetric5);
180
Status Symmetric5(const ImageF& in, const Rect& in_rect,
181
                  const WeightsSymmetric5& weights, ThreadPool* pool,
182
0
                  ImageF* JXL_RESTRICT out, const Rect& out_rect) {
183
0
  return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, in_rect, weights, pool, out,
184
0
                                          out_rect);
185
0
}
186
187
Status Symmetric5(const ImageF& in, const Rect& rect,
188
                  const WeightsSymmetric5& weights, ThreadPool* pool,
189
0
                  ImageF* JXL_RESTRICT out) {
190
0
  return Symmetric5(in, rect, weights, pool, out, Rect(*out));
191
0
}
192
193
}  // namespace jxl
194
#endif  // HWY_ONCE