Coverage Report

Created: 2024-05-21 06:24

/src/libjxl/lib/jxl/convolve_symmetric5.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/convolve.h"
7
8
#undef HWY_TARGET_INCLUDE
9
#define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric5.cc"
10
#include <hwy/foreach_target.h>
11
#include <hwy/highway.h>
12
13
#include "lib/jxl/base/common.h"
14
#include "lib/jxl/base/rect.h"
15
#include "lib/jxl/convolve-inl.h"
16
17
HWY_BEFORE_NAMESPACE();
18
namespace jxl {
19
namespace HWY_NAMESPACE {
20
21
// These templates are not found via ADL.
22
using hwy::HWY_NAMESPACE::Add;
23
using hwy::HWY_NAMESPACE::Mul;
24
using hwy::HWY_NAMESPACE::Vec;
25
26
// Weighted sum of 1x5 pixels around ix, iy with [wx2 wx1 wx0 wx1 wx2].
27
template <class WrapY>
28
static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y,
29
                               const int64_t ix, const int64_t iy,
30
                               const size_t xsize, const size_t ysize,
31
                               const float wx0, const float wx1,
32
60.0k
                               const float wx2) {
33
60.0k
  const WrapMirror wrap_x;
34
60.0k
  const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize));
35
60.0k
  const float in_m2 = row[wrap_x(ix - 2, xsize)];
36
60.0k
  const float in_p2 = row[wrap_x(ix + 2, xsize)];
37
60.0k
  const float in_m1 = row[wrap_x(ix - 1, xsize)];
38
60.0k
  const float in_p1 = row[wrap_x(ix + 1, xsize)];
39
60.0k
  const float in_00 = row[ix];
40
60.0k
  const float sum_2 = wx2 * (in_m2 + in_p2);
41
60.0k
  const float sum_1 = wx1 * (in_m1 + in_p1);
42
60.0k
  const float sum_0 = wx0 * in_00;
43
60.0k
  return sum_2 + (sum_1 + sum_0);
44
60.0k
}
Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SSE4::WeightedSumBorder<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::WrapMirror, long, long, unsigned long, unsigned long, float, float, float)
Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SSE4::WeightedSumBorder<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::WrapUnchanged, long, long, unsigned long, unsigned long, float, float, float)
convolve_symmetric5.cc:float jxl::N_AVX2::WeightedSumBorder<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::WrapMirror, long, long, unsigned long, unsigned long, float, float, float)
Line
Count
Source
32
30.3k
                               const float wx2) {
33
30.3k
  const WrapMirror wrap_x;
34
30.3k
  const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize));
35
30.3k
  const float in_m2 = row[wrap_x(ix - 2, xsize)];
36
30.3k
  const float in_p2 = row[wrap_x(ix + 2, xsize)];
37
30.3k
  const float in_m1 = row[wrap_x(ix - 1, xsize)];
38
30.3k
  const float in_p1 = row[wrap_x(ix + 1, xsize)];
39
30.3k
  const float in_00 = row[ix];
40
30.3k
  const float sum_2 = wx2 * (in_m2 + in_p2);
41
30.3k
  const float sum_1 = wx1 * (in_m1 + in_p1);
42
30.3k
  const float sum_0 = wx0 * in_00;
43
30.3k
  return sum_2 + (sum_1 + sum_0);
44
30.3k
}
convolve_symmetric5.cc:float jxl::N_AVX2::WeightedSumBorder<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::WrapUnchanged, long, long, unsigned long, unsigned long, float, float, float)
Line
Count
Source
32
29.6k
                               const float wx2) {
33
29.6k
  const WrapMirror wrap_x;
34
29.6k
  const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize));
35
29.6k
  const float in_m2 = row[wrap_x(ix - 2, xsize)];
36
29.6k
  const float in_p2 = row[wrap_x(ix + 2, xsize)];
37
29.6k
  const float in_m1 = row[wrap_x(ix - 1, xsize)];
38
29.6k
  const float in_p1 = row[wrap_x(ix + 1, xsize)];
39
29.6k
  const float in_00 = row[ix];
40
29.6k
  const float sum_2 = wx2 * (in_m2 + in_p2);
41
29.6k
  const float sum_1 = wx1 * (in_m1 + in_p1);
42
29.6k
  const float sum_0 = wx0 * in_00;
43
29.6k
  return sum_2 + (sum_1 + sum_0);
44
29.6k
}
Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SSE2::WeightedSumBorder<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::WrapMirror, long, long, unsigned long, unsigned long, float, float, float)
Unexecuted instantiation: convolve_symmetric5.cc:float jxl::N_SSE2::WeightedSumBorder<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::WrapUnchanged, long, long, unsigned long, unsigned long, float, float, float)
45
46
template <class WrapY, class V>
47
static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
48
                     const int64_t iy, const size_t ysize, const V wx0,
49
0
                     const V wx1, const V wx2) {
50
0
  const HWY_FULL(float) d;
51
0
  const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix;
52
0
  const auto in_m2 = LoadU(d, center - 2);
53
0
  const auto in_p2 = LoadU(d, center + 2);
54
0
  const auto in_m1 = LoadU(d, center - 1);
55
0
  const auto in_p1 = LoadU(d, center + 1);
56
0
  const auto in_00 = LoadU(d, center);
57
0
  const auto sum_2 = Mul(wx2, Add(in_m2, in_p2));
58
0
  const auto sum_1 = Mul(wx1, Add(in_m1, in_p1));
59
0
  const auto sum_0 = Mul(wx0, in_00);
60
0
  return Add(sum_2, Add(sum_1, sum_0));
61
0
}
Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::WeightedSum<jxl::WrapMirror, hwy::N_SSE4::Vec128<float, 4ul> >(jxl::Plane<float> const&, jxl::WrapMirror, unsigned long, long, unsigned long, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>)
Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::WeightedSum<jxl::WrapUnchanged, hwy::N_SSE4::Vec128<float, 4ul> >(jxl::Plane<float> const&, jxl::WrapUnchanged, unsigned long, long, unsigned long, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>)
Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::WeightedSum<jxl::WrapMirror, hwy::N_AVX2::Vec256<float> >(jxl::Plane<float> const&, jxl::WrapMirror, unsigned long, long, unsigned long, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>)
Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::WeightedSum<jxl::WrapUnchanged, hwy::N_AVX2::Vec256<float> >(jxl::Plane<float> const&, jxl::WrapUnchanged, unsigned long, long, unsigned long, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>)
Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::WeightedSum<jxl::WrapMirror, hwy::N_SSE2::Vec128<float, 4ul> >(jxl::Plane<float> const&, jxl::WrapMirror, unsigned long, long, unsigned long, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>)
Unexecuted instantiation: convolve_symmetric5.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::WeightedSum<jxl::WrapUnchanged, hwy::N_SSE2::Vec128<float, 4ul> >(jxl::Plane<float> const&, jxl::WrapUnchanged, unsigned long, long, unsigned long, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>)
62
63
// Produces result for one pixel
64
template <class WrapY>
65
float Symmetric5Border(const ImageF& in, const int64_t ix, const int64_t iy,
66
12.8k
                       const WeightsSymmetric5& weights) {
67
12.8k
  const float w0 = weights.c[0];
68
12.8k
  const float w1 = weights.r[0];
69
12.8k
  const float w2 = weights.R[0];
70
12.8k
  const float w4 = weights.d[0];
71
12.8k
  const float w5 = weights.L[0];
72
12.8k
  const float w8 = weights.D[0];
73
74
12.8k
  const size_t xsize = in.xsize();
75
12.8k
  const size_t ysize = in.ysize();
76
12.8k
  const WrapY wrap_y;
77
  // Unrolled loop over all 5 rows of the kernel.
78
12.8k
  float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2);
79
80
12.8k
  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8);
81
12.8k
  float sum1 =
82
12.8k
      WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8);
83
84
12.8k
  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5);
85
12.8k
  sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5);
86
87
12.8k
  return sum0 + sum1;
88
12.8k
}
Unexecuted instantiation: float jxl::N_SSE4::Symmetric5Border<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&)
Unexecuted instantiation: float jxl::N_SSE4::Symmetric5Border<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&)
float jxl::N_AVX2::Symmetric5Border<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&)
Line
Count
Source
66
6.45k
                       const WeightsSymmetric5& weights) {
67
6.45k
  const float w0 = weights.c[0];
68
6.45k
  const float w1 = weights.r[0];
69
6.45k
  const float w2 = weights.R[0];
70
6.45k
  const float w4 = weights.d[0];
71
6.45k
  const float w5 = weights.L[0];
72
6.45k
  const float w8 = weights.D[0];
73
74
6.45k
  const size_t xsize = in.xsize();
75
6.45k
  const size_t ysize = in.ysize();
76
6.45k
  const WrapY wrap_y;
77
  // Unrolled loop over all 5 rows of the kernel.
78
6.45k
  float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2);
79
80
6.45k
  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8);
81
6.45k
  float sum1 =
82
6.45k
      WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8);
83
84
6.45k
  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5);
85
6.45k
  sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5);
86
87
6.45k
  return sum0 + sum1;
88
6.45k
}
float jxl::N_AVX2::Symmetric5Border<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&)
Line
Count
Source
66
6.36k
                       const WeightsSymmetric5& weights) {
67
6.36k
  const float w0 = weights.c[0];
68
6.36k
  const float w1 = weights.r[0];
69
6.36k
  const float w2 = weights.R[0];
70
6.36k
  const float w4 = weights.d[0];
71
6.36k
  const float w5 = weights.L[0];
72
6.36k
  const float w8 = weights.D[0];
73
74
6.36k
  const size_t xsize = in.xsize();
75
6.36k
  const size_t ysize = in.ysize();
76
6.36k
  const WrapY wrap_y;
77
  // Unrolled loop over all 5 rows of the kernel.
78
6.36k
  float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2);
79
80
6.36k
  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8);
81
6.36k
  float sum1 =
82
6.36k
      WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8);
83
84
6.36k
  sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5);
85
6.36k
  sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5);
86
87
6.36k
  return sum0 + sum1;
88
6.36k
}
Unexecuted instantiation: float jxl::N_SSE2::Symmetric5Border<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&)
Unexecuted instantiation: float jxl::N_SSE2::Symmetric5Border<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, jxl::WeightsSymmetric5 const&)
89
90
// Produces result for one vector's worth of pixels
91
template <class WrapY>
92
static void Symmetric5Interior(const ImageF& in, const int64_t ix,
93
                               const int64_t rix, const int64_t iy,
94
                               const WeightsSymmetric5& weights,
95
0
                               float* JXL_RESTRICT row_out) {
96
0
  const HWY_FULL(float) d;
97
98
0
  const auto w0 = LoadDup128(d, weights.c);
99
0
  const auto w1 = LoadDup128(d, weights.r);
100
0
  const auto w2 = LoadDup128(d, weights.R);
101
0
  const auto w4 = LoadDup128(d, weights.d);
102
0
  const auto w5 = LoadDup128(d, weights.L);
103
0
  const auto w8 = LoadDup128(d, weights.D);
104
105
0
  const size_t ysize = in.ysize();
106
0
  const WrapY wrap_y;
107
  // Unrolled loop over all 5 rows of the kernel.
108
0
  auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2);
109
110
0
  sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 2, ysize, w2, w5, w8));
111
0
  auto sum1 = WeightedSum(in, wrap_y, ix, iy + 2, ysize, w2, w5, w8);
112
113
0
  sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5));
114
0
  sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5));
115
116
0
  StoreU(Add(sum0, sum1), d, row_out + rix);
117
0
}
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE4::Symmetric5Interior<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*)
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE4::Symmetric5Interior<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*)
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_AVX2::Symmetric5Interior<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*)
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_AVX2::Symmetric5Interior<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*)
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE2::Symmetric5Interior<jxl::WrapMirror>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*)
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE2::Symmetric5Interior<jxl::WrapUnchanged>(jxl::Plane<float> const&, long, long, long, jxl::WeightsSymmetric5 const&, float*)
118
119
template <class WrapY>
120
static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy,
121
                          const WeightsSymmetric5& weights,
122
1.66k
                          float* JXL_RESTRICT row_out) {
123
1.66k
  const int64_t kRadius = 2;
124
1.66k
  const size_t xend = rect.x1();
125
126
1.66k
  size_t rix = 0;
127
1.66k
  size_t ix = rect.x0();
128
1.66k
  const HWY_FULL(float) d;
129
1.66k
  const size_t N = Lanes(d);
130
1.66k
  const size_t aligned_x = RoundUpTo(kRadius, N);
131
14.4k
  for (; ix < std::min(aligned_x, xend); ++ix, ++rix) {
132
12.8k
    row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
133
12.8k
  }
134
1.66k
  for (; ix + N + kRadius <= xend; ix += N, rix += N) {
135
0
    Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out);
136
0
  }
137
1.66k
  for (; ix < xend; ++ix, ++rix) {
138
0
    row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
139
0
  }
140
1.66k
}
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE4::Symmetric5Row<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*)
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE4::Symmetric5Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*)
convolve_symmetric5.cc:void jxl::N_AVX2::Symmetric5Row<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*)
Line
Count
Source
122
831
                          float* JXL_RESTRICT row_out) {
123
831
  const int64_t kRadius = 2;
124
831
  const size_t xend = rect.x1();
125
126
831
  size_t rix = 0;
127
831
  size_t ix = rect.x0();
128
831
  const HWY_FULL(float) d;
129
831
  const size_t N = Lanes(d);
130
831
  const size_t aligned_x = RoundUpTo(kRadius, N);
131
7.28k
  for (; ix < std::min(aligned_x, xend); ++ix, ++rix) {
132
6.45k
    row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
133
6.45k
  }
134
831
  for (; ix + N + kRadius <= xend; ix += N, rix += N) {
135
0
    Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out);
136
0
  }
137
831
  for (; ix < xend; ++ix, ++rix) {
138
0
    row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
139
0
  }
140
831
}
convolve_symmetric5.cc:void jxl::N_AVX2::Symmetric5Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*)
Line
Count
Source
122
829
                          float* JXL_RESTRICT row_out) {
123
829
  const int64_t kRadius = 2;
124
829
  const size_t xend = rect.x1();
125
126
829
  size_t rix = 0;
127
829
  size_t ix = rect.x0();
128
829
  const HWY_FULL(float) d;
129
829
  const size_t N = Lanes(d);
130
829
  const size_t aligned_x = RoundUpTo(kRadius, N);
131
7.19k
  for (; ix < std::min(aligned_x, xend); ++ix, ++rix) {
132
6.36k
    row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
133
6.36k
  }
134
829
  for (; ix + N + kRadius <= xend; ix += N, rix += N) {
135
0
    Symmetric5Interior<WrapY>(in, ix, rix, iy, weights, row_out);
136
0
  }
137
829
  for (; ix < xend; ++ix, ++rix) {
138
0
    row_out[rix] = Symmetric5Border<WrapY>(in, ix, iy, weights);
139
0
  }
140
829
}
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE2::Symmetric5Row<jxl::WrapMirror>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*)
Unexecuted instantiation: convolve_symmetric5.cc:void jxl::N_SSE2::Symmetric5Row<jxl::WrapUnchanged>(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, long, jxl::WeightsSymmetric5 const&, float*)
141
142
// Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike
143
// the fully vectorized strategies below.
144
void Symmetric5(const ImageF& in, const Rect& in_rect,
145
                const WeightsSymmetric5& weights, ThreadPool* pool,
146
208
                ImageF* JXL_RESTRICT out, const Rect& out_rect) {
147
208
  JXL_ASSERT(in_rect.xsize() == out_rect.xsize());
148
208
  JXL_ASSERT(in_rect.ysize() == out_rect.ysize());
149
208
  const size_t ysize = in_rect.ysize();
150
208
  JXL_CHECK(RunOnPool(
151
208
      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
152
208
      [&](const uint32_t task, size_t /*thread*/) {
153
208
        const int64_t riy = task;
154
208
        const int64_t iy = in_rect.y0() + riy;
155
156
208
        if (iy < 2 || iy >= static_cast<ssize_t>(in.ysize()) - 2) {
157
208
          Symmetric5Row<WrapMirror>(in, in_rect, iy, weights,
158
208
                                    out_rect.Row(out, riy));
159
208
        } else {
160
208
          Symmetric5Row<WrapUnchanged>(in, in_rect, iy, weights,
161
208
                                       out_rect.Row(out, riy));
162
208
        }
163
208
      },
164
208
      "Symmetric5x5Convolution"));
165
208
}
Unexecuted instantiation: jxl::N_SSE4::Symmetric5(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSymmetric5 const&, jxl::ThreadPool*, jxl::Plane<float>*, jxl::RectT<unsigned long> const&)
jxl::N_AVX2::Symmetric5(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSymmetric5 const&, jxl::ThreadPool*, jxl::Plane<float>*, jxl::RectT<unsigned long> const&)
Line
Count
Source
146
208
                ImageF* JXL_RESTRICT out, const Rect& out_rect) {
147
208
  JXL_ASSERT(in_rect.xsize() == out_rect.xsize());
148
208
  JXL_ASSERT(in_rect.ysize() == out_rect.ysize());
149
208
  const size_t ysize = in_rect.ysize();
150
208
  JXL_CHECK(RunOnPool(
151
208
      pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
152
208
      [&](const uint32_t task, size_t /*thread*/) {
153
208
        const int64_t riy = task;
154
208
        const int64_t iy = in_rect.y0() + riy;
155
156
208
        if (iy < 2 || iy >= static_cast<ssize_t>(in.ysize()) - 2) {
157
208
          Symmetric5Row<WrapMirror>(in, in_rect, iy, weights,
158
208
                                    out_rect.Row(out, riy));
159
208
        } else {
160
208
          Symmetric5Row<WrapUnchanged>(in, in_rect, iy, weights,
161
208
                                       out_rect.Row(out, riy));
162
208
        }
163
208
      },
164
208
      "Symmetric5x5Convolution"));
165
208
}
Unexecuted instantiation: jxl::N_SSE2::Symmetric5(jxl::Plane<float> const&, jxl::RectT<unsigned long> const&, jxl::WeightsSymmetric5 const&, jxl::ThreadPool*, jxl::Plane<float>*, jxl::RectT<unsigned long> const&)
166
167
// NOLINTNEXTLINE(google-readability-namespace-comments)
168
}  // namespace HWY_NAMESPACE
169
}  // namespace jxl
170
HWY_AFTER_NAMESPACE();
171
172
#if HWY_ONCE
173
namespace jxl {
174
175
HWY_EXPORT(Symmetric5);
176
void Symmetric5(const ImageF& in, const Rect& in_rect,
177
                const WeightsSymmetric5& weights, ThreadPool* pool,
178
208
                ImageF* JXL_RESTRICT out, const Rect& out_rect) {
179
208
  HWY_DYNAMIC_DISPATCH(Symmetric5)(in, in_rect, weights, pool, out, out_rect);
180
208
}
181
182
void Symmetric5(const ImageF& in, const Rect& rect,
183
                const WeightsSymmetric5& weights, ThreadPool* pool,
184
52
                ImageF* JXL_RESTRICT out) {
185
52
  Symmetric5(in, rect, weights, pool, out, Rect(*out));
186
52
}
187
188
}  // namespace jxl
189
#endif  // HWY_ONCE