/src/libjxl/lib/jxl/butteraugli/butteraugli.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | // |
6 | | // Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com) |
7 | | // |
8 | | // The physical architecture of butteraugli is based on the following naming |
9 | | // convention: |
10 | | // * Opsin - dynamics of the photosensitive chemicals in the retina |
11 | | // with their immediate electrical processing |
12 | | // * Xyb - hybrid opponent/trichromatic color space |
13 | | // x is roughly red-subtract-green. |
14 | | // y is yellow. |
15 | | // b is blue. |
16 | | // Xyb values are computed from Opsin mixing, not directly from rgb. |
17 | | // * Mask - for visual masking |
18 | | // * Hf - color modeling for spatially high-frequency features |
19 | | // * Lf - color modeling for spatially low-frequency features |
20 | | // * Diffmap - to cluster and build an image of error between the images |
21 | | // * Blur - to hold the smoothing code |
22 | | |
23 | | #include "lib/jxl/butteraugli/butteraugli.h" |
24 | | |
25 | | #include <jxl/memory_manager.h> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <atomic> |
29 | | #include <cmath> |
30 | | #include <cstdint> |
31 | | #include <cstdio> |
32 | | #include <cstdlib> |
33 | | #include <cstring> |
34 | | #include <memory> |
35 | | |
36 | | #include "lib/jxl/base/common.h" |
37 | | #include "lib/jxl/base/compiler_specific.h" |
38 | | #include "lib/jxl/base/rect.h" |
39 | | #include "lib/jxl/convolve.h" |
40 | | #include "lib/jxl/image.h" |
41 | | |
42 | | #undef HWY_TARGET_INCLUDE |
43 | | #define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc" |
44 | | #include <hwy/foreach_target.h> |
45 | | |
46 | | #include "lib/jxl/base/fast_math-inl.h" |
47 | | #include "lib/jxl/base/status.h" |
48 | | #include "lib/jxl/image_ops.h" |
49 | | |
50 | | #if BUTTERAUGLI_ENABLE_CHECKS |
51 | | #include "lib/jxl/base/printf_macros.h" |
52 | | #endif |
53 | | |
54 | | #ifndef JXL_BUTTERAUGLI_ONCE |
55 | | #define JXL_BUTTERAUGLI_ONCE |
56 | | |
57 | | namespace jxl { |
58 | | |
59 | | static const double wMfMalta = 37.0819870399; |
60 | | static const double norm1Mf = 130262059.556; |
61 | | static const double wMfMaltaX = 8246.75321353; |
62 | | static const double norm1MfX = 1009002.70582; |
63 | | static const double wHfMalta = 18.7237414387; |
64 | | static const double norm1Hf = 4498534.45232; |
65 | | static const double wHfMaltaX = 6923.99476109; |
66 | | static const double norm1HfX = 8051.15833247; |
67 | | static const double wUhfMalta = 1.10039032555; |
68 | | static const double norm1Uhf = 71.7800275169; |
69 | | static const double wUhfMaltaX = 173.5; |
70 | | static const double norm1UhfX = 5.0; |
71 | | static const double wmul[9] = { |
72 | | 400.0, 1.50815703118, 0, |
73 | | 2150.0, 10.6195433239, 16.2176043152, |
74 | | 29.2353797994, 0.844626970982, 0.703646627719, |
75 | | }; |
76 | | |
77 | 0 | std::vector<float> ComputeKernel(float sigma) { |
78 | 0 | const float m = 2.25; // Accuracy increases when m is increased. |
79 | 0 | const double scaler = -1.0 / (2.0 * sigma * sigma); |
80 | 0 | const int diff = std::max<int>(1, m * std::fabs(sigma)); |
81 | 0 | std::vector<float> kernel(2 * diff + 1); |
82 | 0 | for (int i = -diff; i <= diff; ++i) { |
83 | 0 | kernel[i + diff] = std::exp(scaler * i * i); |
84 | 0 | } |
85 | 0 | return kernel; |
86 | 0 | } |
87 | | |
88 | | void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel, |
89 | 0 | const size_t x, float* BUTTERAUGLI_RESTRICT row_out) { |
90 | 0 | const size_t offset = kernel.size() / 2; |
91 | 0 | int minx = x < offset ? 0 : x - offset; |
92 | 0 | int maxx = std::min<int>(in.xsize() - 1, x + offset); |
93 | 0 | float weight = 0.0f; |
94 | 0 | for (int j = minx; j <= maxx; ++j) { |
95 | 0 | weight += kernel[j - x + offset]; |
96 | 0 | } |
97 | 0 | float scale = 1.0f / weight; |
98 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
99 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y); |
100 | 0 | float sum = 0.0f; |
101 | 0 | for (int j = minx; j <= maxx; ++j) { |
102 | 0 | sum += row_in[j] * kernel[j - x + offset]; |
103 | 0 | } |
104 | 0 | row_out[y] = sum * scale; |
105 | 0 | } |
106 | 0 | } |
107 | | |
108 | | // Computes a horizontal convolution and transposes the result. |
109 | | Status ConvolutionWithTranspose(const ImageF& in, |
110 | | const std::vector<float>& kernel, |
111 | 0 | ImageF* BUTTERAUGLI_RESTRICT out) { |
112 | 0 | JXL_ENSURE(out->xsize() == in.ysize()); |
113 | 0 | JXL_ENSURE(out->ysize() == in.xsize()); |
114 | 0 | const size_t len = kernel.size(); |
115 | 0 | const size_t offset = len / 2; |
116 | 0 | float weight_no_border = 0.0f; |
117 | 0 | for (size_t j = 0; j < len; ++j) { |
118 | 0 | weight_no_border += kernel[j]; |
119 | 0 | } |
120 | 0 | const float scale_no_border = 1.0f / weight_no_border; |
121 | 0 | const size_t border1 = std::min(in.xsize(), offset); |
122 | 0 | const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0; |
123 | 0 | std::vector<float> scaled_kernel(len / 2 + 1); |
124 | 0 | for (size_t i = 0; i <= len / 2; ++i) { |
125 | 0 | scaled_kernel[i] = kernel[i] * scale_no_border; |
126 | 0 | } |
127 | | |
128 | | // middle |
129 | 0 | switch (len) { |
130 | 0 | case 7: { |
131 | 0 | const float sk0 = scaled_kernel[0]; |
132 | 0 | const float sk1 = scaled_kernel[1]; |
133 | 0 | const float sk2 = scaled_kernel[2]; |
134 | 0 | const float sk3 = scaled_kernel[3]; |
135 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
136 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
137 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
138 | 0 | const float sum0 = (row_in[0] + row_in[6]) * sk0; |
139 | 0 | const float sum1 = (row_in[1] + row_in[5]) * sk1; |
140 | 0 | const float sum2 = (row_in[2] + row_in[4]) * sk2; |
141 | 0 | const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2; |
142 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
143 | 0 | row_out[y] = sum; |
144 | 0 | } |
145 | 0 | } |
146 | 0 | } break; |
147 | 0 | case 13: { |
148 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
149 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
150 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
151 | 0 | float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0]; |
152 | 0 | float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1]; |
153 | 0 | float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2]; |
154 | 0 | float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3]; |
155 | 0 | sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4]; |
156 | 0 | sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5]; |
157 | 0 | const float sum = (row_in[6]) * scaled_kernel[6]; |
158 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
159 | 0 | row_out[y] = sum + sum0 + sum1 + sum2 + sum3; |
160 | 0 | } |
161 | 0 | } |
162 | 0 | break; |
163 | 0 | } |
164 | 0 | case 15: { |
165 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
166 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
167 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
168 | 0 | float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0]; |
169 | 0 | float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1]; |
170 | 0 | float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2]; |
171 | 0 | float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3]; |
172 | 0 | sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4]; |
173 | 0 | sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5]; |
174 | 0 | sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6]; |
175 | 0 | const float sum = (row_in[7]) * scaled_kernel[7]; |
176 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
177 | 0 | row_out[y] = sum + sum0 + sum1 + sum2 + sum3; |
178 | 0 | } |
179 | 0 | } |
180 | 0 | break; |
181 | 0 | } |
182 | 0 | case 33: { |
183 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
184 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
185 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
186 | 0 | float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0]; |
187 | 0 | float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1]; |
188 | 0 | float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2]; |
189 | 0 | float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3]; |
190 | 0 | sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4]; |
191 | 0 | sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5]; |
192 | 0 | sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6]; |
193 | 0 | sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7]; |
194 | 0 | sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8]; |
195 | 0 | sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9]; |
196 | 0 | sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10]; |
197 | 0 | sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11]; |
198 | 0 | sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12]; |
199 | 0 | sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13]; |
200 | 0 | sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14]; |
201 | 0 | sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15]; |
202 | 0 | const float sum = (row_in[16]) * scaled_kernel[16]; |
203 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
204 | 0 | row_out[y] = sum + sum0 + sum1 + sum2 + sum3; |
205 | 0 | } |
206 | 0 | } |
207 | 0 | break; |
208 | 0 | } |
209 | 0 | default: |
210 | 0 | return JXL_UNREACHABLE("kernel size %d not implemented", |
211 | 0 | static_cast<int>(len)); |
212 | 0 | } |
213 | | // left border |
214 | 0 | for (size_t x = 0; x < border1; ++x) { |
215 | 0 | ConvolveBorderColumn(in, kernel, x, out->Row(x)); |
216 | 0 | } |
217 | | |
218 | | // right border |
219 | 0 | for (size_t x = border2; x < in.xsize(); ++x) { |
220 | 0 | ConvolveBorderColumn(in, kernel, x, out->Row(x)); |
221 | 0 | } |
222 | 0 | return true; |
223 | 0 | } |
224 | | |
225 | | // A blur somewhat similar to a 2D Gaussian blur. |
226 | | // See: https://en.wikipedia.org/wiki/Gaussian_blur |
227 | | // |
228 | | // This is a bottleneck because the sigma can be quite large (>7). We can use |
229 | | // gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated |
230 | | // Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are |
231 | | // zero-valued. This leads to noticeable differences at the edges of diffmaps. |
232 | | // We retain a special case for 5x5 kernels (even faster than gauss_blur), |
233 | | // optionally use gauss_blur followed by fixup of the borders for large images, |
234 | | // or fall back to the previous truncated FIR followed by a transpose. |
235 | | Status Blur(const ImageF& in, float sigma, const ButteraugliParams& params, |
236 | 0 | BlurTemp* temp, ImageF* out) { |
237 | 0 | std::vector<float> kernel = ComputeKernel(sigma); |
238 | | // Separable5 does an in-place convolution, so this fast path is not safe if |
239 | | // in aliases out. |
240 | 0 | if (kernel.size() == 5 && &in != out) { |
241 | 0 | float sum_weights = 0.0f; |
242 | 0 | for (const float w : kernel) { |
243 | 0 | sum_weights += w; |
244 | 0 | } |
245 | 0 | const float scale = 1.0f / sum_weights; |
246 | 0 | const float w0 = kernel[2] * scale; |
247 | 0 | const float w1 = kernel[1] * scale; |
248 | 0 | const float w2 = kernel[0] * scale; |
249 | 0 | const WeightsSeparable5 weights = { |
250 | 0 | {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, |
251 | 0 | {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, |
252 | 0 | }; |
253 | 0 | JXL_RETURN_IF_ERROR( |
254 | 0 | Separable5(in, Rect(in), weights, /*pool=*/nullptr, out)); |
255 | 0 | return true; |
256 | 0 | } |
257 | | |
258 | 0 | ImageF* temp_t; |
259 | 0 | JXL_RETURN_IF_ERROR(temp->GetTransposed(in, &temp_t)); |
260 | 0 | JXL_RETURN_IF_ERROR(ConvolutionWithTranspose(in, kernel, temp_t)); |
261 | 0 | JXL_RETURN_IF_ERROR(ConvolutionWithTranspose(*temp_t, kernel, out)); |
262 | 0 | return true; |
263 | 0 | } |
264 | | |
265 | | // Allows PaddedMaltaUnit to call either function via overloading. |
266 | | struct MaltaTagLF {}; |
267 | | struct MaltaTag {}; |
268 | | |
269 | | } // namespace jxl |
270 | | |
271 | | #endif // JXL_BUTTERAUGLI_ONCE |
272 | | |
273 | | #include <hwy/highway.h> |
274 | | HWY_BEFORE_NAMESPACE(); |
275 | | namespace jxl { |
276 | | namespace HWY_NAMESPACE { |
277 | | |
278 | | // These templates are not found via ADL. |
279 | | using hwy::HWY_NAMESPACE::Abs; |
280 | | using hwy::HWY_NAMESPACE::Div; |
281 | | using hwy::HWY_NAMESPACE::Gt; |
282 | | using hwy::HWY_NAMESPACE::IfThenElse; |
283 | | using hwy::HWY_NAMESPACE::IfThenElseZero; |
284 | | using hwy::HWY_NAMESPACE::Lt; |
285 | | using hwy::HWY_NAMESPACE::Max; |
286 | | using hwy::HWY_NAMESPACE::Mul; |
287 | | using hwy::HWY_NAMESPACE::MulAdd; |
288 | | using hwy::HWY_NAMESPACE::MulSub; |
289 | | using hwy::HWY_NAMESPACE::Neg; |
290 | | using hwy::HWY_NAMESPACE::Sub; |
291 | | using hwy::HWY_NAMESPACE::Vec; |
292 | | using hwy::HWY_NAMESPACE::ZeroIfNegative; |
293 | | |
294 | | template <class D, class V> |
295 | 0 | HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) { |
296 | 0 | static const double kMul = 0.724216145665; |
297 | 0 | const V mul = Set(d, kMul); |
298 | 0 | const V maxval = Set(d, kMaxVal); |
299 | | // If greater than maxval or less than -maxval, replace with if_*. |
300 | 0 | const V if_pos = MulAdd(Sub(v, maxval), mul, maxval); |
301 | 0 | const V if_neg = MulSub(Add(v, maxval), mul, maxval); |
302 | 0 | const V pos_or_v = IfThenElse(Ge(v, maxval), if_pos, v); |
303 | 0 | return IfThenElse(Lt(v, Neg(maxval)), if_neg, pos_or_v); |
304 | 0 | } Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::MaximumClamp<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, double) Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::MaximumClamp<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, double) Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::MaximumClamp<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, double) |
305 | | |
306 | | // Make area around zero less important (remove it). |
307 | | template <class D, class V> |
308 | 0 | HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) { |
309 | 0 | const auto w = Set(d, kw); |
310 | 0 | return IfThenElse(Gt(x, w), Sub(x, w), |
311 | 0 | IfThenElseZero(Lt(x, Neg(w)), Add(x, w))); |
312 | 0 | } Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::RemoveRangeAroundZero<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, double, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::RemoveRangeAroundZero<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, double, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::RemoveRangeAroundZero<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, double, hwy::N_SSE2::Vec128<float, 4ul>) |
313 | | |
314 | | // Make area around zero more important (2x it until the limit). |
315 | | template <class D, class V> |
316 | 0 | HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) { |
317 | 0 | const auto w = Set(d, kw); |
318 | 0 | return IfThenElse(Gt(x, w), Add(x, w), |
319 | 0 | IfThenElse(Lt(x, Neg(w)), Sub(x, w), Add(x, x))); |
320 | 0 | } Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::AmplifyRangeAroundZero<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, double, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::AmplifyRangeAroundZero<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, double, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::AmplifyRangeAroundZero<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, double, hwy::N_SSE2::Vec128<float, 4ul>) |
321 | | |
322 | | // XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space. |
323 | | // Vals space can be converted to L2-norm space (Euclidean and normalized) |
324 | | // through visual masking. |
325 | | template <class D, class V> |
326 | | HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y, |
327 | | const V& b_arg, V* HWY_RESTRICT valx, |
328 | 0 | V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) { |
329 | 0 | static const double xmul_scalar = 33.832837186260; |
330 | 0 | static const double ymul_scalar = 14.458268100570; |
331 | 0 | static const double bmul_scalar = 49.87984651440; |
332 | 0 | static const double y_to_b_mul_scalar = -0.362267051518; |
333 | 0 | const V xmul = Set(d, xmul_scalar); |
334 | 0 | const V ymul = Set(d, ymul_scalar); |
335 | 0 | const V bmul = Set(d, bmul_scalar); |
336 | 0 | const V y_to_b_mul = Set(d, y_to_b_mul_scalar); |
337 | 0 | const V b = MulAdd(y_to_b_mul, y, b_arg); |
338 | 0 | *valb = Mul(b, bmul); |
339 | 0 | *valx = Mul(x, xmul); |
340 | 0 | *valy = Mul(y, ymul); |
341 | 0 | } Unexecuted instantiation: void jxl::N_SSE4::XybLowFreqToVals<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*) Unexecuted instantiation: void jxl::N_AVX2::XybLowFreqToVals<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*) Unexecuted instantiation: void jxl::N_SSE2::XybLowFreqToVals<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*) |
342 | | |
343 | 0 | void XybLowFreqToVals(Image3F* xyb_lf) { |
344 | | // Modify range around zero code only concerns the high frequency |
345 | | // planes and only the X and Y channels. |
346 | | // Convert low freq xyb to vals space so that we can do a simple squared sum |
347 | | // diff on the low frequencies later. |
348 | 0 | const HWY_FULL(float) d; |
349 | 0 | for (size_t y = 0; y < xyb_lf->ysize(); ++y) { |
350 | 0 | float* BUTTERAUGLI_RESTRICT row_x = xyb_lf->PlaneRow(0, y); |
351 | 0 | float* BUTTERAUGLI_RESTRICT row_y = xyb_lf->PlaneRow(1, y); |
352 | 0 | float* BUTTERAUGLI_RESTRICT row_b = xyb_lf->PlaneRow(2, y); |
353 | 0 | for (size_t x = 0; x < xyb_lf->xsize(); x += Lanes(d)) { |
354 | 0 | auto valx = Undefined(d); |
355 | 0 | auto valy = Undefined(d); |
356 | 0 | auto valb = Undefined(d); |
357 | 0 | XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x), |
358 | 0 | Load(d, row_b + x), &valx, &valy, &valb); |
359 | 0 | Store(valx, d, row_x + x); |
360 | 0 | Store(valy, d, row_y + x); |
361 | 0 | Store(valb, d, row_b + x); |
362 | 0 | } |
363 | 0 | } |
364 | 0 | } Unexecuted instantiation: jxl::N_SSE4::XybLowFreqToVals(jxl::Image3<float>*) Unexecuted instantiation: jxl::N_AVX2::XybLowFreqToVals(jxl::Image3<float>*) Unexecuted instantiation: jxl::N_SSE2::XybLowFreqToVals(jxl::Image3<float>*) |
365 | | |
366 | 0 | Status SuppressXByY(const ImageF& in_y, ImageF* HWY_RESTRICT inout_x) { |
367 | 0 | JXL_ENSURE(SameSize(*inout_x, in_y)); |
368 | 0 | const size_t xsize = in_y.xsize(); |
369 | 0 | const size_t ysize = in_y.ysize(); |
370 | 0 | const HWY_FULL(float) d; |
371 | 0 | static const double suppress = 46.0; |
372 | 0 | static const double s = 0.653020556257; |
373 | 0 | const auto sv = Set(d, s); |
374 | 0 | const auto one_minus_s = Set(d, 1.0 - s); |
375 | 0 | const auto ywv = Set(d, suppress); |
376 | |
|
377 | 0 | for (size_t y = 0; y < ysize; ++y) { |
378 | 0 | const float* HWY_RESTRICT row_y = in_y.ConstRow(y); |
379 | 0 | float* HWY_RESTRICT row_x = inout_x->Row(y); |
380 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
381 | 0 | const auto vx = Load(d, row_x + x); |
382 | 0 | const auto vy = Load(d, row_y + x); |
383 | 0 | const auto scaler = |
384 | 0 | MulAdd(Div(ywv, MulAdd(vy, vy, ywv)), one_minus_s, sv); |
385 | 0 | Store(Mul(scaler, vx), d, row_x + x); |
386 | 0 | } |
387 | 0 | } |
388 | 0 | return true; |
389 | 0 | } Unexecuted instantiation: jxl::N_SSE4::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float>*) |
390 | | |
391 | 0 | void Subtract(const ImageF& a, const ImageF& b, ImageF* c) { |
392 | 0 | const HWY_FULL(float) d; |
393 | 0 | for (size_t y = 0; y < a.ysize(); ++y) { |
394 | 0 | const float* row_a = a.ConstRow(y); |
395 | 0 | const float* row_b = b.ConstRow(y); |
396 | 0 | float* row_c = c->Row(y); |
397 | 0 | for (size_t x = 0; x < a.xsize(); x += Lanes(d)) { |
398 | 0 | Store(Sub(Load(d, row_a + x), Load(d, row_b + x)), d, row_c + x); |
399 | 0 | } |
400 | 0 | } |
401 | 0 | } Unexecuted instantiation: jxl::N_SSE4::Subtract(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::Subtract(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::Subtract(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*) |
402 | | |
403 | | Status SeparateLFAndMF(const ButteraugliParams& params, const Image3F& xyb, |
404 | 0 | Image3F* lf, Image3F* mf, BlurTemp* blur_temp) { |
405 | 0 | static const double kSigmaLf = 7.15593339443; |
406 | 0 | for (int i = 0; i < 3; ++i) { |
407 | | // Extract lf ... |
408 | 0 | JXL_RETURN_IF_ERROR( |
409 | 0 | Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &lf->Plane(i))); |
410 | | // ... and keep everything else in mf. |
411 | 0 | Subtract(xyb.Plane(i), lf->Plane(i), &mf->Plane(i)); |
412 | 0 | } |
413 | 0 | XybLowFreqToVals(lf); |
414 | 0 | return true; |
415 | 0 | } Unexecuted instantiation: jxl::N_SSE4::SeparateLFAndMF(jxl::ButteraugliParams const&, jxl::Image3<float> const&, jxl::Image3<float>*, jxl::Image3<float>*, jxl::BlurTemp*) Unexecuted instantiation: jxl::N_AVX2::SeparateLFAndMF(jxl::ButteraugliParams const&, jxl::Image3<float> const&, jxl::Image3<float>*, jxl::Image3<float>*, jxl::BlurTemp*) Unexecuted instantiation: jxl::N_SSE2::SeparateLFAndMF(jxl::ButteraugliParams const&, jxl::Image3<float> const&, jxl::Image3<float>*, jxl::Image3<float>*, jxl::BlurTemp*) |
416 | | |
417 | | Status SeparateMFAndHF(const ButteraugliParams& params, Image3F* mf, ImageF* hf, |
418 | 0 | BlurTemp* blur_temp) { |
419 | 0 | const HWY_FULL(float) d; |
420 | 0 | static const double kSigmaHf = 3.22489901262; |
421 | 0 | const size_t xsize = mf->xsize(); |
422 | 0 | const size_t ysize = mf->ysize(); |
423 | 0 | JxlMemoryManager* memory_manager = mf[0].memory_manager(); |
424 | 0 | JXL_ASSIGN_OR_RETURN(hf[0], ImageF::Create(memory_manager, xsize, ysize)); |
425 | 0 | JXL_ASSIGN_OR_RETURN(hf[1], ImageF::Create(memory_manager, xsize, ysize)); |
426 | 0 | for (int i = 0; i < 3; ++i) { |
427 | 0 | if (i == 2) { |
428 | 0 | JXL_RETURN_IF_ERROR( |
429 | 0 | Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i))); |
430 | 0 | break; |
431 | 0 | } |
432 | 0 | for (size_t y = 0; y < ysize; ++y) { |
433 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(i, y); |
434 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y); |
435 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
436 | 0 | Store(Load(d, row_mf + x), d, row_hf + x); |
437 | 0 | } |
438 | 0 | } |
439 | 0 | JXL_RETURN_IF_ERROR( |
440 | 0 | Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i))); |
441 | 0 | static const double kRemoveMfRange = 0.29; |
442 | 0 | static const double kAddMfRange = 0.1; |
443 | 0 | if (i == 0) { |
444 | 0 | for (size_t y = 0; y < ysize; ++y) { |
445 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(0, y); |
446 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y); |
447 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
448 | 0 | auto mfv = Load(d, row_mf + x); |
449 | 0 | auto hfv = Sub(Load(d, row_hf + x), mfv); |
450 | 0 | mfv = RemoveRangeAroundZero(d, kRemoveMfRange, mfv); |
451 | 0 | Store(mfv, d, row_mf + x); |
452 | 0 | Store(hfv, d, row_hf + x); |
453 | 0 | } |
454 | 0 | } |
455 | 0 | } else { |
456 | 0 | for (size_t y = 0; y < ysize; ++y) { |
457 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(1, y); |
458 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y); |
459 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
460 | 0 | auto mfv = Load(d, row_mf + x); |
461 | 0 | auto hfv = Sub(Load(d, row_hf + x), mfv); |
462 | |
|
463 | 0 | mfv = AmplifyRangeAroundZero(d, kAddMfRange, mfv); |
464 | 0 | Store(mfv, d, row_mf + x); |
465 | 0 | Store(hfv, d, row_hf + x); |
466 | 0 | } |
467 | 0 | } |
468 | 0 | } |
469 | 0 | } |
470 | | // Suppress red-green by intensity change in the high freq channels. |
471 | 0 | JXL_RETURN_IF_ERROR(SuppressXByY(hf[1], &hf[0])); |
472 | 0 | return true; |
473 | 0 | } Unexecuted instantiation: jxl::N_SSE4::SeparateMFAndHF(jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::Plane<float>*, jxl::BlurTemp*) Unexecuted instantiation: jxl::N_AVX2::SeparateMFAndHF(jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::Plane<float>*, jxl::BlurTemp*) Unexecuted instantiation: jxl::N_SSE2::SeparateMFAndHF(jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::Plane<float>*, jxl::BlurTemp*) |
474 | | |
475 | | Status SeparateHFAndUHF(const ButteraugliParams& params, ImageF* hf, |
476 | 0 | ImageF* uhf, BlurTemp* blur_temp) { |
477 | 0 | const HWY_FULL(float) d; |
478 | 0 | const size_t xsize = hf[0].xsize(); |
479 | 0 | const size_t ysize = hf[0].ysize(); |
480 | 0 | JxlMemoryManager* memory_manager = hf[0].memory_manager(); |
481 | 0 | static const double kSigmaUhf = 1.56416327805; |
482 | 0 | JXL_ASSIGN_OR_RETURN(uhf[0], ImageF::Create(memory_manager, xsize, ysize)); |
483 | 0 | JXL_ASSIGN_OR_RETURN(uhf[1], ImageF::Create(memory_manager, xsize, ysize)); |
484 | 0 | for (int i = 0; i < 2; ++i) { |
485 | | // Divide hf into hf and uhf. |
486 | 0 | for (size_t y = 0; y < ysize; ++y) { |
487 | 0 | float* BUTTERAUGLI_RESTRICT row_uhf = uhf[i].Row(y); |
488 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y); |
489 | 0 | for (size_t x = 0; x < xsize; ++x) { |
490 | 0 | row_uhf[x] = row_hf[x]; |
491 | 0 | } |
492 | 0 | } |
493 | 0 | JXL_RETURN_IF_ERROR(Blur(hf[i], kSigmaUhf, params, blur_temp, &hf[i])); |
494 | 0 | static const double kRemoveHfRange = 1.5; |
495 | 0 | static const double kAddHfRange = 0.132; |
496 | 0 | static const double kRemoveUhfRange = 0.04; |
497 | 0 | static const double kMaxclampHf = 28.4691806922; |
498 | 0 | static const double kMaxclampUhf = 5.19175294647; |
499 | 0 | static double kMulYHf = 2.155; |
500 | 0 | static double kMulYUhf = 2.69313763794; |
501 | 0 | if (i == 0) { |
502 | 0 | for (size_t y = 0; y < ysize; ++y) { |
503 | 0 | float* BUTTERAUGLI_RESTRICT row_uhf = uhf[0].Row(y); |
504 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y); |
505 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
506 | 0 | auto hfv = Load(d, row_hf + x); |
507 | 0 | auto uhfv = Sub(Load(d, row_uhf + x), hfv); |
508 | 0 | hfv = RemoveRangeAroundZero(d, kRemoveHfRange, hfv); |
509 | 0 | uhfv = RemoveRangeAroundZero(d, kRemoveUhfRange, uhfv); |
510 | 0 | Store(hfv, d, row_hf + x); |
511 | 0 | Store(uhfv, d, row_uhf + x); |
512 | 0 | } |
513 | 0 | } |
514 | 0 | } else { |
515 | 0 | for (size_t y = 0; y < ysize; ++y) { |
516 | 0 | float* BUTTERAUGLI_RESTRICT row_uhf = uhf[1].Row(y); |
517 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y); |
518 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
519 | 0 | auto hfv = Load(d, row_hf + x); |
520 | 0 | hfv = MaximumClamp(d, hfv, kMaxclampHf); |
521 | |
|
522 | 0 | auto uhfv = Sub(Load(d, row_uhf + x), hfv); |
523 | 0 | uhfv = MaximumClamp(d, uhfv, kMaxclampUhf); |
524 | 0 | uhfv = Mul(uhfv, Set(d, kMulYUhf)); |
525 | 0 | Store(uhfv, d, row_uhf + x); |
526 | |
|
527 | 0 | hfv = Mul(hfv, Set(d, kMulYHf)); |
528 | 0 | hfv = AmplifyRangeAroundZero(d, kAddHfRange, hfv); |
529 | 0 | Store(hfv, d, row_hf + x); |
530 | 0 | } |
531 | 0 | } |
532 | 0 | } |
533 | 0 | } |
534 | 0 | return true; |
535 | 0 | } Unexecuted instantiation: jxl::N_SSE4::SeparateHFAndUHF(jxl::ButteraugliParams const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::BlurTemp*) Unexecuted instantiation: jxl::N_AVX2::SeparateHFAndUHF(jxl::ButteraugliParams const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::BlurTemp*) Unexecuted instantiation: jxl::N_SSE2::SeparateHFAndUHF(jxl::ButteraugliParams const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::BlurTemp*) |
536 | | |
537 | 0 | void DeallocateHFAndUHF(ImageF* hf, ImageF* uhf) { |
538 | 0 | for (int i = 0; i < 2; ++i) { |
539 | 0 | hf[i] = ImageF(); |
540 | 0 | uhf[i] = ImageF(); |
541 | 0 | } |
542 | 0 | } Unexecuted instantiation: jxl::N_SSE4::DeallocateHFAndUHF(jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::DeallocateHFAndUHF(jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::DeallocateHFAndUHF(jxl::Plane<float>*, jxl::Plane<float>*) |
543 | | |
544 | | Status SeparateFrequencies(size_t xsize, size_t ysize, |
545 | | const ButteraugliParams& params, BlurTemp* blur_temp, |
546 | 0 | const Image3F& xyb, PsychoImage& ps) { |
547 | 0 | JxlMemoryManager* memory_manager = xyb.memory_manager(); |
548 | 0 | JXL_ASSIGN_OR_RETURN( |
549 | 0 | ps.lf, Image3F::Create(memory_manager, xyb.xsize(), xyb.ysize())); |
550 | 0 | JXL_ASSIGN_OR_RETURN( |
551 | 0 | ps.mf, Image3F::Create(memory_manager, xyb.xsize(), xyb.ysize())); |
552 | 0 | JXL_RETURN_IF_ERROR(SeparateLFAndMF(params, xyb, &ps.lf, &ps.mf, blur_temp)); |
553 | 0 | JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &ps.mf, &ps.hf[0], blur_temp)); |
554 | 0 | JXL_RETURN_IF_ERROR( |
555 | 0 | SeparateHFAndUHF(params, &ps.hf[0], &ps.uhf[0], blur_temp)); |
556 | 0 | return true; |
557 | 0 | } Unexecuted instantiation: jxl::N_SSE4::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&) Unexecuted instantiation: jxl::N_AVX2::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&) Unexecuted instantiation: jxl::N_SSE2::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&) |
558 | | |
559 | | namespace { |
560 | | template <typename V> |
561 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d) { |
562 | 0 | return Add(Add(a, b), Add(c, d)); |
563 | 0 | } Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 1ul> >(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) |
564 | | template <typename V> |
565 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e) { |
566 | 0 | return Sum(a, b, c, Add(d, e)); |
567 | 0 | } Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 1ul> >(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) |
568 | | template <typename V> |
569 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g) { |
570 | 0 | return Sum(a, b, c, Sum(d, e, f, g)); |
571 | 0 | } Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 1ul> >(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) |
572 | | template <typename V> |
573 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g, V h, V i) { |
574 | 0 | return Add(Add(Sum(a, b, c, d), Sum(e, f, g, h)), i); |
575 | 0 | } Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 1ul> >(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>) |
576 | | } // namespace |
577 | | |
578 | | template <class D> |
579 | | Vec<D> MaltaUnit(MaltaTagLF /*tag*/, const D df, |
580 | 0 | const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { |
581 | 0 | const intptr_t xs3 = 3 * xs; |
582 | |
|
583 | 0 | const auto center = LoadU(df, d); |
584 | | |
585 | | // x grows, y constant |
586 | 0 | const auto sum_yconst = Sum(LoadU(df, d - 4), LoadU(df, d - 2), center, |
587 | 0 | LoadU(df, d + 2), LoadU(df, d + 4)); |
588 | | // Will return this, sum of all line kernels |
589 | 0 | auto retval = Mul(sum_yconst, sum_yconst); |
590 | 0 | { |
591 | | // y grows, x constant |
592 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs - xs), center, |
593 | 0 | LoadU(df, d + xs + xs), LoadU(df, d + xs3 + xs)); |
594 | 0 | retval = MulAdd(sum, sum, retval); |
595 | 0 | } |
596 | 0 | { |
597 | | // both grow |
598 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), center, |
599 | 0 | LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); |
600 | 0 | retval = MulAdd(sum, sum, retval); |
601 | 0 | } |
602 | 0 | { |
603 | | // y grows, x shrinks |
604 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), center, |
605 | 0 | LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); |
606 | 0 | retval = MulAdd(sum, sum, retval); |
607 | 0 | } |
608 | 0 | { |
609 | | // y grows -4 to 4, x shrinks 1 -> -1 |
610 | 0 | auto sum = |
611 | 0 | Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs - xs + 1), center, |
612 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 1)); |
613 | 0 | retval = MulAdd(sum, sum, retval); |
614 | 0 | } |
615 | 0 | { |
616 | | // y grows -4 to 4, x grows -1 -> 1 |
617 | 0 | auto sum = |
618 | 0 | Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs - xs - 1), center, |
619 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 1)); |
620 | 0 | retval = MulAdd(sum, sum, retval); |
621 | 0 | } |
622 | 0 | { |
623 | | // x grows -4 to 4, y grows -1 to 1 |
624 | 0 | auto sum = Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 2 - xs), center, |
625 | 0 | LoadU(df, d + 2 + xs), LoadU(df, d + 4 + xs)); |
626 | 0 | retval = MulAdd(sum, sum, retval); |
627 | 0 | } |
628 | 0 | { |
629 | | // x grows -4 to 4, y shrinks 1 to -1 |
630 | 0 | auto sum = Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 2 + xs), center, |
631 | 0 | LoadU(df, d + 2 - xs), LoadU(df, d + 4 - xs)); |
632 | 0 | retval = MulAdd(sum, sum, retval); |
633 | 0 | } |
634 | 0 | { |
635 | | /* 0_________ |
636 | | 1__*______ |
637 | | 2___*_____ |
638 | | 3_________ |
639 | | 4____0____ |
640 | | 5_________ |
641 | | 6_____*___ |
642 | | 7______*__ |
643 | | 8_________ */ |
644 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), center, |
645 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); |
646 | 0 | retval = MulAdd(sum, sum, retval); |
647 | 0 | } |
648 | 0 | { |
649 | | /* 0_________ |
650 | | 1______*__ |
651 | | 2_____*___ |
652 | | 3_________ |
653 | | 4____0____ |
654 | | 5_________ |
655 | | 6___*_____ |
656 | | 7__*______ |
657 | | 8_________ */ |
658 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), center, |
659 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); |
660 | 0 | retval = MulAdd(sum, sum, retval); |
661 | 0 | } |
662 | 0 | { |
663 | | /* 0_________ |
664 | | 1_________ |
665 | | 2_*_______ |
666 | | 3__*______ |
667 | | 4____0____ |
668 | | 5______*__ |
669 | | 6_______*_ |
670 | | 7_________ |
671 | | 8_________ */ |
672 | 0 | auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), center, |
673 | 0 | LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); |
674 | 0 | retval = MulAdd(sum, sum, retval); |
675 | 0 | } |
676 | 0 | { |
677 | | /* 0_________ |
678 | | 1_________ |
679 | | 2_______*_ |
680 | | 3______*__ |
681 | | 4____0____ |
682 | | 5__*______ |
683 | | 6_*_______ |
684 | | 7_________ |
685 | | 8_________ */ |
686 | 0 | auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), center, |
687 | 0 | LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); |
688 | 0 | retval = MulAdd(sum, sum, retval); |
689 | 0 | } |
690 | 0 | { |
691 | | /* 0_________ |
692 | | 1_________ |
693 | | 2________* |
694 | | 3______*__ |
695 | | 4____0____ |
696 | | 5__*______ |
697 | | 6*________ |
698 | | 7_________ |
699 | | 8_________ */ |
700 | |
|
701 | 0 | auto sum = Sum(LoadU(df, d + xs + xs - 4), LoadU(df, d + xs - 2), center, |
702 | 0 | LoadU(df, d - xs + 2), LoadU(df, d - xs - xs + 4)); |
703 | 0 | retval = MulAdd(sum, sum, retval); |
704 | 0 | } |
705 | 0 | { |
706 | | /* 0_________ |
707 | | 1_________ |
708 | | 2*________ |
709 | | 3__*______ |
710 | | 4____0____ |
711 | | 5______*__ |
712 | | 6________* |
713 | | 7_________ |
714 | | 8_________ */ |
715 | 0 | auto sum = Sum(LoadU(df, d - xs - xs - 4), LoadU(df, d - xs - 2), center, |
716 | 0 | LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 4)); |
717 | 0 | retval = MulAdd(sum, sum, retval); |
718 | 0 | } |
719 | 0 | { |
720 | | /* 0__*______ |
721 | | 1_________ |
722 | | 2___*_____ |
723 | | 3_________ |
724 | | 4____0____ |
725 | | 5_________ |
726 | | 6_____*___ |
727 | | 7_________ |
728 | | 8______*__ */ |
729 | 0 | auto sum = |
730 | 0 | Sum(LoadU(df, d - xs3 - xs - 2), LoadU(df, d - xs - xs - 1), center, |
731 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 2)); |
732 | 0 | retval = MulAdd(sum, sum, retval); |
733 | 0 | } |
734 | 0 | { |
735 | | /* 0______*__ |
736 | | 1_________ |
737 | | 2_____*___ |
738 | | 3_________ |
739 | | 4____0____ |
740 | | 5_________ |
741 | | 6___*_____ |
742 | | 7_________ |
743 | | 8__*______ */ |
744 | 0 | auto sum = |
745 | 0 | Sum(LoadU(df, d - xs3 - xs + 2), LoadU(df, d - xs - xs + 1), center, |
746 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 2)); |
747 | 0 | retval = MulAdd(sum, sum, retval); |
748 | 0 | } |
749 | 0 | return retval; |
750 | 0 | } Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE4::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 4ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE4::Simd<float, 4ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_AVX2::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 8ul, 0> >(jxl::MaltaTagLF, hwy::N_AVX2::Simd<float, 8ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::MaltaUnit<hwy::N_SSE2::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE2::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::MaltaUnit<hwy::N_SSE2::Simd<float, 4ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE2::Simd<float, 4ul, 0>, float const*, long) |
751 | | |
752 | | template <class D> |
753 | | Vec<D> MaltaUnit(MaltaTag /*tag*/, const D df, |
754 | 0 | const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { |
755 | 0 | const intptr_t xs3 = 3 * xs; |
756 | |
|
757 | 0 | const auto center = LoadU(df, d); |
758 | | |
759 | | // x grows, y constant |
760 | 0 | const auto sum_yconst = |
761 | 0 | Sum(LoadU(df, d - 4), LoadU(df, d - 3), LoadU(df, d - 2), |
762 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2), |
763 | 0 | LoadU(df, d + 3), LoadU(df, d + 4)); |
764 | | // Will return this, sum of all line kernels |
765 | 0 | auto retval = Mul(sum_yconst, sum_yconst); |
766 | |
|
767 | 0 | { |
768 | | // y grows, x constant |
769 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs3), |
770 | 0 | LoadU(df, d - xs - xs), LoadU(df, d - xs), center, |
771 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs), |
772 | 0 | LoadU(df, d + xs3), LoadU(df, d + xs3 + xs)); |
773 | 0 | retval = MulAdd(sum, sum, retval); |
774 | 0 | } |
775 | 0 | { |
776 | | // both grow |
777 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), |
778 | 0 | LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), |
779 | 0 | LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); |
780 | 0 | retval = MulAdd(sum, sum, retval); |
781 | 0 | } |
782 | 0 | { |
783 | | // y grows, x shrinks |
784 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), |
785 | 0 | LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), |
786 | 0 | LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); |
787 | 0 | retval = MulAdd(sum, sum, retval); |
788 | 0 | } |
789 | 0 | { |
790 | | // y grows -4 to 4, x shrinks 1 -> -1 |
791 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), |
792 | 0 | LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, |
793 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), |
794 | 0 | LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); |
795 | 0 | retval = MulAdd(sum, sum, retval); |
796 | 0 | } |
797 | 0 | { |
798 | | // y grows -4 to 4, x grows -1 -> 1 |
799 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), |
800 | 0 | LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, |
801 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), |
802 | 0 | LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); |
803 | 0 | retval = MulAdd(sum, sum, retval); |
804 | 0 | } |
805 | 0 | { |
806 | | // x grows -4 to 4, y grows -1 to 1 |
807 | 0 | auto sum = |
808 | 0 | Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 3 - xs), LoadU(df, d - 2 - xs), |
809 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 + xs), |
810 | 0 | LoadU(df, d + 3 + xs), LoadU(df, d + 4 + xs)); |
811 | 0 | retval = MulAdd(sum, sum, retval); |
812 | 0 | } |
813 | 0 | { |
814 | | // x grows -4 to 4, y shrinks 1 to -1 |
815 | 0 | auto sum = |
816 | 0 | Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 3 + xs), LoadU(df, d - 2 + xs), |
817 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 - xs), |
818 | 0 | LoadU(df, d + 3 - xs), LoadU(df, d + 4 - xs)); |
819 | 0 | retval = MulAdd(sum, sum, retval); |
820 | 0 | } |
821 | 0 | { |
822 | | /* 0_________ |
823 | | 1__*______ |
824 | | 2___*_____ |
825 | | 3___*_____ |
826 | | 4____0____ |
827 | | 5_____*___ |
828 | | 6_____*___ |
829 | | 7______*__ |
830 | | 8_________ */ |
831 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), |
832 | 0 | LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), |
833 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); |
834 | 0 | retval = MulAdd(sum, sum, retval); |
835 | 0 | } |
836 | 0 | { |
837 | | /* 0_________ |
838 | | 1______*__ |
839 | | 2_____*___ |
840 | | 3_____*___ |
841 | | 4____0____ |
842 | | 5___*_____ |
843 | | 6___*_____ |
844 | | 7__*______ |
845 | | 8_________ */ |
846 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), |
847 | 0 | LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), |
848 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); |
849 | 0 | retval = MulAdd(sum, sum, retval); |
850 | 0 | } |
851 | 0 | { |
852 | | /* 0_________ |
853 | | 1_________ |
854 | | 2_*_______ |
855 | | 3__**_____ |
856 | | 4____0____ |
857 | | 5_____**__ |
858 | | 6_______*_ |
859 | | 7_________ |
860 | | 8_________ */ |
861 | 0 | auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), |
862 | 0 | LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), |
863 | 0 | LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); |
864 | 0 | retval = MulAdd(sum, sum, retval); |
865 | 0 | } |
866 | 0 | { |
867 | | /* 0_________ |
868 | | 1_________ |
869 | | 2_______*_ |
870 | | 3_____**__ |
871 | | 4____0____ |
872 | | 5__**_____ |
873 | | 6_*_______ |
874 | | 7_________ |
875 | | 8_________ */ |
876 | 0 | auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), |
877 | 0 | LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), |
878 | 0 | LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); |
879 | 0 | retval = MulAdd(sum, sum, retval); |
880 | 0 | } |
881 | 0 | { |
882 | | /* 0_________ |
883 | | 1_________ |
884 | | 2_________ |
885 | | 3______*** |
886 | | 4___*0*___ |
887 | | 5***______ |
888 | | 6_________ |
889 | | 7_________ |
890 | | 8_________ */ |
891 | |
|
892 | 0 | auto sum = |
893 | 0 | Sum(LoadU(df, d + xs - 4), LoadU(df, d + xs - 3), LoadU(df, d + xs - 2), |
894 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d - xs + 2), |
895 | 0 | LoadU(df, d - xs + 3), LoadU(df, d - xs + 4)); |
896 | 0 | retval = MulAdd(sum, sum, retval); |
897 | 0 | } |
898 | 0 | { |
899 | | /* 0_________ |
900 | | 1_________ |
901 | | 2_________ |
902 | | 3***______ |
903 | | 4___*0*___ |
904 | | 5______*** |
905 | | 6_________ |
906 | | 7_________ |
907 | | 8_________ */ |
908 | 0 | auto sum = |
909 | 0 | Sum(LoadU(df, d - xs - 4), LoadU(df, d - xs - 3), LoadU(df, d - xs - 2), |
910 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + xs + 2), |
911 | 0 | LoadU(df, d + xs + 3), LoadU(df, d + xs + 4)); |
912 | 0 | retval = MulAdd(sum, sum, retval); |
913 | 0 | } |
914 | 0 | { |
915 | | /* 0___*_____ |
916 | | 1___*_____ |
917 | | 2___*_____ |
918 | | 3____*____ |
919 | | 4____0____ |
920 | | 5____*____ |
921 | | 6_____*___ |
922 | | 7_____*___ |
923 | | 8_____*___ */ |
924 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), |
925 | 0 | LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, |
926 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), |
927 | 0 | LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); |
928 | 0 | retval = MulAdd(sum, sum, retval); |
929 | 0 | } |
930 | 0 | { |
931 | | /* 0_____*___ |
932 | | 1_____*___ |
933 | | 2____ *___ |
934 | | 3____*____ |
935 | | 4____0____ |
936 | | 5____*____ |
937 | | 6___*_____ |
938 | | 7___*_____ |
939 | | 8___*_____ */ |
940 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), |
941 | 0 | LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, |
942 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), |
943 | 0 | LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); |
944 | 0 | retval = MulAdd(sum, sum, retval); |
945 | 0 | } |
946 | 0 | return retval; |
947 | 0 | } Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_SSE4::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 4ul, 0> >(jxl::MaltaTag, hwy::N_SSE4::Simd<float, 4ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_AVX2::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 8ul, 0> >(jxl::MaltaTag, hwy::N_AVX2::Simd<float, 8ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::MaltaUnit<hwy::N_SSE2::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_SSE2::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::MaltaUnit<hwy::N_SSE2::Simd<float, 4ul, 0> >(jxl::MaltaTag, hwy::N_SSE2::Simd<float, 4ul, 0>, float const*, long) |
948 | | |
949 | | // Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known |
950 | | // to be far enough from the image borders. "diffs" is a packed image. |
951 | | template <class Tag> |
952 | | static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs, |
953 | | const size_t x0, |
954 | 0 | const size_t y0) { |
955 | 0 | const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0; |
956 | 0 | const HWY_CAPPED(float, 1) df; |
957 | 0 | if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) && |
958 | 0 | y0 < (diffs.ysize() - 4))) { |
959 | 0 | return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow())); |
960 | 0 | } |
961 | | |
962 | 0 | float borderimage[12 * 9]; // round up to 4 |
963 | 0 | for (int dy = 0; dy < 9; ++dy) { |
964 | 0 | int y = y0 + dy - 4; |
965 | 0 | if (y < 0 || static_cast<size_t>(y) >= diffs.ysize()) { |
966 | 0 | for (int dx = 0; dx < 12; ++dx) { |
967 | 0 | borderimage[dy * 12 + dx] = 0.0f; |
968 | 0 | } |
969 | 0 | continue; |
970 | 0 | } |
971 | | |
972 | 0 | const float* row_diffs = diffs.ConstRow(y); |
973 | 0 | for (int dx = 0; dx < 9; ++dx) { |
974 | 0 | int x = x0 + dx - 4; |
975 | 0 | if (x < 0 || static_cast<size_t>(x) >= diffs.xsize()) { |
976 | 0 | borderimage[dy * 12 + dx] = 0.0f; |
977 | 0 | } else { |
978 | 0 | borderimage[dy * 12 + dx] = row_diffs[x]; |
979 | 0 | } |
980 | 0 | } |
981 | 0 | std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f); |
982 | 0 | } |
983 | 0 | return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12)); |
984 | 0 | } Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE4::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE4::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_AVX2::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_AVX2::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE2::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE2::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long) |
985 | | |
986 | | template <class Tag> |
987 | | static Status MaltaDiffMapT(const Tag tag, const ImageF& lum0, |
988 | | const ImageF& lum1, const double w_0gt1, |
989 | | const double w_0lt1, const double norm1, |
990 | | const double len, const double mulli, |
991 | | ImageF* HWY_RESTRICT diffs, |
992 | 0 | ImageF* HWY_RESTRICT block_diff_ac) { |
993 | 0 | JXL_ENSURE(SameSize(lum0, lum1) && SameSize(lum0, *diffs)); |
994 | 0 | const size_t xsize_ = lum0.xsize(); |
995 | 0 | const size_t ysize_ = lum0.ysize(); |
996 | |
|
997 | 0 | const float kWeight0 = 0.5; |
998 | 0 | const float kWeight1 = 0.33; |
999 | |
|
1000 | 0 | const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1); |
1001 | 0 | const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1); |
1002 | 0 | const float norm2_0gt1 = w_pre0gt1 * norm1; |
1003 | 0 | const float norm2_0lt1 = w_pre0lt1 * norm1; |
1004 | |
|
1005 | 0 | for (size_t y = 0; y < ysize_; ++y) { |
1006 | 0 | const float* HWY_RESTRICT row0 = lum0.ConstRow(y); |
1007 | 0 | const float* HWY_RESTRICT row1 = lum1.ConstRow(y); |
1008 | 0 | float* HWY_RESTRICT row_diffs = diffs->Row(y); |
1009 | 0 | for (size_t x = 0; x < xsize_; ++x) { |
1010 | 0 | const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x])); |
1011 | 0 | const float diff = row0[x] - row1[x]; |
1012 | 0 | const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval); |
1013 | | |
1014 | | // Primary symmetric quadratic objective. |
1015 | 0 | row_diffs[x] = scaler * diff; |
1016 | |
|
1017 | 0 | const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval); |
1018 | 0 | const double fabs0 = std::fabs(row0[x]); |
1019 | | |
1020 | | // Secondary half-open quadratic objectives. |
1021 | 0 | const double too_small = 0.55 * fabs0; |
1022 | 0 | const double too_big = 1.05 * fabs0; |
1023 | |
|
1024 | 0 | if (row0[x] < 0) { |
1025 | 0 | if (row1[x] > -too_small) { |
1026 | 0 | double impact = scaler2 * (row1[x] + too_small); |
1027 | 0 | row_diffs[x] -= impact; |
1028 | 0 | } else if (row1[x] < -too_big) { |
1029 | 0 | double impact = scaler2 * (-row1[x] - too_big); |
1030 | 0 | row_diffs[x] += impact; |
1031 | 0 | } |
1032 | 0 | } else { |
1033 | 0 | if (row1[x] < too_small) { |
1034 | 0 | double impact = scaler2 * (too_small - row1[x]); |
1035 | 0 | row_diffs[x] += impact; |
1036 | 0 | } else if (row1[x] > too_big) { |
1037 | 0 | double impact = scaler2 * (row1[x] - too_big); |
1038 | 0 | row_diffs[x] -= impact; |
1039 | 0 | } |
1040 | 0 | } |
1041 | 0 | } |
1042 | 0 | } |
1043 | |
|
1044 | 0 | size_t y0 = 0; |
1045 | | // Top |
1046 | 0 | for (; y0 < 4; ++y0) { |
1047 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); |
1048 | 0 | for (size_t x0 = 0; x0 < xsize_; ++x0) { |
1049 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1050 | 0 | } |
1051 | 0 | } |
1052 | |
|
1053 | 0 | const HWY_FULL(float) df; |
1054 | 0 | const size_t aligned_x = std::max(static_cast<size_t>(4), Lanes(df)); |
1055 | 0 | const intptr_t stride = diffs->PixelsPerRow(); |
1056 | | |
1057 | | // Middle |
1058 | 0 | for (; y0 < ysize_ - 4; ++y0) { |
1059 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0); |
1060 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); |
1061 | 0 | size_t x0 = 0; |
1062 | 0 | for (; x0 < aligned_x; ++x0) { |
1063 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1064 | 0 | } |
1065 | 0 | for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) { |
1066 | 0 | auto diff = Load(df, row_diff + x0); |
1067 | 0 | diff = Add(diff, MaltaUnit(Tag(), df, row_in + x0, stride)); |
1068 | 0 | Store(diff, df, row_diff + x0); |
1069 | 0 | } |
1070 | |
|
1071 | 0 | for (; x0 < xsize_; ++x0) { |
1072 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1073 | 0 | } |
1074 | 0 | } |
1075 | | |
1076 | | // Bottom |
1077 | 0 | for (; y0 < ysize_; ++y0) { |
1078 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); |
1079 | 0 | for (size_t x0 = 0; x0 < xsize_; ++x0) { |
1080 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1081 | 0 | } |
1082 | 0 | } |
1083 | 0 | return true; |
1084 | 0 | } Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SSE4::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SSE4::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_AVX2::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_AVX2::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SSE2::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SSE2::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) |
1085 | | |
1086 | | // Need non-template wrapper functions for HWY_EXPORT. |
1087 | | Status MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, |
1088 | | const double w_0lt1, const double norm1, |
1089 | | ImageF* HWY_RESTRICT diffs, |
1090 | 0 | ImageF* HWY_RESTRICT block_diff_ac) { |
1091 | 0 | const double len = 3.75; |
1092 | 0 | static const double mulli = 0.39905817637; |
1093 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1, |
1094 | 0 | norm1, len, mulli, diffs, block_diff_ac)); |
1095 | 0 | return true; |
1096 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) |
1097 | | |
1098 | | Status MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, |
1099 | | const double w_0gt1, const double w_0lt1, |
1100 | | const double norm1, ImageF* HWY_RESTRICT diffs, |
1101 | 0 | ImageF* HWY_RESTRICT block_diff_ac) { |
1102 | 0 | const double len = 3.75; |
1103 | 0 | static const double mulli = 0.611612573796; |
1104 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1, |
1105 | 0 | norm1, len, mulli, diffs, block_diff_ac)); |
1106 | 0 | return true; |
1107 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) |
1108 | | |
1109 | | void CombineChannelsForMasking(const ImageF* hf, const ImageF* uhf, |
1110 | 0 | ImageF* out) { |
1111 | | // Only X and Y components are involved in masking. B's influence |
1112 | | // is considered less important in the high frequency area, and we |
1113 | | // don't model masking from lower frequency signals. |
1114 | 0 | static const float muls[3] = { |
1115 | 0 | 2.5f, |
1116 | 0 | 0.4f, |
1117 | 0 | 0.4f, |
1118 | 0 | }; |
1119 | | // Silly and unoptimized approach here. TODO(jyrki): rework this. |
1120 | 0 | for (size_t y = 0; y < hf[0].ysize(); ++y) { |
1121 | 0 | const float* BUTTERAUGLI_RESTRICT row_y_hf = hf[1].Row(y); |
1122 | 0 | const float* BUTTERAUGLI_RESTRICT row_y_uhf = uhf[1].Row(y); |
1123 | 0 | const float* BUTTERAUGLI_RESTRICT row_x_hf = hf[0].Row(y); |
1124 | 0 | const float* BUTTERAUGLI_RESTRICT row_x_uhf = uhf[0].Row(y); |
1125 | 0 | float* BUTTERAUGLI_RESTRICT row = out->Row(y); |
1126 | 0 | for (size_t x = 0; x < hf[0].xsize(); ++x) { |
1127 | 0 | float xdiff = (row_x_uhf[x] + row_x_hf[x]) * muls[0]; |
1128 | 0 | float ydiff = row_y_uhf[x] * muls[1] + row_y_hf[x] * muls[2]; |
1129 | 0 | row[x] = xdiff * xdiff + ydiff * ydiff; |
1130 | 0 | row[x] = std::sqrt(row[x]); |
1131 | 0 | } |
1132 | 0 | } |
1133 | 0 | } Unexecuted instantiation: jxl::N_SSE4::CombineChannelsForMasking(jxl::Plane<float> const*, jxl::Plane<float> const*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::CombineChannelsForMasking(jxl::Plane<float> const*, jxl::Plane<float> const*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::CombineChannelsForMasking(jxl::Plane<float> const*, jxl::Plane<float> const*, jxl::Plane<float>*) |
1134 | | |
1135 | 0 | void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) { |
1136 | 0 | const size_t xsize = xyb.xsize(); |
1137 | 0 | const size_t ysize = xyb.ysize(); |
1138 | 0 | const float bias = mul * bias_arg; |
1139 | 0 | const float sqrt_bias = std::sqrt(bias); |
1140 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1141 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y); |
1142 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(y); |
1143 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1144 | | // kBias makes sqrt behave more linearly. |
1145 | 0 | row_out[x] = std::sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias; |
1146 | 0 | } |
1147 | 0 | } |
1148 | 0 | } Unexecuted instantiation: jxl::N_SSE4::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*) |
1149 | | |
1150 | | // std::log(80.0) / std::log(255.0); |
1151 | | constexpr float kIntensityTargetNormalizationHack = 0.79079917404f; |
1152 | | static const float kInternalGoodQualityThreshold = |
1153 | | 17.83f * kIntensityTargetNormalizationHack; |
1154 | | static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold; |
1155 | | |
1156 | 0 | void StoreMin3(const float v, float& min0, float& min1, float& min2) { |
1157 | 0 | if (v < min2) { |
1158 | 0 | if (v < min0) { |
1159 | 0 | min2 = min1; |
1160 | 0 | min1 = min0; |
1161 | 0 | min0 = v; |
1162 | 0 | } else if (v < min1) { |
1163 | 0 | min2 = min1; |
1164 | 0 | min1 = v; |
1165 | 0 | } else { |
1166 | 0 | min2 = v; |
1167 | 0 | } |
1168 | 0 | } |
1169 | 0 | } Unexecuted instantiation: jxl::N_SSE4::StoreMin3(float, float&, float&, float&) Unexecuted instantiation: jxl::N_AVX2::StoreMin3(float, float&, float&, float&) Unexecuted instantiation: jxl::N_SSE2::StoreMin3(float, float&, float&, float&) |
1170 | | |
1171 | | // Look for smooth areas near the area of degradation. |
1172 | | // If the areas area generally smooth, don't do masking. |
1173 | 0 | void FuzzyErosion(const ImageF& from, ImageF* to) { |
1174 | 0 | const size_t xsize = from.xsize(); |
1175 | 0 | const size_t ysize = from.ysize(); |
1176 | 0 | static const int kStep = 3; |
1177 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1178 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1179 | 0 | float min0 = from.Row(y)[x]; |
1180 | 0 | float min1 = 2 * min0; |
1181 | 0 | float min2 = min1; |
1182 | 0 | if (x >= kStep) { |
1183 | 0 | StoreMin3(from.Row(y)[x - kStep], min0, min1, min2); |
1184 | 0 | if (y >= kStep) { |
1185 | 0 | StoreMin3(from.Row(y - kStep)[x - kStep], min0, min1, min2); |
1186 | 0 | } |
1187 | 0 | if (y < ysize - kStep) { |
1188 | 0 | StoreMin3(from.Row(y + kStep)[x - kStep], min0, min1, min2); |
1189 | 0 | } |
1190 | 0 | } |
1191 | 0 | if (x < xsize - kStep) { |
1192 | 0 | StoreMin3(from.Row(y)[x + kStep], min0, min1, min2); |
1193 | 0 | if (y >= kStep) { |
1194 | 0 | StoreMin3(from.Row(y - kStep)[x + kStep], min0, min1, min2); |
1195 | 0 | } |
1196 | 0 | if (y < ysize - kStep) { |
1197 | 0 | StoreMin3(from.Row(y + kStep)[x + kStep], min0, min1, min2); |
1198 | 0 | } |
1199 | 0 | } |
1200 | 0 | if (y >= kStep) { |
1201 | 0 | StoreMin3(from.Row(y - kStep)[x], min0, min1, min2); |
1202 | 0 | } |
1203 | 0 | if (y < ysize - kStep) { |
1204 | 0 | StoreMin3(from.Row(y + kStep)[x], min0, min1, min2); |
1205 | 0 | } |
1206 | 0 | to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2); |
1207 | 0 | } |
1208 | 0 | } |
1209 | 0 | } Unexecuted instantiation: jxl::N_SSE4::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*) |
1210 | | |
1211 | | // Compute values of local frequency and dc masking based on the activity |
1212 | | // in the two images. img_diff_ac may be null. |
1213 | | Status Mask(const ImageF& mask0, const ImageF& mask1, |
1214 | | const ButteraugliParams& params, BlurTemp* blur_temp, |
1215 | | ImageF* BUTTERAUGLI_RESTRICT mask, |
1216 | 0 | ImageF* BUTTERAUGLI_RESTRICT diff_ac) { |
1217 | 0 | const size_t xsize = mask0.xsize(); |
1218 | 0 | const size_t ysize = mask0.ysize(); |
1219 | 0 | JxlMemoryManager* memory_manager = mask0.memory_manager(); |
1220 | 0 | JXL_ASSIGN_OR_RETURN(*mask, ImageF::Create(memory_manager, xsize, ysize)); |
1221 | 0 | static const float kMul = 6.19424080439; |
1222 | 0 | static const float kBias = 12.61050594197; |
1223 | 0 | static const float kRadius = 2.7; |
1224 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diff0, |
1225 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1226 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diff1, |
1227 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1228 | 0 | JXL_ASSIGN_OR_RETURN(ImageF blurred0, |
1229 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1230 | 0 | JXL_ASSIGN_OR_RETURN(ImageF blurred1, |
1231 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1232 | 0 | DiffPrecompute(mask0, kMul, kBias, &diff0); |
1233 | 0 | DiffPrecompute(mask1, kMul, kBias, &diff1); |
1234 | 0 | JXL_RETURN_IF_ERROR(Blur(diff0, kRadius, params, blur_temp, &blurred0)); |
1235 | 0 | FuzzyErosion(blurred0, &diff0); |
1236 | 0 | JXL_RETURN_IF_ERROR(Blur(diff1, kRadius, params, blur_temp, &blurred1)); |
1237 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1238 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1239 | 0 | mask->Row(y)[x] = diff0.Row(y)[x]; |
1240 | 0 | if (diff_ac != nullptr) { |
1241 | 0 | static const float kMaskToErrorMul = 10.0; |
1242 | 0 | float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x]; |
1243 | 0 | diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff; |
1244 | 0 | } |
1245 | 0 | } |
1246 | 0 | } |
1247 | 0 | return true; |
1248 | 0 | } Unexecuted instantiation: jxl::N_SSE4::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) |
1249 | | |
1250 | | // `diff_ac` may be null. |
1251 | | Status MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1, |
1252 | | const size_t xsize, const size_t ysize, |
1253 | | const ButteraugliParams& params, BlurTemp* blur_temp, |
1254 | | ImageF* BUTTERAUGLI_RESTRICT mask, |
1255 | 0 | ImageF* BUTTERAUGLI_RESTRICT diff_ac) { |
1256 | 0 | JxlMemoryManager* memory_manager = pi0.hf[0].memory_manager(); |
1257 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask0, |
1258 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1259 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask1, |
1260 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1261 | 0 | CombineChannelsForMasking(&pi0.hf[0], &pi0.uhf[0], &mask0); |
1262 | 0 | CombineChannelsForMasking(&pi1.hf[0], &pi1.uhf[0], &mask1); |
1263 | 0 | JXL_RETURN_IF_ERROR(Mask(mask0, mask1, params, blur_temp, mask, diff_ac)); |
1264 | 0 | return true; |
1265 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) |
1266 | | |
1267 | 0 | double MaskY(double delta) { |
1268 | 0 | static const double offset = 0.829591754942; |
1269 | 0 | static const double scaler = 0.451936922203; |
1270 | 0 | static const double mul = 2.5485944793; |
1271 | 0 | const double c = mul / ((scaler * delta) + offset); |
1272 | 0 | const double retval = kGlobalScale * (1.0 + c); |
1273 | 0 | return retval * retval; |
1274 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaskY(double) Unexecuted instantiation: jxl::N_AVX2::MaskY(double) Unexecuted instantiation: jxl::N_SSE2::MaskY(double) |
1275 | | |
1276 | 0 | double MaskDcY(double delta) { |
1277 | 0 | static const double offset = 0.20025578522; |
1278 | 0 | static const double scaler = 3.87449418804; |
1279 | 0 | static const double mul = 0.505054525019; |
1280 | 0 | const double c = mul / ((scaler * delta) + offset); |
1281 | 0 | const double retval = kGlobalScale * (1.0 + c); |
1282 | 0 | return retval * retval; |
1283 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaskDcY(double) Unexecuted instantiation: jxl::N_AVX2::MaskDcY(double) Unexecuted instantiation: jxl::N_SSE2::MaskDcY(double) |
1284 | | |
1285 | 0 | inline float MaskColor(const float color[3], const float mask) { |
1286 | 0 | return color[0] * mask + color[1] * mask + color[2] * mask; |
1287 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaskColor(float const*, float) Unexecuted instantiation: jxl::N_AVX2::MaskColor(float const*, float) Unexecuted instantiation: jxl::N_SSE2::MaskColor(float const*, float) |
1288 | | |
1289 | | // Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks} |
1290 | | Status CombineChannelsToDiffmap(const ImageF& mask, |
1291 | | const Image3F& block_diff_dc, |
1292 | | const Image3F& block_diff_ac, float xmul, |
1293 | 0 | ImageF* result) { |
1294 | 0 | JXL_ENSURE(SameSize(mask, *result)); |
1295 | 0 | size_t xsize = mask.xsize(); |
1296 | 0 | size_t ysize = mask.ysize(); |
1297 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1298 | 0 | float* BUTTERAUGLI_RESTRICT row_out = result->Row(y); |
1299 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1300 | 0 | float val = mask.Row(y)[x]; |
1301 | 0 | float maskval = MaskY(val); |
1302 | 0 | float dc_maskval = MaskDcY(val); |
1303 | 0 | float diff_dc[3]; |
1304 | 0 | float diff_ac[3]; |
1305 | 0 | for (int i = 0; i < 3; ++i) { |
1306 | 0 | diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x]; |
1307 | 0 | diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x]; |
1308 | 0 | } |
1309 | 0 | diff_ac[0] *= xmul; |
1310 | 0 | diff_dc[0] *= xmul; |
1311 | 0 | row_out[x] = std::sqrt(MaskColor(diff_dc, dc_maskval) + |
1312 | 0 | MaskColor(diff_ac, maskval)); |
1313 | 0 | } |
1314 | 0 | } |
1315 | 0 | return true; |
1316 | 0 | } Unexecuted instantiation: jxl::N_SSE4::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_SSE2::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*) |
1317 | | |
1318 | | // Adds weighted L2 difference between i0 and i1 to diffmap. |
1319 | | static void L2Diff(const ImageF& i0, const ImageF& i1, const float w, |
1320 | 0 | ImageF* BUTTERAUGLI_RESTRICT diffmap) { |
1321 | 0 | if (w == 0) return; |
1322 | | |
1323 | 0 | const HWY_FULL(float) d; |
1324 | 0 | const auto weight = Set(d, w); |
1325 | |
|
1326 | 0 | for (size_t y = 0; y < i0.ysize(); ++y) { |
1327 | 0 | const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); |
1328 | 0 | const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); |
1329 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); |
1330 | |
|
1331 | 0 | for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { |
1332 | 0 | const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); |
1333 | 0 | const auto diff2 = Mul(diff, diff); |
1334 | 0 | const auto prev = Load(d, row_diff + x); |
1335 | 0 | Store(MulAdd(diff2, weight, prev), d, row_diff + x); |
1336 | 0 | } |
1337 | 0 | } |
1338 | 0 | } Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::N_SSE2::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*) |
1339 | | |
1340 | | // Initializes diffmap to the weighted L2 difference between i0 and i1. |
1341 | | static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w, |
1342 | 0 | ImageF* BUTTERAUGLI_RESTRICT diffmap) { |
1343 | 0 | if (w == 0) return; |
1344 | | |
1345 | 0 | const HWY_FULL(float) d; |
1346 | 0 | const auto weight = Set(d, w); |
1347 | |
|
1348 | 0 | for (size_t y = 0; y < i0.ysize(); ++y) { |
1349 | 0 | const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); |
1350 | 0 | const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); |
1351 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); |
1352 | |
|
1353 | 0 | for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { |
1354 | 0 | const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); |
1355 | 0 | const auto diff2 = Mul(diff, diff); |
1356 | 0 | Store(Mul(diff2, weight), d, row_diff + x); |
1357 | 0 | } |
1358 | 0 | } |
1359 | 0 | } Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::N_SSE2::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*) |
1360 | | |
1361 | | // i0 is the original image. |
1362 | | // i1 is the deformed copy. |
1363 | | static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1, |
1364 | | float w_0lt1, |
1365 | 0 | ImageF* BUTTERAUGLI_RESTRICT diffmap) { |
1366 | 0 | if (w_0gt1 == 0 && w_0lt1 == 0) { |
1367 | 0 | return; |
1368 | 0 | } |
1369 | | |
1370 | 0 | const HWY_FULL(float) d; |
1371 | 0 | const auto vw_0gt1 = Set(d, w_0gt1 * 0.8); |
1372 | 0 | const auto vw_0lt1 = Set(d, w_0lt1 * 0.8); |
1373 | |
|
1374 | 0 | for (size_t y = 0; y < i0.ysize(); ++y) { |
1375 | 0 | const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y); |
1376 | 0 | const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y); |
1377 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); |
1378 | |
|
1379 | 0 | for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { |
1380 | 0 | const auto val0 = Load(d, row0 + x); |
1381 | 0 | const auto val1 = Load(d, row1 + x); |
1382 | | |
1383 | | // Primary symmetric quadratic objective. |
1384 | 0 | const auto diff = Sub(val0, val1); |
1385 | 0 | auto total = MulAdd(Mul(diff, diff), vw_0gt1, Load(d, row_diff + x)); |
1386 | | |
1387 | | // Secondary half-open quadratic objectives. |
1388 | 0 | const auto fabs0 = Abs(val0); |
1389 | 0 | const auto too_small = Mul(Set(d, 0.4), fabs0); |
1390 | 0 | const auto too_big = fabs0; |
1391 | |
|
1392 | 0 | const auto if_neg = IfThenElse( |
1393 | 0 | Gt(val1, Neg(too_small)), Add(val1, too_small), |
1394 | 0 | IfThenElseZero(Lt(val1, Neg(too_big)), Sub(Neg(val1), too_big))); |
1395 | 0 | const auto if_pos = |
1396 | 0 | IfThenElse(Lt(val1, too_small), Sub(too_small, val1), |
1397 | 0 | IfThenElseZero(Gt(val1, too_big), Sub(val1, too_big))); |
1398 | 0 | const auto v = IfThenElse(Lt(val0, Zero(d)), if_neg, if_pos); |
1399 | 0 | total = MulAdd(vw_0lt1, Mul(v, v), total); |
1400 | 0 | Store(total, d, row_diff + x); |
1401 | 0 | } |
1402 | 0 | } |
1403 | 0 | } Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::N_SSE2::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Plane<float>*) |
1404 | | |
1405 | | // A simple HDR compatible gamma function. |
1406 | | template <class DF, class V> |
1407 | 0 | V Gamma(const DF df, V v) { |
1408 | | // ln(2) constant folded in because we want std::log but have FastLog2f. |
1409 | 0 | const auto kRetMul = Set(df, 19.245013259874995f * kInvLog2e); |
1410 | 0 | const auto kRetAdd = Set(df, -23.16046239805755); |
1411 | | // This should happen rarely, but may lead to a NaN in log, which is |
1412 | | // undesirable. Since negative photons don't exist we solve the NaNs by |
1413 | | // clamping here. |
1414 | 0 | v = ZeroIfNegative(v); |
1415 | |
|
1416 | 0 | const auto biased = Add(v, Set(df, 9.9710635769299145)); |
1417 | 0 | const auto log = FastLog2f(df, biased); |
1418 | | // We could fold this into a custom Log2 polynomial, but there would be |
1419 | | // relatively little gain. |
1420 | 0 | return MulAdd(kRetMul, log, kRetAdd); |
1421 | 0 | } Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::Gamma<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::Gamma<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::Gamma<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>) |
1422 | | |
1423 | | template <bool Clamp, class DF, class V> |
1424 | | BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1, |
1425 | | const V& in2, V* JXL_RESTRICT out0, |
1426 | | V* JXL_RESTRICT out1, |
1427 | 0 | V* JXL_RESTRICT out2) { |
1428 | | // https://en.wikipedia.org/wiki/Photopsin absorbance modeling. |
1429 | 0 | static const double mixi0 = 0.29956550340058319; |
1430 | 0 | static const double mixi1 = 0.63373087833825936; |
1431 | 0 | static const double mixi2 = 0.077705617820981968; |
1432 | 0 | static const double mixi3 = 1.7557483643287353; |
1433 | 0 | static const double mixi4 = 0.22158691104574774; |
1434 | 0 | static const double mixi5 = 0.69391388044116142; |
1435 | 0 | static const double mixi6 = 0.0987313588422; |
1436 | 0 | static const double mixi7 = 1.7557483643287353; |
1437 | 0 | static const double mixi8 = 0.02; |
1438 | 0 | static const double mixi9 = 0.02; |
1439 | 0 | static const double mixi10 = 0.20480129041026129; |
1440 | 0 | static const double mixi11 = 12.226454707163354; |
1441 | |
|
1442 | 0 | const V mix0 = Set(df, mixi0); |
1443 | 0 | const V mix1 = Set(df, mixi1); |
1444 | 0 | const V mix2 = Set(df, mixi2); |
1445 | 0 | const V mix3 = Set(df, mixi3); |
1446 | 0 | const V mix4 = Set(df, mixi4); |
1447 | 0 | const V mix5 = Set(df, mixi5); |
1448 | 0 | const V mix6 = Set(df, mixi6); |
1449 | 0 | const V mix7 = Set(df, mixi7); |
1450 | 0 | const V mix8 = Set(df, mixi8); |
1451 | 0 | const V mix9 = Set(df, mixi9); |
1452 | 0 | const V mix10 = Set(df, mixi10); |
1453 | 0 | const V mix11 = Set(df, mixi11); |
1454 | |
|
1455 | 0 | *out0 = MulAdd(mix0, in0, MulAdd(mix1, in1, MulAdd(mix2, in2, mix3))); |
1456 | 0 | *out1 = MulAdd(mix4, in0, MulAdd(mix5, in1, MulAdd(mix6, in2, mix7))); |
1457 | 0 | *out2 = MulAdd(mix8, in0, MulAdd(mix9, in1, MulAdd(mix10, in2, mix11))); |
1458 | |
|
1459 | 0 | if (Clamp) { |
1460 | 0 | *out0 = Max(*out0, mix3); |
1461 | 0 | *out1 = Max(*out1, mix7); |
1462 | 0 | *out2 = Max(*out2, mix11); |
1463 | 0 | } |
1464 | 0 | } Unexecuted instantiation: void jxl::N_SSE4::OpsinAbsorbance<true, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*) Unexecuted instantiation: void jxl::N_SSE4::OpsinAbsorbance<false, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*) Unexecuted instantiation: void jxl::N_AVX2::OpsinAbsorbance<true, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*) Unexecuted instantiation: void jxl::N_AVX2::OpsinAbsorbance<false, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*) Unexecuted instantiation: void jxl::N_SSE2::OpsinAbsorbance<true, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*) Unexecuted instantiation: void jxl::N_SSE2::OpsinAbsorbance<false, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*) |
1465 | | |
1466 | | // `blurred` is a temporary image used inside this function and not returned. |
1467 | | Status OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params, |
1468 | 0 | Image3F* blurred, BlurTemp* blur_temp, Image3F* xyb) { |
1469 | 0 | JXL_ENSURE(blurred != nullptr); |
1470 | 0 | const double kSigma = 1.2; |
1471 | 0 | JXL_RETURN_IF_ERROR( |
1472 | 0 | Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0))); |
1473 | 0 | JXL_RETURN_IF_ERROR( |
1474 | 0 | Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1))); |
1475 | 0 | JXL_RETURN_IF_ERROR( |
1476 | 0 | Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2))); |
1477 | 0 | const HWY_FULL(float) df; |
1478 | 0 | const auto intensity_target_multiplier = Set(df, params.intensity_target); |
1479 | 0 | for (size_t y = 0; y < rgb.ysize(); ++y) { |
1480 | 0 | const float* row_r = rgb.ConstPlaneRow(0, y); |
1481 | 0 | const float* row_g = rgb.ConstPlaneRow(1, y); |
1482 | 0 | const float* row_b = rgb.ConstPlaneRow(2, y); |
1483 | 0 | const float* row_blurred_r = blurred->ConstPlaneRow(0, y); |
1484 | 0 | const float* row_blurred_g = blurred->ConstPlaneRow(1, y); |
1485 | 0 | const float* row_blurred_b = blurred->ConstPlaneRow(2, y); |
1486 | 0 | float* row_out_x = xyb->PlaneRow(0, y); |
1487 | 0 | float* row_out_y = xyb->PlaneRow(1, y); |
1488 | 0 | float* row_out_b = xyb->PlaneRow(2, y); |
1489 | 0 | const auto min = Set(df, 1e-4f); |
1490 | 0 | for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) { |
1491 | 0 | auto sensitivity0 = Undefined(df); |
1492 | 0 | auto sensitivity1 = Undefined(df); |
1493 | 0 | auto sensitivity2 = Undefined(df); |
1494 | 0 | { |
1495 | | // Calculate sensitivity based on the smoothed image gamma derivative. |
1496 | 0 | auto pre_mixed0 = Undefined(df); |
1497 | 0 | auto pre_mixed1 = Undefined(df); |
1498 | 0 | auto pre_mixed2 = Undefined(df); |
1499 | 0 | OpsinAbsorbance<true>( |
1500 | 0 | df, Mul(Load(df, row_blurred_r + x), intensity_target_multiplier), |
1501 | 0 | Mul(Load(df, row_blurred_g + x), intensity_target_multiplier), |
1502 | 0 | Mul(Load(df, row_blurred_b + x), intensity_target_multiplier), |
1503 | 0 | &pre_mixed0, &pre_mixed1, &pre_mixed2); |
1504 | 0 | pre_mixed0 = Max(pre_mixed0, min); |
1505 | 0 | pre_mixed1 = Max(pre_mixed1, min); |
1506 | 0 | pre_mixed2 = Max(pre_mixed2, min); |
1507 | 0 | sensitivity0 = Div(Gamma(df, pre_mixed0), pre_mixed0); |
1508 | 0 | sensitivity1 = Div(Gamma(df, pre_mixed1), pre_mixed1); |
1509 | 0 | sensitivity2 = Div(Gamma(df, pre_mixed2), pre_mixed2); |
1510 | 0 | sensitivity0 = Max(sensitivity0, min); |
1511 | 0 | sensitivity1 = Max(sensitivity1, min); |
1512 | 0 | sensitivity2 = Max(sensitivity2, min); |
1513 | 0 | } |
1514 | 0 | auto cur_mixed0 = Undefined(df); |
1515 | 0 | auto cur_mixed1 = Undefined(df); |
1516 | 0 | auto cur_mixed2 = Undefined(df); |
1517 | 0 | OpsinAbsorbance<false>( |
1518 | 0 | df, Mul(Load(df, row_r + x), intensity_target_multiplier), |
1519 | 0 | Mul(Load(df, row_g + x), intensity_target_multiplier), |
1520 | 0 | Mul(Load(df, row_b + x), intensity_target_multiplier), &cur_mixed0, |
1521 | 0 | &cur_mixed1, &cur_mixed2); |
1522 | 0 | cur_mixed0 = Mul(cur_mixed0, sensitivity0); |
1523 | 0 | cur_mixed1 = Mul(cur_mixed1, sensitivity1); |
1524 | 0 | cur_mixed2 = Mul(cur_mixed2, sensitivity2); |
1525 | | // This is a kludge. The negative values should be zeroed away before |
1526 | | // blurring. Ideally there would be no negative values in the first place. |
1527 | 0 | const auto min01 = Set(df, 1.7557483643287353f); |
1528 | 0 | const auto min2 = Set(df, 12.226454707163354f); |
1529 | 0 | cur_mixed0 = Max(cur_mixed0, min01); |
1530 | 0 | cur_mixed1 = Max(cur_mixed1, min01); |
1531 | 0 | cur_mixed2 = Max(cur_mixed2, min2); |
1532 | |
|
1533 | 0 | Store(Sub(cur_mixed0, cur_mixed1), df, row_out_x + x); |
1534 | 0 | Store(Add(cur_mixed0, cur_mixed1), df, row_out_y + x); |
1535 | 0 | Store(cur_mixed2, df, row_out_b + x); |
1536 | 0 | } |
1537 | 0 | } |
1538 | 0 | return true; |
1539 | 0 | } Unexecuted instantiation: jxl::N_SSE4::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Image3<float>*) Unexecuted instantiation: jxl::N_AVX2::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Image3<float>*) Unexecuted instantiation: jxl::N_SSE2::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Image3<float>*) |
1540 | | |
1541 | | Status ButteraugliDiffmapInPlace(Image3F& image0, Image3F& image1, |
1542 | | const ButteraugliParams& params, |
1543 | 0 | ImageF& diffmap) { |
1544 | | // image0 and image1 are in linear sRGB color space |
1545 | 0 | const size_t xsize = image0.xsize(); |
1546 | 0 | const size_t ysize = image0.ysize(); |
1547 | 0 | JxlMemoryManager* memory_manager = image0.memory_manager(); |
1548 | 0 | BlurTemp blur_temp; |
1549 | 0 | { |
1550 | | // Convert image0 and image1 to XYB in-place |
1551 | 0 | JXL_ASSIGN_OR_RETURN(Image3F temp, |
1552 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1553 | 0 | JXL_RETURN_IF_ERROR( |
1554 | 0 | OpsinDynamicsImage(image0, params, &temp, &blur_temp, &image0)); |
1555 | 0 | JXL_RETURN_IF_ERROR( |
1556 | 0 | OpsinDynamicsImage(image1, params, &temp, &blur_temp, &image1)); |
1557 | 0 | } |
1558 | | // image0 and image1 are in XYB color space |
1559 | 0 | JXL_ASSIGN_OR_RETURN(ImageF block_diff_dc, |
1560 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1561 | 0 | ZeroFillImage(&block_diff_dc); |
1562 | 0 | { |
1563 | | // separate out LF components from image0 and image1 and compute the dc |
1564 | | // diff image from them |
1565 | 0 | JXL_ASSIGN_OR_RETURN(Image3F lf0, |
1566 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1567 | 0 | JXL_ASSIGN_OR_RETURN(Image3F lf1, |
1568 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1569 | 0 | JXL_RETURN_IF_ERROR( |
1570 | 0 | SeparateLFAndMF(params, image0, &lf0, &image0, &blur_temp)); |
1571 | 0 | JXL_RETURN_IF_ERROR( |
1572 | 0 | SeparateLFAndMF(params, image1, &lf1, &image1, &blur_temp)); |
1573 | 0 | for (size_t c = 0; c < 3; ++c) { |
1574 | 0 | L2Diff(lf0.Plane(c), lf1.Plane(c), wmul[6 + c], &block_diff_dc); |
1575 | 0 | } |
1576 | 0 | } |
1577 | | // image0 and image1 are MF residuals (before blurring) in XYB color space |
1578 | 0 | ImageF hf0[2]; |
1579 | 0 | ImageF hf1[2]; |
1580 | 0 | JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image0, &hf0[0], &blur_temp)); |
1581 | 0 | JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image1, &hf1[0], &blur_temp)); |
1582 | | // image0 and image1 are MF-images in XYB color space |
1583 | | |
1584 | 0 | JXL_ASSIGN_OR_RETURN(ImageF block_diff_ac, |
1585 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1586 | 0 | ZeroFillImage(&block_diff_ac); |
1587 | | // start accumulating ac diff image from MF images |
1588 | 0 | { |
1589 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diffs, |
1590 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1591 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(image0.Plane(1), image1.Plane(1), |
1592 | 0 | wMfMalta, wMfMalta, norm1Mf, &diffs, |
1593 | 0 | &block_diff_ac)); |
1594 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(image0.Plane(0), image1.Plane(0), |
1595 | 0 | wMfMaltaX, wMfMaltaX, norm1MfX, &diffs, |
1596 | 0 | &block_diff_ac)); |
1597 | 0 | } |
1598 | 0 | for (size_t c = 0; c < 3; ++c) { |
1599 | 0 | L2Diff(image0.Plane(c), image1.Plane(c), wmul[3 + c], &block_diff_ac); |
1600 | 0 | } |
1601 | | // we will not need the MF-images and more, so we deallocate them to reduce |
1602 | | // peak memory usage |
1603 | 0 | image0 = Image3F(); |
1604 | 0 | image1 = Image3F(); |
1605 | |
|
1606 | 0 | ImageF uhf0[2]; |
1607 | 0 | ImageF uhf1[2]; |
1608 | 0 | JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf0[0], &uhf0[0], &blur_temp)); |
1609 | 0 | JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf1[0], &uhf1[0], &blur_temp)); |
1610 | | |
1611 | | // continue accumulating ac diff image from HF and UHF images |
1612 | 0 | const float hf_asymmetry = params.hf_asymmetry; |
1613 | 0 | { |
1614 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diffs, |
1615 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1616 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMap(uhf0[1], uhf1[1], wUhfMalta * hf_asymmetry, |
1617 | 0 | wUhfMalta / hf_asymmetry, norm1Uhf, &diffs, |
1618 | 0 | &block_diff_ac)); |
1619 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMap( |
1620 | 0 | uhf0[0], uhf1[0], wUhfMaltaX * hf_asymmetry, wUhfMaltaX / hf_asymmetry, |
1621 | 0 | norm1UhfX, &diffs, &block_diff_ac)); |
1622 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF( |
1623 | 0 | hf0[1], hf1[1], wHfMalta * std::sqrt(hf_asymmetry), |
1624 | 0 | wHfMalta / std::sqrt(hf_asymmetry), norm1Hf, &diffs, &block_diff_ac)); |
1625 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF( |
1626 | 0 | hf0[0], hf1[0], wHfMaltaX * std::sqrt(hf_asymmetry), |
1627 | 0 | wHfMaltaX / std::sqrt(hf_asymmetry), norm1HfX, &diffs, &block_diff_ac)); |
1628 | 0 | } |
1629 | 0 | for (size_t c = 0; c < 2; ++c) { |
1630 | 0 | L2DiffAsymmetric(hf0[c], hf1[c], wmul[c] * hf_asymmetry, |
1631 | 0 | wmul[c] / hf_asymmetry, &block_diff_ac); |
1632 | 0 | } |
1633 | | |
1634 | | // compute mask image from HF and UHF X and Y images |
1635 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask, |
1636 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1637 | 0 | { |
1638 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask0, |
1639 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1640 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask1, |
1641 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1642 | 0 | CombineChannelsForMasking(&hf0[0], &uhf0[0], &mask0); |
1643 | 0 | CombineChannelsForMasking(&hf1[0], &uhf1[0], &mask1); |
1644 | 0 | DeallocateHFAndUHF(&hf1[0], &uhf1[0]); |
1645 | 0 | DeallocateHFAndUHF(&hf0[0], &uhf0[0]); |
1646 | 0 | JXL_RETURN_IF_ERROR( |
1647 | 0 | Mask(mask0, mask1, params, &blur_temp, &mask, &block_diff_ac)); |
1648 | 0 | } |
1649 | | |
1650 | | // compute final diffmap from mask image and ac and dc diff images |
1651 | 0 | JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(memory_manager, xsize, ysize)); |
1652 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1653 | 0 | const float* row_dc = block_diff_dc.Row(y); |
1654 | 0 | const float* row_ac = block_diff_ac.Row(y); |
1655 | 0 | float* row_out = diffmap.Row(y); |
1656 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1657 | 0 | const float val = mask.Row(y)[x]; |
1658 | 0 | row_out[x] = sqrt(row_dc[x] * MaskDcY(val) + row_ac[x] * MaskY(val)); |
1659 | 0 | } |
1660 | 0 | } |
1661 | 0 | return true; |
1662 | 0 | } Unexecuted instantiation: jxl::N_SSE4::ButteraugliDiffmapInPlace(jxl::Image3<float>&, jxl::Image3<float>&, jxl::ButteraugliParams const&, jxl::Plane<float>&) Unexecuted instantiation: jxl::N_AVX2::ButteraugliDiffmapInPlace(jxl::Image3<float>&, jxl::Image3<float>&, jxl::ButteraugliParams const&, jxl::Plane<float>&) Unexecuted instantiation: jxl::N_SSE2::ButteraugliDiffmapInPlace(jxl::Image3<float>&, jxl::Image3<float>&, jxl::ButteraugliParams const&, jxl::Plane<float>&) |
1663 | | |
1664 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
1665 | | } // namespace HWY_NAMESPACE |
1666 | | } // namespace jxl |
1667 | | HWY_AFTER_NAMESPACE(); |
1668 | | |
1669 | | #if HWY_ONCE |
1670 | | namespace jxl { |
1671 | | |
1672 | | HWY_EXPORT(SeparateFrequencies); // Local function. |
1673 | | HWY_EXPORT(MaskPsychoImage); // Local function. |
1674 | | HWY_EXPORT(L2DiffAsymmetric); // Local function. |
1675 | | HWY_EXPORT(L2Diff); // Local function. |
1676 | | HWY_EXPORT(SetL2Diff); // Local function. |
1677 | | HWY_EXPORT(CombineChannelsToDiffmap); // Local function. |
1678 | | HWY_EXPORT(MaltaDiffMap); // Local function. |
1679 | | HWY_EXPORT(MaltaDiffMapLF); // Local function. |
1680 | | HWY_EXPORT(OpsinDynamicsImage); // Local function. |
1681 | | HWY_EXPORT(ButteraugliDiffmapInPlace); // Local function. |
1682 | | |
1683 | | #if BUTTERAUGLI_ENABLE_CHECKS |
1684 | | |
1685 | | static inline bool IsNan(const float x) { |
1686 | | uint32_t bits; |
1687 | | memcpy(&bits, &x, sizeof(bits)); |
1688 | | const uint32_t bitmask_exp = 0x7F800000; |
1689 | | return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF); |
1690 | | } |
1691 | | |
1692 | | static inline bool IsNan(const double x) { |
1693 | | uint64_t bits; |
1694 | | memcpy(&bits, &x, sizeof(bits)); |
1695 | | return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) || |
1696 | | (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL); |
1697 | | } |
1698 | | |
1699 | | static inline void CheckImage(const ImageF& image, const char* name) { |
1700 | | for (size_t y = 0; y < image.ysize(); ++y) { |
1701 | | const float* BUTTERAUGLI_RESTRICT row = image.Row(y); |
1702 | | for (size_t x = 0; x < image.xsize(); ++x) { |
1703 | | if (IsNan(row[x])) { |
1704 | | printf("NAN: Image %s @ %" PRIuS ",%" PRIuS " (of %" PRIuS ",%" PRIuS |
1705 | | ")\n", |
1706 | | name, x, y, image.xsize(), image.ysize()); |
1707 | | exit(1); |
1708 | | } |
1709 | | } |
1710 | | } |
1711 | | } |
1712 | | |
1713 | | #define CHECK_NAN(x, str) \ |
1714 | | do { \ |
1715 | | if (IsNan(x)) { \ |
1716 | | printf("%d: %s\n", __LINE__, str); \ |
1717 | | abort(); \ |
1718 | | } \ |
1719 | | } while (0) |
1720 | | |
1721 | | #define CHECK_IMAGE(image, name) CheckImage(image, name) |
1722 | | |
1723 | | #else // BUTTERAUGLI_ENABLE_CHECKS |
1724 | | |
1725 | | #define CHECK_NAN(x, str) |
1726 | | #define CHECK_IMAGE(image, name) |
1727 | | |
1728 | | #endif // BUTTERAUGLI_ENABLE_CHECKS |
1729 | | |
1730 | | // Calculate a 2x2 subsampled image for purposes of recursive butteraugli at |
1731 | | // multiresolution. |
1732 | 0 | static StatusOr<Image3F> SubSample2x(const Image3F& in) { |
1733 | 0 | size_t xs = (in.xsize() + 1) / 2; |
1734 | 0 | size_t ys = (in.ysize() + 1) / 2; |
1735 | 0 | JxlMemoryManager* memory_manager = in.memory_manager(); |
1736 | 0 | JXL_ASSIGN_OR_RETURN(Image3F retval, Image3F::Create(memory_manager, xs, ys)); |
1737 | 0 | for (size_t c = 0; c < 3; ++c) { |
1738 | 0 | for (size_t y = 0; y < ys; ++y) { |
1739 | 0 | for (size_t x = 0; x < xs; ++x) { |
1740 | 0 | retval.PlaneRow(c, y)[x] = 0; |
1741 | 0 | } |
1742 | 0 | } |
1743 | 0 | } |
1744 | 0 | for (size_t c = 0; c < 3; ++c) { |
1745 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
1746 | 0 | for (size_t x = 0; x < in.xsize(); ++x) { |
1747 | 0 | retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x]; |
1748 | 0 | } |
1749 | 0 | } |
1750 | 0 | if ((in.xsize() & 1) != 0) { |
1751 | 0 | for (size_t y = 0; y < retval.ysize(); ++y) { |
1752 | 0 | size_t last_column = retval.xsize() - 1; |
1753 | 0 | retval.PlaneRow(c, y)[last_column] *= 2.0f; |
1754 | 0 | } |
1755 | 0 | } |
1756 | 0 | if ((in.ysize() & 1) != 0) { |
1757 | 0 | for (size_t x = 0; x < retval.xsize(); ++x) { |
1758 | 0 | size_t last_row = retval.ysize() - 1; |
1759 | 0 | retval.PlaneRow(c, last_row)[x] *= 2.0f; |
1760 | 0 | } |
1761 | 0 | } |
1762 | 0 | } |
1763 | 0 | return retval; |
1764 | 0 | } |
1765 | | |
1766 | | // Supersample src by 2x and add it to dest. |
1767 | 0 | static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) { |
1768 | 0 | for (size_t y = 0; y < dest.ysize(); ++y) { |
1769 | 0 | for (size_t x = 0; x < dest.xsize(); ++x) { |
1770 | | // There will be less errors from the more averaged images. |
1771 | | // We take it into account to some extent using a scaler. |
1772 | 0 | static const double kHeuristicMixingValue = 0.3; |
1773 | 0 | dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w; |
1774 | 0 | dest.Row(y)[x] += w * src.Row(y / 2)[x / 2]; |
1775 | 0 | } |
1776 | 0 | } |
1777 | 0 | } |
1778 | | |
1779 | 0 | Image3F* ButteraugliComparator::Temp() const { |
1780 | 0 | bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel); |
1781 | 0 | if (was_in_use) return nullptr; |
1782 | 0 | return &temp_; |
1783 | 0 | } |
1784 | | |
1785 | 0 | void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); } |
1786 | | |
1787 | | ButteraugliComparator::ButteraugliComparator(size_t xsize, size_t ysize, |
1788 | | const ButteraugliParams& params) |
1789 | 0 | : xsize_(xsize), ysize_(ysize), params_(params) {} |
1790 | | |
1791 | | StatusOr<std::unique_ptr<ButteraugliComparator>> ButteraugliComparator::Make( |
1792 | 0 | const Image3F& rgb0, const ButteraugliParams& params) { |
1793 | 0 | size_t xsize = rgb0.xsize(); |
1794 | 0 | size_t ysize = rgb0.ysize(); |
1795 | 0 | JxlMemoryManager* memory_manager = rgb0.memory_manager(); |
1796 | 0 | std::unique_ptr<ButteraugliComparator> result = |
1797 | 0 | std::unique_ptr<ButteraugliComparator>( |
1798 | 0 | new ButteraugliComparator(xsize, ysize, params)); |
1799 | 0 | JXL_ASSIGN_OR_RETURN(result->temp_, |
1800 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1801 | |
|
1802 | 0 | if (xsize < 8 || ysize < 8) { |
1803 | 0 | return result; |
1804 | 0 | } |
1805 | | |
1806 | 0 | JXL_ASSIGN_OR_RETURN(Image3F xyb0, |
1807 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1808 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( |
1809 | 0 | rgb0, params, result->Temp(), &result->blur_temp_, &xyb0)); |
1810 | 0 | result->ReleaseTemp(); |
1811 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)( |
1812 | 0 | xsize, ysize, params, &result->blur_temp_, xyb0, result->pi0_)); |
1813 | | |
1814 | | // Awful recursive construction of samples of different resolution. |
1815 | | // This is an after-thought and possibly somewhat parallel in |
1816 | | // functionality with the PsychoImage multi-resolution approach. |
1817 | 0 | JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb0, SubSample2x(rgb0)); |
1818 | 0 | JXL_ASSIGN_OR_RETURN(result->sub_, |
1819 | 0 | ButteraugliComparator::Make(subsampledRgb0, params)); |
1820 | 0 | return result; |
1821 | 0 | } |
1822 | | |
1823 | 0 | Status ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const { |
1824 | 0 | return HWY_DYNAMIC_DISPATCH(MaskPsychoImage)( |
1825 | 0 | pi0_, pi0_, xsize_, ysize_, params_, &blur_temp_, mask, nullptr); |
1826 | 0 | } |
1827 | | |
1828 | | Status ButteraugliComparator::Diffmap(const Image3F& rgb1, |
1829 | 0 | ImageF& result) const { |
1830 | 0 | JxlMemoryManager* memory_manager = rgb1.memory_manager(); |
1831 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1832 | 0 | ZeroFillImage(&result); |
1833 | 0 | return true; |
1834 | 0 | } |
1835 | 0 | JXL_ASSIGN_OR_RETURN(Image3F xyb1, |
1836 | 0 | Image3F::Create(memory_manager, xsize_, ysize_)); |
1837 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( |
1838 | 0 | rgb1, params_, Temp(), &blur_temp_, &xyb1)); |
1839 | 0 | ReleaseTemp(); |
1840 | 0 | JXL_RETURN_IF_ERROR(DiffmapOpsinDynamicsImage(xyb1, result)); |
1841 | 0 | if (sub_) { |
1842 | 0 | if (sub_->xsize_ < 8 || sub_->ysize_ < 8) { |
1843 | 0 | return true; |
1844 | 0 | } |
1845 | 0 | JXL_ASSIGN_OR_RETURN( |
1846 | 0 | Image3F sub_xyb, |
1847 | 0 | Image3F::Create(memory_manager, sub_->xsize_, sub_->ysize_)); |
1848 | 0 | JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb1, SubSample2x(rgb1)); |
1849 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( |
1850 | 0 | subsampledRgb1, params_, sub_->Temp(), &sub_->blur_temp_, &sub_xyb)); |
1851 | 0 | sub_->ReleaseTemp(); |
1852 | 0 | ImageF subresult; |
1853 | 0 | JXL_RETURN_IF_ERROR(sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult)); |
1854 | 0 | AddSupersampled2x(subresult, 0.5, result); |
1855 | 0 | } |
1856 | 0 | return true; |
1857 | 0 | } |
1858 | | |
1859 | | Status ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1, |
1860 | 0 | ImageF& result) const { |
1861 | 0 | JxlMemoryManager* memory_manager = xyb1.memory_manager(); |
1862 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1863 | 0 | ZeroFillImage(&result); |
1864 | 0 | return true; |
1865 | 0 | } |
1866 | 0 | PsychoImage pi1; |
1867 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)( |
1868 | 0 | xsize_, ysize_, params_, &blur_temp_, xyb1, pi1)); |
1869 | 0 | JXL_ASSIGN_OR_RETURN(result, ImageF::Create(memory_manager, xsize_, ysize_)); |
1870 | 0 | return DiffmapPsychoImage(pi1, result); |
1871 | 0 | } |
1872 | | |
1873 | | namespace { |
1874 | | |
1875 | | Status MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, |
1876 | | const double w_0lt1, const double norm1, |
1877 | | ImageF* HWY_RESTRICT diffs, |
1878 | 0 | Image3F* HWY_RESTRICT block_diff_ac, size_t c) { |
1879 | 0 | return HWY_DYNAMIC_DISPATCH(MaltaDiffMap)(lum0, lum1, w_0gt1, w_0lt1, norm1, |
1880 | 0 | diffs, &block_diff_ac->Plane(c)); |
1881 | 0 | } |
1882 | | |
1883 | | Status MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, |
1884 | | const double w_0gt1, const double w_0lt1, |
1885 | | const double norm1, ImageF* HWY_RESTRICT diffs, |
1886 | 0 | Image3F* HWY_RESTRICT block_diff_ac, size_t c) { |
1887 | 0 | return HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF)(lum0, lum1, w_0gt1, w_0lt1, norm1, |
1888 | 0 | diffs, &block_diff_ac->Plane(c)); |
1889 | 0 | } |
1890 | | |
1891 | | } // namespace |
1892 | | |
1893 | | Status ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1, |
1894 | 0 | ImageF& diffmap) const { |
1895 | 0 | JxlMemoryManager* memory_manager = diffmap.memory_manager(); |
1896 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1897 | 0 | ZeroFillImage(&diffmap); |
1898 | 0 | return true; |
1899 | 0 | } |
1900 | | |
1901 | 0 | const float hf_asymmetry_ = params_.hf_asymmetry; |
1902 | 0 | const float xmul_ = params_.xmul; |
1903 | |
|
1904 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diffs, |
1905 | 0 | ImageF::Create(memory_manager, xsize_, ysize_)); |
1906 | 0 | JXL_ASSIGN_OR_RETURN(Image3F block_diff_ac, |
1907 | 0 | Image3F::Create(memory_manager, xsize_, ysize_)); |
1908 | 0 | ZeroFillImage(&block_diff_ac); |
1909 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMap( |
1910 | 0 | pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_, |
1911 | 0 | wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1)); |
1912 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMap( |
1913 | 0 | pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_, |
1914 | 0 | wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac, 0)); |
1915 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF( |
1916 | 0 | pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_), |
1917 | 0 | wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs, &block_diff_ac, 1)); |
1918 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], |
1919 | 0 | wHfMaltaX * std::sqrt(hf_asymmetry_), |
1920 | 0 | wHfMaltaX / std::sqrt(hf_asymmetry_), |
1921 | 0 | norm1HfX, &diffs, &block_diff_ac, 0)); |
1922 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), |
1923 | 0 | wMfMalta, wMfMalta, norm1Mf, &diffs, |
1924 | 0 | &block_diff_ac, 1)); |
1925 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), |
1926 | 0 | wMfMaltaX, wMfMaltaX, norm1MfX, &diffs, |
1927 | 0 | &block_diff_ac, 0)); |
1928 | | |
1929 | 0 | JXL_ASSIGN_OR_RETURN(Image3F block_diff_dc, |
1930 | 0 | Image3F::Create(memory_manager, xsize_, ysize_)); |
1931 | 0 | for (size_t c = 0; c < 3; ++c) { |
1932 | 0 | if (c < 2) { // No blue channel error accumulated at HF. |
1933 | 0 | HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric) |
1934 | 0 | (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_, |
1935 | 0 | &block_diff_ac.Plane(c)); |
1936 | 0 | } |
1937 | 0 | HWY_DYNAMIC_DISPATCH(L2Diff) |
1938 | 0 | (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac.Plane(c)); |
1939 | 0 | HWY_DYNAMIC_DISPATCH(SetL2Diff) |
1940 | 0 | (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc.Plane(c)); |
1941 | 0 | } |
1942 | |
|
1943 | 0 | ImageF mask; |
1944 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(MaskPsychoImage)( |
1945 | 0 | pi0_, pi1, xsize_, ysize_, params_, &blur_temp_, &mask, |
1946 | 0 | &block_diff_ac.Plane(1))); |
1947 | | |
1948 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap)( |
1949 | 0 | mask, block_diff_dc, block_diff_ac, xmul_, &diffmap)); |
1950 | 0 | return true; |
1951 | 0 | } |
1952 | | |
1953 | | double ButteraugliScoreFromDiffmap(const ImageF& diffmap, |
1954 | 0 | const ButteraugliParams* params) { |
1955 | 0 | float retval = 0.0f; |
1956 | 0 | for (size_t y = 0; y < diffmap.ysize(); ++y) { |
1957 | 0 | const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y); |
1958 | 0 | for (size_t x = 0; x < diffmap.xsize(); ++x) { |
1959 | 0 | retval = std::max(retval, row[x]); |
1960 | 0 | } |
1961 | 0 | } |
1962 | 0 | return retval; |
1963 | 0 | } |
1964 | | |
1965 | | Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, |
1966 | 0 | double hf_asymmetry, double xmul, ImageF& diffmap) { |
1967 | 0 | ButteraugliParams params; |
1968 | 0 | params.hf_asymmetry = hf_asymmetry; |
1969 | 0 | params.xmul = xmul; |
1970 | 0 | return ButteraugliDiffmap(rgb0, rgb1, params, diffmap); |
1971 | 0 | } |
1972 | | |
1973 | | template <size_t kMax> |
1974 | | bool ButteraugliDiffmapSmall(const Image3F& rgb0, const Image3F& rgb1, |
1975 | 0 | const ButteraugliParams& params, ImageF& diffmap) { |
1976 | 0 | const size_t xsize = rgb0.xsize(); |
1977 | 0 | const size_t ysize = rgb0.ysize(); |
1978 | 0 | JxlMemoryManager* memory_manager = rgb0.memory_manager(); |
1979 | | // Butteraugli values for small (where xsize or ysize is smaller |
1980 | | // than 8 pixels) images are non-sensical, but most likely it is |
1981 | | // less disruptive to try to compute something than just give up. |
1982 | | // Temporarily extend the borders of the image to fit 8 x 8 size. |
1983 | 0 | size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0; |
1984 | 0 | size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0; |
1985 | 0 | size_t xscaled = std::max<size_t>(kMax, xsize); |
1986 | 0 | size_t yscaled = std::max<size_t>(kMax, ysize); |
1987 | 0 | JXL_ASSIGN_OR_RETURN(Image3F scaled0, |
1988 | 0 | Image3F::Create(memory_manager, xscaled, yscaled)); |
1989 | 0 | JXL_ASSIGN_OR_RETURN(Image3F scaled1, |
1990 | 0 | Image3F::Create(memory_manager, xscaled, yscaled)); |
1991 | 0 | for (int i = 0; i < 3; ++i) { |
1992 | 0 | for (size_t y = 0; y < yscaled; ++y) { |
1993 | 0 | for (size_t x = 0; x < xscaled; ++x) { |
1994 | 0 | size_t x2 = std::min<size_t>(xsize - 1, x > xborder ? x - xborder : 0); |
1995 | 0 | size_t y2 = std::min<size_t>(ysize - 1, y > yborder ? y - yborder : 0); |
1996 | 0 | scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2]; |
1997 | 0 | scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2]; |
1998 | 0 | } |
1999 | 0 | } |
2000 | 0 | } |
2001 | 0 | ImageF diffmap_scaled; |
2002 | 0 | const bool ok = ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled); |
2003 | 0 | JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(memory_manager, xsize, ysize)); |
2004 | 0 | for (size_t y = 0; y < ysize; ++y) { |
2005 | 0 | for (size_t x = 0; x < xsize; ++x) { |
2006 | 0 | diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder]; |
2007 | 0 | } |
2008 | 0 | } |
2009 | 0 | return ok; |
2010 | 0 | } |
2011 | | |
2012 | | Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, |
2013 | 0 | const ButteraugliParams& params, ImageF& diffmap) { |
2014 | 0 | const size_t xsize = rgb0.xsize(); |
2015 | 0 | const size_t ysize = rgb0.ysize(); |
2016 | 0 | if (xsize < 1 || ysize < 1) { |
2017 | 0 | return JXL_FAILURE("Zero-sized image"); |
2018 | 0 | } |
2019 | 0 | if (!SameSize(rgb0, rgb1)) { |
2020 | 0 | return JXL_FAILURE("Size mismatch"); |
2021 | 0 | } |
2022 | 0 | static const int kMax = 8; |
2023 | 0 | if (xsize < kMax || ysize < kMax) { |
2024 | 0 | return ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap); |
2025 | 0 | } |
2026 | 0 | JXL_ASSIGN_OR_RETURN(std::unique_ptr<ButteraugliComparator> butteraugli, |
2027 | 0 | ButteraugliComparator::Make(rgb0, params)); |
2028 | 0 | JXL_RETURN_IF_ERROR(butteraugli->Diffmap(rgb1, diffmap)); |
2029 | 0 | return true; |
2030 | 0 | } |
2031 | | |
2032 | | bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, |
2033 | | float hf_asymmetry, float xmul, ImageF& diffmap, |
2034 | 0 | double& diffvalue) { |
2035 | 0 | ButteraugliParams params; |
2036 | 0 | params.hf_asymmetry = hf_asymmetry; |
2037 | 0 | params.xmul = xmul; |
2038 | 0 | return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue); |
2039 | 0 | } |
2040 | | |
2041 | | bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, |
2042 | | const ButteraugliParams& params, ImageF& diffmap, |
2043 | 0 | double& diffvalue) { |
2044 | 0 | if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) { |
2045 | 0 | return false; |
2046 | 0 | } |
2047 | 0 | diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); |
2048 | 0 | return true; |
2049 | 0 | } |
2050 | | |
2051 | | Status ButteraugliInterfaceInPlace(Image3F&& rgb0, Image3F&& rgb1, |
2052 | | const ButteraugliParams& params, |
2053 | 0 | ImageF& diffmap, double& diffvalue) { |
2054 | 0 | const size_t xsize = rgb0.xsize(); |
2055 | 0 | const size_t ysize = rgb0.ysize(); |
2056 | 0 | if (xsize < 1 || ysize < 1) { |
2057 | 0 | return JXL_FAILURE("Zero-sized image"); |
2058 | 0 | } |
2059 | 0 | if (!SameSize(rgb0, rgb1)) { |
2060 | 0 | return JXL_FAILURE("Size mismatch"); |
2061 | 0 | } |
2062 | 0 | static const int kMax = 8; |
2063 | 0 | if (xsize < kMax || ysize < kMax) { |
2064 | 0 | bool ok = ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap); |
2065 | 0 | diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); |
2066 | 0 | return ok; |
2067 | 0 | } |
2068 | 0 | ImageF subdiffmap; |
2069 | 0 | if (xsize >= 15 && ysize >= 15) { |
2070 | 0 | JXL_ASSIGN_OR_RETURN(Image3F rgb0_sub, SubSample2x(rgb0)); |
2071 | 0 | JXL_ASSIGN_OR_RETURN(Image3F rgb1_sub, SubSample2x(rgb1)); |
2072 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)( |
2073 | 0 | rgb0_sub, rgb1_sub, params, subdiffmap)); |
2074 | 0 | } |
2075 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)( |
2076 | 0 | rgb0, rgb1, params, diffmap)); |
2077 | 0 | if (xsize >= 15 && ysize >= 15) { |
2078 | 0 | AddSupersampled2x(subdiffmap, 0.5, diffmap); |
2079 | 0 | } |
2080 | 0 | diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); |
2081 | 0 | return true; |
2082 | 0 | } |
2083 | | |
2084 | 0 | double ButteraugliFuzzyClass(double score) { |
2085 | 0 | static const double fuzzy_width_up = 4.8; |
2086 | 0 | static const double fuzzy_width_down = 4.8; |
2087 | 0 | static const double m0 = 2.0; |
2088 | 0 | static const double scaler = 0.7777; |
2089 | 0 | double val; |
2090 | 0 | if (score < 1.0) { |
2091 | | // val in [scaler .. 2.0] |
2092 | 0 | val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down)); |
2093 | 0 | val -= 1.0; // from [1 .. 2] to [0 .. 1] |
2094 | 0 | val *= 2.0 - scaler; // from [0 .. 1] to [0 .. 2.0 - scaler] |
2095 | 0 | val += scaler; // from [0 .. 2.0 - scaler] to [scaler .. 2.0] |
2096 | 0 | } else { |
2097 | | // val in [0 .. scaler] |
2098 | 0 | val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up)); |
2099 | 0 | val *= scaler; |
2100 | 0 | } |
2101 | 0 | return val; |
2102 | 0 | } |
2103 | | |
2104 | | // #define PRINT_OUT_NORMALIZATION |
2105 | | |
2106 | 0 | double ButteraugliFuzzyInverse(double seek) { |
2107 | 0 | double pos = 0; |
2108 | | // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter) |
2109 | 0 | for (double range = 1.0; range >= 1e-10; range *= 0.5) { |
2110 | 0 | double cur = ButteraugliFuzzyClass(pos); |
2111 | 0 | if (cur < seek) { |
2112 | 0 | pos -= range; |
2113 | 0 | } else { |
2114 | 0 | pos += range; |
2115 | 0 | } |
2116 | 0 | } |
2117 | | #ifdef PRINT_OUT_NORMALIZATION |
2118 | | if (seek == 1.0) { |
2119 | | fprintf(stderr, "Fuzzy inverse %g\n", pos); |
2120 | | } |
2121 | | #endif |
2122 | 0 | return pos; |
2123 | 0 | } |
2124 | | |
2125 | | #ifdef PRINT_OUT_NORMALIZATION |
2126 | | static double print_out_normalization = ButteraugliFuzzyInverse(1.0); |
2127 | | #endif |
2128 | | |
2129 | | namespace { |
2130 | | |
2131 | | void ScoreToRgb(double score, double good_threshold, double bad_threshold, |
2132 | 0 | float rgb[3]) { |
2133 | 0 | double heatmap[12][3] = { |
2134 | 0 | {0, 0, 0}, {0, 0, 1}, |
2135 | 0 | {0, 1, 1}, {0, 1, 0}, // Good level |
2136 | 0 | {1, 1, 0}, {1, 0, 0}, // Bad level |
2137 | 0 | {1, 0, 1}, {0.5, 0.5, 1.0}, |
2138 | 0 | {1.0, 0.5, 0.5}, // Pastel colors for the very bad quality range. |
2139 | 0 | {1.0, 1.0, 0.5}, {1, 1, 1}, |
2140 | 0 | {1, 1, 1}, // Last color repeated to have a solid range of white. |
2141 | 0 | }; |
2142 | 0 | if (score < good_threshold) { |
2143 | 0 | score = (score / good_threshold) * 0.3; |
2144 | 0 | } else if (score < bad_threshold) { |
2145 | 0 | score = 0.3 + |
2146 | 0 | (score - good_threshold) / (bad_threshold - good_threshold) * 0.15; |
2147 | 0 | } else { |
2148 | 0 | score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5; |
2149 | 0 | } |
2150 | 0 | static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]); |
2151 | 0 | score = jxl::Clamp1<double>(score * (kTableSize - 1), 0.0, kTableSize - 2); |
2152 | 0 | int ix = static_cast<int>(score); |
2153 | 0 | ix = jxl::Clamp1(ix, 0, kTableSize - 2); // Handle NaN |
2154 | 0 | double mix = score - ix; |
2155 | 0 | for (int i = 0; i < 3; ++i) { |
2156 | 0 | double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i]; |
2157 | 0 | rgb[i] = pow(v, 0.5); |
2158 | 0 | } |
2159 | 0 | } |
2160 | | |
2161 | | } // namespace |
2162 | | |
2163 | | StatusOr<Image3F> CreateHeatMapImage(const ImageF& distmap, |
2164 | | double good_threshold, |
2165 | 0 | double bad_threshold) { |
2166 | 0 | JxlMemoryManager* memory_manager = distmap.memory_manager(); |
2167 | 0 | JXL_ASSIGN_OR_RETURN( |
2168 | 0 | Image3F heatmap, |
2169 | 0 | Image3F::Create(memory_manager, distmap.xsize(), distmap.ysize())); |
2170 | 0 | for (size_t y = 0; y < distmap.ysize(); ++y) { |
2171 | 0 | const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y); |
2172 | 0 | float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y); |
2173 | 0 | float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y); |
2174 | 0 | float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y); |
2175 | 0 | for (size_t x = 0; x < distmap.xsize(); ++x) { |
2176 | 0 | const float d = row_distmap[x]; |
2177 | 0 | float rgb[3]; |
2178 | 0 | ScoreToRgb(d, good_threshold, bad_threshold, rgb); |
2179 | 0 | row_h0[x] = rgb[0]; |
2180 | 0 | row_h1[x] = rgb[1]; |
2181 | 0 | row_h2[x] = rgb[2]; |
2182 | 0 | } |
2183 | 0 | } |
2184 | 0 | return heatmap; |
2185 | 0 | } |
2186 | | |
2187 | | } // namespace jxl |
2188 | | #endif // HWY_ONCE |