/src/libjxl/lib/jxl/butteraugli/butteraugli.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | // |
6 | | // Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com) |
7 | | // |
8 | | // The physical architecture of butteraugli is based on the following naming |
9 | | // convention: |
10 | | // * Opsin - dynamics of the photosensitive chemicals in the retina |
11 | | // with their immediate electrical processing |
12 | | // * Xyb - hybrid opponent/trichromatic color space |
13 | | // x is roughly red-subtract-green. |
14 | | // y is yellow. |
15 | | // b is blue. |
16 | | // Xyb values are computed from Opsin mixing, not directly from rgb. |
17 | | // * Mask - for visual masking |
18 | | // * Hf - color modeling for spatially high-frequency features |
19 | | // * Lf - color modeling for spatially low-frequency features |
20 | | // * Diffmap - to cluster and build an image of error between the images |
21 | | // * Blur - to hold the smoothing code |
22 | | |
23 | | #include "lib/jxl/butteraugli/butteraugli.h" |
24 | | |
25 | | #include <stdint.h> |
26 | | #include <stdio.h> |
27 | | #include <stdlib.h> |
28 | | #include <string.h> |
29 | | |
30 | | #include <algorithm> |
31 | | #include <array> |
32 | | #include <cmath> |
33 | | #include <new> |
34 | | #include <vector> |
35 | | |
36 | | #if PROFILER_ENABLED |
37 | | #include <chrono> |
38 | | #endif // PROFILER_ENABLED |
39 | | |
40 | | #undef HWY_TARGET_INCLUDE |
41 | | #define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc" |
42 | | #include <hwy/foreach_target.h> |
43 | | |
44 | | #include "lib/jxl/base/printf_macros.h" |
45 | | #include "lib/jxl/base/profiler.h" |
46 | | #include "lib/jxl/base/status.h" |
47 | | #include "lib/jxl/convolve.h" |
48 | | #include "lib/jxl/fast_math-inl.h" |
49 | | #include "lib/jxl/gauss_blur.h" |
50 | | #include "lib/jxl/image_ops.h" |
51 | | |
52 | | #ifndef JXL_BUTTERAUGLI_ONCE |
53 | | #define JXL_BUTTERAUGLI_ONCE |
54 | | |
55 | | namespace jxl { |
56 | | |
57 | 0 | std::vector<float> ComputeKernel(float sigma) { |
58 | 0 | const float m = 2.25; // Accuracy increases when m is increased. |
59 | 0 | const double scaler = -1.0 / (2.0 * sigma * sigma); |
60 | 0 | const int diff = std::max<int>(1, m * std::fabs(sigma)); |
61 | 0 | std::vector<float> kernel(2 * diff + 1); |
62 | 0 | for (int i = -diff; i <= diff; ++i) { |
63 | 0 | kernel[i + diff] = std::exp(scaler * i * i); |
64 | 0 | } |
65 | 0 | return kernel; |
66 | 0 | } |
67 | | |
68 | | void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel, |
69 | 0 | const size_t x, float* BUTTERAUGLI_RESTRICT row_out) { |
70 | 0 | const size_t offset = kernel.size() / 2; |
71 | 0 | int minx = x < offset ? 0 : x - offset; |
72 | 0 | int maxx = std::min<int>(in.xsize() - 1, x + offset); |
73 | 0 | float weight = 0.0f; |
74 | 0 | for (int j = minx; j <= maxx; ++j) { |
75 | 0 | weight += kernel[j - x + offset]; |
76 | 0 | } |
77 | 0 | float scale = 1.0f / weight; |
78 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
79 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y); |
80 | 0 | float sum = 0.0f; |
81 | 0 | for (int j = minx; j <= maxx; ++j) { |
82 | 0 | sum += row_in[j] * kernel[j - x + offset]; |
83 | 0 | } |
84 | 0 | row_out[y] = sum * scale; |
85 | 0 | } |
86 | 0 | } |
87 | | |
88 | | // Computes a horizontal convolution and transposes the result. |
89 | | void ConvolutionWithTranspose(const ImageF& in, |
90 | | const std::vector<float>& kernel, |
91 | 0 | ImageF* BUTTERAUGLI_RESTRICT out) { |
92 | 0 | PROFILER_FUNC; |
93 | 0 | JXL_CHECK(out->xsize() == in.ysize()); |
94 | 0 | JXL_CHECK(out->ysize() == in.xsize()); |
95 | 0 | const size_t len = kernel.size(); |
96 | 0 | const size_t offset = len / 2; |
97 | 0 | float weight_no_border = 0.0f; |
98 | 0 | for (size_t j = 0; j < len; ++j) { |
99 | 0 | weight_no_border += kernel[j]; |
100 | 0 | } |
101 | 0 | const float scale_no_border = 1.0f / weight_no_border; |
102 | 0 | const size_t border1 = std::min(in.xsize(), offset); |
103 | 0 | const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0; |
104 | 0 | std::vector<float> scaled_kernel(len / 2 + 1); |
105 | 0 | for (size_t i = 0; i <= len / 2; ++i) { |
106 | 0 | scaled_kernel[i] = kernel[i] * scale_no_border; |
107 | 0 | } |
108 | | |
109 | | // middle |
110 | 0 | switch (len) { |
111 | 0 | case 7: { |
112 | 0 | PROFILER_ZONE("conv7"); |
113 | 0 | const float sk0 = scaled_kernel[0]; |
114 | 0 | const float sk1 = scaled_kernel[1]; |
115 | 0 | const float sk2 = scaled_kernel[2]; |
116 | 0 | const float sk3 = scaled_kernel[3]; |
117 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
118 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
119 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
120 | 0 | const float sum0 = (row_in[0] + row_in[6]) * sk0; |
121 | 0 | const float sum1 = (row_in[1] + row_in[5]) * sk1; |
122 | 0 | const float sum2 = (row_in[2] + row_in[4]) * sk2; |
123 | 0 | const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2; |
124 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
125 | 0 | row_out[y] = sum; |
126 | 0 | } |
127 | 0 | } |
128 | 0 | } break; |
129 | 0 | case 13: { |
130 | 0 | PROFILER_ZONE("conv15"); |
131 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
132 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
133 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
134 | 0 | float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0]; |
135 | 0 | float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1]; |
136 | 0 | float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2]; |
137 | 0 | float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3]; |
138 | 0 | sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4]; |
139 | 0 | sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5]; |
140 | 0 | const float sum = (row_in[6]) * scaled_kernel[6]; |
141 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
142 | 0 | row_out[y] = sum + sum0 + sum1 + sum2 + sum3; |
143 | 0 | } |
144 | 0 | } |
145 | 0 | break; |
146 | 0 | } |
147 | 0 | case 15: { |
148 | 0 | PROFILER_ZONE("conv15"); |
149 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
150 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
151 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
152 | 0 | float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0]; |
153 | 0 | float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1]; |
154 | 0 | float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2]; |
155 | 0 | float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3]; |
156 | 0 | sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4]; |
157 | 0 | sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5]; |
158 | 0 | sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6]; |
159 | 0 | const float sum = (row_in[7]) * scaled_kernel[7]; |
160 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
161 | 0 | row_out[y] = sum + sum0 + sum1 + sum2 + sum3; |
162 | 0 | } |
163 | 0 | } |
164 | 0 | break; |
165 | 0 | } |
166 | 0 | case 33: { |
167 | 0 | PROFILER_ZONE("conv33"); |
168 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
169 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
170 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
171 | 0 | float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0]; |
172 | 0 | float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1]; |
173 | 0 | float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2]; |
174 | 0 | float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3]; |
175 | 0 | sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4]; |
176 | 0 | sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5]; |
177 | 0 | sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6]; |
178 | 0 | sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7]; |
179 | 0 | sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8]; |
180 | 0 | sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9]; |
181 | 0 | sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10]; |
182 | 0 | sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11]; |
183 | 0 | sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12]; |
184 | 0 | sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13]; |
185 | 0 | sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14]; |
186 | 0 | sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15]; |
187 | 0 | const float sum = (row_in[16]) * scaled_kernel[16]; |
188 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
189 | 0 | row_out[y] = sum + sum0 + sum1 + sum2 + sum3; |
190 | 0 | } |
191 | 0 | } |
192 | 0 | break; |
193 | 0 | } |
194 | 0 | default: |
195 | 0 | printf("Warning: Unexpected kernel size! %" PRIuS "\n", len); |
196 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
197 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y); |
198 | 0 | for (size_t x = border1; x < border2; ++x) { |
199 | 0 | const int d = x - offset; |
200 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
201 | 0 | float sum = 0.0f; |
202 | 0 | size_t j; |
203 | 0 | for (j = 0; j <= len / 2; ++j) { |
204 | 0 | sum += row_in[d + j] * scaled_kernel[j]; |
205 | 0 | } |
206 | 0 | for (; j < len; ++j) { |
207 | 0 | sum += row_in[d + j] * scaled_kernel[len - 1 - j]; |
208 | 0 | } |
209 | 0 | row_out[y] = sum; |
210 | 0 | } |
211 | 0 | } |
212 | 0 | } |
213 | | // left border |
214 | 0 | for (size_t x = 0; x < border1; ++x) { |
215 | 0 | ConvolveBorderColumn(in, kernel, x, out->Row(x)); |
216 | 0 | } |
217 | | |
218 | | // right border |
219 | 0 | for (size_t x = border2; x < in.xsize(); ++x) { |
220 | 0 | ConvolveBorderColumn(in, kernel, x, out->Row(x)); |
221 | 0 | } |
222 | 0 | } |
223 | | |
224 | | // A blur somewhat similar to a 2D Gaussian blur. |
225 | | // See: https://en.wikipedia.org/wiki/Gaussian_blur |
226 | | // |
227 | | // This is a bottleneck because the sigma can be quite large (>7). We can use |
228 | | // gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated |
229 | | // Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are |
230 | | // zero-valued. This leads to noticeable differences at the edges of diffmaps. |
231 | | // We retain a special case for 5x5 kernels (even faster than gauss_blur), |
232 | | // optionally use gauss_blur followed by fixup of the borders for large images, |
233 | | // or fall back to the previous truncated FIR followed by a transpose. |
234 | | void Blur(const ImageF& in, float sigma, const ButteraugliParams& params, |
235 | 0 | BlurTemp* temp, ImageF* out) { |
236 | 0 | std::vector<float> kernel = ComputeKernel(sigma); |
237 | | // Separable5 does an in-place convolution, so this fast path is not safe if |
238 | | // in aliases out. |
239 | 0 | if (kernel.size() == 5 && &in != out) { |
240 | 0 | float sum_weights = 0.0f; |
241 | 0 | for (const float w : kernel) { |
242 | 0 | sum_weights += w; |
243 | 0 | } |
244 | 0 | const float scale = 1.0f / sum_weights; |
245 | 0 | const float w0 = kernel[2] * scale; |
246 | 0 | const float w1 = kernel[1] * scale; |
247 | 0 | const float w2 = kernel[0] * scale; |
248 | 0 | const WeightsSeparable5 weights = { |
249 | 0 | {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, |
250 | 0 | {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, |
251 | 0 | }; |
252 | 0 | Separable5(in, Rect(in), weights, /*pool=*/nullptr, out); |
253 | 0 | return; |
254 | 0 | } |
255 | | |
256 | 0 | ImageF* JXL_RESTRICT temp_t = temp->GetTransposed(in); |
257 | 0 | ConvolutionWithTranspose(in, kernel, temp_t); |
258 | 0 | ConvolutionWithTranspose(*temp_t, kernel, out); |
259 | 0 | } |
260 | | |
261 | | // Allows PaddedMaltaUnit to call either function via overloading. |
262 | | struct MaltaTagLF {}; |
263 | | struct MaltaTag {}; |
264 | | |
265 | | } // namespace jxl |
266 | | |
267 | | #endif // JXL_BUTTERAUGLI_ONCE |
268 | | |
269 | | #include <hwy/highway.h> |
270 | | HWY_BEFORE_NAMESPACE(); |
271 | | namespace jxl { |
272 | | namespace HWY_NAMESPACE { |
273 | | |
274 | | // These templates are not found via ADL. |
275 | | using hwy::HWY_NAMESPACE::Abs; |
276 | | using hwy::HWY_NAMESPACE::Div; |
277 | | using hwy::HWY_NAMESPACE::Gt; |
278 | | using hwy::HWY_NAMESPACE::IfThenElse; |
279 | | using hwy::HWY_NAMESPACE::IfThenElseZero; |
280 | | using hwy::HWY_NAMESPACE::Lt; |
281 | | using hwy::HWY_NAMESPACE::Max; |
282 | | using hwy::HWY_NAMESPACE::Mul; |
283 | | using hwy::HWY_NAMESPACE::MulAdd; |
284 | | using hwy::HWY_NAMESPACE::MulSub; |
285 | | using hwy::HWY_NAMESPACE::Neg; |
286 | | using hwy::HWY_NAMESPACE::Sub; |
287 | | using hwy::HWY_NAMESPACE::Vec; |
288 | | using hwy::HWY_NAMESPACE::ZeroIfNegative; |
289 | | |
290 | | template <class D, class V> |
291 | 0 | HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) { |
292 | 0 | static const double kMul = 0.724216145665; |
293 | 0 | const V mul = Set(d, kMul); |
294 | 0 | const V maxval = Set(d, kMaxVal); |
295 | | // If greater than maxval or less than -maxval, replace with if_*. |
296 | 0 | const V if_pos = MulAdd(Sub(v, maxval), mul, maxval); |
297 | 0 | const V if_neg = MulSub(Add(v, maxval), mul, maxval); |
298 | 0 | const V pos_or_v = IfThenElse(Ge(v, maxval), if_pos, v); |
299 | 0 | return IfThenElse(Lt(v, Neg(maxval)), if_neg, pos_or_v); |
300 | 0 | } Unexecuted instantiation: hwy::N_AVX3::Vec512<float> jxl::N_AVX3::MaximumClamp<hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float>, double) Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::MaximumClamp<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, double) Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::MaximumClamp<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, double) Unexecuted instantiation: hwy::N_EMU128::Vec128<float, 4ul> jxl::N_EMU128::MaximumClamp<hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul>, double) |
301 | | |
302 | | // Make area around zero less important (remove it). |
303 | | template <class D, class V> |
304 | 0 | HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) { |
305 | 0 | const auto w = Set(d, kw); |
306 | 0 | return IfThenElse(Gt(x, w), Sub(x, w), |
307 | 0 | IfThenElseZero(Lt(x, Neg(w)), Add(x, w))); |
308 | 0 | } Unexecuted instantiation: hwy::N_AVX3::Vec512<float> jxl::N_AVX3::RemoveRangeAroundZero<hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Simd<float, 16ul, 0>, double, hwy::N_AVX3::Vec512<float>) Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::RemoveRangeAroundZero<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, double, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::RemoveRangeAroundZero<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, double, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: hwy::N_EMU128::Vec128<float, 4ul> jxl::N_EMU128::RemoveRangeAroundZero<hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Simd<float, 4ul, 0>, double, hwy::N_EMU128::Vec128<float, 4ul>) |
309 | | |
310 | | // Make area around zero more important (2x it until the limit). |
311 | | template <class D, class V> |
312 | 0 | HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) { |
313 | 0 | const auto w = Set(d, kw); |
314 | 0 | return IfThenElse(Gt(x, w), Add(x, w), |
315 | 0 | IfThenElse(Lt(x, Neg(w)), Sub(x, w), Add(x, x))); |
316 | 0 | } Unexecuted instantiation: hwy::N_AVX3::Vec512<float> jxl::N_AVX3::AmplifyRangeAroundZero<hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Simd<float, 16ul, 0>, double, hwy::N_AVX3::Vec512<float>) Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::AmplifyRangeAroundZero<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, double, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::AmplifyRangeAroundZero<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, double, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: hwy::N_EMU128::Vec128<float, 4ul> jxl::N_EMU128::AmplifyRangeAroundZero<hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Simd<float, 4ul, 0>, double, hwy::N_EMU128::Vec128<float, 4ul>) |
317 | | |
318 | | // XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space. |
319 | | // Vals space can be converted to L2-norm space (Euclidean and normalized) |
320 | | // through visual masking. |
321 | | template <class D, class V> |
322 | | HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y, |
323 | | const V& b_arg, V* HWY_RESTRICT valx, |
324 | 0 | V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) { |
325 | 0 | static const double xmuli = 32.2217497012; |
326 | 0 | static const double ymuli = 13.7697791434; |
327 | 0 | static const double bmuli = 47.504615728; |
328 | 0 | static const double y_to_b_muli = -0.362267051518; |
329 | 0 | const V xmul = Set(d, xmuli); |
330 | 0 | const V ymul = Set(d, ymuli); |
331 | 0 | const V bmul = Set(d, bmuli); |
332 | 0 | const V y_to_b_mul = Set(d, y_to_b_muli); |
333 | 0 | const V b = MulAdd(y_to_b_mul, y, b_arg); |
334 | 0 | *valb = Mul(b, bmul); |
335 | 0 | *valx = Mul(x, xmul); |
336 | 0 | *valy = Mul(y, ymul); |
337 | 0 | } Unexecuted instantiation: void jxl::N_AVX3::XybLowFreqToVals<hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> const&, hwy::N_AVX3::Vec512<float> const&, hwy::N_AVX3::Vec512<float> const&, hwy::N_AVX3::Vec512<float>*, hwy::N_AVX3::Vec512<float>*, hwy::N_AVX3::Vec512<float>*) Unexecuted instantiation: void jxl::N_AVX2::XybLowFreqToVals<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*) Unexecuted instantiation: void jxl::N_SSE4::XybLowFreqToVals<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*) Unexecuted instantiation: void jxl::N_EMU128::XybLowFreqToVals<hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> const&, hwy::N_EMU128::Vec128<float, 4ul> const&, hwy::N_EMU128::Vec128<float, 4ul> const&, hwy::N_EMU128::Vec128<float, 4ul>*, hwy::N_EMU128::Vec128<float, 4ul>*, hwy::N_EMU128::Vec128<float, 4ul>*) |
338 | | |
339 | | void SuppressXByY(const ImageF& in_x, const ImageF& in_y, const double yw, |
340 | 0 | ImageF* HWY_RESTRICT out) { |
341 | 0 | JXL_DASSERT(SameSize(in_x, in_y) && SameSize(in_x, *out)); |
342 | 0 | const size_t xsize = in_x.xsize(); |
343 | 0 | const size_t ysize = in_x.ysize(); |
344 | |
|
345 | 0 | const HWY_FULL(float) d; |
346 | 0 | static const double s = 0.653020556257; |
347 | 0 | const auto sv = Set(d, s); |
348 | 0 | const auto one_minus_s = Set(d, 1.0 - s); |
349 | 0 | const auto ywv = Set(d, yw); |
350 | |
|
351 | 0 | for (size_t y = 0; y < ysize; ++y) { |
352 | 0 | const float* HWY_RESTRICT row_x = in_x.ConstRow(y); |
353 | 0 | const float* HWY_RESTRICT row_y = in_y.ConstRow(y); |
354 | 0 | float* HWY_RESTRICT row_out = out->Row(y); |
355 | |
|
356 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
357 | 0 | const auto vx = Load(d, row_x + x); |
358 | 0 | const auto vy = Load(d, row_y + x); |
359 | 0 | const auto scaler = |
360 | 0 | MulAdd(Div(ywv, MulAdd(vy, vy, ywv)), one_minus_s, sv); |
361 | 0 | Store(Mul(scaler, vx), d, row_out + x); |
362 | 0 | } |
363 | 0 | } |
364 | 0 | } Unexecuted instantiation: jxl::N_SSE4::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float> const&, double, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float> const&, double, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX3::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float> const&, double, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_EMU128::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float> const&, double, jxl::Plane<float>*) |
365 | | |
366 | | static void SeparateFrequencies(size_t xsize, size_t ysize, |
367 | | const ButteraugliParams& params, |
368 | | BlurTemp* blur_temp, const Image3F& xyb, |
369 | 0 | PsychoImage& ps) { |
370 | 0 | PROFILER_FUNC; |
371 | 0 | const HWY_FULL(float) d; |
372 | | |
373 | | // Extract lf ... |
374 | 0 | static const double kSigmaLf = 7.15593339443; |
375 | 0 | static const double kSigmaHf = 3.22489901262; |
376 | 0 | static const double kSigmaUhf = 1.56416327805; |
377 | 0 | ps.mf = Image3F(xsize, ysize); |
378 | 0 | ps.hf[0] = ImageF(xsize, ysize); |
379 | 0 | ps.hf[1] = ImageF(xsize, ysize); |
380 | 0 | ps.lf = Image3F(xyb.xsize(), xyb.ysize()); |
381 | 0 | ps.mf = Image3F(xyb.xsize(), xyb.ysize()); |
382 | 0 | for (int i = 0; i < 3; ++i) { |
383 | 0 | Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &ps.lf.Plane(i)); |
384 | | |
385 | | // ... and keep everything else in mf. |
386 | 0 | for (size_t y = 0; y < ysize; ++y) { |
387 | 0 | const float* BUTTERAUGLI_RESTRICT row_xyb = xyb.PlaneRow(i, y); |
388 | 0 | const float* BUTTERAUGLI_RESTRICT row_lf = ps.lf.ConstPlaneRow(i, y); |
389 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y); |
390 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
391 | 0 | const auto mf = Sub(Load(d, row_xyb + x), Load(d, row_lf + x)); |
392 | 0 | Store(mf, d, row_mf + x); |
393 | 0 | } |
394 | 0 | } |
395 | 0 | if (i == 2) { |
396 | 0 | Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i)); |
397 | 0 | break; |
398 | 0 | } |
399 | | // Divide mf into mf and hf. |
400 | 0 | for (size_t y = 0; y < ysize; ++y) { |
401 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y); |
402 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y); |
403 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
404 | 0 | Store(Load(d, row_mf + x), d, row_hf + x); |
405 | 0 | } |
406 | 0 | } |
407 | 0 | Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i)); |
408 | 0 | static const double kRemoveMfRange = 0.29; |
409 | 0 | static const double kAddMfRange = 0.1; |
410 | 0 | if (i == 0) { |
411 | 0 | for (size_t y = 0; y < ysize; ++y) { |
412 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(0, y); |
413 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y); |
414 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
415 | 0 | auto mf = Load(d, row_mf + x); |
416 | 0 | auto hf = Sub(Load(d, row_hf + x), mf); |
417 | 0 | mf = RemoveRangeAroundZero(d, kRemoveMfRange, mf); |
418 | 0 | Store(mf, d, row_mf + x); |
419 | 0 | Store(hf, d, row_hf + x); |
420 | 0 | } |
421 | 0 | } |
422 | 0 | } else { |
423 | 0 | for (size_t y = 0; y < ysize; ++y) { |
424 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(1, y); |
425 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y); |
426 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
427 | 0 | auto mf = Load(d, row_mf + x); |
428 | 0 | auto hf = Sub(Load(d, row_hf + x), mf); |
429 | |
|
430 | 0 | mf = AmplifyRangeAroundZero(d, kAddMfRange, mf); |
431 | 0 | Store(mf, d, row_mf + x); |
432 | 0 | Store(hf, d, row_hf + x); |
433 | 0 | } |
434 | 0 | } |
435 | 0 | } |
436 | 0 | } |
437 | | |
438 | | // Temporarily used as output of SuppressXByY |
439 | 0 | ps.uhf[0] = ImageF(xsize, ysize); |
440 | 0 | ps.uhf[1] = ImageF(xsize, ysize); |
441 | | |
442 | | // Suppress red-green by intensity change in the high freq channels. |
443 | 0 | static const double suppress = 46.0; |
444 | 0 | SuppressXByY(ps.hf[0], ps.hf[1], suppress, &ps.uhf[0]); |
445 | | // hf is the SuppressXByY output, uhf will be written below. |
446 | 0 | ps.hf[0].Swap(ps.uhf[0]); |
447 | |
|
448 | 0 | for (int i = 0; i < 2; ++i) { |
449 | | // Divide hf into hf and uhf. |
450 | 0 | for (size_t y = 0; y < ysize; ++y) { |
451 | 0 | float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[i].Row(y); |
452 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y); |
453 | 0 | for (size_t x = 0; x < xsize; ++x) { |
454 | 0 | row_uhf[x] = row_hf[x]; |
455 | 0 | } |
456 | 0 | } |
457 | 0 | Blur(ps.hf[i], kSigmaUhf, params, blur_temp, &ps.hf[i]); |
458 | 0 | static const double kRemoveHfRange = 1.5; |
459 | 0 | static const double kAddHfRange = 0.132; |
460 | 0 | static const double kRemoveUhfRange = 0.04; |
461 | 0 | static const double kMaxclampHf = 28.4691806922; |
462 | 0 | static const double kMaxclampUhf = 5.19175294647; |
463 | 0 | static double kMulYHf = 2.155; |
464 | 0 | static double kMulYUhf = 2.69313763794; |
465 | 0 | if (i == 0) { |
466 | 0 | for (size_t y = 0; y < ysize; ++y) { |
467 | 0 | float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[0].Row(y); |
468 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y); |
469 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
470 | 0 | auto hf = Load(d, row_hf + x); |
471 | 0 | auto uhf = Sub(Load(d, row_uhf + x), hf); |
472 | 0 | hf = RemoveRangeAroundZero(d, kRemoveHfRange, hf); |
473 | 0 | uhf = RemoveRangeAroundZero(d, kRemoveUhfRange, uhf); |
474 | 0 | Store(hf, d, row_hf + x); |
475 | 0 | Store(uhf, d, row_uhf + x); |
476 | 0 | } |
477 | 0 | } |
478 | 0 | } else { |
479 | 0 | for (size_t y = 0; y < ysize; ++y) { |
480 | 0 | float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[1].Row(y); |
481 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y); |
482 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
483 | 0 | auto hf = Load(d, row_hf + x); |
484 | 0 | hf = MaximumClamp(d, hf, kMaxclampHf); |
485 | |
|
486 | 0 | auto uhf = Sub(Load(d, row_uhf + x), hf); |
487 | 0 | uhf = MaximumClamp(d, uhf, kMaxclampUhf); |
488 | 0 | uhf = Mul(uhf, Set(d, kMulYUhf)); |
489 | 0 | Store(uhf, d, row_uhf + x); |
490 | |
|
491 | 0 | hf = Mul(hf, Set(d, kMulYHf)); |
492 | 0 | hf = AmplifyRangeAroundZero(d, kAddHfRange, hf); |
493 | 0 | Store(hf, d, row_hf + x); |
494 | 0 | } |
495 | 0 | } |
496 | 0 | } |
497 | 0 | } |
498 | | // Modify range around zero code only concerns the high frequency |
499 | | // planes and only the X and Y channels. |
500 | | // Convert low freq xyb to vals space so that we can do a simple squared sum |
501 | | // diff on the low frequencies later. |
502 | 0 | for (size_t y = 0; y < ysize; ++y) { |
503 | 0 | float* BUTTERAUGLI_RESTRICT row_x = ps.lf.PlaneRow(0, y); |
504 | 0 | float* BUTTERAUGLI_RESTRICT row_y = ps.lf.PlaneRow(1, y); |
505 | 0 | float* BUTTERAUGLI_RESTRICT row_b = ps.lf.PlaneRow(2, y); |
506 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
507 | 0 | auto valx = Undefined(d); |
508 | 0 | auto valy = Undefined(d); |
509 | 0 | auto valb = Undefined(d); |
510 | 0 | XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x), |
511 | 0 | Load(d, row_b + x), &valx, &valy, &valb); |
512 | 0 | Store(valx, d, row_x + x); |
513 | 0 | Store(valy, d, row_y + x); |
514 | 0 | Store(valb, d, row_b + x); |
515 | 0 | } |
516 | 0 | } |
517 | 0 | } Unexecuted instantiation: butteraugli.cc:jxl::N_AVX3::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&) Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&) Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&) Unexecuted instantiation: butteraugli.cc:jxl::N_EMU128::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&) |
518 | | |
519 | | namespace { |
520 | | template <typename V> |
521 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d) { |
522 | 0 | return Add(Add(a, b), Add(c, d)); |
523 | 0 | } Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX3::Vec128<float, 1ul> jxl::N_AVX3::(anonymous namespace)::Sum<hwy::N_AVX3::Vec128<float, 1ul> >(hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX3::Vec512<float> jxl::N_AVX3::(anonymous namespace)::Sum<hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_EMU128::Vec128<float, 1ul> jxl::N_EMU128::(anonymous namespace)::Sum<hwy::N_EMU128::Vec128<float, 1ul> >(hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_EMU128::Vec128<float, 4ul> jxl::N_EMU128::(anonymous namespace)::Sum<hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>) |
524 | | template <typename V> |
525 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e) { |
526 | 0 | return Sum(a, b, c, Add(d, e)); |
527 | 0 | } Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX3::Vec128<float, 1ul> jxl::N_AVX3::(anonymous namespace)::Sum<hwy::N_AVX3::Vec128<float, 1ul> >(hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX3::Vec512<float> jxl::N_AVX3::(anonymous namespace)::Sum<hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_EMU128::Vec128<float, 1ul> jxl::N_EMU128::(anonymous namespace)::Sum<hwy::N_EMU128::Vec128<float, 1ul> >(hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_EMU128::Vec128<float, 4ul> jxl::N_EMU128::(anonymous namespace)::Sum<hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>) |
528 | | template <typename V> |
529 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g) { |
530 | 0 | return Sum(a, b, c, Sum(d, e, f, g)); |
531 | 0 | } Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX3::Vec128<float, 1ul> jxl::N_AVX3::(anonymous namespace)::Sum<hwy::N_AVX3::Vec128<float, 1ul> >(hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX3::Vec512<float> jxl::N_AVX3::(anonymous namespace)::Sum<hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_EMU128::Vec128<float, 1ul> jxl::N_EMU128::(anonymous namespace)::Sum<hwy::N_EMU128::Vec128<float, 1ul> >(hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_EMU128::Vec128<float, 4ul> jxl::N_EMU128::(anonymous namespace)::Sum<hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>) |
532 | | template <typename V> |
533 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g, V h, V i) { |
534 | 0 | return Add(Add(Sum(a, b, c, d), Sum(e, f, g, h)), i); |
535 | 0 | } Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX3::Vec128<float, 1ul> jxl::N_AVX3::(anonymous namespace)::Sum<hwy::N_AVX3::Vec128<float, 1ul> >(hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>, hwy::N_AVX3::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_AVX3::Vec512<float> jxl::N_AVX3::(anonymous namespace)::Sum<hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>, hwy::N_AVX3::Vec512<float>) Unexecuted instantiation: butteraugli.cc:hwy::N_EMU128::Vec128<float, 1ul> jxl::N_EMU128::(anonymous namespace)::Sum<hwy::N_EMU128::Vec128<float, 1ul> >(hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>, hwy::N_EMU128::Vec128<float, 1ul>) Unexecuted instantiation: butteraugli.cc:hwy::N_EMU128::Vec128<float, 4ul> jxl::N_EMU128::(anonymous namespace)::Sum<hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>, hwy::N_EMU128::Vec128<float, 4ul>) |
536 | | } // namespace |
537 | | |
538 | | template <class D> |
539 | | Vec<D> MaltaUnit(MaltaTagLF /*tag*/, const D df, |
540 | 0 | const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { |
541 | 0 | const intptr_t xs3 = 3 * xs; |
542 | |
|
543 | 0 | const auto center = LoadU(df, d); |
544 | | |
545 | | // x grows, y constant |
546 | 0 | const auto sum_yconst = Sum(LoadU(df, d - 4), LoadU(df, d - 2), center, |
547 | 0 | LoadU(df, d + 2), LoadU(df, d + 4)); |
548 | | // Will return this, sum of all line kernels |
549 | 0 | auto retval = Mul(sum_yconst, sum_yconst); |
550 | 0 | { |
551 | | // y grows, x constant |
552 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs - xs), center, |
553 | 0 | LoadU(df, d + xs + xs), LoadU(df, d + xs3 + xs)); |
554 | 0 | retval = MulAdd(sum, sum, retval); |
555 | 0 | } |
556 | 0 | { |
557 | | // both grow |
558 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), center, |
559 | 0 | LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); |
560 | 0 | retval = MulAdd(sum, sum, retval); |
561 | 0 | } |
562 | 0 | { |
563 | | // y grows, x shrinks |
564 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), center, |
565 | 0 | LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); |
566 | 0 | retval = MulAdd(sum, sum, retval); |
567 | 0 | } |
568 | 0 | { |
569 | | // y grows -4 to 4, x shrinks 1 -> -1 |
570 | 0 | auto sum = |
571 | 0 | Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs - xs + 1), center, |
572 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 1)); |
573 | 0 | retval = MulAdd(sum, sum, retval); |
574 | 0 | } |
575 | 0 | { |
576 | | // y grows -4 to 4, x grows -1 -> 1 |
577 | 0 | auto sum = |
578 | 0 | Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs - xs - 1), center, |
579 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 1)); |
580 | 0 | retval = MulAdd(sum, sum, retval); |
581 | 0 | } |
582 | 0 | { |
583 | | // x grows -4 to 4, y grows -1 to 1 |
584 | 0 | auto sum = Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 2 - xs), center, |
585 | 0 | LoadU(df, d + 2 + xs), LoadU(df, d + 4 + xs)); |
586 | 0 | retval = MulAdd(sum, sum, retval); |
587 | 0 | } |
588 | 0 | { |
589 | | // x grows -4 to 4, y shrinks 1 to -1 |
590 | 0 | auto sum = Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 2 + xs), center, |
591 | 0 | LoadU(df, d + 2 - xs), LoadU(df, d + 4 - xs)); |
592 | 0 | retval = MulAdd(sum, sum, retval); |
593 | 0 | } |
594 | 0 | { |
595 | | /* 0_________ |
596 | | 1__*______ |
597 | | 2___*_____ |
598 | | 3_________ |
599 | | 4____0____ |
600 | | 5_________ |
601 | | 6_____*___ |
602 | | 7______*__ |
603 | | 8_________ */ |
604 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), center, |
605 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); |
606 | 0 | retval = MulAdd(sum, sum, retval); |
607 | 0 | } |
608 | 0 | { |
609 | | /* 0_________ |
610 | | 1______*__ |
611 | | 2_____*___ |
612 | | 3_________ |
613 | | 4____0____ |
614 | | 5_________ |
615 | | 6___*_____ |
616 | | 7__*______ |
617 | | 8_________ */ |
618 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), center, |
619 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); |
620 | 0 | retval = MulAdd(sum, sum, retval); |
621 | 0 | } |
622 | 0 | { |
623 | | /* 0_________ |
624 | | 1_________ |
625 | | 2_*_______ |
626 | | 3__*______ |
627 | | 4____0____ |
628 | | 5______*__ |
629 | | 6_______*_ |
630 | | 7_________ |
631 | | 8_________ */ |
632 | 0 | auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), center, |
633 | 0 | LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); |
634 | 0 | retval = MulAdd(sum, sum, retval); |
635 | 0 | } |
636 | 0 | { |
637 | | /* 0_________ |
638 | | 1_________ |
639 | | 2_______*_ |
640 | | 3______*__ |
641 | | 4____0____ |
642 | | 5__*______ |
643 | | 6_*_______ |
644 | | 7_________ |
645 | | 8_________ */ |
646 | 0 | auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), center, |
647 | 0 | LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); |
648 | 0 | retval = MulAdd(sum, sum, retval); |
649 | 0 | } |
650 | 0 | { |
651 | | /* 0_________ |
652 | | 1_________ |
653 | | 2________* |
654 | | 3______*__ |
655 | | 4____0____ |
656 | | 5__*______ |
657 | | 6*________ |
658 | | 7_________ |
659 | | 8_________ */ |
660 | |
|
661 | 0 | auto sum = Sum(LoadU(df, d + xs + xs - 4), LoadU(df, d + xs - 2), center, |
662 | 0 | LoadU(df, d - xs + 2), LoadU(df, d - xs - xs + 4)); |
663 | 0 | retval = MulAdd(sum, sum, retval); |
664 | 0 | } |
665 | 0 | { |
666 | | /* 0_________ |
667 | | 1_________ |
668 | | 2*________ |
669 | | 3__*______ |
670 | | 4____0____ |
671 | | 5______*__ |
672 | | 6________* |
673 | | 7_________ |
674 | | 8_________ */ |
675 | 0 | auto sum = Sum(LoadU(df, d - xs - xs - 4), LoadU(df, d - xs - 2), center, |
676 | 0 | LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 4)); |
677 | 0 | retval = MulAdd(sum, sum, retval); |
678 | 0 | } |
679 | 0 | { |
680 | | /* 0__*______ |
681 | | 1_________ |
682 | | 2___*_____ |
683 | | 3_________ |
684 | | 4____0____ |
685 | | 5_________ |
686 | | 6_____*___ |
687 | | 7_________ |
688 | | 8______*__ */ |
689 | 0 | auto sum = |
690 | 0 | Sum(LoadU(df, d - xs3 - xs - 2), LoadU(df, d - xs - xs - 1), center, |
691 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 2)); |
692 | 0 | retval = MulAdd(sum, sum, retval); |
693 | 0 | } |
694 | 0 | { |
695 | | /* 0______*__ |
696 | | 1_________ |
697 | | 2_____*___ |
698 | | 3_________ |
699 | | 4____0____ |
700 | | 5_________ |
701 | | 6___*_____ |
702 | | 7_________ |
703 | | 8__*______ */ |
704 | 0 | auto sum = |
705 | 0 | Sum(LoadU(df, d - xs3 - xs + 2), LoadU(df, d - xs - xs + 1), center, |
706 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 2)); |
707 | 0 | retval = MulAdd(sum, sum, retval); |
708 | 0 | } |
709 | 0 | return retval; |
710 | 0 | } Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE4::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 4ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE4::Simd<float, 4ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_AVX2::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 8ul, 0> >(jxl::MaltaTagLF, hwy::N_AVX2::Simd<float, 8ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX3::Simd<float, 1ul, 0>)())) jxl::N_AVX3::MaltaUnit<hwy::N_AVX3::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_AVX3::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX3::Simd<float, 16ul, 0>)())) jxl::N_AVX3::MaltaUnit<hwy::N_AVX3::Simd<float, 16ul, 0> >(jxl::MaltaTagLF, hwy::N_AVX3::Simd<float, 16ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_EMU128::Simd<float, 1ul, 0>)())) jxl::N_EMU128::MaltaUnit<hwy::N_EMU128::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_EMU128::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_EMU128::Simd<float, 4ul, 0>)())) jxl::N_EMU128::MaltaUnit<hwy::N_EMU128::Simd<float, 4ul, 0> >(jxl::MaltaTagLF, hwy::N_EMU128::Simd<float, 4ul, 0>, float const*, long) |
711 | | |
712 | | template <class D> |
713 | | Vec<D> MaltaUnit(MaltaTag /*tag*/, const D df, |
714 | 0 | const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { |
715 | 0 | const intptr_t xs3 = 3 * xs; |
716 | |
|
717 | 0 | const auto center = LoadU(df, d); |
718 | | |
719 | | // x grows, y constant |
720 | 0 | const auto sum_yconst = |
721 | 0 | Sum(LoadU(df, d - 4), LoadU(df, d - 3), LoadU(df, d - 2), |
722 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2), |
723 | 0 | LoadU(df, d + 3), LoadU(df, d + 4)); |
724 | | // Will return this, sum of all line kernels |
725 | 0 | auto retval = Mul(sum_yconst, sum_yconst); |
726 | |
|
727 | 0 | { |
728 | | // y grows, x constant |
729 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs3), |
730 | 0 | LoadU(df, d - xs - xs), LoadU(df, d - xs), center, |
731 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs), |
732 | 0 | LoadU(df, d + xs3), LoadU(df, d + xs3 + xs)); |
733 | 0 | retval = MulAdd(sum, sum, retval); |
734 | 0 | } |
735 | 0 | { |
736 | | // both grow |
737 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), |
738 | 0 | LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), |
739 | 0 | LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); |
740 | 0 | retval = MulAdd(sum, sum, retval); |
741 | 0 | } |
742 | 0 | { |
743 | | // y grows, x shrinks |
744 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), |
745 | 0 | LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), |
746 | 0 | LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); |
747 | 0 | retval = MulAdd(sum, sum, retval); |
748 | 0 | } |
749 | 0 | { |
750 | | // y grows -4 to 4, x shrinks 1 -> -1 |
751 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), |
752 | 0 | LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, |
753 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), |
754 | 0 | LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); |
755 | 0 | retval = MulAdd(sum, sum, retval); |
756 | 0 | } |
757 | 0 | { |
758 | | // y grows -4 to 4, x grows -1 -> 1 |
759 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), |
760 | 0 | LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, |
761 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), |
762 | 0 | LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); |
763 | 0 | retval = MulAdd(sum, sum, retval); |
764 | 0 | } |
765 | 0 | { |
766 | | // x grows -4 to 4, y grows -1 to 1 |
767 | 0 | auto sum = |
768 | 0 | Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 3 - xs), LoadU(df, d - 2 - xs), |
769 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 + xs), |
770 | 0 | LoadU(df, d + 3 + xs), LoadU(df, d + 4 + xs)); |
771 | 0 | retval = MulAdd(sum, sum, retval); |
772 | 0 | } |
773 | 0 | { |
774 | | // x grows -4 to 4, y shrinks 1 to -1 |
775 | 0 | auto sum = |
776 | 0 | Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 3 + xs), LoadU(df, d - 2 + xs), |
777 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 - xs), |
778 | 0 | LoadU(df, d + 3 - xs), LoadU(df, d + 4 - xs)); |
779 | 0 | retval = MulAdd(sum, sum, retval); |
780 | 0 | } |
781 | 0 | { |
782 | | /* 0_________ |
783 | | 1__*______ |
784 | | 2___*_____ |
785 | | 3___*_____ |
786 | | 4____0____ |
787 | | 5_____*___ |
788 | | 6_____*___ |
789 | | 7______*__ |
790 | | 8_________ */ |
791 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), |
792 | 0 | LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), |
793 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); |
794 | 0 | retval = MulAdd(sum, sum, retval); |
795 | 0 | } |
796 | 0 | { |
797 | | /* 0_________ |
798 | | 1______*__ |
799 | | 2_____*___ |
800 | | 3_____*___ |
801 | | 4____0____ |
802 | | 5___*_____ |
803 | | 6___*_____ |
804 | | 7__*______ |
805 | | 8_________ */ |
806 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), |
807 | 0 | LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), |
808 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); |
809 | 0 | retval = MulAdd(sum, sum, retval); |
810 | 0 | } |
811 | 0 | { |
812 | | /* 0_________ |
813 | | 1_________ |
814 | | 2_*_______ |
815 | | 3__**_____ |
816 | | 4____0____ |
817 | | 5_____**__ |
818 | | 6_______*_ |
819 | | 7_________ |
820 | | 8_________ */ |
821 | 0 | auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), |
822 | 0 | LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), |
823 | 0 | LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); |
824 | 0 | retval = MulAdd(sum, sum, retval); |
825 | 0 | } |
826 | 0 | { |
827 | | /* 0_________ |
828 | | 1_________ |
829 | | 2_______*_ |
830 | | 3_____**__ |
831 | | 4____0____ |
832 | | 5__**_____ |
833 | | 6_*_______ |
834 | | 7_________ |
835 | | 8_________ */ |
836 | 0 | auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), |
837 | 0 | LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), |
838 | 0 | LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); |
839 | 0 | retval = MulAdd(sum, sum, retval); |
840 | 0 | } |
841 | 0 | { |
842 | | /* 0_________ |
843 | | 1_________ |
844 | | 2_________ |
845 | | 3______*** |
846 | | 4___*0*___ |
847 | | 5***______ |
848 | | 6_________ |
849 | | 7_________ |
850 | | 8_________ */ |
851 | |
|
852 | 0 | auto sum = |
853 | 0 | Sum(LoadU(df, d + xs - 4), LoadU(df, d + xs - 3), LoadU(df, d + xs - 2), |
854 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d - xs + 2), |
855 | 0 | LoadU(df, d - xs + 3), LoadU(df, d - xs + 4)); |
856 | 0 | retval = MulAdd(sum, sum, retval); |
857 | 0 | } |
858 | 0 | { |
859 | | /* 0_________ |
860 | | 1_________ |
861 | | 2_________ |
862 | | 3***______ |
863 | | 4___*0*___ |
864 | | 5______*** |
865 | | 6_________ |
866 | | 7_________ |
867 | | 8_________ */ |
868 | 0 | auto sum = |
869 | 0 | Sum(LoadU(df, d - xs - 4), LoadU(df, d - xs - 3), LoadU(df, d - xs - 2), |
870 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + xs + 2), |
871 | 0 | LoadU(df, d + xs + 3), LoadU(df, d + xs + 4)); |
872 | 0 | retval = MulAdd(sum, sum, retval); |
873 | 0 | } |
874 | 0 | { |
875 | | /* 0___*_____ |
876 | | 1___*_____ |
877 | | 2___*_____ |
878 | | 3____*____ |
879 | | 4____0____ |
880 | | 5____*____ |
881 | | 6_____*___ |
882 | | 7_____*___ |
883 | | 8_____*___ */ |
884 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), |
885 | 0 | LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, |
886 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), |
887 | 0 | LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); |
888 | 0 | retval = MulAdd(sum, sum, retval); |
889 | 0 | } |
890 | 0 | { |
891 | | /* 0_____*___ |
892 | | 1_____*___ |
893 | | 2____ *___ |
894 | | 3____*____ |
895 | | 4____0____ |
896 | | 5____*____ |
897 | | 6___*_____ |
898 | | 7___*_____ |
899 | | 8___*_____ */ |
900 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), |
901 | 0 | LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, |
902 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), |
903 | 0 | LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); |
904 | 0 | retval = MulAdd(sum, sum, retval); |
905 | 0 | } |
906 | 0 | return retval; |
907 | 0 | } Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_SSE4::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 4ul, 0> >(jxl::MaltaTag, hwy::N_SSE4::Simd<float, 4ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_AVX2::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 8ul, 0> >(jxl::MaltaTag, hwy::N_AVX2::Simd<float, 8ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX3::Simd<float, 1ul, 0>)())) jxl::N_AVX3::MaltaUnit<hwy::N_AVX3::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_AVX3::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_AVX3::Simd<float, 16ul, 0>)())) jxl::N_AVX3::MaltaUnit<hwy::N_AVX3::Simd<float, 16ul, 0> >(jxl::MaltaTag, hwy::N_AVX3::Simd<float, 16ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_EMU128::Simd<float, 1ul, 0>)())) jxl::N_EMU128::MaltaUnit<hwy::N_EMU128::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_EMU128::Simd<float, 1ul, 0>, float const*, long) Unexecuted instantiation: decltype (Zero((hwy::N_EMU128::Simd<float, 4ul, 0>)())) jxl::N_EMU128::MaltaUnit<hwy::N_EMU128::Simd<float, 4ul, 0> >(jxl::MaltaTag, hwy::N_EMU128::Simd<float, 4ul, 0>, float const*, long) |
908 | | |
909 | | // Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known |
910 | | // to be far enough from the image borders. "diffs" is a packed image. |
911 | | template <class Tag> |
912 | | static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs, |
913 | | const size_t x0, |
914 | 0 | const size_t y0) { |
915 | 0 | const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0; |
916 | 0 | const HWY_CAPPED(float, 1) df; |
917 | 0 | if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) && |
918 | 0 | y0 < (diffs.ysize() - 4))) { |
919 | 0 | return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow())); |
920 | 0 | } |
921 | | |
922 | 0 | PROFILER_ZONE("Padded Malta"); |
923 | 0 | float borderimage[12 * 9]; // round up to 4 |
924 | 0 | for (int dy = 0; dy < 9; ++dy) { |
925 | 0 | int y = y0 + dy - 4; |
926 | 0 | if (y < 0 || static_cast<size_t>(y) >= diffs.ysize()) { |
927 | 0 | for (int dx = 0; dx < 12; ++dx) { |
928 | 0 | borderimage[dy * 12 + dx] = 0.0f; |
929 | 0 | } |
930 | 0 | continue; |
931 | 0 | } |
932 | | |
933 | 0 | const float* row_diffs = diffs.ConstRow(y); |
934 | 0 | for (int dx = 0; dx < 9; ++dx) { |
935 | 0 | int x = x0 + dx - 4; |
936 | 0 | if (x < 0 || static_cast<size_t>(x) >= diffs.xsize()) { |
937 | 0 | borderimage[dy * 12 + dx] = 0.0f; |
938 | 0 | } else { |
939 | 0 | borderimage[dy * 12 + dx] = row_diffs[x]; |
940 | 0 | } |
941 | 0 | } |
942 | 0 | std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f); |
943 | 0 | } |
944 | 0 | return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12)); |
945 | 0 | } Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE4::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE4::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_AVX2::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_AVX2::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_AVX3::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_AVX3::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_EMU128::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_EMU128::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long) |
946 | | |
947 | | template <class Tag> |
948 | | static void MaltaDiffMapT(const Tag tag, const ImageF& lum0, const ImageF& lum1, |
949 | | const double w_0gt1, const double w_0lt1, |
950 | | const double norm1, const double len, |
951 | | const double mulli, ImageF* HWY_RESTRICT diffs, |
952 | 0 | Image3F* HWY_RESTRICT block_diff_ac, size_t c) { |
953 | 0 | JXL_DASSERT(SameSize(lum0, lum1) && SameSize(lum0, *diffs)); |
954 | 0 | const size_t xsize_ = lum0.xsize(); |
955 | 0 | const size_t ysize_ = lum0.ysize(); |
956 | |
|
957 | 0 | const float kWeight0 = 0.5; |
958 | 0 | const float kWeight1 = 0.33; |
959 | |
|
960 | 0 | const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1); |
961 | 0 | const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1); |
962 | 0 | const float norm2_0gt1 = w_pre0gt1 * norm1; |
963 | 0 | const float norm2_0lt1 = w_pre0lt1 * norm1; |
964 | |
|
965 | 0 | for (size_t y = 0; y < ysize_; ++y) { |
966 | 0 | const float* HWY_RESTRICT row0 = lum0.ConstRow(y); |
967 | 0 | const float* HWY_RESTRICT row1 = lum1.ConstRow(y); |
968 | 0 | float* HWY_RESTRICT row_diffs = diffs->Row(y); |
969 | 0 | for (size_t x = 0; x < xsize_; ++x) { |
970 | 0 | const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x])); |
971 | 0 | const float diff = row0[x] - row1[x]; |
972 | 0 | const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval); |
973 | | |
974 | | // Primary symmetric quadratic objective. |
975 | 0 | row_diffs[x] = scaler * diff; |
976 | |
|
977 | 0 | const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval); |
978 | 0 | const double fabs0 = std::fabs(row0[x]); |
979 | | |
980 | | // Secondary half-open quadratic objectives. |
981 | 0 | const double too_small = 0.55 * fabs0; |
982 | 0 | const double too_big = 1.05 * fabs0; |
983 | |
|
984 | 0 | if (row0[x] < 0) { |
985 | 0 | if (row1[x] > -too_small) { |
986 | 0 | double impact = scaler2 * (row1[x] + too_small); |
987 | 0 | row_diffs[x] -= impact; |
988 | 0 | } else if (row1[x] < -too_big) { |
989 | 0 | double impact = scaler2 * (-row1[x] - too_big); |
990 | 0 | row_diffs[x] += impact; |
991 | 0 | } |
992 | 0 | } else { |
993 | 0 | if (row1[x] < too_small) { |
994 | 0 | double impact = scaler2 * (too_small - row1[x]); |
995 | 0 | row_diffs[x] += impact; |
996 | 0 | } else if (row1[x] > too_big) { |
997 | 0 | double impact = scaler2 * (row1[x] - too_big); |
998 | 0 | row_diffs[x] -= impact; |
999 | 0 | } |
1000 | 0 | } |
1001 | 0 | } |
1002 | 0 | } |
1003 | |
|
1004 | 0 | size_t y0 = 0; |
1005 | | // Top |
1006 | 0 | for (; y0 < 4; ++y0) { |
1007 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0); |
1008 | 0 | for (size_t x0 = 0; x0 < xsize_; ++x0) { |
1009 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1010 | 0 | } |
1011 | 0 | } |
1012 | |
|
1013 | 0 | const HWY_FULL(float) df; |
1014 | 0 | const size_t aligned_x = std::max(size_t(4), Lanes(df)); |
1015 | 0 | const intptr_t stride = diffs->PixelsPerRow(); |
1016 | | |
1017 | | // Middle |
1018 | 0 | for (; y0 < ysize_ - 4; ++y0) { |
1019 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0); |
1020 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0); |
1021 | 0 | size_t x0 = 0; |
1022 | 0 | for (; x0 < aligned_x; ++x0) { |
1023 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1024 | 0 | } |
1025 | 0 | for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) { |
1026 | 0 | auto diff = Load(df, row_diff + x0); |
1027 | 0 | diff = Add(diff, MaltaUnit(Tag(), df, row_in + x0, stride)); |
1028 | 0 | Store(diff, df, row_diff + x0); |
1029 | 0 | } |
1030 | |
|
1031 | 0 | for (; x0 < xsize_; ++x0) { |
1032 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1033 | 0 | } |
1034 | 0 | } |
1035 | | |
1036 | | // Bottom |
1037 | 0 | for (; y0 < ysize_; ++y0) { |
1038 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0); |
1039 | 0 | for (size_t x0 = 0; x0 < xsize_; ++x0) { |
1040 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1041 | 0 | } |
1042 | 0 | } |
1043 | 0 | } Unexecuted instantiation: butteraugli.cc:void jxl::N_SSE4::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:void jxl::N_SSE4::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:void jxl::N_AVX2::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:void jxl::N_AVX2::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:void jxl::N_AVX3::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:void jxl::N_AVX3::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:void jxl::N_EMU128::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:void jxl::N_EMU128::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) |
1044 | | |
1045 | | // Need non-template wrapper functions for HWY_EXPORT. |
1046 | | void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, |
1047 | | const double w_0lt1, const double norm1, const double len, |
1048 | | const double mulli, ImageF* HWY_RESTRICT diffs, |
1049 | 0 | Image3F* HWY_RESTRICT block_diff_ac, size_t c) { |
1050 | 0 | MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, |
1051 | 0 | diffs, block_diff_ac, c); |
1052 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: jxl::N_AVX2::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: jxl::N_AVX3::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: jxl::N_EMU128::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) |
1053 | | |
1054 | | void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, |
1055 | | const double w_0lt1, const double norm1, const double len, |
1056 | | const double mulli, ImageF* HWY_RESTRICT diffs, |
1057 | 0 | Image3F* HWY_RESTRICT block_diff_ac, size_t c) { |
1058 | 0 | MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, |
1059 | 0 | diffs, block_diff_ac, c); |
1060 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: jxl::N_AVX2::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: jxl::N_AVX3::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: jxl::N_EMU128::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Image3<float>*, unsigned long) |
1061 | | |
1062 | 0 | void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) { |
1063 | 0 | PROFILER_FUNC; |
1064 | 0 | const size_t xsize = xyb.xsize(); |
1065 | 0 | const size_t ysize = xyb.ysize(); |
1066 | 0 | const float bias = mul * bias_arg; |
1067 | 0 | const float sqrt_bias = sqrt(bias); |
1068 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1069 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y); |
1070 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(y); |
1071 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1072 | | // kBias makes sqrt behave more linearly. |
1073 | 0 | row_out[x] = sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias; |
1074 | 0 | } |
1075 | 0 | } |
1076 | 0 | } Unexecuted instantiation: jxl::N_SSE4::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX3::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_EMU128::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*) |
1077 | | |
1078 | | // std::log(80.0) / std::log(255.0); |
1079 | | constexpr float kIntensityTargetNormalizationHack = 0.79079917404f; |
1080 | | static const float kInternalGoodQualityThreshold = |
1081 | | 17.8f * kIntensityTargetNormalizationHack; |
1082 | | static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold; |
1083 | | |
1084 | 0 | void StoreMin3(const float v, float& min0, float& min1, float& min2) { |
1085 | 0 | if (v < min2) { |
1086 | 0 | if (v < min0) { |
1087 | 0 | min2 = min1; |
1088 | 0 | min1 = min0; |
1089 | 0 | min0 = v; |
1090 | 0 | } else if (v < min1) { |
1091 | 0 | min2 = min1; |
1092 | 0 | min1 = v; |
1093 | 0 | } else { |
1094 | 0 | min2 = v; |
1095 | 0 | } |
1096 | 0 | } |
1097 | 0 | } Unexecuted instantiation: jxl::N_SSE4::StoreMin3(float, float&, float&, float&) Unexecuted instantiation: jxl::N_AVX2::StoreMin3(float, float&, float&, float&) Unexecuted instantiation: jxl::N_AVX3::StoreMin3(float, float&, float&, float&) Unexecuted instantiation: jxl::N_EMU128::StoreMin3(float, float&, float&, float&) |
1098 | | |
1099 | | // Look for smooth areas near the area of degradation. |
1100 | | // If the areas area generally smooth, don't do masking. |
1101 | 0 | void FuzzyErosion(const ImageF& from, ImageF* to) { |
1102 | 0 | const size_t xsize = from.xsize(); |
1103 | 0 | const size_t ysize = from.ysize(); |
1104 | 0 | static const int kStep = 3; |
1105 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1106 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1107 | 0 | float min0 = from.Row(y)[x]; |
1108 | 0 | float min1 = 2 * min0; |
1109 | 0 | float min2 = min1; |
1110 | 0 | if (x >= kStep) { |
1111 | 0 | float v = from.Row(y)[x - kStep]; |
1112 | 0 | StoreMin3(v, min0, min1, min2); |
1113 | 0 | if (y >= kStep) { |
1114 | 0 | float v = from.Row(y - kStep)[x - kStep]; |
1115 | 0 | StoreMin3(v, min0, min1, min2); |
1116 | 0 | } |
1117 | 0 | if (y < ysize - kStep) { |
1118 | 0 | float v = from.Row(y + kStep)[x - kStep]; |
1119 | 0 | StoreMin3(v, min0, min1, min2); |
1120 | 0 | } |
1121 | 0 | } |
1122 | 0 | if (x < xsize - kStep) { |
1123 | 0 | float v = from.Row(y)[x + kStep]; |
1124 | 0 | StoreMin3(v, min0, min1, min2); |
1125 | 0 | if (y >= kStep) { |
1126 | 0 | float v = from.Row(y - kStep)[x + kStep]; |
1127 | 0 | StoreMin3(v, min0, min1, min2); |
1128 | 0 | } |
1129 | 0 | if (y < ysize - kStep) { |
1130 | 0 | float v = from.Row(y + kStep)[x + kStep]; |
1131 | 0 | StoreMin3(v, min0, min1, min2); |
1132 | 0 | } |
1133 | 0 | } |
1134 | 0 | if (y >= kStep) { |
1135 | 0 | float v = from.Row(y - kStep)[x]; |
1136 | 0 | StoreMin3(v, min0, min1, min2); |
1137 | 0 | } |
1138 | 0 | if (y < ysize - kStep) { |
1139 | 0 | float v = from.Row(y + kStep)[x]; |
1140 | 0 | StoreMin3(v, min0, min1, min2); |
1141 | 0 | } |
1142 | 0 | to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2); |
1143 | 0 | } |
1144 | 0 | } |
1145 | 0 | } Unexecuted instantiation: jxl::N_SSE4::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX3::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_EMU128::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*) |
1146 | | |
1147 | | // Compute values of local frequency and dc masking based on the activity |
1148 | | // in the two images. img_diff_ac may be null. |
1149 | | void Mask(const ImageF& mask0, const ImageF& mask1, |
1150 | | const ButteraugliParams& params, BlurTemp* blur_temp, |
1151 | | ImageF* BUTTERAUGLI_RESTRICT mask, |
1152 | 0 | ImageF* BUTTERAUGLI_RESTRICT diff_ac) { |
1153 | | // Only X and Y components are involved in masking. B's influence |
1154 | | // is considered less important in the high frequency area, and we |
1155 | | // don't model masking from lower frequency signals. |
1156 | 0 | PROFILER_FUNC; |
1157 | 0 | const size_t xsize = mask0.xsize(); |
1158 | 0 | const size_t ysize = mask0.ysize(); |
1159 | 0 | *mask = ImageF(xsize, ysize); |
1160 | 0 | static const float kMul = 6.19424080439; |
1161 | 0 | static const float kBias = 12.61050594197; |
1162 | 0 | static const float kRadius = 2.7; |
1163 | 0 | ImageF diff0(xsize, ysize); |
1164 | 0 | ImageF diff1(xsize, ysize); |
1165 | 0 | ImageF blurred0(xsize, ysize); |
1166 | 0 | ImageF blurred1(xsize, ysize); |
1167 | 0 | DiffPrecompute(mask0, kMul, kBias, &diff0); |
1168 | 0 | DiffPrecompute(mask1, kMul, kBias, &diff1); |
1169 | 0 | Blur(diff0, kRadius, params, blur_temp, &blurred0); |
1170 | 0 | FuzzyErosion(blurred0, &diff0); |
1171 | 0 | Blur(diff1, kRadius, params, blur_temp, &blurred1); |
1172 | 0 | FuzzyErosion(blurred1, &diff1); |
1173 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1174 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1175 | 0 | mask->Row(y)[x] = diff0.Row(y)[x]; |
1176 | 0 | if (diff_ac != nullptr) { |
1177 | 0 | static const float kMaskToErrorMul = 10.0; |
1178 | 0 | float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x]; |
1179 | 0 | diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff; |
1180 | 0 | } |
1181 | 0 | } |
1182 | 0 | } |
1183 | 0 | } Unexecuted instantiation: jxl::N_SSE4::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX3::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_EMU128::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) |
1184 | | |
1185 | | // `diff_ac` may be null. |
1186 | | void MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1, |
1187 | | const size_t xsize, const size_t ysize, |
1188 | | const ButteraugliParams& params, Image3F* temp, |
1189 | | BlurTemp* blur_temp, ImageF* BUTTERAUGLI_RESTRICT mask, |
1190 | 0 | ImageF* BUTTERAUGLI_RESTRICT diff_ac) { |
1191 | 0 | ImageF mask0(xsize, ysize); |
1192 | 0 | ImageF mask1(xsize, ysize); |
1193 | 0 | static const float muls[3] = { |
1194 | 0 | 2.5f, |
1195 | 0 | 0.4f, |
1196 | 0 | 0.4f, |
1197 | 0 | }; |
1198 | | // Silly and unoptimized approach here. TODO(jyrki): rework this. |
1199 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1200 | 0 | const float* BUTTERAUGLI_RESTRICT row_y_hf0 = pi0.hf[1].Row(y); |
1201 | 0 | const float* BUTTERAUGLI_RESTRICT row_y_hf1 = pi1.hf[1].Row(y); |
1202 | 0 | const float* BUTTERAUGLI_RESTRICT row_y_uhf0 = pi0.uhf[1].Row(y); |
1203 | 0 | const float* BUTTERAUGLI_RESTRICT row_y_uhf1 = pi1.uhf[1].Row(y); |
1204 | 0 | const float* BUTTERAUGLI_RESTRICT row_x_hf0 = pi0.hf[0].Row(y); |
1205 | 0 | const float* BUTTERAUGLI_RESTRICT row_x_hf1 = pi1.hf[0].Row(y); |
1206 | 0 | const float* BUTTERAUGLI_RESTRICT row_x_uhf0 = pi0.uhf[0].Row(y); |
1207 | 0 | const float* BUTTERAUGLI_RESTRICT row_x_uhf1 = pi1.uhf[0].Row(y); |
1208 | 0 | float* BUTTERAUGLI_RESTRICT row0 = mask0.Row(y); |
1209 | 0 | float* BUTTERAUGLI_RESTRICT row1 = mask1.Row(y); |
1210 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1211 | 0 | float xdiff0 = (row_x_uhf0[x] + row_x_hf0[x]) * muls[0]; |
1212 | 0 | float xdiff1 = (row_x_uhf1[x] + row_x_hf1[x]) * muls[0]; |
1213 | 0 | float ydiff0 = row_y_uhf0[x] * muls[1] + row_y_hf0[x] * muls[2]; |
1214 | 0 | float ydiff1 = row_y_uhf1[x] * muls[1] + row_y_hf1[x] * muls[2]; |
1215 | 0 | row0[x] = xdiff0 * xdiff0 + ydiff0 * ydiff0; |
1216 | 0 | row0[x] = sqrt(row0[x]); |
1217 | 0 | row1[x] = xdiff1 * xdiff1 + ydiff1 * ydiff1; |
1218 | 0 | row1[x] = sqrt(row1[x]); |
1219 | 0 | } |
1220 | 0 | } |
1221 | 0 | Mask(mask0, mask1, params, blur_temp, mask, diff_ac); |
1222 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX3::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_EMU128::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*) |
1223 | | |
1224 | 0 | double MaskY(double delta) { |
1225 | 0 | static const double offset = 0.829591754942; |
1226 | 0 | static const double scaler = 0.451936922203; |
1227 | 0 | static const double mul = 2.5485944793; |
1228 | 0 | const double c = mul / ((scaler * delta) + offset); |
1229 | 0 | const double retval = kGlobalScale * (1.0 + c); |
1230 | 0 | return retval * retval; |
1231 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaskY(double) Unexecuted instantiation: jxl::N_AVX2::MaskY(double) Unexecuted instantiation: jxl::N_AVX3::MaskY(double) Unexecuted instantiation: jxl::N_EMU128::MaskY(double) |
1232 | | |
1233 | 0 | double MaskDcY(double delta) { |
1234 | 0 | static const double offset = 0.20025578522; |
1235 | 0 | static const double scaler = 3.87449418804; |
1236 | 0 | static const double mul = 0.505054525019; |
1237 | 0 | const double c = mul / ((scaler * delta) + offset); |
1238 | 0 | const double retval = kGlobalScale * (1.0 + c); |
1239 | 0 | return retval * retval; |
1240 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaskDcY(double) Unexecuted instantiation: jxl::N_AVX2::MaskDcY(double) Unexecuted instantiation: jxl::N_AVX3::MaskDcY(double) Unexecuted instantiation: jxl::N_EMU128::MaskDcY(double) |
1241 | | |
1242 | 0 | inline float MaskColor(const float color[3], const float mask) { |
1243 | 0 | return color[0] * mask + color[1] * mask + color[2] * mask; |
1244 | 0 | } Unexecuted instantiation: jxl::N_SSE4::MaskColor(float const*, float) Unexecuted instantiation: jxl::N_AVX2::MaskColor(float const*, float) Unexecuted instantiation: jxl::N_AVX3::MaskColor(float const*, float) Unexecuted instantiation: jxl::N_EMU128::MaskColor(float const*, float) |
1245 | | |
1246 | | // Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks} |
1247 | | void CombineChannelsToDiffmap(const ImageF& mask, const Image3F& block_diff_dc, |
1248 | | const Image3F& block_diff_ac, float xmul, |
1249 | 0 | ImageF* result) { |
1250 | 0 | PROFILER_FUNC; |
1251 | 0 | JXL_CHECK(SameSize(mask, *result)); |
1252 | 0 | size_t xsize = mask.xsize(); |
1253 | 0 | size_t ysize = mask.ysize(); |
1254 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1255 | 0 | float* BUTTERAUGLI_RESTRICT row_out = result->Row(y); |
1256 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1257 | 0 | float val = mask.Row(y)[x]; |
1258 | 0 | float maskval = MaskY(val); |
1259 | 0 | float dc_maskval = MaskDcY(val); |
1260 | 0 | float diff_dc[3]; |
1261 | 0 | float diff_ac[3]; |
1262 | 0 | for (int i = 0; i < 3; ++i) { |
1263 | 0 | diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x]; |
1264 | 0 | diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x]; |
1265 | 0 | } |
1266 | 0 | diff_ac[0] *= xmul; |
1267 | 0 | diff_dc[0] *= xmul; |
1268 | 0 | row_out[x] = |
1269 | 0 | sqrt(MaskColor(diff_dc, dc_maskval) + MaskColor(diff_ac, maskval)); |
1270 | 0 | } |
1271 | 0 | } |
1272 | 0 | } Unexecuted instantiation: jxl::N_SSE4::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX2::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_AVX3::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*) Unexecuted instantiation: jxl::N_EMU128::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*) |
1273 | | |
1274 | | // Adds weighted L2 difference between i0 and i1 to diffmap. |
1275 | | static void L2Diff(const ImageF& i0, const ImageF& i1, const float w, |
1276 | 0 | Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) { |
1277 | 0 | if (w == 0) return; |
1278 | | |
1279 | 0 | const HWY_FULL(float) d; |
1280 | 0 | const auto weight = Set(d, w); |
1281 | |
|
1282 | 0 | for (size_t y = 0; y < i0.ysize(); ++y) { |
1283 | 0 | const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); |
1284 | 0 | const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); |
1285 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y); |
1286 | |
|
1287 | 0 | for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { |
1288 | 0 | const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); |
1289 | 0 | const auto diff2 = Mul(diff, diff); |
1290 | 0 | const auto prev = Load(d, row_diff + x); |
1291 | 0 | Store(MulAdd(diff2, weight, prev), d, row_diff + x); |
1292 | 0 | } |
1293 | 0 | } |
1294 | 0 | } Unexecuted instantiation: butteraugli.cc:jxl::N_AVX3::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:jxl::N_EMU128::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Image3<float>*, unsigned long) |
1295 | | |
1296 | | // Initializes diffmap to the weighted L2 difference between i0 and i1. |
1297 | | static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w, |
1298 | 0 | Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) { |
1299 | 0 | if (w == 0) return; |
1300 | | |
1301 | 0 | const HWY_FULL(float) d; |
1302 | 0 | const auto weight = Set(d, w); |
1303 | |
|
1304 | 0 | for (size_t y = 0; y < i0.ysize(); ++y) { |
1305 | 0 | const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); |
1306 | 0 | const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); |
1307 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y); |
1308 | |
|
1309 | 0 | for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { |
1310 | 0 | const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); |
1311 | 0 | const auto diff2 = Mul(diff, diff); |
1312 | 0 | Store(Mul(diff2, weight), d, row_diff + x); |
1313 | 0 | } |
1314 | 0 | } |
1315 | 0 | } Unexecuted instantiation: butteraugli.cc:jxl::N_AVX3::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:jxl::N_EMU128::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Image3<float>*, unsigned long) |
1316 | | |
1317 | | // i0 is the original image. |
1318 | | // i1 is the deformed copy. |
1319 | | static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1, |
1320 | | float w_0lt1, |
1321 | 0 | Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) { |
1322 | 0 | if (w_0gt1 == 0 && w_0lt1 == 0) { |
1323 | 0 | return; |
1324 | 0 | } |
1325 | | |
1326 | 0 | const HWY_FULL(float) d; |
1327 | 0 | const auto vw_0gt1 = Set(d, w_0gt1 * 0.8); |
1328 | 0 | const auto vw_0lt1 = Set(d, w_0lt1 * 0.8); |
1329 | |
|
1330 | 0 | for (size_t y = 0; y < i0.ysize(); ++y) { |
1331 | 0 | const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y); |
1332 | 0 | const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y); |
1333 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y); |
1334 | |
|
1335 | 0 | for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { |
1336 | 0 | const auto val0 = Load(d, row0 + x); |
1337 | 0 | const auto val1 = Load(d, row1 + x); |
1338 | | |
1339 | | // Primary symmetric quadratic objective. |
1340 | 0 | const auto diff = Sub(val0, val1); |
1341 | 0 | auto total = MulAdd(Mul(diff, diff), vw_0gt1, Load(d, row_diff + x)); |
1342 | | |
1343 | | // Secondary half-open quadratic objectives. |
1344 | 0 | const auto fabs0 = Abs(val0); |
1345 | 0 | const auto too_small = Mul(Set(d, 0.4), fabs0); |
1346 | 0 | const auto too_big = fabs0; |
1347 | |
|
1348 | 0 | const auto if_neg = IfThenElse( |
1349 | 0 | Gt(val1, Neg(too_small)), Add(val1, too_small), |
1350 | 0 | IfThenElseZero(Lt(val1, Neg(too_big)), Sub(Neg(val1), too_big))); |
1351 | 0 | const auto if_pos = |
1352 | 0 | IfThenElse(Lt(val1, too_small), Sub(too_small, val1), |
1353 | 0 | IfThenElseZero(Gt(val1, too_big), Sub(val1, too_big))); |
1354 | 0 | const auto v = IfThenElse(Lt(val0, Zero(d)), if_neg, if_pos); |
1355 | 0 | total = MulAdd(vw_0lt1, Mul(v, v), total); |
1356 | 0 | Store(total, d, row_diff + x); |
1357 | 0 | } |
1358 | 0 | } |
1359 | 0 | } Unexecuted instantiation: butteraugli.cc:jxl::N_AVX3::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Image3<float>*, unsigned long) Unexecuted instantiation: butteraugli.cc:jxl::N_EMU128::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Image3<float>*, unsigned long) |
1360 | | |
1361 | | // A simple HDR compatible gamma function. |
1362 | | template <class DF, class V> |
1363 | 0 | V Gamma(const DF df, V v) { |
1364 | | // ln(2) constant folded in because we want std::log but have FastLog2f. |
1365 | 0 | const auto kRetMul = Set(df, 19.245013259874995f * 0.693147180559945f); |
1366 | 0 | const auto kRetAdd = Set(df, -23.16046239805755); |
1367 | | // This should happen rarely, but may lead to a NaN in log, which is |
1368 | | // undesirable. Since negative photons don't exist we solve the NaNs by |
1369 | | // clamping here. |
1370 | 0 | v = ZeroIfNegative(v); |
1371 | |
|
1372 | 0 | const auto biased = Add(v, Set(df, 9.9710635769299145)); |
1373 | 0 | const auto log = FastLog2f(df, biased); |
1374 | | // We could fold this into a custom Log2 polynomial, but there would be |
1375 | | // relatively little gain. |
1376 | 0 | return MulAdd(kRetMul, log, kRetAdd); |
1377 | 0 | } Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::Gamma<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>) Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::Gamma<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>) Unexecuted instantiation: hwy::N_AVX3::Vec512<float> jxl::N_AVX3::Gamma<hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float>) Unexecuted instantiation: hwy::N_EMU128::Vec128<float, 4ul> jxl::N_EMU128::Gamma<hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul>) |
1378 | | |
1379 | | template <bool Clamp, class DF, class V> |
1380 | | BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1, |
1381 | | const V& in2, V* JXL_RESTRICT out0, |
1382 | | V* JXL_RESTRICT out1, |
1383 | 0 | V* JXL_RESTRICT out2) { |
1384 | | // https://en.wikipedia.org/wiki/Photopsin absorbance modeling. |
1385 | 0 | static const double mixi0 = 0.29956550340058319; |
1386 | 0 | static const double mixi1 = 0.63373087833825936; |
1387 | 0 | static const double mixi2 = 0.077705617820981968; |
1388 | 0 | static const double mixi3 = 1.7557483643287353; |
1389 | 0 | static const double mixi4 = 0.22158691104574774; |
1390 | 0 | static const double mixi5 = 0.69391388044116142; |
1391 | 0 | static const double mixi6 = 0.0987313588422; |
1392 | 0 | static const double mixi7 = 1.7557483643287353; |
1393 | 0 | static const double mixi8 = 0.02; |
1394 | 0 | static const double mixi9 = 0.02; |
1395 | 0 | static const double mixi10 = 0.20480129041026129; |
1396 | 0 | static const double mixi11 = 12.226454707163354; |
1397 | |
|
1398 | 0 | const V mix0 = Set(df, mixi0); |
1399 | 0 | const V mix1 = Set(df, mixi1); |
1400 | 0 | const V mix2 = Set(df, mixi2); |
1401 | 0 | const V mix3 = Set(df, mixi3); |
1402 | 0 | const V mix4 = Set(df, mixi4); |
1403 | 0 | const V mix5 = Set(df, mixi5); |
1404 | 0 | const V mix6 = Set(df, mixi6); |
1405 | 0 | const V mix7 = Set(df, mixi7); |
1406 | 0 | const V mix8 = Set(df, mixi8); |
1407 | 0 | const V mix9 = Set(df, mixi9); |
1408 | 0 | const V mix10 = Set(df, mixi10); |
1409 | 0 | const V mix11 = Set(df, mixi11); |
1410 | |
|
1411 | 0 | *out0 = MulAdd(mix0, in0, MulAdd(mix1, in1, MulAdd(mix2, in2, mix3))); |
1412 | 0 | *out1 = MulAdd(mix4, in0, MulAdd(mix5, in1, MulAdd(mix6, in2, mix7))); |
1413 | 0 | *out2 = MulAdd(mix8, in0, MulAdd(mix9, in1, MulAdd(mix10, in2, mix11))); |
1414 | |
|
1415 | 0 | if (Clamp) { |
1416 | 0 | *out0 = Max(*out0, mix3); |
1417 | 0 | *out1 = Max(*out1, mix7); |
1418 | 0 | *out2 = Max(*out2, mix11); |
1419 | 0 | } |
1420 | 0 | } Unexecuted instantiation: void jxl::N_SSE4::OpsinAbsorbance<true, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*) Unexecuted instantiation: void jxl::N_SSE4::OpsinAbsorbance<false, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*) Unexecuted instantiation: void jxl::N_AVX2::OpsinAbsorbance<true, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*) Unexecuted instantiation: void jxl::N_AVX2::OpsinAbsorbance<false, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*) Unexecuted instantiation: void jxl::N_AVX3::OpsinAbsorbance<true, hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> const&, hwy::N_AVX3::Vec512<float> const&, hwy::N_AVX3::Vec512<float> const&, hwy::N_AVX3::Vec512<float>*, hwy::N_AVX3::Vec512<float>*, hwy::N_AVX3::Vec512<float>*) Unexecuted instantiation: void jxl::N_AVX3::OpsinAbsorbance<false, hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> >(hwy::N_AVX3::Simd<float, 16ul, 0>, hwy::N_AVX3::Vec512<float> const&, hwy::N_AVX3::Vec512<float> const&, hwy::N_AVX3::Vec512<float> const&, hwy::N_AVX3::Vec512<float>*, hwy::N_AVX3::Vec512<float>*, hwy::N_AVX3::Vec512<float>*) Unexecuted instantiation: void jxl::N_EMU128::OpsinAbsorbance<true, hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> const&, hwy::N_EMU128::Vec128<float, 4ul> const&, hwy::N_EMU128::Vec128<float, 4ul> const&, hwy::N_EMU128::Vec128<float, 4ul>*, hwy::N_EMU128::Vec128<float, 4ul>*, hwy::N_EMU128::Vec128<float, 4ul>*) Unexecuted instantiation: void jxl::N_EMU128::OpsinAbsorbance<false, hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> >(hwy::N_EMU128::Simd<float, 4ul, 0>, hwy::N_EMU128::Vec128<float, 4ul> const&, hwy::N_EMU128::Vec128<float, 4ul> const&, hwy::N_EMU128::Vec128<float, 4ul> const&, hwy::N_EMU128::Vec128<float, 4ul>*, hwy::N_EMU128::Vec128<float, 4ul>*, hwy::N_EMU128::Vec128<float, 4ul>*) |
1421 | | |
1422 | | // `blurred` is a temporary image used inside this function and not returned. |
1423 | | Image3F OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params, |
1424 | 0 | Image3F* blurred, BlurTemp* blur_temp) { |
1425 | 0 | PROFILER_FUNC; |
1426 | 0 | Image3F xyb(rgb.xsize(), rgb.ysize()); |
1427 | 0 | const double kSigma = 1.2; |
1428 | 0 | Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0)); |
1429 | 0 | Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1)); |
1430 | 0 | Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2)); |
1431 | 0 | const HWY_FULL(float) df; |
1432 | 0 | const auto intensity_target_multiplier = Set(df, params.intensity_target); |
1433 | 0 | for (size_t y = 0; y < rgb.ysize(); ++y) { |
1434 | 0 | const float* BUTTERAUGLI_RESTRICT row_r = rgb.ConstPlaneRow(0, y); |
1435 | 0 | const float* BUTTERAUGLI_RESTRICT row_g = rgb.ConstPlaneRow(1, y); |
1436 | 0 | const float* BUTTERAUGLI_RESTRICT row_b = rgb.ConstPlaneRow(2, y); |
1437 | 0 | const float* BUTTERAUGLI_RESTRICT row_blurred_r = |
1438 | 0 | blurred->ConstPlaneRow(0, y); |
1439 | 0 | const float* BUTTERAUGLI_RESTRICT row_blurred_g = |
1440 | 0 | blurred->ConstPlaneRow(1, y); |
1441 | 0 | const float* BUTTERAUGLI_RESTRICT row_blurred_b = |
1442 | 0 | blurred->ConstPlaneRow(2, y); |
1443 | 0 | float* BUTTERAUGLI_RESTRICT row_out_x = xyb.PlaneRow(0, y); |
1444 | 0 | float* BUTTERAUGLI_RESTRICT row_out_y = xyb.PlaneRow(1, y); |
1445 | 0 | float* BUTTERAUGLI_RESTRICT row_out_b = xyb.PlaneRow(2, y); |
1446 | 0 | const auto min = Set(df, 1e-4f); |
1447 | 0 | for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) { |
1448 | 0 | auto sensitivity0 = Undefined(df); |
1449 | 0 | auto sensitivity1 = Undefined(df); |
1450 | 0 | auto sensitivity2 = Undefined(df); |
1451 | 0 | { |
1452 | | // Calculate sensitivity based on the smoothed image gamma derivative. |
1453 | 0 | auto pre_mixed0 = Undefined(df); |
1454 | 0 | auto pre_mixed1 = Undefined(df); |
1455 | 0 | auto pre_mixed2 = Undefined(df); |
1456 | 0 | OpsinAbsorbance<true>( |
1457 | 0 | df, Mul(Load(df, row_blurred_r + x), intensity_target_multiplier), |
1458 | 0 | Mul(Load(df, row_blurred_g + x), intensity_target_multiplier), |
1459 | 0 | Mul(Load(df, row_blurred_b + x), intensity_target_multiplier), |
1460 | 0 | &pre_mixed0, &pre_mixed1, &pre_mixed2); |
1461 | 0 | pre_mixed0 = Max(pre_mixed0, min); |
1462 | 0 | pre_mixed1 = Max(pre_mixed1, min); |
1463 | 0 | pre_mixed2 = Max(pre_mixed2, min); |
1464 | 0 | sensitivity0 = Div(Gamma(df, pre_mixed0), pre_mixed0); |
1465 | 0 | sensitivity1 = Div(Gamma(df, pre_mixed1), pre_mixed1); |
1466 | 0 | sensitivity2 = Div(Gamma(df, pre_mixed2), pre_mixed2); |
1467 | 0 | sensitivity0 = Max(sensitivity0, min); |
1468 | 0 | sensitivity1 = Max(sensitivity1, min); |
1469 | 0 | sensitivity2 = Max(sensitivity2, min); |
1470 | 0 | } |
1471 | 0 | auto cur_mixed0 = Undefined(df); |
1472 | 0 | auto cur_mixed1 = Undefined(df); |
1473 | 0 | auto cur_mixed2 = Undefined(df); |
1474 | 0 | OpsinAbsorbance<false>( |
1475 | 0 | df, Mul(Load(df, row_r + x), intensity_target_multiplier), |
1476 | 0 | Mul(Load(df, row_g + x), intensity_target_multiplier), |
1477 | 0 | Mul(Load(df, row_b + x), intensity_target_multiplier), &cur_mixed0, |
1478 | 0 | &cur_mixed1, &cur_mixed2); |
1479 | 0 | cur_mixed0 = Mul(cur_mixed0, sensitivity0); |
1480 | 0 | cur_mixed1 = Mul(cur_mixed1, sensitivity1); |
1481 | 0 | cur_mixed2 = Mul(cur_mixed2, sensitivity2); |
1482 | | // This is a kludge. The negative values should be zeroed away before |
1483 | | // blurring. Ideally there would be no negative values in the first place. |
1484 | 0 | const auto min01 = Set(df, 1.7557483643287353f); |
1485 | 0 | const auto min2 = Set(df, 12.226454707163354f); |
1486 | 0 | cur_mixed0 = Max(cur_mixed0, min01); |
1487 | 0 | cur_mixed1 = Max(cur_mixed1, min01); |
1488 | 0 | cur_mixed2 = Max(cur_mixed2, min2); |
1489 | |
|
1490 | 0 | Store(Sub(cur_mixed0, cur_mixed1), df, row_out_x + x); |
1491 | 0 | Store(Add(cur_mixed0, cur_mixed1), df, row_out_y + x); |
1492 | 0 | Store(cur_mixed2, df, row_out_b + x); |
1493 | 0 | } |
1494 | 0 | } |
1495 | 0 | return xyb; |
1496 | 0 | } Unexecuted instantiation: jxl::N_SSE4::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*) Unexecuted instantiation: jxl::N_AVX2::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*) Unexecuted instantiation: jxl::N_AVX3::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*) Unexecuted instantiation: jxl::N_EMU128::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*) |
1497 | | |
1498 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
1499 | | } // namespace HWY_NAMESPACE |
1500 | | } // namespace jxl |
1501 | | HWY_AFTER_NAMESPACE(); |
1502 | | |
1503 | | #if HWY_ONCE |
1504 | | namespace jxl { |
1505 | | |
1506 | | HWY_EXPORT(SeparateFrequencies); // Local function. |
1507 | | HWY_EXPORT(MaskPsychoImage); // Local function. |
1508 | | HWY_EXPORT(L2DiffAsymmetric); // Local function. |
1509 | | HWY_EXPORT(L2Diff); // Local function. |
1510 | | HWY_EXPORT(SetL2Diff); // Local function. |
1511 | | HWY_EXPORT(CombineChannelsToDiffmap); // Local function. |
1512 | | HWY_EXPORT(MaltaDiffMap); // Local function. |
1513 | | HWY_EXPORT(MaltaDiffMapLF); // Local function. |
1514 | | HWY_EXPORT(OpsinDynamicsImage); // Local function. |
1515 | | |
1516 | | #if BUTTERAUGLI_ENABLE_CHECKS |
1517 | | |
1518 | | static inline bool IsNan(const float x) { |
1519 | | uint32_t bits; |
1520 | | memcpy(&bits, &x, sizeof(bits)); |
1521 | | const uint32_t bitmask_exp = 0x7F800000; |
1522 | | return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF); |
1523 | | } |
1524 | | |
1525 | | static inline bool IsNan(const double x) { |
1526 | | uint64_t bits; |
1527 | | memcpy(&bits, &x, sizeof(bits)); |
1528 | | return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) || |
1529 | | (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL); |
1530 | | } |
1531 | | |
1532 | | static inline void CheckImage(const ImageF& image, const char* name) { |
1533 | | PROFILER_FUNC; |
1534 | | for (size_t y = 0; y < image.ysize(); ++y) { |
1535 | | const float* BUTTERAUGLI_RESTRICT row = image.Row(y); |
1536 | | for (size_t x = 0; x < image.xsize(); ++x) { |
1537 | | if (IsNan(row[x])) { |
1538 | | printf("NAN: Image %s @ %" PRIuS ",%" PRIuS " (of %" PRIuS ",%" PRIuS |
1539 | | ")\n", |
1540 | | name, x, y, image.xsize(), image.ysize()); |
1541 | | exit(1); |
1542 | | } |
1543 | | } |
1544 | | } |
1545 | | } |
1546 | | |
1547 | | #define CHECK_NAN(x, str) \ |
1548 | | do { \ |
1549 | | if (IsNan(x)) { \ |
1550 | | printf("%d: %s\n", __LINE__, str); \ |
1551 | | abort(); \ |
1552 | | } \ |
1553 | | } while (0) |
1554 | | |
1555 | | #define CHECK_IMAGE(image, name) CheckImage(image, name) |
1556 | | |
1557 | | #else // BUTTERAUGLI_ENABLE_CHECKS |
1558 | | |
1559 | | #define CHECK_NAN(x, str) |
1560 | | #define CHECK_IMAGE(image, name) |
1561 | | |
1562 | | #endif // BUTTERAUGLI_ENABLE_CHECKS |
1563 | | |
1564 | | // Calculate a 2x2 subsampled image for purposes of recursive butteraugli at |
1565 | | // multiresolution. |
1566 | 0 | static Image3F SubSample2x(const Image3F& in) { |
1567 | 0 | size_t xs = (in.xsize() + 1) / 2; |
1568 | 0 | size_t ys = (in.ysize() + 1) / 2; |
1569 | 0 | Image3F retval(xs, ys); |
1570 | 0 | for (size_t c = 0; c < 3; ++c) { |
1571 | 0 | for (size_t y = 0; y < ys; ++y) { |
1572 | 0 | for (size_t x = 0; x < xs; ++x) { |
1573 | 0 | retval.PlaneRow(c, y)[x] = 0; |
1574 | 0 | } |
1575 | 0 | } |
1576 | 0 | } |
1577 | 0 | for (size_t c = 0; c < 3; ++c) { |
1578 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
1579 | 0 | for (size_t x = 0; x < in.xsize(); ++x) { |
1580 | 0 | retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x]; |
1581 | 0 | } |
1582 | 0 | } |
1583 | 0 | if ((in.xsize() & 1) != 0) { |
1584 | 0 | for (size_t y = 0; y < retval.ysize(); ++y) { |
1585 | 0 | size_t last_column = retval.xsize() - 1; |
1586 | 0 | retval.PlaneRow(c, y)[last_column] *= 2.0f; |
1587 | 0 | } |
1588 | 0 | } |
1589 | 0 | if ((in.ysize() & 1) != 0) { |
1590 | 0 | for (size_t x = 0; x < retval.xsize(); ++x) { |
1591 | 0 | size_t last_row = retval.ysize() - 1; |
1592 | 0 | retval.PlaneRow(c, last_row)[x] *= 2.0f; |
1593 | 0 | } |
1594 | 0 | } |
1595 | 0 | } |
1596 | 0 | return retval; |
1597 | 0 | } |
1598 | | |
1599 | | // Supersample src by 2x and add it to dest. |
1600 | 0 | static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) { |
1601 | 0 | for (size_t y = 0; y < dest.ysize(); ++y) { |
1602 | 0 | for (size_t x = 0; x < dest.xsize(); ++x) { |
1603 | | // There will be less errors from the more averaged images. |
1604 | | // We take it into account to some extent using a scaler. |
1605 | 0 | static const double kHeuristicMixingValue = 0.3; |
1606 | 0 | dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w; |
1607 | 0 | dest.Row(y)[x] += w * src.Row(y / 2)[x / 2]; |
1608 | 0 | } |
1609 | 0 | } |
1610 | 0 | } |
1611 | | |
1612 | 0 | Image3F* ButteraugliComparator::Temp() const { |
1613 | 0 | bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel); |
1614 | 0 | JXL_ASSERT(!was_in_use); |
1615 | 0 | (void)was_in_use; |
1616 | 0 | return &temp_; |
1617 | 0 | } |
1618 | | |
1619 | 0 | void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); } |
1620 | | |
1621 | | ButteraugliComparator::ButteraugliComparator(const Image3F& rgb0, |
1622 | | const ButteraugliParams& params) |
1623 | | : xsize_(rgb0.xsize()), |
1624 | | ysize_(rgb0.ysize()), |
1625 | | params_(params), |
1626 | 0 | temp_(xsize_, ysize_) { |
1627 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1628 | 0 | return; |
1629 | 0 | } |
1630 | | |
1631 | 0 | Image3F xyb0 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(rgb0, params, Temp(), |
1632 | 0 | &blur_temp_); |
1633 | 0 | ReleaseTemp(); |
1634 | 0 | HWY_DYNAMIC_DISPATCH(SeparateFrequencies) |
1635 | 0 | (xsize_, ysize_, params_, &blur_temp_, xyb0, pi0_); |
1636 | | |
1637 | | // Awful recursive construction of samples of different resolution. |
1638 | | // This is an after-thought and possibly somewhat parallel in |
1639 | | // functionality with the PsychoImage multi-resolution approach. |
1640 | 0 | sub_.reset(new ButteraugliComparator(SubSample2x(rgb0), params)); |
1641 | 0 | } |
1642 | | |
1643 | 0 | void ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const { |
1644 | 0 | HWY_DYNAMIC_DISPATCH(MaskPsychoImage) |
1645 | 0 | (pi0_, pi0_, xsize_, ysize_, params_, Temp(), &blur_temp_, mask, nullptr); |
1646 | 0 | ReleaseTemp(); |
1647 | 0 | } |
1648 | | |
1649 | 0 | void ButteraugliComparator::Diffmap(const Image3F& rgb1, ImageF& result) const { |
1650 | 0 | PROFILER_FUNC; |
1651 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1652 | 0 | ZeroFillImage(&result); |
1653 | 0 | return; |
1654 | 0 | } |
1655 | 0 | const Image3F xyb1 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( |
1656 | 0 | rgb1, params_, Temp(), &blur_temp_); |
1657 | 0 | ReleaseTemp(); |
1658 | 0 | DiffmapOpsinDynamicsImage(xyb1, result); |
1659 | 0 | if (sub_) { |
1660 | 0 | if (sub_->xsize_ < 8 || sub_->ysize_ < 8) { |
1661 | 0 | return; |
1662 | 0 | } |
1663 | 0 | const Image3F sub_xyb = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( |
1664 | 0 | SubSample2x(rgb1), params_, sub_->Temp(), &sub_->blur_temp_); |
1665 | 0 | sub_->ReleaseTemp(); |
1666 | 0 | ImageF subresult; |
1667 | 0 | sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult); |
1668 | 0 | AddSupersampled2x(subresult, 0.5, result); |
1669 | 0 | } |
1670 | 0 | } |
1671 | | |
1672 | | void ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1, |
1673 | 0 | ImageF& result) const { |
1674 | 0 | PROFILER_FUNC; |
1675 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1676 | 0 | ZeroFillImage(&result); |
1677 | 0 | return; |
1678 | 0 | } |
1679 | 0 | PsychoImage pi1; |
1680 | 0 | HWY_DYNAMIC_DISPATCH(SeparateFrequencies) |
1681 | 0 | (xsize_, ysize_, params_, &blur_temp_, xyb1, pi1); |
1682 | 0 | result = ImageF(xsize_, ysize_); |
1683 | 0 | DiffmapPsychoImage(pi1, result); |
1684 | 0 | } |
1685 | | |
1686 | | namespace { |
1687 | | |
1688 | | void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, |
1689 | | const double w_0lt1, const double norm1, |
1690 | | ImageF* HWY_RESTRICT diffs, |
1691 | 0 | Image3F* HWY_RESTRICT block_diff_ac, size_t c) { |
1692 | 0 | PROFILER_FUNC; |
1693 | 0 | const double len = 3.75; |
1694 | 0 | static const double mulli = 0.39905817637; |
1695 | 0 | HWY_DYNAMIC_DISPATCH(MaltaDiffMap) |
1696 | 0 | (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c); |
1697 | 0 | } |
1698 | | |
1699 | | void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, |
1700 | | const double w_0lt1, const double norm1, |
1701 | | ImageF* HWY_RESTRICT diffs, |
1702 | 0 | Image3F* HWY_RESTRICT block_diff_ac, size_t c) { |
1703 | 0 | PROFILER_FUNC; |
1704 | 0 | const double len = 3.75; |
1705 | 0 | static const double mulli = 0.611612573796; |
1706 | 0 | HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF) |
1707 | 0 | (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c); |
1708 | 0 | } |
1709 | | |
1710 | | } // namespace |
1711 | | |
1712 | | void ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1, |
1713 | 0 | ImageF& diffmap) const { |
1714 | 0 | PROFILER_FUNC; |
1715 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1716 | 0 | ZeroFillImage(&diffmap); |
1717 | 0 | return; |
1718 | 0 | } |
1719 | | |
1720 | 0 | const float hf_asymmetry_ = params_.hf_asymmetry; |
1721 | 0 | const float xmul_ = params_.xmul; |
1722 | |
|
1723 | 0 | ImageF diffs(xsize_, ysize_); |
1724 | 0 | Image3F block_diff_ac(xsize_, ysize_); |
1725 | 0 | ZeroFillImage(&block_diff_ac); |
1726 | 0 | static const double wUhfMalta = 1.10039032555; |
1727 | 0 | static const double norm1Uhf = 71.7800275169; |
1728 | 0 | MaltaDiffMap(pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_, |
1729 | 0 | wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1); |
1730 | |
|
1731 | 0 | static const double wUhfMaltaX = 173.5; |
1732 | 0 | static const double norm1UhfX = 5.0; |
1733 | 0 | MaltaDiffMap(pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_, |
1734 | 0 | wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac, |
1735 | 0 | 0); |
1736 | |
|
1737 | 0 | static const double wHfMalta = 18.7237414387; |
1738 | 0 | static const double norm1Hf = 4498534.45232; |
1739 | 0 | MaltaDiffMapLF(pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_), |
1740 | 0 | wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs, |
1741 | 0 | &block_diff_ac, 1); |
1742 | |
|
1743 | 0 | static const double wHfMaltaX = 6923.99476109; |
1744 | 0 | static const double norm1HfX = 8051.15833247; |
1745 | 0 | MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], wHfMaltaX * std::sqrt(hf_asymmetry_), |
1746 | 0 | wHfMaltaX / std::sqrt(hf_asymmetry_), norm1HfX, &diffs, |
1747 | 0 | &block_diff_ac, 0); |
1748 | |
|
1749 | 0 | static const double wMfMalta = 37.0819870399; |
1750 | 0 | static const double norm1Mf = 130262059.556; |
1751 | 0 | MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), wMfMalta, wMfMalta, norm1Mf, |
1752 | 0 | &diffs, &block_diff_ac, 1); |
1753 | |
|
1754 | 0 | static const double wMfMaltaX = 8246.75321353; |
1755 | 0 | static const double norm1MfX = 1009002.70582; |
1756 | 0 | MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), wMfMaltaX, wMfMaltaX, |
1757 | 0 | norm1MfX, &diffs, &block_diff_ac, 0); |
1758 | |
|
1759 | 0 | static const double wmul[9] = { |
1760 | 0 | 400.0, 1.50815703118, 0, |
1761 | 0 | 2150.0, 10.6195433239, 16.2176043152, |
1762 | 0 | 29.2353797994, 0.844626970982, 0.703646627719, |
1763 | 0 | }; |
1764 | 0 | Image3F block_diff_dc(xsize_, ysize_); |
1765 | 0 | for (size_t c = 0; c < 3; ++c) { |
1766 | 0 | if (c < 2) { // No blue channel error accumulated at HF. |
1767 | 0 | HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric) |
1768 | 0 | (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_, |
1769 | 0 | &block_diff_ac, c); |
1770 | 0 | } |
1771 | 0 | HWY_DYNAMIC_DISPATCH(L2Diff) |
1772 | 0 | (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac, c); |
1773 | 0 | HWY_DYNAMIC_DISPATCH(SetL2Diff) |
1774 | 0 | (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc, c); |
1775 | 0 | } |
1776 | |
|
1777 | 0 | ImageF mask; |
1778 | 0 | HWY_DYNAMIC_DISPATCH(MaskPsychoImage) |
1779 | 0 | (pi0_, pi1, xsize_, ysize_, params_, Temp(), &blur_temp_, &mask, |
1780 | 0 | &block_diff_ac.Plane(1)); |
1781 | 0 | ReleaseTemp(); |
1782 | |
|
1783 | 0 | HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap) |
1784 | 0 | (mask, block_diff_dc, block_diff_ac, xmul_, &diffmap); |
1785 | 0 | } |
1786 | | |
1787 | | double ButteraugliScoreFromDiffmap(const ImageF& diffmap, |
1788 | 0 | const ButteraugliParams* params) { |
1789 | 0 | PROFILER_FUNC; |
1790 | 0 | float retval = 0.0f; |
1791 | 0 | for (size_t y = 0; y < diffmap.ysize(); ++y) { |
1792 | 0 | const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y); |
1793 | 0 | for (size_t x = 0; x < diffmap.xsize(); ++x) { |
1794 | 0 | retval = std::max(retval, row[x]); |
1795 | 0 | } |
1796 | 0 | } |
1797 | 0 | return retval; |
1798 | 0 | } |
1799 | | |
1800 | | bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, |
1801 | 0 | double hf_asymmetry, double xmul, ImageF& diffmap) { |
1802 | 0 | ButteraugliParams params; |
1803 | 0 | params.hf_asymmetry = hf_asymmetry; |
1804 | 0 | params.xmul = xmul; |
1805 | 0 | return ButteraugliDiffmap(rgb0, rgb1, params, diffmap); |
1806 | 0 | } |
1807 | | |
1808 | | bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, |
1809 | 0 | const ButteraugliParams& params, ImageF& diffmap) { |
1810 | 0 | PROFILER_FUNC; |
1811 | 0 | const size_t xsize = rgb0.xsize(); |
1812 | 0 | const size_t ysize = rgb0.ysize(); |
1813 | 0 | if (xsize < 1 || ysize < 1) { |
1814 | 0 | return JXL_FAILURE("Zero-sized image"); |
1815 | 0 | } |
1816 | 0 | if (!SameSize(rgb0, rgb1)) { |
1817 | 0 | return JXL_FAILURE("Size mismatch"); |
1818 | 0 | } |
1819 | 0 | static const int kMax = 8; |
1820 | 0 | if (xsize < kMax || ysize < kMax) { |
1821 | | // Butteraugli values for small (where xsize or ysize is smaller |
1822 | | // than 8 pixels) images are non-sensical, but most likely it is |
1823 | | // less disruptive to try to compute something than just give up. |
1824 | | // Temporarily extend the borders of the image to fit 8 x 8 size. |
1825 | 0 | size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0; |
1826 | 0 | size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0; |
1827 | 0 | size_t xscaled = std::max<size_t>(kMax, xsize); |
1828 | 0 | size_t yscaled = std::max<size_t>(kMax, ysize); |
1829 | 0 | Image3F scaled0(xscaled, yscaled); |
1830 | 0 | Image3F scaled1(xscaled, yscaled); |
1831 | 0 | for (int i = 0; i < 3; ++i) { |
1832 | 0 | for (size_t y = 0; y < yscaled; ++y) { |
1833 | 0 | for (size_t x = 0; x < xscaled; ++x) { |
1834 | 0 | size_t x2 = |
1835 | 0 | std::min<size_t>(xsize - 1, x > xborder ? x - xborder : 0); |
1836 | 0 | size_t y2 = |
1837 | 0 | std::min<size_t>(ysize - 1, y > yborder ? y - yborder : 0); |
1838 | 0 | scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2]; |
1839 | 0 | scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2]; |
1840 | 0 | } |
1841 | 0 | } |
1842 | 0 | } |
1843 | 0 | ImageF diffmap_scaled; |
1844 | 0 | const bool ok = |
1845 | 0 | ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled); |
1846 | 0 | diffmap = ImageF(xsize, ysize); |
1847 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1848 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1849 | 0 | diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder]; |
1850 | 0 | } |
1851 | 0 | } |
1852 | 0 | return ok; |
1853 | 0 | } |
1854 | 0 | ButteraugliComparator butteraugli(rgb0, params); |
1855 | 0 | butteraugli.Diffmap(rgb1, diffmap); |
1856 | 0 | return true; |
1857 | 0 | } |
1858 | | |
1859 | | bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, |
1860 | | float hf_asymmetry, float xmul, ImageF& diffmap, |
1861 | 0 | double& diffvalue) { |
1862 | 0 | ButteraugliParams params; |
1863 | 0 | params.hf_asymmetry = hf_asymmetry; |
1864 | 0 | params.xmul = xmul; |
1865 | 0 | return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue); |
1866 | 0 | } |
1867 | | |
1868 | | bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, |
1869 | | const ButteraugliParams& params, ImageF& diffmap, |
1870 | 0 | double& diffvalue) { |
1871 | | #if PROFILER_ENABLED |
1872 | | auto trace_start = std::chrono::steady_clock::now(); |
1873 | | #endif |
1874 | 0 | if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) { |
1875 | 0 | return false; |
1876 | 0 | } |
1877 | | #if PROFILER_ENABLED |
1878 | | auto trace_end = std::chrono::steady_clock::now(); |
1879 | | std::chrono::duration<double> elapsed = trace_end - trace_start; |
1880 | | const size_t mp = rgb0.xsize() * rgb0.ysize(); |
1881 | | printf("diff MP/s %f\n", mp / elapsed.count() * 1E-6); |
1882 | | #endif |
1883 | 0 | diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); |
1884 | 0 | return true; |
1885 | 0 | } |
1886 | | |
1887 | 0 | double ButteraugliFuzzyClass(double score) { |
1888 | 0 | static const double fuzzy_width_up = 4.8; |
1889 | 0 | static const double fuzzy_width_down = 4.8; |
1890 | 0 | static const double m0 = 2.0; |
1891 | 0 | static const double scaler = 0.7777; |
1892 | 0 | double val; |
1893 | 0 | if (score < 1.0) { |
1894 | | // val in [scaler .. 2.0] |
1895 | 0 | val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down)); |
1896 | 0 | val -= 1.0; // from [1 .. 2] to [0 .. 1] |
1897 | 0 | val *= 2.0 - scaler; // from [0 .. 1] to [0 .. 2.0 - scaler] |
1898 | 0 | val += scaler; // from [0 .. 2.0 - scaler] to [scaler .. 2.0] |
1899 | 0 | } else { |
1900 | | // val in [0 .. scaler] |
1901 | 0 | val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up)); |
1902 | 0 | val *= scaler; |
1903 | 0 | } |
1904 | 0 | return val; |
1905 | 0 | } |
1906 | | |
1907 | | // #define PRINT_OUT_NORMALIZATION |
1908 | | |
1909 | 0 | double ButteraugliFuzzyInverse(double seek) { |
1910 | 0 | double pos = 0; |
1911 | | // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter) |
1912 | 0 | for (double range = 1.0; range >= 1e-10; range *= 0.5) { |
1913 | 0 | double cur = ButteraugliFuzzyClass(pos); |
1914 | 0 | if (cur < seek) { |
1915 | 0 | pos -= range; |
1916 | 0 | } else { |
1917 | 0 | pos += range; |
1918 | 0 | } |
1919 | 0 | } |
1920 | | #ifdef PRINT_OUT_NORMALIZATION |
1921 | | if (seek == 1.0) { |
1922 | | fprintf(stderr, "Fuzzy inverse %g\n", pos); |
1923 | | } |
1924 | | #endif |
1925 | 0 | return pos; |
1926 | 0 | } |
1927 | | |
1928 | | #ifdef PRINT_OUT_NORMALIZATION |
1929 | | static double print_out_normalization = ButteraugliFuzzyInverse(1.0); |
1930 | | #endif |
1931 | | |
1932 | | namespace { |
1933 | | |
1934 | | void ScoreToRgb(double score, double good_threshold, double bad_threshold, |
1935 | 0 | float rgb[3]) { |
1936 | 0 | double heatmap[12][3] = { |
1937 | 0 | {0, 0, 0}, {0, 0, 1}, |
1938 | 0 | {0, 1, 1}, {0, 1, 0}, // Good level |
1939 | 0 | {1, 1, 0}, {1, 0, 0}, // Bad level |
1940 | 0 | {1, 0, 1}, {0.5, 0.5, 1.0}, |
1941 | 0 | {1.0, 0.5, 0.5}, // Pastel colors for the very bad quality range. |
1942 | 0 | {1.0, 1.0, 0.5}, {1, 1, 1}, |
1943 | 0 | {1, 1, 1}, // Last color repeated to have a solid range of white. |
1944 | 0 | }; |
1945 | 0 | if (score < good_threshold) { |
1946 | 0 | score = (score / good_threshold) * 0.3; |
1947 | 0 | } else if (score < bad_threshold) { |
1948 | 0 | score = 0.3 + |
1949 | 0 | (score - good_threshold) / (bad_threshold - good_threshold) * 0.15; |
1950 | 0 | } else { |
1951 | 0 | score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5; |
1952 | 0 | } |
1953 | 0 | static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]); |
1954 | 0 | score = std::min<double>(std::max<double>(score * (kTableSize - 1), 0.0), |
1955 | 0 | kTableSize - 2); |
1956 | 0 | int ix = static_cast<int>(score); |
1957 | 0 | ix = std::min(std::max(0, ix), kTableSize - 2); // Handle NaN |
1958 | 0 | double mix = score - ix; |
1959 | 0 | for (int i = 0; i < 3; ++i) { |
1960 | 0 | double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i]; |
1961 | 0 | rgb[i] = pow(v, 0.5); |
1962 | 0 | } |
1963 | 0 | } |
1964 | | |
1965 | | } // namespace |
1966 | | |
1967 | | Image3F CreateHeatMapImage(const ImageF& distmap, double good_threshold, |
1968 | 0 | double bad_threshold) { |
1969 | 0 | Image3F heatmap(distmap.xsize(), distmap.ysize()); |
1970 | 0 | for (size_t y = 0; y < distmap.ysize(); ++y) { |
1971 | 0 | const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y); |
1972 | 0 | float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y); |
1973 | 0 | float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y); |
1974 | 0 | float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y); |
1975 | 0 | for (size_t x = 0; x < distmap.xsize(); ++x) { |
1976 | 0 | const float d = row_distmap[x]; |
1977 | 0 | float rgb[3]; |
1978 | 0 | ScoreToRgb(d, good_threshold, bad_threshold, rgb); |
1979 | 0 | row_h0[x] = rgb[0]; |
1980 | 0 | row_h1[x] = rgb[1]; |
1981 | 0 | row_h2[x] = rgb[2]; |
1982 | 0 | } |
1983 | 0 | } |
1984 | 0 | return heatmap; |
1985 | 0 | } |
1986 | | |
1987 | | } // namespace jxl |
1988 | | #endif // HWY_ONCE |