/src/libjxl/lib/jxl/butteraugli/butteraugli.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | // |
6 | | // Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com) |
7 | | // |
8 | | // The physical architecture of butteraugli is based on the following naming |
9 | | // convention: |
10 | | // * Opsin - dynamics of the photosensitive chemicals in the retina |
11 | | // with their immediate electrical processing |
12 | | // * Xyb - hybrid opponent/trichromatic color space |
13 | | // x is roughly red-subtract-green. |
14 | | // y is yellow. |
15 | | // b is blue. |
16 | | // Xyb values are computed from Opsin mixing, not directly from rgb. |
17 | | // * Mask - for visual masking |
18 | | // * Hf - color modeling for spatially high-frequency features |
19 | | // * Lf - color modeling for spatially low-frequency features |
20 | | // * Diffmap - to cluster and build an image of error between the images |
21 | | // * Blur - to hold the smoothing code |
22 | | |
23 | | #include "lib/jxl/butteraugli/butteraugli.h" |
24 | | |
25 | | #include <jxl/memory_manager.h> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <cmath> |
29 | | #include <cstdint> |
30 | | #include <cstdio> |
31 | | #include <cstdlib> |
32 | | #include <cstring> |
33 | | #include <memory> |
34 | | #include <vector> |
35 | | |
36 | | #include "lib/jxl/image.h" |
37 | | |
38 | | #undef HWY_TARGET_INCLUDE |
39 | | #define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc" |
40 | | #include <hwy/foreach_target.h> |
41 | | |
42 | | #include "lib/jxl/base/fast_math-inl.h" |
43 | | #include "lib/jxl/base/rect.h" |
44 | | #include "lib/jxl/base/status.h" |
45 | | #include "lib/jxl/convolve.h" |
46 | | #include "lib/jxl/image_ops.h" |
47 | | |
48 | | #if BUTTERAUGLI_ENABLE_CHECKS |
49 | | #include "lib/jxl/base/printf_macros.h" |
50 | | #endif |
51 | | |
52 | | #ifndef JXL_BUTTERAUGLI_ONCE |
53 | | #define JXL_BUTTERAUGLI_ONCE |
54 | | |
55 | | namespace jxl { |
56 | | |
57 | | static const double wMfMalta = 37.0819870399; |
58 | | static const double norm1Mf = 130262059.556; |
59 | | static const double wMfMaltaX = 8246.75321353; |
60 | | static const double norm1MfX = 1009002.70582; |
61 | | static const double wHfMalta = 18.7237414387; |
62 | | static const double norm1Hf = 4498534.45232; |
63 | | static const double wHfMaltaX = 6923.99476109; |
64 | | static const double norm1HfX = 8051.15833247; |
65 | | static const double wUhfMalta = 1.10039032555; |
66 | | static const double norm1Uhf = 71.7800275169; |
67 | | static const double wUhfMaltaX = 173.5; |
68 | | static const double norm1UhfX = 5.0; |
69 | | static const double wmul[9] = { |
70 | | 400.0, 1.50815703118, 0, |
71 | | 2150.0, 10.6195433239, 16.2176043152, |
72 | | 29.2353797994, 0.844626970982, 0.703646627719, |
73 | | }; |
74 | | |
75 | 0 | std::vector<float> ComputeKernel(float sigma) { |
76 | 0 | const float m = 2.25; // Accuracy increases when m is increased. |
77 | 0 | const double scaler = -1.0 / (2.0 * sigma * sigma); |
78 | 0 | const int diff = std::max<int>(1, m * std::fabs(sigma)); |
79 | 0 | std::vector<float> kernel(2 * diff + 1); |
80 | 0 | for (int i = -diff; i <= diff; ++i) { |
81 | 0 | kernel[i + diff] = std::exp(scaler * i * i); |
82 | 0 | } |
83 | 0 | return kernel; |
84 | 0 | } |
85 | | |
86 | | void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel, |
87 | 0 | const size_t x, float* BUTTERAUGLI_RESTRICT row_out) { |
88 | 0 | const size_t offset = kernel.size() / 2; |
89 | 0 | int minx = x < offset ? 0 : x - offset; |
90 | 0 | int maxx = std::min<int>(in.xsize() - 1, x + offset); |
91 | 0 | float weight = 0.0f; |
92 | 0 | for (int j = minx; j <= maxx; ++j) { |
93 | 0 | weight += kernel[j - x + offset]; |
94 | 0 | } |
95 | 0 | float scale = 1.0f / weight; |
96 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
97 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y); |
98 | 0 | float sum = 0.0f; |
99 | 0 | for (int j = minx; j <= maxx; ++j) { |
100 | 0 | sum += row_in[j] * kernel[j - x + offset]; |
101 | 0 | } |
102 | 0 | row_out[y] = sum * scale; |
103 | 0 | } |
104 | 0 | } |
105 | | |
106 | | // Computes a horizontal convolution and transposes the result. |
107 | | Status ConvolutionWithTranspose(const ImageF& in, |
108 | | const std::vector<float>& kernel, |
109 | 0 | ImageF* BUTTERAUGLI_RESTRICT out) { |
110 | 0 | JXL_ENSURE(out->xsize() == in.ysize()); |
111 | 0 | JXL_ENSURE(out->ysize() == in.xsize()); |
112 | 0 | const size_t len = kernel.size(); |
113 | 0 | const size_t offset = len / 2; |
114 | 0 | float weight_no_border = 0.0f; |
115 | 0 | for (size_t j = 0; j < len; ++j) { |
116 | 0 | weight_no_border += kernel[j]; |
117 | 0 | } |
118 | 0 | const float scale_no_border = 1.0f / weight_no_border; |
119 | 0 | const size_t border1 = std::min(in.xsize(), offset); |
120 | 0 | const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0; |
121 | 0 | std::vector<float> scaled_kernel(len / 2 + 1); |
122 | 0 | for (size_t i = 0; i <= len / 2; ++i) { |
123 | 0 | scaled_kernel[i] = kernel[i] * scale_no_border; |
124 | 0 | } |
125 | | |
126 | | // middle |
127 | 0 | switch (len) { |
128 | 0 | case 7: { |
129 | 0 | const float sk0 = scaled_kernel[0]; |
130 | 0 | const float sk1 = scaled_kernel[1]; |
131 | 0 | const float sk2 = scaled_kernel[2]; |
132 | 0 | const float sk3 = scaled_kernel[3]; |
133 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
134 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
135 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
136 | 0 | const float sum0 = (row_in[0] + row_in[6]) * sk0; |
137 | 0 | const float sum1 = (row_in[1] + row_in[5]) * sk1; |
138 | 0 | const float sum2 = (row_in[2] + row_in[4]) * sk2; |
139 | 0 | const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2; |
140 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
141 | 0 | row_out[y] = sum; |
142 | 0 | } |
143 | 0 | } |
144 | 0 | } break; |
145 | 0 | case 13: { |
146 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
147 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
148 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
149 | 0 | float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0]; |
150 | 0 | float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1]; |
151 | 0 | float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2]; |
152 | 0 | float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3]; |
153 | 0 | sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4]; |
154 | 0 | sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5]; |
155 | 0 | const float sum = (row_in[6]) * scaled_kernel[6]; |
156 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
157 | 0 | row_out[y] = sum + sum0 + sum1 + sum2 + sum3; |
158 | 0 | } |
159 | 0 | } |
160 | 0 | break; |
161 | 0 | } |
162 | 0 | case 15: { |
163 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
164 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
165 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
166 | 0 | float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0]; |
167 | 0 | float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1]; |
168 | 0 | float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2]; |
169 | 0 | float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3]; |
170 | 0 | sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4]; |
171 | 0 | sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5]; |
172 | 0 | sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6]; |
173 | 0 | const float sum = (row_in[7]) * scaled_kernel[7]; |
174 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
175 | 0 | row_out[y] = sum + sum0 + sum1 + sum2 + sum3; |
176 | 0 | } |
177 | 0 | } |
178 | 0 | break; |
179 | 0 | } |
180 | 0 | case 33: { |
181 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
182 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; |
183 | 0 | for (size_t x = border1; x < border2; ++x, ++row_in) { |
184 | 0 | float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0]; |
185 | 0 | float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1]; |
186 | 0 | float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2]; |
187 | 0 | float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3]; |
188 | 0 | sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4]; |
189 | 0 | sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5]; |
190 | 0 | sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6]; |
191 | 0 | sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7]; |
192 | 0 | sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8]; |
193 | 0 | sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9]; |
194 | 0 | sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10]; |
195 | 0 | sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11]; |
196 | 0 | sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12]; |
197 | 0 | sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13]; |
198 | 0 | sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14]; |
199 | 0 | sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15]; |
200 | 0 | const float sum = (row_in[16]) * scaled_kernel[16]; |
201 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); |
202 | 0 | row_out[y] = sum + sum0 + sum1 + sum2 + sum3; |
203 | 0 | } |
204 | 0 | } |
205 | 0 | break; |
206 | 0 | } |
207 | 0 | default: |
208 | 0 | return JXL_UNREACHABLE("kernel size %d not implemented", |
209 | 0 | static_cast<int>(len)); |
210 | 0 | } |
211 | | // left border |
212 | 0 | for (size_t x = 0; x < border1; ++x) { |
213 | 0 | ConvolveBorderColumn(in, kernel, x, out->Row(x)); |
214 | 0 | } |
215 | | |
216 | | // right border |
217 | 0 | for (size_t x = border2; x < in.xsize(); ++x) { |
218 | 0 | ConvolveBorderColumn(in, kernel, x, out->Row(x)); |
219 | 0 | } |
220 | 0 | return true; |
221 | 0 | } |
222 | | |
223 | | // A blur somewhat similar to a 2D Gaussian blur. |
224 | | // See: https://en.wikipedia.org/wiki/Gaussian_blur |
225 | | // |
226 | | // This is a bottleneck because the sigma can be quite large (>7). We can use |
227 | | // gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated |
228 | | // Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are |
229 | | // zero-valued. This leads to noticeable differences at the edges of diffmaps. |
230 | | // We retain a special case for 5x5 kernels (even faster than gauss_blur), |
231 | | // optionally use gauss_blur followed by fixup of the borders for large images, |
232 | | // or fall back to the previous truncated FIR followed by a transpose. |
233 | | Status Blur(const ImageF& in, float sigma, const ButteraugliParams& params, |
234 | 0 | BlurTemp* temp, ImageF* out) { |
235 | 0 | std::vector<float> kernel = ComputeKernel(sigma); |
236 | | // Separable5 does an in-place convolution, so this fast path is not safe if |
237 | | // in aliases out. |
238 | 0 | if (kernel.size() == 5 && &in != out) { |
239 | 0 | float sum_weights = 0.0f; |
240 | 0 | for (const float w : kernel) { |
241 | 0 | sum_weights += w; |
242 | 0 | } |
243 | 0 | const float scale = 1.0f / sum_weights; |
244 | 0 | const float w0 = kernel[2] * scale; |
245 | 0 | const float w1 = kernel[1] * scale; |
246 | 0 | const float w2 = kernel[0] * scale; |
247 | 0 | const WeightsSeparable5 weights = { |
248 | 0 | {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, |
249 | 0 | {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, |
250 | 0 | }; |
251 | 0 | JXL_RETURN_IF_ERROR( |
252 | 0 | Separable5(in, Rect(in), weights, /*pool=*/nullptr, out)); |
253 | 0 | return true; |
254 | 0 | } |
255 | | |
256 | 0 | ImageF* temp_t; |
257 | 0 | JXL_RETURN_IF_ERROR(temp->GetTransposed(in, &temp_t)); |
258 | 0 | JXL_RETURN_IF_ERROR(ConvolutionWithTranspose(in, kernel, temp_t)); |
259 | 0 | JXL_RETURN_IF_ERROR(ConvolutionWithTranspose(*temp_t, kernel, out)); |
260 | 0 | return true; |
261 | 0 | } |
262 | | |
263 | | // Allows PaddedMaltaUnit to call either function via overloading. |
264 | | struct MaltaTagLF {}; |
265 | | struct MaltaTag {}; |
266 | | |
267 | | } // namespace jxl |
268 | | |
269 | | #endif // JXL_BUTTERAUGLI_ONCE |
270 | | |
271 | | #include <hwy/highway.h> |
272 | | HWY_BEFORE_NAMESPACE(); |
273 | | namespace jxl { |
274 | | namespace HWY_NAMESPACE { |
275 | | |
276 | | // These templates are not found via ADL. |
277 | | using hwy::HWY_NAMESPACE::Abs; |
278 | | using hwy::HWY_NAMESPACE::Div; |
279 | | using hwy::HWY_NAMESPACE::Gt; |
280 | | using hwy::HWY_NAMESPACE::IfThenElse; |
281 | | using hwy::HWY_NAMESPACE::IfThenElseZero; |
282 | | using hwy::HWY_NAMESPACE::Lt; |
283 | | using hwy::HWY_NAMESPACE::Max; |
284 | | using hwy::HWY_NAMESPACE::Mul; |
285 | | using hwy::HWY_NAMESPACE::MulAdd; |
286 | | using hwy::HWY_NAMESPACE::MulSub; |
287 | | using hwy::HWY_NAMESPACE::Neg; |
288 | | using hwy::HWY_NAMESPACE::Sub; |
289 | | using hwy::HWY_NAMESPACE::Vec; |
290 | | using hwy::HWY_NAMESPACE::ZeroIfNegative; |
291 | | |
292 | | template <class D, class V> |
293 | 0 | HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) { |
294 | 0 | static const double kMul = 0.724216145665; |
295 | 0 | const V mul = Set(d, kMul); |
296 | 0 | const V maxval = Set(d, kMaxVal); |
297 | | // If greater than maxval or less than -maxval, replace with if_*. |
298 | 0 | const V if_pos = MulAdd(Sub(v, maxval), mul, maxval); |
299 | 0 | const V if_neg = MulSub(Add(v, maxval), mul, maxval); |
300 | 0 | const V pos_or_v = IfThenElse(Ge(v, maxval), if_pos, v); |
301 | 0 | return IfThenElse(Lt(v, Neg(maxval)), if_neg, pos_or_v); |
302 | 0 | } |
303 | | |
304 | | // Make area around zero less important (remove it). |
305 | | template <class D, class V> |
306 | 0 | HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) { |
307 | 0 | const auto w = Set(d, kw); |
308 | 0 | return IfThenElse(Gt(x, w), Sub(x, w), |
309 | 0 | IfThenElseZero(Lt(x, Neg(w)), Add(x, w))); |
310 | 0 | } |
311 | | |
312 | | // Make area around zero more important (2x it until the limit). |
313 | | template <class D, class V> |
314 | 0 | HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) { |
315 | 0 | const auto w = Set(d, kw); |
316 | 0 | return IfThenElse(Gt(x, w), Add(x, w), |
317 | 0 | IfThenElse(Lt(x, Neg(w)), Sub(x, w), Add(x, x))); |
318 | 0 | } |
319 | | |
320 | | // XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space. |
321 | | // Vals space can be converted to L2-norm space (Euclidean and normalized) |
322 | | // through visual masking. |
323 | | template <class D, class V> |
324 | | HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y, |
325 | | const V& b_arg, V* HWY_RESTRICT valx, |
326 | 0 | V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) { |
327 | 0 | static const double xmul_scalar = 33.832837186260; |
328 | 0 | static const double ymul_scalar = 14.458268100570; |
329 | 0 | static const double bmul_scalar = 49.87984651440; |
330 | 0 | static const double y_to_b_mul_scalar = -0.362267051518; |
331 | 0 | const V xmul = Set(d, xmul_scalar); |
332 | 0 | const V ymul = Set(d, ymul_scalar); |
333 | 0 | const V bmul = Set(d, bmul_scalar); |
334 | 0 | const V y_to_b_mul = Set(d, y_to_b_mul_scalar); |
335 | 0 | const V b = MulAdd(y_to_b_mul, y, b_arg); |
336 | 0 | *valb = Mul(b, bmul); |
337 | 0 | *valx = Mul(x, xmul); |
338 | 0 | *valy = Mul(y, ymul); |
339 | 0 | } |
340 | | |
341 | 0 | void XybLowFreqToVals(Image3F* xyb_lf) { |
342 | | // Modify range around zero code only concerns the high frequency |
343 | | // planes and only the X and Y channels. |
344 | | // Convert low freq xyb to vals space so that we can do a simple squared sum |
345 | | // diff on the low frequencies later. |
346 | 0 | const HWY_FULL(float) d; |
347 | 0 | for (size_t y = 0; y < xyb_lf->ysize(); ++y) { |
348 | 0 | float* BUTTERAUGLI_RESTRICT row_x = xyb_lf->PlaneRow(0, y); |
349 | 0 | float* BUTTERAUGLI_RESTRICT row_y = xyb_lf->PlaneRow(1, y); |
350 | 0 | float* BUTTERAUGLI_RESTRICT row_b = xyb_lf->PlaneRow(2, y); |
351 | 0 | for (size_t x = 0; x < xyb_lf->xsize(); x += Lanes(d)) { |
352 | 0 | auto valx = Undefined(d); |
353 | 0 | auto valy = Undefined(d); |
354 | 0 | auto valb = Undefined(d); |
355 | 0 | XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x), |
356 | 0 | Load(d, row_b + x), &valx, &valy, &valb); |
357 | 0 | Store(valx, d, row_x + x); |
358 | 0 | Store(valy, d, row_y + x); |
359 | 0 | Store(valb, d, row_b + x); |
360 | 0 | } |
361 | 0 | } |
362 | 0 | } |
363 | | |
364 | 0 | Status SuppressXByY(const ImageF& in_y, ImageF* HWY_RESTRICT inout_x) { |
365 | 0 | JXL_ENSURE(SameSize(*inout_x, in_y)); |
366 | 0 | const size_t xsize = in_y.xsize(); |
367 | 0 | const size_t ysize = in_y.ysize(); |
368 | 0 | const HWY_FULL(float) d; |
369 | 0 | static const double suppress = 46.0; |
370 | 0 | static const double s = 0.653020556257; |
371 | 0 | const auto sv = Set(d, s); |
372 | 0 | const auto one_minus_s = Set(d, 1.0 - s); |
373 | 0 | const auto ywv = Set(d, suppress); |
374 | |
|
375 | 0 | for (size_t y = 0; y < ysize; ++y) { |
376 | 0 | const float* HWY_RESTRICT row_y = in_y.ConstRow(y); |
377 | 0 | float* HWY_RESTRICT row_x = inout_x->Row(y); |
378 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
379 | 0 | const auto vx = Load(d, row_x + x); |
380 | 0 | const auto vy = Load(d, row_y + x); |
381 | 0 | const auto scaler = |
382 | 0 | MulAdd(Div(ywv, MulAdd(vy, vy, ywv)), one_minus_s, sv); |
383 | 0 | Store(Mul(scaler, vx), d, row_x + x); |
384 | 0 | } |
385 | 0 | } |
386 | 0 | return true; |
387 | 0 | } |
388 | | |
389 | 0 | void Subtract(const ImageF& a, const ImageF& b, ImageF* c) { |
390 | 0 | const HWY_FULL(float) d; |
391 | 0 | for (size_t y = 0; y < a.ysize(); ++y) { |
392 | 0 | const float* row_a = a.ConstRow(y); |
393 | 0 | const float* row_b = b.ConstRow(y); |
394 | 0 | float* row_c = c->Row(y); |
395 | 0 | for (size_t x = 0; x < a.xsize(); x += Lanes(d)) { |
396 | 0 | Store(Sub(Load(d, row_a + x), Load(d, row_b + x)), d, row_c + x); |
397 | 0 | } |
398 | 0 | } |
399 | 0 | } |
400 | | |
401 | | Status SeparateLFAndMF(const ButteraugliParams& params, const Image3F& xyb, |
402 | 0 | Image3F* lf, Image3F* mf, BlurTemp* blur_temp) { |
403 | 0 | static const double kSigmaLf = 7.15593339443; |
404 | 0 | for (int i = 0; i < 3; ++i) { |
405 | | // Extract lf ... |
406 | 0 | JXL_RETURN_IF_ERROR( |
407 | 0 | Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &lf->Plane(i))); |
408 | | // ... and keep everything else in mf. |
409 | 0 | Subtract(xyb.Plane(i), lf->Plane(i), &mf->Plane(i)); |
410 | 0 | } |
411 | 0 | XybLowFreqToVals(lf); |
412 | 0 | return true; |
413 | 0 | } |
414 | | |
415 | | Status SeparateMFAndHF(const ButteraugliParams& params, Image3F* mf, ImageF* hf, |
416 | 0 | BlurTemp* blur_temp) { |
417 | 0 | const HWY_FULL(float) d; |
418 | 0 | static const double kSigmaHf = 3.22489901262; |
419 | 0 | const size_t xsize = mf->xsize(); |
420 | 0 | const size_t ysize = mf->ysize(); |
421 | 0 | JxlMemoryManager* memory_manager = mf[0].memory_manager(); |
422 | 0 | JXL_ASSIGN_OR_RETURN(hf[0], ImageF::Create(memory_manager, xsize, ysize)); |
423 | 0 | JXL_ASSIGN_OR_RETURN(hf[1], ImageF::Create(memory_manager, xsize, ysize)); |
424 | 0 | for (int i = 0; i < 3; ++i) { |
425 | 0 | if (i == 2) { |
426 | 0 | JXL_RETURN_IF_ERROR( |
427 | 0 | Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i))); |
428 | 0 | break; |
429 | 0 | } |
430 | 0 | for (size_t y = 0; y < ysize; ++y) { |
431 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(i, y); |
432 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y); |
433 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
434 | 0 | Store(Load(d, row_mf + x), d, row_hf + x); |
435 | 0 | } |
436 | 0 | } |
437 | 0 | JXL_RETURN_IF_ERROR( |
438 | 0 | Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i))); |
439 | 0 | static const double kRemoveMfRange = 0.29; |
440 | 0 | static const double kAddMfRange = 0.1; |
441 | 0 | if (i == 0) { |
442 | 0 | for (size_t y = 0; y < ysize; ++y) { |
443 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(0, y); |
444 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y); |
445 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
446 | 0 | auto mf = Load(d, row_mf + x); |
447 | 0 | auto hf = Sub(Load(d, row_hf + x), mf); |
448 | 0 | mf = RemoveRangeAroundZero(d, kRemoveMfRange, mf); |
449 | 0 | Store(mf, d, row_mf + x); |
450 | 0 | Store(hf, d, row_hf + x); |
451 | 0 | } |
452 | 0 | } |
453 | 0 | } else { |
454 | 0 | for (size_t y = 0; y < ysize; ++y) { |
455 | 0 | float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(1, y); |
456 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y); |
457 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
458 | 0 | auto mf = Load(d, row_mf + x); |
459 | 0 | auto hf = Sub(Load(d, row_hf + x), mf); |
460 | |
|
461 | 0 | mf = AmplifyRangeAroundZero(d, kAddMfRange, mf); |
462 | 0 | Store(mf, d, row_mf + x); |
463 | 0 | Store(hf, d, row_hf + x); |
464 | 0 | } |
465 | 0 | } |
466 | 0 | } |
467 | 0 | } |
468 | | // Suppress red-green by intensity change in the high freq channels. |
469 | 0 | JXL_RETURN_IF_ERROR(SuppressXByY(hf[1], &hf[0])); |
470 | 0 | return true; |
471 | 0 | } |
472 | | |
473 | | Status SeparateHFAndUHF(const ButteraugliParams& params, ImageF* hf, |
474 | 0 | ImageF* uhf, BlurTemp* blur_temp) { |
475 | 0 | const HWY_FULL(float) d; |
476 | 0 | const size_t xsize = hf[0].xsize(); |
477 | 0 | const size_t ysize = hf[0].ysize(); |
478 | 0 | JxlMemoryManager* memory_manager = hf[0].memory_manager(); |
479 | 0 | static const double kSigmaUhf = 1.56416327805; |
480 | 0 | JXL_ASSIGN_OR_RETURN(uhf[0], ImageF::Create(memory_manager, xsize, ysize)); |
481 | 0 | JXL_ASSIGN_OR_RETURN(uhf[1], ImageF::Create(memory_manager, xsize, ysize)); |
482 | 0 | for (int i = 0; i < 2; ++i) { |
483 | | // Divide hf into hf and uhf. |
484 | 0 | for (size_t y = 0; y < ysize; ++y) { |
485 | 0 | float* BUTTERAUGLI_RESTRICT row_uhf = uhf[i].Row(y); |
486 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y); |
487 | 0 | for (size_t x = 0; x < xsize; ++x) { |
488 | 0 | row_uhf[x] = row_hf[x]; |
489 | 0 | } |
490 | 0 | } |
491 | 0 | JXL_RETURN_IF_ERROR(Blur(hf[i], kSigmaUhf, params, blur_temp, &hf[i])); |
492 | 0 | static const double kRemoveHfRange = 1.5; |
493 | 0 | static const double kAddHfRange = 0.132; |
494 | 0 | static const double kRemoveUhfRange = 0.04; |
495 | 0 | static const double kMaxclampHf = 28.4691806922; |
496 | 0 | static const double kMaxclampUhf = 5.19175294647; |
497 | 0 | static double kMulYHf = 2.155; |
498 | 0 | static double kMulYUhf = 2.69313763794; |
499 | 0 | if (i == 0) { |
500 | 0 | for (size_t y = 0; y < ysize; ++y) { |
501 | 0 | float* BUTTERAUGLI_RESTRICT row_uhf = uhf[0].Row(y); |
502 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y); |
503 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
504 | 0 | auto hf = Load(d, row_hf + x); |
505 | 0 | auto uhf = Sub(Load(d, row_uhf + x), hf); |
506 | 0 | hf = RemoveRangeAroundZero(d, kRemoveHfRange, hf); |
507 | 0 | uhf = RemoveRangeAroundZero(d, kRemoveUhfRange, uhf); |
508 | 0 | Store(hf, d, row_hf + x); |
509 | 0 | Store(uhf, d, row_uhf + x); |
510 | 0 | } |
511 | 0 | } |
512 | 0 | } else { |
513 | 0 | for (size_t y = 0; y < ysize; ++y) { |
514 | 0 | float* BUTTERAUGLI_RESTRICT row_uhf = uhf[1].Row(y); |
515 | 0 | float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y); |
516 | 0 | for (size_t x = 0; x < xsize; x += Lanes(d)) { |
517 | 0 | auto hf = Load(d, row_hf + x); |
518 | 0 | hf = MaximumClamp(d, hf, kMaxclampHf); |
519 | |
|
520 | 0 | auto uhf = Sub(Load(d, row_uhf + x), hf); |
521 | 0 | uhf = MaximumClamp(d, uhf, kMaxclampUhf); |
522 | 0 | uhf = Mul(uhf, Set(d, kMulYUhf)); |
523 | 0 | Store(uhf, d, row_uhf + x); |
524 | |
|
525 | 0 | hf = Mul(hf, Set(d, kMulYHf)); |
526 | 0 | hf = AmplifyRangeAroundZero(d, kAddHfRange, hf); |
527 | 0 | Store(hf, d, row_hf + x); |
528 | 0 | } |
529 | 0 | } |
530 | 0 | } |
531 | 0 | } |
532 | 0 | return true; |
533 | 0 | } |
534 | | |
535 | 0 | void DeallocateHFAndUHF(ImageF* hf, ImageF* uhf) { |
536 | 0 | for (int i = 0; i < 2; ++i) { |
537 | 0 | hf[i] = ImageF(); |
538 | 0 | uhf[i] = ImageF(); |
539 | 0 | } |
540 | 0 | } |
541 | | |
542 | | Status SeparateFrequencies(size_t xsize, size_t ysize, |
543 | | const ButteraugliParams& params, BlurTemp* blur_temp, |
544 | 0 | const Image3F& xyb, PsychoImage& ps) { |
545 | 0 | JxlMemoryManager* memory_manager = xyb.memory_manager(); |
546 | 0 | JXL_ASSIGN_OR_RETURN( |
547 | 0 | ps.lf, Image3F::Create(memory_manager, xyb.xsize(), xyb.ysize())); |
548 | 0 | JXL_ASSIGN_OR_RETURN( |
549 | 0 | ps.mf, Image3F::Create(memory_manager, xyb.xsize(), xyb.ysize())); |
550 | 0 | JXL_RETURN_IF_ERROR(SeparateLFAndMF(params, xyb, &ps.lf, &ps.mf, blur_temp)); |
551 | 0 | JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &ps.mf, &ps.hf[0], blur_temp)); |
552 | 0 | JXL_RETURN_IF_ERROR( |
553 | 0 | SeparateHFAndUHF(params, &ps.hf[0], &ps.uhf[0], blur_temp)); |
554 | 0 | return true; |
555 | 0 | } |
556 | | |
557 | | namespace { |
558 | | template <typename V> |
559 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d) { |
560 | 0 | return Add(Add(a, b), Add(c, d)); |
561 | 0 | } |
562 | | template <typename V> |
563 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e) { |
564 | 0 | return Sum(a, b, c, Add(d, e)); |
565 | 0 | } |
566 | | template <typename V> |
567 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g) { |
568 | 0 | return Sum(a, b, c, Sum(d, e, f, g)); |
569 | 0 | } |
570 | | template <typename V> |
571 | 0 | BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g, V h, V i) { |
572 | 0 | return Add(Add(Sum(a, b, c, d), Sum(e, f, g, h)), i); |
573 | 0 | } |
574 | | } // namespace |
575 | | |
576 | | template <class D> |
577 | | Vec<D> MaltaUnit(MaltaTagLF /*tag*/, const D df, |
578 | 0 | const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { |
579 | 0 | const intptr_t xs3 = 3 * xs; |
580 | |
|
581 | 0 | const auto center = LoadU(df, d); |
582 | | |
583 | | // x grows, y constant |
584 | 0 | const auto sum_yconst = Sum(LoadU(df, d - 4), LoadU(df, d - 2), center, |
585 | 0 | LoadU(df, d + 2), LoadU(df, d + 4)); |
586 | | // Will return this, sum of all line kernels |
587 | 0 | auto retval = Mul(sum_yconst, sum_yconst); |
588 | 0 | { |
589 | | // y grows, x constant |
590 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs - xs), center, |
591 | 0 | LoadU(df, d + xs + xs), LoadU(df, d + xs3 + xs)); |
592 | 0 | retval = MulAdd(sum, sum, retval); |
593 | 0 | } |
594 | 0 | { |
595 | | // both grow |
596 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), center, |
597 | 0 | LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); |
598 | 0 | retval = MulAdd(sum, sum, retval); |
599 | 0 | } |
600 | 0 | { |
601 | | // y grows, x shrinks |
602 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), center, |
603 | 0 | LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); |
604 | 0 | retval = MulAdd(sum, sum, retval); |
605 | 0 | } |
606 | 0 | { |
607 | | // y grows -4 to 4, x shrinks 1 -> -1 |
608 | 0 | auto sum = |
609 | 0 | Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs - xs + 1), center, |
610 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 1)); |
611 | 0 | retval = MulAdd(sum, sum, retval); |
612 | 0 | } |
613 | 0 | { |
614 | | // y grows -4 to 4, x grows -1 -> 1 |
615 | 0 | auto sum = |
616 | 0 | Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs - xs - 1), center, |
617 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 1)); |
618 | 0 | retval = MulAdd(sum, sum, retval); |
619 | 0 | } |
620 | 0 | { |
621 | | // x grows -4 to 4, y grows -1 to 1 |
622 | 0 | auto sum = Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 2 - xs), center, |
623 | 0 | LoadU(df, d + 2 + xs), LoadU(df, d + 4 + xs)); |
624 | 0 | retval = MulAdd(sum, sum, retval); |
625 | 0 | } |
626 | 0 | { |
627 | | // x grows -4 to 4, y shrinks 1 to -1 |
628 | 0 | auto sum = Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 2 + xs), center, |
629 | 0 | LoadU(df, d + 2 - xs), LoadU(df, d + 4 - xs)); |
630 | 0 | retval = MulAdd(sum, sum, retval); |
631 | 0 | } |
632 | 0 | { |
633 | | /* 0_________ |
634 | | 1__*______ |
635 | | 2___*_____ |
636 | | 3_________ |
637 | | 4____0____ |
638 | | 5_________ |
639 | | 6_____*___ |
640 | | 7______*__ |
641 | | 8_________ */ |
642 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), center, |
643 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); |
644 | 0 | retval = MulAdd(sum, sum, retval); |
645 | 0 | } |
646 | 0 | { |
647 | | /* 0_________ |
648 | | 1______*__ |
649 | | 2_____*___ |
650 | | 3_________ |
651 | | 4____0____ |
652 | | 5_________ |
653 | | 6___*_____ |
654 | | 7__*______ |
655 | | 8_________ */ |
656 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), center, |
657 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); |
658 | 0 | retval = MulAdd(sum, sum, retval); |
659 | 0 | } |
660 | 0 | { |
661 | | /* 0_________ |
662 | | 1_________ |
663 | | 2_*_______ |
664 | | 3__*______ |
665 | | 4____0____ |
666 | | 5______*__ |
667 | | 6_______*_ |
668 | | 7_________ |
669 | | 8_________ */ |
670 | 0 | auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), center, |
671 | 0 | LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); |
672 | 0 | retval = MulAdd(sum, sum, retval); |
673 | 0 | } |
674 | 0 | { |
675 | | /* 0_________ |
676 | | 1_________ |
677 | | 2_______*_ |
678 | | 3______*__ |
679 | | 4____0____ |
680 | | 5__*______ |
681 | | 6_*_______ |
682 | | 7_________ |
683 | | 8_________ */ |
684 | 0 | auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), center, |
685 | 0 | LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); |
686 | 0 | retval = MulAdd(sum, sum, retval); |
687 | 0 | } |
688 | 0 | { |
689 | | /* 0_________ |
690 | | 1_________ |
691 | | 2________* |
692 | | 3______*__ |
693 | | 4____0____ |
694 | | 5__*______ |
695 | | 6*________ |
696 | | 7_________ |
697 | | 8_________ */ |
698 | |
|
699 | 0 | auto sum = Sum(LoadU(df, d + xs + xs - 4), LoadU(df, d + xs - 2), center, |
700 | 0 | LoadU(df, d - xs + 2), LoadU(df, d - xs - xs + 4)); |
701 | 0 | retval = MulAdd(sum, sum, retval); |
702 | 0 | } |
703 | 0 | { |
704 | | /* 0_________ |
705 | | 1_________ |
706 | | 2*________ |
707 | | 3__*______ |
708 | | 4____0____ |
709 | | 5______*__ |
710 | | 6________* |
711 | | 7_________ |
712 | | 8_________ */ |
713 | 0 | auto sum = Sum(LoadU(df, d - xs - xs - 4), LoadU(df, d - xs - 2), center, |
714 | 0 | LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 4)); |
715 | 0 | retval = MulAdd(sum, sum, retval); |
716 | 0 | } |
717 | 0 | { |
718 | | /* 0__*______ |
719 | | 1_________ |
720 | | 2___*_____ |
721 | | 3_________ |
722 | | 4____0____ |
723 | | 5_________ |
724 | | 6_____*___ |
725 | | 7_________ |
726 | | 8______*__ */ |
727 | 0 | auto sum = |
728 | 0 | Sum(LoadU(df, d - xs3 - xs - 2), LoadU(df, d - xs - xs - 1), center, |
729 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 2)); |
730 | 0 | retval = MulAdd(sum, sum, retval); |
731 | 0 | } |
732 | 0 | { |
733 | | /* 0______*__ |
734 | | 1_________ |
735 | | 2_____*___ |
736 | | 3_________ |
737 | | 4____0____ |
738 | | 5_________ |
739 | | 6___*_____ |
740 | | 7_________ |
741 | | 8__*______ */ |
742 | 0 | auto sum = |
743 | 0 | Sum(LoadU(df, d - xs3 - xs + 2), LoadU(df, d - xs - xs + 1), center, |
744 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 2)); |
745 | 0 | retval = MulAdd(sum, sum, retval); |
746 | 0 | } |
747 | 0 | return retval; |
748 | 0 | } |
749 | | |
750 | | template <class D> |
751 | | Vec<D> MaltaUnit(MaltaTag /*tag*/, const D df, |
752 | 0 | const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { |
753 | 0 | const intptr_t xs3 = 3 * xs; |
754 | |
|
755 | 0 | const auto center = LoadU(df, d); |
756 | | |
757 | | // x grows, y constant |
758 | 0 | const auto sum_yconst = |
759 | 0 | Sum(LoadU(df, d - 4), LoadU(df, d - 3), LoadU(df, d - 2), |
760 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2), |
761 | 0 | LoadU(df, d + 3), LoadU(df, d + 4)); |
762 | | // Will return this, sum of all line kernels |
763 | 0 | auto retval = Mul(sum_yconst, sum_yconst); |
764 | |
|
765 | 0 | { |
766 | | // y grows, x constant |
767 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs3), |
768 | 0 | LoadU(df, d - xs - xs), LoadU(df, d - xs), center, |
769 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs), |
770 | 0 | LoadU(df, d + xs3), LoadU(df, d + xs3 + xs)); |
771 | 0 | retval = MulAdd(sum, sum, retval); |
772 | 0 | } |
773 | 0 | { |
774 | | // both grow |
775 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), |
776 | 0 | LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), |
777 | 0 | LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); |
778 | 0 | retval = MulAdd(sum, sum, retval); |
779 | 0 | } |
780 | 0 | { |
781 | | // y grows, x shrinks |
782 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), |
783 | 0 | LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), |
784 | 0 | LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); |
785 | 0 | retval = MulAdd(sum, sum, retval); |
786 | 0 | } |
787 | 0 | { |
788 | | // y grows -4 to 4, x shrinks 1 -> -1 |
789 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), |
790 | 0 | LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, |
791 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), |
792 | 0 | LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); |
793 | 0 | retval = MulAdd(sum, sum, retval); |
794 | 0 | } |
795 | 0 | { |
796 | | // y grows -4 to 4, x grows -1 -> 1 |
797 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), |
798 | 0 | LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, |
799 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), |
800 | 0 | LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); |
801 | 0 | retval = MulAdd(sum, sum, retval); |
802 | 0 | } |
803 | 0 | { |
804 | | // x grows -4 to 4, y grows -1 to 1 |
805 | 0 | auto sum = |
806 | 0 | Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 3 - xs), LoadU(df, d - 2 - xs), |
807 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 + xs), |
808 | 0 | LoadU(df, d + 3 + xs), LoadU(df, d + 4 + xs)); |
809 | 0 | retval = MulAdd(sum, sum, retval); |
810 | 0 | } |
811 | 0 | { |
812 | | // x grows -4 to 4, y shrinks 1 to -1 |
813 | 0 | auto sum = |
814 | 0 | Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 3 + xs), LoadU(df, d - 2 + xs), |
815 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 - xs), |
816 | 0 | LoadU(df, d + 3 - xs), LoadU(df, d + 4 - xs)); |
817 | 0 | retval = MulAdd(sum, sum, retval); |
818 | 0 | } |
819 | 0 | { |
820 | | /* 0_________ |
821 | | 1__*______ |
822 | | 2___*_____ |
823 | | 3___*_____ |
824 | | 4____0____ |
825 | | 5_____*___ |
826 | | 6_____*___ |
827 | | 7______*__ |
828 | | 8_________ */ |
829 | 0 | auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), |
830 | 0 | LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), |
831 | 0 | LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); |
832 | 0 | retval = MulAdd(sum, sum, retval); |
833 | 0 | } |
834 | 0 | { |
835 | | /* 0_________ |
836 | | 1______*__ |
837 | | 2_____*___ |
838 | | 3_____*___ |
839 | | 4____0____ |
840 | | 5___*_____ |
841 | | 6___*_____ |
842 | | 7__*______ |
843 | | 8_________ */ |
844 | 0 | auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), |
845 | 0 | LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), |
846 | 0 | LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); |
847 | 0 | retval = MulAdd(sum, sum, retval); |
848 | 0 | } |
849 | 0 | { |
850 | | /* 0_________ |
851 | | 1_________ |
852 | | 2_*_______ |
853 | | 3__**_____ |
854 | | 4____0____ |
855 | | 5_____**__ |
856 | | 6_______*_ |
857 | | 7_________ |
858 | | 8_________ */ |
859 | 0 | auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), |
860 | 0 | LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), |
861 | 0 | LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); |
862 | 0 | retval = MulAdd(sum, sum, retval); |
863 | 0 | } |
864 | 0 | { |
865 | | /* 0_________ |
866 | | 1_________ |
867 | | 2_______*_ |
868 | | 3_____**__ |
869 | | 4____0____ |
870 | | 5__**_____ |
871 | | 6_*_______ |
872 | | 7_________ |
873 | | 8_________ */ |
874 | 0 | auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), |
875 | 0 | LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), |
876 | 0 | LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); |
877 | 0 | retval = MulAdd(sum, sum, retval); |
878 | 0 | } |
879 | 0 | { |
880 | | /* 0_________ |
881 | | 1_________ |
882 | | 2_________ |
883 | | 3______*** |
884 | | 4___*0*___ |
885 | | 5***______ |
886 | | 6_________ |
887 | | 7_________ |
888 | | 8_________ */ |
889 | |
|
890 | 0 | auto sum = |
891 | 0 | Sum(LoadU(df, d + xs - 4), LoadU(df, d + xs - 3), LoadU(df, d + xs - 2), |
892 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d - xs + 2), |
893 | 0 | LoadU(df, d - xs + 3), LoadU(df, d - xs + 4)); |
894 | 0 | retval = MulAdd(sum, sum, retval); |
895 | 0 | } |
896 | 0 | { |
897 | | /* 0_________ |
898 | | 1_________ |
899 | | 2_________ |
900 | | 3***______ |
901 | | 4___*0*___ |
902 | | 5______*** |
903 | | 6_________ |
904 | | 7_________ |
905 | | 8_________ */ |
906 | 0 | auto sum = |
907 | 0 | Sum(LoadU(df, d - xs - 4), LoadU(df, d - xs - 3), LoadU(df, d - xs - 2), |
908 | 0 | LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + xs + 2), |
909 | 0 | LoadU(df, d + xs + 3), LoadU(df, d + xs + 4)); |
910 | 0 | retval = MulAdd(sum, sum, retval); |
911 | 0 | } |
912 | 0 | { |
913 | | /* 0___*_____ |
914 | | 1___*_____ |
915 | | 2___*_____ |
916 | | 3____*____ |
917 | | 4____0____ |
918 | | 5____*____ |
919 | | 6_____*___ |
920 | | 7_____*___ |
921 | | 8_____*___ */ |
922 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), |
923 | 0 | LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, |
924 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), |
925 | 0 | LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); |
926 | 0 | retval = MulAdd(sum, sum, retval); |
927 | 0 | } |
928 | 0 | { |
929 | | /* 0_____*___ |
930 | | 1_____*___ |
931 | | 2____ *___ |
932 | | 3____*____ |
933 | | 4____0____ |
934 | | 5____*____ |
935 | | 6___*_____ |
936 | | 7___*_____ |
937 | | 8___*_____ */ |
938 | 0 | auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), |
939 | 0 | LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, |
940 | 0 | LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), |
941 | 0 | LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); |
942 | 0 | retval = MulAdd(sum, sum, retval); |
943 | 0 | } |
944 | 0 | return retval; |
945 | 0 | } |
946 | | |
947 | | // Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known |
948 | | // to be far enough from the image borders. "diffs" is a packed image. |
949 | | template <class Tag> |
950 | | static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs, |
951 | | const size_t x0, |
952 | 0 | const size_t y0) { |
953 | 0 | const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0; |
954 | 0 | const HWY_CAPPED(float, 1) df; |
955 | 0 | if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) && |
956 | 0 | y0 < (diffs.ysize() - 4))) { |
957 | 0 | return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow())); |
958 | 0 | } |
959 | | |
960 | 0 | float borderimage[12 * 9]; // round up to 4 |
961 | 0 | for (int dy = 0; dy < 9; ++dy) { |
962 | 0 | int y = y0 + dy - 4; |
963 | 0 | if (y < 0 || static_cast<size_t>(y) >= diffs.ysize()) { |
964 | 0 | for (int dx = 0; dx < 12; ++dx) { |
965 | 0 | borderimage[dy * 12 + dx] = 0.0f; |
966 | 0 | } |
967 | 0 | continue; |
968 | 0 | } |
969 | | |
970 | 0 | const float* row_diffs = diffs.ConstRow(y); |
971 | 0 | for (int dx = 0; dx < 9; ++dx) { |
972 | 0 | int x = x0 + dx - 4; |
973 | 0 | if (x < 0 || static_cast<size_t>(x) >= diffs.xsize()) { |
974 | 0 | borderimage[dy * 12 + dx] = 0.0f; |
975 | 0 | } else { |
976 | 0 | borderimage[dy * 12 + dx] = row_diffs[x]; |
977 | 0 | } |
978 | 0 | } |
979 | 0 | std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f); |
980 | 0 | } |
981 | 0 | return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12)); |
982 | 0 | } Unexecuted instantiation: butteraugli.cc:float jxl::N_SCALAR::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long) Unexecuted instantiation: butteraugli.cc:float jxl::N_SCALAR::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long) |
983 | | |
984 | | template <class Tag> |
985 | | static Status MaltaDiffMapT(const Tag tag, const ImageF& lum0, |
986 | | const ImageF& lum1, const double w_0gt1, |
987 | | const double w_0lt1, const double norm1, |
988 | | const double len, const double mulli, |
989 | | ImageF* HWY_RESTRICT diffs, |
990 | 0 | ImageF* HWY_RESTRICT block_diff_ac) { |
991 | 0 | JXL_ENSURE(SameSize(lum0, lum1) && SameSize(lum0, *diffs)); |
992 | 0 | const size_t xsize_ = lum0.xsize(); |
993 | 0 | const size_t ysize_ = lum0.ysize(); |
994 | |
|
995 | 0 | const float kWeight0 = 0.5; |
996 | 0 | const float kWeight1 = 0.33; |
997 | |
|
998 | 0 | const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1); |
999 | 0 | const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1); |
1000 | 0 | const float norm2_0gt1 = w_pre0gt1 * norm1; |
1001 | 0 | const float norm2_0lt1 = w_pre0lt1 * norm1; |
1002 | |
|
1003 | 0 | for (size_t y = 0; y < ysize_; ++y) { |
1004 | 0 | const float* HWY_RESTRICT row0 = lum0.ConstRow(y); |
1005 | 0 | const float* HWY_RESTRICT row1 = lum1.ConstRow(y); |
1006 | 0 | float* HWY_RESTRICT row_diffs = diffs->Row(y); |
1007 | 0 | for (size_t x = 0; x < xsize_; ++x) { |
1008 | 0 | const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x])); |
1009 | 0 | const float diff = row0[x] - row1[x]; |
1010 | 0 | const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval); |
1011 | | |
1012 | | // Primary symmetric quadratic objective. |
1013 | 0 | row_diffs[x] = scaler * diff; |
1014 | |
|
1015 | 0 | const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval); |
1016 | 0 | const double fabs0 = std::fabs(row0[x]); |
1017 | | |
1018 | | // Secondary half-open quadratic objectives. |
1019 | 0 | const double too_small = 0.55 * fabs0; |
1020 | 0 | const double too_big = 1.05 * fabs0; |
1021 | |
|
1022 | 0 | if (row0[x] < 0) { |
1023 | 0 | if (row1[x] > -too_small) { |
1024 | 0 | double impact = scaler2 * (row1[x] + too_small); |
1025 | 0 | row_diffs[x] -= impact; |
1026 | 0 | } else if (row1[x] < -too_big) { |
1027 | 0 | double impact = scaler2 * (-row1[x] - too_big); |
1028 | 0 | row_diffs[x] += impact; |
1029 | 0 | } |
1030 | 0 | } else { |
1031 | 0 | if (row1[x] < too_small) { |
1032 | 0 | double impact = scaler2 * (too_small - row1[x]); |
1033 | 0 | row_diffs[x] += impact; |
1034 | 0 | } else if (row1[x] > too_big) { |
1035 | 0 | double impact = scaler2 * (row1[x] - too_big); |
1036 | 0 | row_diffs[x] -= impact; |
1037 | 0 | } |
1038 | 0 | } |
1039 | 0 | } |
1040 | 0 | } |
1041 | |
|
1042 | 0 | size_t y0 = 0; |
1043 | | // Top |
1044 | 0 | for (; y0 < 4; ++y0) { |
1045 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); |
1046 | 0 | for (size_t x0 = 0; x0 < xsize_; ++x0) { |
1047 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1048 | 0 | } |
1049 | 0 | } |
1050 | |
|
1051 | 0 | const HWY_FULL(float) df; |
1052 | 0 | const size_t aligned_x = std::max(static_cast<size_t>(4), Lanes(df)); |
1053 | 0 | const intptr_t stride = diffs->PixelsPerRow(); |
1054 | | |
1055 | | // Middle |
1056 | 0 | for (; y0 < ysize_ - 4; ++y0) { |
1057 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0); |
1058 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); |
1059 | 0 | size_t x0 = 0; |
1060 | 0 | for (; x0 < aligned_x; ++x0) { |
1061 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1062 | 0 | } |
1063 | 0 | for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) { |
1064 | 0 | auto diff = Load(df, row_diff + x0); |
1065 | 0 | diff = Add(diff, MaltaUnit(Tag(), df, row_in + x0, stride)); |
1066 | 0 | Store(diff, df, row_diff + x0); |
1067 | 0 | } |
1068 | |
|
1069 | 0 | for (; x0 < xsize_; ++x0) { |
1070 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1071 | 0 | } |
1072 | 0 | } |
1073 | | |
1074 | | // Bottom |
1075 | 0 | for (; y0 < ysize_; ++y0) { |
1076 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); |
1077 | 0 | for (size_t x0 = 0; x0 < xsize_; ++x0) { |
1078 | 0 | row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); |
1079 | 0 | } |
1080 | 0 | } |
1081 | 0 | return true; |
1082 | 0 | } Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SCALAR::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SCALAR::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*) |
1083 | | |
1084 | | // Need non-template wrapper functions for HWY_EXPORT. |
1085 | | Status MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, |
1086 | | const double w_0lt1, const double norm1, |
1087 | | ImageF* HWY_RESTRICT diffs, |
1088 | 0 | ImageF* HWY_RESTRICT block_diff_ac) { |
1089 | 0 | const double len = 3.75; |
1090 | 0 | static const double mulli = 0.39905817637; |
1091 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1, |
1092 | 0 | norm1, len, mulli, diffs, block_diff_ac)); |
1093 | 0 | return true; |
1094 | 0 | } |
1095 | | |
1096 | | Status MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, |
1097 | | const double w_0gt1, const double w_0lt1, |
1098 | | const double norm1, ImageF* HWY_RESTRICT diffs, |
1099 | 0 | ImageF* HWY_RESTRICT block_diff_ac) { |
1100 | 0 | const double len = 3.75; |
1101 | 0 | static const double mulli = 0.611612573796; |
1102 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1, |
1103 | 0 | norm1, len, mulli, diffs, block_diff_ac)); |
1104 | 0 | return true; |
1105 | 0 | } |
1106 | | |
1107 | | void CombineChannelsForMasking(const ImageF* hf, const ImageF* uhf, |
1108 | 0 | ImageF* out) { |
1109 | | // Only X and Y components are involved in masking. B's influence |
1110 | | // is considered less important in the high frequency area, and we |
1111 | | // don't model masking from lower frequency signals. |
1112 | 0 | static const float muls[3] = { |
1113 | 0 | 2.5f, |
1114 | 0 | 0.4f, |
1115 | 0 | 0.4f, |
1116 | 0 | }; |
1117 | | // Silly and unoptimized approach here. TODO(jyrki): rework this. |
1118 | 0 | for (size_t y = 0; y < hf[0].ysize(); ++y) { |
1119 | 0 | const float* BUTTERAUGLI_RESTRICT row_y_hf = hf[1].Row(y); |
1120 | 0 | const float* BUTTERAUGLI_RESTRICT row_y_uhf = uhf[1].Row(y); |
1121 | 0 | const float* BUTTERAUGLI_RESTRICT row_x_hf = hf[0].Row(y); |
1122 | 0 | const float* BUTTERAUGLI_RESTRICT row_x_uhf = uhf[0].Row(y); |
1123 | 0 | float* BUTTERAUGLI_RESTRICT row = out->Row(y); |
1124 | 0 | for (size_t x = 0; x < hf[0].xsize(); ++x) { |
1125 | 0 | float xdiff = (row_x_uhf[x] + row_x_hf[x]) * muls[0]; |
1126 | 0 | float ydiff = row_y_uhf[x] * muls[1] + row_y_hf[x] * muls[2]; |
1127 | 0 | row[x] = xdiff * xdiff + ydiff * ydiff; |
1128 | 0 | row[x] = std::sqrt(row[x]); |
1129 | 0 | } |
1130 | 0 | } |
1131 | 0 | } |
1132 | | |
1133 | 0 | void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) { |
1134 | 0 | const size_t xsize = xyb.xsize(); |
1135 | 0 | const size_t ysize = xyb.ysize(); |
1136 | 0 | const float bias = mul * bias_arg; |
1137 | 0 | const float sqrt_bias = std::sqrt(bias); |
1138 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1139 | 0 | const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y); |
1140 | 0 | float* BUTTERAUGLI_RESTRICT row_out = out->Row(y); |
1141 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1142 | | // kBias makes sqrt behave more linearly. |
1143 | 0 | row_out[x] = std::sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias; |
1144 | 0 | } |
1145 | 0 | } |
1146 | 0 | } |
1147 | | |
1148 | | // std::log(80.0) / std::log(255.0); |
1149 | | constexpr float kIntensityTargetNormalizationHack = 0.79079917404f; |
1150 | | static const float kInternalGoodQualityThreshold = |
1151 | | 17.83f * kIntensityTargetNormalizationHack; |
1152 | | static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold; |
1153 | | |
1154 | 0 | void StoreMin3(const float v, float& min0, float& min1, float& min2) { |
1155 | 0 | if (v < min2) { |
1156 | 0 | if (v < min0) { |
1157 | 0 | min2 = min1; |
1158 | 0 | min1 = min0; |
1159 | 0 | min0 = v; |
1160 | 0 | } else if (v < min1) { |
1161 | 0 | min2 = min1; |
1162 | 0 | min1 = v; |
1163 | 0 | } else { |
1164 | 0 | min2 = v; |
1165 | 0 | } |
1166 | 0 | } |
1167 | 0 | } |
1168 | | |
1169 | | // Look for smooth areas near the area of degradation. |
1170 | | // If the areas area generally smooth, don't do masking. |
1171 | 0 | void FuzzyErosion(const ImageF& from, ImageF* to) { |
1172 | 0 | const size_t xsize = from.xsize(); |
1173 | 0 | const size_t ysize = from.ysize(); |
1174 | 0 | static const int kStep = 3; |
1175 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1176 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1177 | 0 | float min0 = from.Row(y)[x]; |
1178 | 0 | float min1 = 2 * min0; |
1179 | 0 | float min2 = min1; |
1180 | 0 | if (x >= kStep) { |
1181 | 0 | float v = from.Row(y)[x - kStep]; |
1182 | 0 | StoreMin3(v, min0, min1, min2); |
1183 | 0 | if (y >= kStep) { |
1184 | 0 | float v = from.Row(y - kStep)[x - kStep]; |
1185 | 0 | StoreMin3(v, min0, min1, min2); |
1186 | 0 | } |
1187 | 0 | if (y < ysize - kStep) { |
1188 | 0 | float v = from.Row(y + kStep)[x - kStep]; |
1189 | 0 | StoreMin3(v, min0, min1, min2); |
1190 | 0 | } |
1191 | 0 | } |
1192 | 0 | if (x < xsize - kStep) { |
1193 | 0 | float v = from.Row(y)[x + kStep]; |
1194 | 0 | StoreMin3(v, min0, min1, min2); |
1195 | 0 | if (y >= kStep) { |
1196 | 0 | float v = from.Row(y - kStep)[x + kStep]; |
1197 | 0 | StoreMin3(v, min0, min1, min2); |
1198 | 0 | } |
1199 | 0 | if (y < ysize - kStep) { |
1200 | 0 | float v = from.Row(y + kStep)[x + kStep]; |
1201 | 0 | StoreMin3(v, min0, min1, min2); |
1202 | 0 | } |
1203 | 0 | } |
1204 | 0 | if (y >= kStep) { |
1205 | 0 | float v = from.Row(y - kStep)[x]; |
1206 | 0 | StoreMin3(v, min0, min1, min2); |
1207 | 0 | } |
1208 | 0 | if (y < ysize - kStep) { |
1209 | 0 | float v = from.Row(y + kStep)[x]; |
1210 | 0 | StoreMin3(v, min0, min1, min2); |
1211 | 0 | } |
1212 | 0 | to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2); |
1213 | 0 | } |
1214 | 0 | } |
1215 | 0 | } |
1216 | | |
1217 | | // Compute values of local frequency and dc masking based on the activity |
1218 | | // in the two images. img_diff_ac may be null. |
1219 | | Status Mask(const ImageF& mask0, const ImageF& mask1, |
1220 | | const ButteraugliParams& params, BlurTemp* blur_temp, |
1221 | | ImageF* BUTTERAUGLI_RESTRICT mask, |
1222 | 0 | ImageF* BUTTERAUGLI_RESTRICT diff_ac) { |
1223 | 0 | const size_t xsize = mask0.xsize(); |
1224 | 0 | const size_t ysize = mask0.ysize(); |
1225 | 0 | JxlMemoryManager* memory_manager = mask0.memory_manager(); |
1226 | 0 | JXL_ASSIGN_OR_RETURN(*mask, ImageF::Create(memory_manager, xsize, ysize)); |
1227 | 0 | static const float kMul = 6.19424080439; |
1228 | 0 | static const float kBias = 12.61050594197; |
1229 | 0 | static const float kRadius = 2.7; |
1230 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diff0, |
1231 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1232 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diff1, |
1233 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1234 | 0 | JXL_ASSIGN_OR_RETURN(ImageF blurred0, |
1235 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1236 | 0 | JXL_ASSIGN_OR_RETURN(ImageF blurred1, |
1237 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1238 | 0 | DiffPrecompute(mask0, kMul, kBias, &diff0); |
1239 | 0 | DiffPrecompute(mask1, kMul, kBias, &diff1); |
1240 | 0 | JXL_RETURN_IF_ERROR(Blur(diff0, kRadius, params, blur_temp, &blurred0)); |
1241 | 0 | FuzzyErosion(blurred0, &diff0); |
1242 | 0 | JXL_RETURN_IF_ERROR(Blur(diff1, kRadius, params, blur_temp, &blurred1)); |
1243 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1244 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1245 | 0 | mask->Row(y)[x] = diff0.Row(y)[x]; |
1246 | 0 | if (diff_ac != nullptr) { |
1247 | 0 | static const float kMaskToErrorMul = 10.0; |
1248 | 0 | float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x]; |
1249 | 0 | diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff; |
1250 | 0 | } |
1251 | 0 | } |
1252 | 0 | } |
1253 | 0 | return true; |
1254 | 0 | } |
1255 | | |
1256 | | // `diff_ac` may be null. |
1257 | | Status MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1, |
1258 | | const size_t xsize, const size_t ysize, |
1259 | | const ButteraugliParams& params, BlurTemp* blur_temp, |
1260 | | ImageF* BUTTERAUGLI_RESTRICT mask, |
1261 | 0 | ImageF* BUTTERAUGLI_RESTRICT diff_ac) { |
1262 | 0 | JxlMemoryManager* memory_manager = pi0.hf[0].memory_manager(); |
1263 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask0, |
1264 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1265 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask1, |
1266 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1267 | 0 | CombineChannelsForMasking(&pi0.hf[0], &pi0.uhf[0], &mask0); |
1268 | 0 | CombineChannelsForMasking(&pi1.hf[0], &pi1.uhf[0], &mask1); |
1269 | 0 | JXL_RETURN_IF_ERROR(Mask(mask0, mask1, params, blur_temp, mask, diff_ac)); |
1270 | 0 | return true; |
1271 | 0 | } |
1272 | | |
1273 | 0 | double MaskY(double delta) { |
1274 | 0 | static const double offset = 0.829591754942; |
1275 | 0 | static const double scaler = 0.451936922203; |
1276 | 0 | static const double mul = 2.5485944793; |
1277 | 0 | const double c = mul / ((scaler * delta) + offset); |
1278 | 0 | const double retval = kGlobalScale * (1.0 + c); |
1279 | 0 | return retval * retval; |
1280 | 0 | } |
1281 | | |
1282 | 0 | double MaskDcY(double delta) { |
1283 | 0 | static const double offset = 0.20025578522; |
1284 | 0 | static const double scaler = 3.87449418804; |
1285 | 0 | static const double mul = 0.505054525019; |
1286 | 0 | const double c = mul / ((scaler * delta) + offset); |
1287 | 0 | const double retval = kGlobalScale * (1.0 + c); |
1288 | 0 | return retval * retval; |
1289 | 0 | } |
1290 | | |
1291 | 0 | inline float MaskColor(const float color[3], const float mask) { |
1292 | 0 | return color[0] * mask + color[1] * mask + color[2] * mask; |
1293 | 0 | } |
1294 | | |
1295 | | // Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks} |
1296 | | Status CombineChannelsToDiffmap(const ImageF& mask, |
1297 | | const Image3F& block_diff_dc, |
1298 | | const Image3F& block_diff_ac, float xmul, |
1299 | 0 | ImageF* result) { |
1300 | 0 | JXL_ENSURE(SameSize(mask, *result)); |
1301 | 0 | size_t xsize = mask.xsize(); |
1302 | 0 | size_t ysize = mask.ysize(); |
1303 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1304 | 0 | float* BUTTERAUGLI_RESTRICT row_out = result->Row(y); |
1305 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1306 | 0 | float val = mask.Row(y)[x]; |
1307 | 0 | float maskval = MaskY(val); |
1308 | 0 | float dc_maskval = MaskDcY(val); |
1309 | 0 | float diff_dc[3]; |
1310 | 0 | float diff_ac[3]; |
1311 | 0 | for (int i = 0; i < 3; ++i) { |
1312 | 0 | diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x]; |
1313 | 0 | diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x]; |
1314 | 0 | } |
1315 | 0 | diff_ac[0] *= xmul; |
1316 | 0 | diff_dc[0] *= xmul; |
1317 | 0 | row_out[x] = std::sqrt(MaskColor(diff_dc, dc_maskval) + |
1318 | 0 | MaskColor(diff_ac, maskval)); |
1319 | 0 | } |
1320 | 0 | } |
1321 | 0 | return true; |
1322 | 0 | } |
1323 | | |
1324 | | // Adds weighted L2 difference between i0 and i1 to diffmap. |
1325 | | static void L2Diff(const ImageF& i0, const ImageF& i1, const float w, |
1326 | 0 | ImageF* BUTTERAUGLI_RESTRICT diffmap) { |
1327 | 0 | if (w == 0) return; |
1328 | | |
1329 | 0 | const HWY_FULL(float) d; |
1330 | 0 | const auto weight = Set(d, w); |
1331 | |
|
1332 | 0 | for (size_t y = 0; y < i0.ysize(); ++y) { |
1333 | 0 | const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); |
1334 | 0 | const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); |
1335 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); |
1336 | |
|
1337 | 0 | for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { |
1338 | 0 | const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); |
1339 | 0 | const auto diff2 = Mul(diff, diff); |
1340 | 0 | const auto prev = Load(d, row_diff + x); |
1341 | 0 | Store(MulAdd(diff2, weight, prev), d, row_diff + x); |
1342 | 0 | } |
1343 | 0 | } |
1344 | 0 | } |
1345 | | |
1346 | | // Initializes diffmap to the weighted L2 difference between i0 and i1. |
1347 | | static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w, |
1348 | 0 | ImageF* BUTTERAUGLI_RESTRICT diffmap) { |
1349 | 0 | if (w == 0) return; |
1350 | | |
1351 | 0 | const HWY_FULL(float) d; |
1352 | 0 | const auto weight = Set(d, w); |
1353 | |
|
1354 | 0 | for (size_t y = 0; y < i0.ysize(); ++y) { |
1355 | 0 | const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); |
1356 | 0 | const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); |
1357 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); |
1358 | |
|
1359 | 0 | for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { |
1360 | 0 | const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); |
1361 | 0 | const auto diff2 = Mul(diff, diff); |
1362 | 0 | Store(Mul(diff2, weight), d, row_diff + x); |
1363 | 0 | } |
1364 | 0 | } |
1365 | 0 | } |
1366 | | |
1367 | | // i0 is the original image. |
1368 | | // i1 is the deformed copy. |
1369 | | static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1, |
1370 | | float w_0lt1, |
1371 | 0 | ImageF* BUTTERAUGLI_RESTRICT diffmap) { |
1372 | 0 | if (w_0gt1 == 0 && w_0lt1 == 0) { |
1373 | 0 | return; |
1374 | 0 | } |
1375 | | |
1376 | 0 | const HWY_FULL(float) d; |
1377 | 0 | const auto vw_0gt1 = Set(d, w_0gt1 * 0.8); |
1378 | 0 | const auto vw_0lt1 = Set(d, w_0lt1 * 0.8); |
1379 | |
|
1380 | 0 | for (size_t y = 0; y < i0.ysize(); ++y) { |
1381 | 0 | const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y); |
1382 | 0 | const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y); |
1383 | 0 | float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); |
1384 | |
|
1385 | 0 | for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { |
1386 | 0 | const auto val0 = Load(d, row0 + x); |
1387 | 0 | const auto val1 = Load(d, row1 + x); |
1388 | | |
1389 | | // Primary symmetric quadratic objective. |
1390 | 0 | const auto diff = Sub(val0, val1); |
1391 | 0 | auto total = MulAdd(Mul(diff, diff), vw_0gt1, Load(d, row_diff + x)); |
1392 | | |
1393 | | // Secondary half-open quadratic objectives. |
1394 | 0 | const auto fabs0 = Abs(val0); |
1395 | 0 | const auto too_small = Mul(Set(d, 0.4), fabs0); |
1396 | 0 | const auto too_big = fabs0; |
1397 | |
|
1398 | 0 | const auto if_neg = IfThenElse( |
1399 | 0 | Gt(val1, Neg(too_small)), Add(val1, too_small), |
1400 | 0 | IfThenElseZero(Lt(val1, Neg(too_big)), Sub(Neg(val1), too_big))); |
1401 | 0 | const auto if_pos = |
1402 | 0 | IfThenElse(Lt(val1, too_small), Sub(too_small, val1), |
1403 | 0 | IfThenElseZero(Gt(val1, too_big), Sub(val1, too_big))); |
1404 | 0 | const auto v = IfThenElse(Lt(val0, Zero(d)), if_neg, if_pos); |
1405 | 0 | total = MulAdd(vw_0lt1, Mul(v, v), total); |
1406 | 0 | Store(total, d, row_diff + x); |
1407 | 0 | } |
1408 | 0 | } |
1409 | 0 | } |
1410 | | |
1411 | | // A simple HDR compatible gamma function. |
1412 | | template <class DF, class V> |
1413 | 0 | V Gamma(const DF df, V v) { |
1414 | | // ln(2) constant folded in because we want std::log but have FastLog2f. |
1415 | 0 | const auto kRetMul = Set(df, 19.245013259874995f * 0.693147180559945f); |
1416 | 0 | const auto kRetAdd = Set(df, -23.16046239805755); |
1417 | | // This should happen rarely, but may lead to a NaN in log, which is |
1418 | | // undesirable. Since negative photons don't exist we solve the NaNs by |
1419 | | // clamping here. |
1420 | 0 | v = ZeroIfNegative(v); |
1421 | |
|
1422 | 0 | const auto biased = Add(v, Set(df, 9.9710635769299145)); |
1423 | 0 | const auto log = FastLog2f(df, biased); |
1424 | | // We could fold this into a custom Log2 polynomial, but there would be |
1425 | | // relatively little gain. |
1426 | 0 | return MulAdd(kRetMul, log, kRetAdd); |
1427 | 0 | } |
1428 | | |
1429 | | template <bool Clamp, class DF, class V> |
1430 | | BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1, |
1431 | | const V& in2, V* JXL_RESTRICT out0, |
1432 | | V* JXL_RESTRICT out1, |
1433 | 0 | V* JXL_RESTRICT out2) { |
1434 | | // https://en.wikipedia.org/wiki/Photopsin absorbance modeling. |
1435 | 0 | static const double mixi0 = 0.29956550340058319; |
1436 | 0 | static const double mixi1 = 0.63373087833825936; |
1437 | 0 | static const double mixi2 = 0.077705617820981968; |
1438 | 0 | static const double mixi3 = 1.7557483643287353; |
1439 | 0 | static const double mixi4 = 0.22158691104574774; |
1440 | 0 | static const double mixi5 = 0.69391388044116142; |
1441 | 0 | static const double mixi6 = 0.0987313588422; |
1442 | 0 | static const double mixi7 = 1.7557483643287353; |
1443 | 0 | static const double mixi8 = 0.02; |
1444 | 0 | static const double mixi9 = 0.02; |
1445 | 0 | static const double mixi10 = 0.20480129041026129; |
1446 | 0 | static const double mixi11 = 12.226454707163354; |
1447 | |
|
1448 | 0 | const V mix0 = Set(df, mixi0); |
1449 | 0 | const V mix1 = Set(df, mixi1); |
1450 | 0 | const V mix2 = Set(df, mixi2); |
1451 | 0 | const V mix3 = Set(df, mixi3); |
1452 | 0 | const V mix4 = Set(df, mixi4); |
1453 | 0 | const V mix5 = Set(df, mixi5); |
1454 | 0 | const V mix6 = Set(df, mixi6); |
1455 | 0 | const V mix7 = Set(df, mixi7); |
1456 | 0 | const V mix8 = Set(df, mixi8); |
1457 | 0 | const V mix9 = Set(df, mixi9); |
1458 | 0 | const V mix10 = Set(df, mixi10); |
1459 | 0 | const V mix11 = Set(df, mixi11); |
1460 | |
|
1461 | 0 | *out0 = MulAdd(mix0, in0, MulAdd(mix1, in1, MulAdd(mix2, in2, mix3))); |
1462 | 0 | *out1 = MulAdd(mix4, in0, MulAdd(mix5, in1, MulAdd(mix6, in2, mix7))); |
1463 | 0 | *out2 = MulAdd(mix8, in0, MulAdd(mix9, in1, MulAdd(mix10, in2, mix11))); |
1464 | |
|
1465 | 0 | if (Clamp) { |
1466 | 0 | *out0 = Max(*out0, mix3); |
1467 | 0 | *out1 = Max(*out1, mix7); |
1468 | 0 | *out2 = Max(*out2, mix11); |
1469 | 0 | } |
1470 | 0 | } Unexecuted instantiation: void jxl::N_SCALAR::OpsinAbsorbance<true, hwy::N_SCALAR::Simd<float, 1ul, 0>, hwy::N_SCALAR::Vec1<float> >(hwy::N_SCALAR::Simd<float, 1ul, 0>, hwy::N_SCALAR::Vec1<float> const&, hwy::N_SCALAR::Vec1<float> const&, hwy::N_SCALAR::Vec1<float> const&, hwy::N_SCALAR::Vec1<float>*, hwy::N_SCALAR::Vec1<float>*, hwy::N_SCALAR::Vec1<float>*) Unexecuted instantiation: void jxl::N_SCALAR::OpsinAbsorbance<false, hwy::N_SCALAR::Simd<float, 1ul, 0>, hwy::N_SCALAR::Vec1<float> >(hwy::N_SCALAR::Simd<float, 1ul, 0>, hwy::N_SCALAR::Vec1<float> const&, hwy::N_SCALAR::Vec1<float> const&, hwy::N_SCALAR::Vec1<float> const&, hwy::N_SCALAR::Vec1<float>*, hwy::N_SCALAR::Vec1<float>*, hwy::N_SCALAR::Vec1<float>*) |
1471 | | |
1472 | | // `blurred` is a temporary image used inside this function and not returned. |
1473 | | Status OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params, |
1474 | 0 | Image3F* blurred, BlurTemp* blur_temp, Image3F* xyb) { |
1475 | 0 | JXL_ENSURE(blurred != nullptr); |
1476 | 0 | const double kSigma = 1.2; |
1477 | 0 | JXL_RETURN_IF_ERROR( |
1478 | 0 | Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0))); |
1479 | 0 | JXL_RETURN_IF_ERROR( |
1480 | 0 | Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1))); |
1481 | 0 | JXL_RETURN_IF_ERROR( |
1482 | 0 | Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2))); |
1483 | 0 | const HWY_FULL(float) df; |
1484 | 0 | const auto intensity_target_multiplier = Set(df, params.intensity_target); |
1485 | 0 | for (size_t y = 0; y < rgb.ysize(); ++y) { |
1486 | 0 | const float* row_r = rgb.ConstPlaneRow(0, y); |
1487 | 0 | const float* row_g = rgb.ConstPlaneRow(1, y); |
1488 | 0 | const float* row_b = rgb.ConstPlaneRow(2, y); |
1489 | 0 | const float* row_blurred_r = blurred->ConstPlaneRow(0, y); |
1490 | 0 | const float* row_blurred_g = blurred->ConstPlaneRow(1, y); |
1491 | 0 | const float* row_blurred_b = blurred->ConstPlaneRow(2, y); |
1492 | 0 | float* row_out_x = xyb->PlaneRow(0, y); |
1493 | 0 | float* row_out_y = xyb->PlaneRow(1, y); |
1494 | 0 | float* row_out_b = xyb->PlaneRow(2, y); |
1495 | 0 | const auto min = Set(df, 1e-4f); |
1496 | 0 | for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) { |
1497 | 0 | auto sensitivity0 = Undefined(df); |
1498 | 0 | auto sensitivity1 = Undefined(df); |
1499 | 0 | auto sensitivity2 = Undefined(df); |
1500 | 0 | { |
1501 | | // Calculate sensitivity based on the smoothed image gamma derivative. |
1502 | 0 | auto pre_mixed0 = Undefined(df); |
1503 | 0 | auto pre_mixed1 = Undefined(df); |
1504 | 0 | auto pre_mixed2 = Undefined(df); |
1505 | 0 | OpsinAbsorbance<true>( |
1506 | 0 | df, Mul(Load(df, row_blurred_r + x), intensity_target_multiplier), |
1507 | 0 | Mul(Load(df, row_blurred_g + x), intensity_target_multiplier), |
1508 | 0 | Mul(Load(df, row_blurred_b + x), intensity_target_multiplier), |
1509 | 0 | &pre_mixed0, &pre_mixed1, &pre_mixed2); |
1510 | 0 | pre_mixed0 = Max(pre_mixed0, min); |
1511 | 0 | pre_mixed1 = Max(pre_mixed1, min); |
1512 | 0 | pre_mixed2 = Max(pre_mixed2, min); |
1513 | 0 | sensitivity0 = Div(Gamma(df, pre_mixed0), pre_mixed0); |
1514 | 0 | sensitivity1 = Div(Gamma(df, pre_mixed1), pre_mixed1); |
1515 | 0 | sensitivity2 = Div(Gamma(df, pre_mixed2), pre_mixed2); |
1516 | 0 | sensitivity0 = Max(sensitivity0, min); |
1517 | 0 | sensitivity1 = Max(sensitivity1, min); |
1518 | 0 | sensitivity2 = Max(sensitivity2, min); |
1519 | 0 | } |
1520 | 0 | auto cur_mixed0 = Undefined(df); |
1521 | 0 | auto cur_mixed1 = Undefined(df); |
1522 | 0 | auto cur_mixed2 = Undefined(df); |
1523 | 0 | OpsinAbsorbance<false>( |
1524 | 0 | df, Mul(Load(df, row_r + x), intensity_target_multiplier), |
1525 | 0 | Mul(Load(df, row_g + x), intensity_target_multiplier), |
1526 | 0 | Mul(Load(df, row_b + x), intensity_target_multiplier), &cur_mixed0, |
1527 | 0 | &cur_mixed1, &cur_mixed2); |
1528 | 0 | cur_mixed0 = Mul(cur_mixed0, sensitivity0); |
1529 | 0 | cur_mixed1 = Mul(cur_mixed1, sensitivity1); |
1530 | 0 | cur_mixed2 = Mul(cur_mixed2, sensitivity2); |
1531 | | // This is a kludge. The negative values should be zeroed away before |
1532 | | // blurring. Ideally there would be no negative values in the first place. |
1533 | 0 | const auto min01 = Set(df, 1.7557483643287353f); |
1534 | 0 | const auto min2 = Set(df, 12.226454707163354f); |
1535 | 0 | cur_mixed0 = Max(cur_mixed0, min01); |
1536 | 0 | cur_mixed1 = Max(cur_mixed1, min01); |
1537 | 0 | cur_mixed2 = Max(cur_mixed2, min2); |
1538 | |
|
1539 | 0 | Store(Sub(cur_mixed0, cur_mixed1), df, row_out_x + x); |
1540 | 0 | Store(Add(cur_mixed0, cur_mixed1), df, row_out_y + x); |
1541 | 0 | Store(cur_mixed2, df, row_out_b + x); |
1542 | 0 | } |
1543 | 0 | } |
1544 | 0 | return true; |
1545 | 0 | } |
1546 | | |
1547 | | Status ButteraugliDiffmapInPlace(Image3F& image0, Image3F& image1, |
1548 | | const ButteraugliParams& params, |
1549 | 0 | ImageF& diffmap) { |
1550 | | // image0 and image1 are in linear sRGB color space |
1551 | 0 | const size_t xsize = image0.xsize(); |
1552 | 0 | const size_t ysize = image0.ysize(); |
1553 | 0 | JxlMemoryManager* memory_manager = image0.memory_manager(); |
1554 | 0 | BlurTemp blur_temp; |
1555 | 0 | { |
1556 | | // Convert image0 and image1 to XYB in-place |
1557 | 0 | JXL_ASSIGN_OR_RETURN(Image3F temp, |
1558 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1559 | 0 | JXL_RETURN_IF_ERROR( |
1560 | 0 | OpsinDynamicsImage(image0, params, &temp, &blur_temp, &image0)); |
1561 | 0 | JXL_RETURN_IF_ERROR( |
1562 | 0 | OpsinDynamicsImage(image1, params, &temp, &blur_temp, &image1)); |
1563 | 0 | } |
1564 | | // image0 and image1 are in XYB color space |
1565 | 0 | JXL_ASSIGN_OR_RETURN(ImageF block_diff_dc, |
1566 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1567 | 0 | ZeroFillImage(&block_diff_dc); |
1568 | 0 | { |
1569 | | // separate out LF components from image0 and image1 and compute the dc |
1570 | | // diff image from them |
1571 | 0 | JXL_ASSIGN_OR_RETURN(Image3F lf0, |
1572 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1573 | 0 | JXL_ASSIGN_OR_RETURN(Image3F lf1, |
1574 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1575 | 0 | JXL_RETURN_IF_ERROR( |
1576 | 0 | SeparateLFAndMF(params, image0, &lf0, &image0, &blur_temp)); |
1577 | 0 | JXL_RETURN_IF_ERROR( |
1578 | 0 | SeparateLFAndMF(params, image1, &lf1, &image1, &blur_temp)); |
1579 | 0 | for (size_t c = 0; c < 3; ++c) { |
1580 | 0 | L2Diff(lf0.Plane(c), lf1.Plane(c), wmul[6 + c], &block_diff_dc); |
1581 | 0 | } |
1582 | 0 | } |
1583 | | // image0 and image1 are MF residuals (before blurring) in XYB color space |
1584 | 0 | ImageF hf0[2]; |
1585 | 0 | ImageF hf1[2]; |
1586 | 0 | JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image0, &hf0[0], &blur_temp)); |
1587 | 0 | JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image1, &hf1[0], &blur_temp)); |
1588 | | // image0 and image1 are MF-images in XYB color space |
1589 | | |
1590 | 0 | JXL_ASSIGN_OR_RETURN(ImageF block_diff_ac, |
1591 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1592 | 0 | ZeroFillImage(&block_diff_ac); |
1593 | | // start accumulating ac diff image from MF images |
1594 | 0 | { |
1595 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diffs, |
1596 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1597 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(image0.Plane(1), image1.Plane(1), |
1598 | 0 | wMfMalta, wMfMalta, norm1Mf, &diffs, |
1599 | 0 | &block_diff_ac)); |
1600 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(image0.Plane(0), image1.Plane(0), |
1601 | 0 | wMfMaltaX, wMfMaltaX, norm1MfX, &diffs, |
1602 | 0 | &block_diff_ac)); |
1603 | 0 | } |
1604 | 0 | for (size_t c = 0; c < 3; ++c) { |
1605 | 0 | L2Diff(image0.Plane(c), image1.Plane(c), wmul[3 + c], &block_diff_ac); |
1606 | 0 | } |
1607 | | // we will not need the MF-images and more, so we deallocate them to reduce |
1608 | | // peak memory usage |
1609 | 0 | image0 = Image3F(); |
1610 | 0 | image1 = Image3F(); |
1611 | |
|
1612 | 0 | ImageF uhf0[2]; |
1613 | 0 | ImageF uhf1[2]; |
1614 | 0 | JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf0[0], &uhf0[0], &blur_temp)); |
1615 | 0 | JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf1[0], &uhf1[0], &blur_temp)); |
1616 | | |
1617 | | // continue accumulating ac diff image from HF and UHF images |
1618 | 0 | const float hf_asymmetry = params.hf_asymmetry; |
1619 | 0 | { |
1620 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diffs, |
1621 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1622 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMap(uhf0[1], uhf1[1], wUhfMalta * hf_asymmetry, |
1623 | 0 | wUhfMalta / hf_asymmetry, norm1Uhf, &diffs, |
1624 | 0 | &block_diff_ac)); |
1625 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMap( |
1626 | 0 | uhf0[0], uhf1[0], wUhfMaltaX * hf_asymmetry, wUhfMaltaX / hf_asymmetry, |
1627 | 0 | norm1UhfX, &diffs, &block_diff_ac)); |
1628 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF( |
1629 | 0 | hf0[1], hf1[1], wHfMalta * std::sqrt(hf_asymmetry), |
1630 | 0 | wHfMalta / std::sqrt(hf_asymmetry), norm1Hf, &diffs, &block_diff_ac)); |
1631 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF( |
1632 | 0 | hf0[0], hf1[0], wHfMaltaX * std::sqrt(hf_asymmetry), |
1633 | 0 | wHfMaltaX / std::sqrt(hf_asymmetry), norm1HfX, &diffs, &block_diff_ac)); |
1634 | 0 | } |
1635 | 0 | for (size_t c = 0; c < 2; ++c) { |
1636 | 0 | L2DiffAsymmetric(hf0[c], hf1[c], wmul[c] * hf_asymmetry, |
1637 | 0 | wmul[c] / hf_asymmetry, &block_diff_ac); |
1638 | 0 | } |
1639 | | |
1640 | | // compute mask image from HF and UHF X and Y images |
1641 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask, |
1642 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1643 | 0 | { |
1644 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask0, |
1645 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1646 | 0 | JXL_ASSIGN_OR_RETURN(ImageF mask1, |
1647 | 0 | ImageF::Create(memory_manager, xsize, ysize)); |
1648 | 0 | CombineChannelsForMasking(&hf0[0], &uhf0[0], &mask0); |
1649 | 0 | CombineChannelsForMasking(&hf1[0], &uhf1[0], &mask1); |
1650 | 0 | DeallocateHFAndUHF(&hf1[0], &uhf1[0]); |
1651 | 0 | DeallocateHFAndUHF(&hf0[0], &uhf0[0]); |
1652 | 0 | JXL_RETURN_IF_ERROR( |
1653 | 0 | Mask(mask0, mask1, params, &blur_temp, &mask, &block_diff_ac)); |
1654 | 0 | } |
1655 | | |
1656 | | // compute final diffmap from mask image and ac and dc diff images |
1657 | 0 | JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(memory_manager, xsize, ysize)); |
1658 | 0 | for (size_t y = 0; y < ysize; ++y) { |
1659 | 0 | const float* row_dc = block_diff_dc.Row(y); |
1660 | 0 | const float* row_ac = block_diff_ac.Row(y); |
1661 | 0 | float* row_out = diffmap.Row(y); |
1662 | 0 | for (size_t x = 0; x < xsize; ++x) { |
1663 | 0 | const float val = mask.Row(y)[x]; |
1664 | 0 | row_out[x] = sqrt(row_dc[x] * MaskDcY(val) + row_ac[x] * MaskY(val)); |
1665 | 0 | } |
1666 | 0 | } |
1667 | 0 | return true; |
1668 | 0 | } |
1669 | | |
1670 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
1671 | | } // namespace HWY_NAMESPACE |
1672 | | } // namespace jxl |
1673 | | HWY_AFTER_NAMESPACE(); |
1674 | | |
1675 | | #if HWY_ONCE |
1676 | | namespace jxl { |
1677 | | |
1678 | | HWY_EXPORT(SeparateFrequencies); // Local function. |
1679 | | HWY_EXPORT(MaskPsychoImage); // Local function. |
1680 | | HWY_EXPORT(L2DiffAsymmetric); // Local function. |
1681 | | HWY_EXPORT(L2Diff); // Local function. |
1682 | | HWY_EXPORT(SetL2Diff); // Local function. |
1683 | | HWY_EXPORT(CombineChannelsToDiffmap); // Local function. |
1684 | | HWY_EXPORT(MaltaDiffMap); // Local function. |
1685 | | HWY_EXPORT(MaltaDiffMapLF); // Local function. |
1686 | | HWY_EXPORT(OpsinDynamicsImage); // Local function. |
1687 | | HWY_EXPORT(ButteraugliDiffmapInPlace); // Local function. |
1688 | | |
1689 | | #if BUTTERAUGLI_ENABLE_CHECKS |
1690 | | |
1691 | | static inline bool IsNan(const float x) { |
1692 | | uint32_t bits; |
1693 | | memcpy(&bits, &x, sizeof(bits)); |
1694 | | const uint32_t bitmask_exp = 0x7F800000; |
1695 | | return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF); |
1696 | | } |
1697 | | |
1698 | | static inline bool IsNan(const double x) { |
1699 | | uint64_t bits; |
1700 | | memcpy(&bits, &x, sizeof(bits)); |
1701 | | return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) || |
1702 | | (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL); |
1703 | | } |
1704 | | |
1705 | | static inline void CheckImage(const ImageF& image, const char* name) { |
1706 | | for (size_t y = 0; y < image.ysize(); ++y) { |
1707 | | const float* BUTTERAUGLI_RESTRICT row = image.Row(y); |
1708 | | for (size_t x = 0; x < image.xsize(); ++x) { |
1709 | | if (IsNan(row[x])) { |
1710 | | printf("NAN: Image %s @ %" PRIuS ",%" PRIuS " (of %" PRIuS ",%" PRIuS |
1711 | | ")\n", |
1712 | | name, x, y, image.xsize(), image.ysize()); |
1713 | | exit(1); |
1714 | | } |
1715 | | } |
1716 | | } |
1717 | | } |
1718 | | |
1719 | | #define CHECK_NAN(x, str) \ |
1720 | | do { \ |
1721 | | if (IsNan(x)) { \ |
1722 | | printf("%d: %s\n", __LINE__, str); \ |
1723 | | abort(); \ |
1724 | | } \ |
1725 | | } while (0) |
1726 | | |
1727 | | #define CHECK_IMAGE(image, name) CheckImage(image, name) |
1728 | | |
1729 | | #else // BUTTERAUGLI_ENABLE_CHECKS |
1730 | | |
1731 | | #define CHECK_NAN(x, str) |
1732 | | #define CHECK_IMAGE(image, name) |
1733 | | |
1734 | | #endif // BUTTERAUGLI_ENABLE_CHECKS |
1735 | | |
1736 | | // Calculate a 2x2 subsampled image for purposes of recursive butteraugli at |
1737 | | // multiresolution. |
1738 | 0 | static StatusOr<Image3F> SubSample2x(const Image3F& in) { |
1739 | 0 | size_t xs = (in.xsize() + 1) / 2; |
1740 | 0 | size_t ys = (in.ysize() + 1) / 2; |
1741 | 0 | JxlMemoryManager* memory_manager = in.memory_manager(); |
1742 | 0 | JXL_ASSIGN_OR_RETURN(Image3F retval, Image3F::Create(memory_manager, xs, ys)); |
1743 | 0 | for (size_t c = 0; c < 3; ++c) { |
1744 | 0 | for (size_t y = 0; y < ys; ++y) { |
1745 | 0 | for (size_t x = 0; x < xs; ++x) { |
1746 | 0 | retval.PlaneRow(c, y)[x] = 0; |
1747 | 0 | } |
1748 | 0 | } |
1749 | 0 | } |
1750 | 0 | for (size_t c = 0; c < 3; ++c) { |
1751 | 0 | for (size_t y = 0; y < in.ysize(); ++y) { |
1752 | 0 | for (size_t x = 0; x < in.xsize(); ++x) { |
1753 | 0 | retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x]; |
1754 | 0 | } |
1755 | 0 | } |
1756 | 0 | if ((in.xsize() & 1) != 0) { |
1757 | 0 | for (size_t y = 0; y < retval.ysize(); ++y) { |
1758 | 0 | size_t last_column = retval.xsize() - 1; |
1759 | 0 | retval.PlaneRow(c, y)[last_column] *= 2.0f; |
1760 | 0 | } |
1761 | 0 | } |
1762 | 0 | if ((in.ysize() & 1) != 0) { |
1763 | 0 | for (size_t x = 0; x < retval.xsize(); ++x) { |
1764 | 0 | size_t last_row = retval.ysize() - 1; |
1765 | 0 | retval.PlaneRow(c, last_row)[x] *= 2.0f; |
1766 | 0 | } |
1767 | 0 | } |
1768 | 0 | } |
1769 | 0 | return retval; |
1770 | 0 | } |
1771 | | |
1772 | | // Supersample src by 2x and add it to dest. |
1773 | 0 | static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) { |
1774 | 0 | for (size_t y = 0; y < dest.ysize(); ++y) { |
1775 | 0 | for (size_t x = 0; x < dest.xsize(); ++x) { |
1776 | | // There will be less errors from the more averaged images. |
1777 | | // We take it into account to some extent using a scaler. |
1778 | 0 | static const double kHeuristicMixingValue = 0.3; |
1779 | 0 | dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w; |
1780 | 0 | dest.Row(y)[x] += w * src.Row(y / 2)[x / 2]; |
1781 | 0 | } |
1782 | 0 | } |
1783 | 0 | } |
1784 | | |
1785 | 0 | Image3F* ButteraugliComparator::Temp() const { |
1786 | 0 | bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel); |
1787 | 0 | if (was_in_use) return nullptr; |
1788 | 0 | return &temp_; |
1789 | 0 | } |
1790 | | |
1791 | 0 | void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); } |
1792 | | |
1793 | | ButteraugliComparator::ButteraugliComparator(size_t xsize, size_t ysize, |
1794 | | const ButteraugliParams& params) |
1795 | 0 | : xsize_(xsize), ysize_(ysize), params_(params) {} |
1796 | | |
1797 | | StatusOr<std::unique_ptr<ButteraugliComparator>> ButteraugliComparator::Make( |
1798 | 0 | const Image3F& rgb0, const ButteraugliParams& params) { |
1799 | 0 | size_t xsize = rgb0.xsize(); |
1800 | 0 | size_t ysize = rgb0.ysize(); |
1801 | 0 | JxlMemoryManager* memory_manager = rgb0.memory_manager(); |
1802 | 0 | std::unique_ptr<ButteraugliComparator> result = |
1803 | 0 | std::unique_ptr<ButteraugliComparator>( |
1804 | 0 | new ButteraugliComparator(xsize, ysize, params)); |
1805 | 0 | JXL_ASSIGN_OR_RETURN(result->temp_, |
1806 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1807 | |
|
1808 | 0 | if (xsize < 8 || ysize < 8) { |
1809 | 0 | return result; |
1810 | 0 | } |
1811 | | |
1812 | 0 | JXL_ASSIGN_OR_RETURN(Image3F xyb0, |
1813 | 0 | Image3F::Create(memory_manager, xsize, ysize)); |
1814 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( |
1815 | 0 | rgb0, params, result->Temp(), &result->blur_temp_, &xyb0)); |
1816 | 0 | result->ReleaseTemp(); |
1817 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)( |
1818 | 0 | xsize, ysize, params, &result->blur_temp_, xyb0, result->pi0_)); |
1819 | | |
1820 | | // Awful recursive construction of samples of different resolution. |
1821 | | // This is an after-thought and possibly somewhat parallel in |
1822 | | // functionality with the PsychoImage multi-resolution approach. |
1823 | 0 | JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb0, SubSample2x(rgb0)); |
1824 | 0 | JXL_ASSIGN_OR_RETURN(result->sub_, |
1825 | 0 | ButteraugliComparator::Make(subsampledRgb0, params)); |
1826 | 0 | return result; |
1827 | 0 | } |
1828 | | |
1829 | 0 | Status ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const { |
1830 | 0 | return HWY_DYNAMIC_DISPATCH(MaskPsychoImage)( |
1831 | 0 | pi0_, pi0_, xsize_, ysize_, params_, &blur_temp_, mask, nullptr); |
1832 | 0 | } |
1833 | | |
1834 | | Status ButteraugliComparator::Diffmap(const Image3F& rgb1, |
1835 | 0 | ImageF& result) const { |
1836 | 0 | JxlMemoryManager* memory_manager = rgb1.memory_manager(); |
1837 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1838 | 0 | ZeroFillImage(&result); |
1839 | 0 | return true; |
1840 | 0 | } |
1841 | 0 | JXL_ASSIGN_OR_RETURN(Image3F xyb1, |
1842 | 0 | Image3F::Create(memory_manager, xsize_, ysize_)); |
1843 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( |
1844 | 0 | rgb1, params_, Temp(), &blur_temp_, &xyb1)); |
1845 | 0 | ReleaseTemp(); |
1846 | 0 | JXL_RETURN_IF_ERROR(DiffmapOpsinDynamicsImage(xyb1, result)); |
1847 | 0 | if (sub_) { |
1848 | 0 | if (sub_->xsize_ < 8 || sub_->ysize_ < 8) { |
1849 | 0 | return true; |
1850 | 0 | } |
1851 | 0 | JXL_ASSIGN_OR_RETURN( |
1852 | 0 | Image3F sub_xyb, |
1853 | 0 | Image3F::Create(memory_manager, sub_->xsize_, sub_->ysize_)); |
1854 | 0 | JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb1, SubSample2x(rgb1)); |
1855 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( |
1856 | 0 | subsampledRgb1, params_, sub_->Temp(), &sub_->blur_temp_, &sub_xyb)); |
1857 | 0 | sub_->ReleaseTemp(); |
1858 | 0 | ImageF subresult; |
1859 | 0 | JXL_RETURN_IF_ERROR(sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult)); |
1860 | 0 | AddSupersampled2x(subresult, 0.5, result); |
1861 | 0 | } |
1862 | 0 | return true; |
1863 | 0 | } |
1864 | | |
1865 | | Status ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1, |
1866 | 0 | ImageF& result) const { |
1867 | 0 | JxlMemoryManager* memory_manager = xyb1.memory_manager(); |
1868 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1869 | 0 | ZeroFillImage(&result); |
1870 | 0 | return true; |
1871 | 0 | } |
1872 | 0 | PsychoImage pi1; |
1873 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)( |
1874 | 0 | xsize_, ysize_, params_, &blur_temp_, xyb1, pi1)); |
1875 | 0 | JXL_ASSIGN_OR_RETURN(result, ImageF::Create(memory_manager, xsize_, ysize_)); |
1876 | 0 | return DiffmapPsychoImage(pi1, result); |
1877 | 0 | } |
1878 | | |
1879 | | namespace { |
1880 | | |
1881 | | Status MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, |
1882 | | const double w_0lt1, const double norm1, |
1883 | | ImageF* HWY_RESTRICT diffs, |
1884 | 0 | Image3F* HWY_RESTRICT block_diff_ac, size_t c) { |
1885 | 0 | return HWY_DYNAMIC_DISPATCH(MaltaDiffMap)(lum0, lum1, w_0gt1, w_0lt1, norm1, |
1886 | 0 | diffs, &block_diff_ac->Plane(c)); |
1887 | 0 | } |
1888 | | |
1889 | | Status MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, |
1890 | | const double w_0gt1, const double w_0lt1, |
1891 | | const double norm1, ImageF* HWY_RESTRICT diffs, |
1892 | 0 | Image3F* HWY_RESTRICT block_diff_ac, size_t c) { |
1893 | 0 | return HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF)(lum0, lum1, w_0gt1, w_0lt1, norm1, |
1894 | 0 | diffs, &block_diff_ac->Plane(c)); |
1895 | 0 | } |
1896 | | |
1897 | | } // namespace |
1898 | | |
1899 | | Status ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1, |
1900 | 0 | ImageF& diffmap) const { |
1901 | 0 | JxlMemoryManager* memory_manager = diffmap.memory_manager(); |
1902 | 0 | if (xsize_ < 8 || ysize_ < 8) { |
1903 | 0 | ZeroFillImage(&diffmap); |
1904 | 0 | return true; |
1905 | 0 | } |
1906 | | |
1907 | 0 | const float hf_asymmetry_ = params_.hf_asymmetry; |
1908 | 0 | const float xmul_ = params_.xmul; |
1909 | |
|
1910 | 0 | JXL_ASSIGN_OR_RETURN(ImageF diffs, |
1911 | 0 | ImageF::Create(memory_manager, xsize_, ysize_)); |
1912 | 0 | JXL_ASSIGN_OR_RETURN(Image3F block_diff_ac, |
1913 | 0 | Image3F::Create(memory_manager, xsize_, ysize_)); |
1914 | 0 | ZeroFillImage(&block_diff_ac); |
1915 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMap( |
1916 | 0 | pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_, |
1917 | 0 | wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1)); |
1918 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMap( |
1919 | 0 | pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_, |
1920 | 0 | wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac, 0)); |
1921 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF( |
1922 | 0 | pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_), |
1923 | 0 | wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs, &block_diff_ac, 1)); |
1924 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], |
1925 | 0 | wHfMaltaX * std::sqrt(hf_asymmetry_), |
1926 | 0 | wHfMaltaX / std::sqrt(hf_asymmetry_), |
1927 | 0 | norm1HfX, &diffs, &block_diff_ac, 0)); |
1928 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), |
1929 | 0 | wMfMalta, wMfMalta, norm1Mf, &diffs, |
1930 | 0 | &block_diff_ac, 1)); |
1931 | 0 | JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), |
1932 | 0 | wMfMaltaX, wMfMaltaX, norm1MfX, &diffs, |
1933 | 0 | &block_diff_ac, 0)); |
1934 | | |
1935 | 0 | JXL_ASSIGN_OR_RETURN(Image3F block_diff_dc, |
1936 | 0 | Image3F::Create(memory_manager, xsize_, ysize_)); |
1937 | 0 | for (size_t c = 0; c < 3; ++c) { |
1938 | 0 | if (c < 2) { // No blue channel error accumulated at HF. |
1939 | 0 | HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric) |
1940 | 0 | (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_, |
1941 | 0 | &block_diff_ac.Plane(c)); |
1942 | 0 | } |
1943 | 0 | HWY_DYNAMIC_DISPATCH(L2Diff) |
1944 | 0 | (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac.Plane(c)); |
1945 | 0 | HWY_DYNAMIC_DISPATCH(SetL2Diff) |
1946 | 0 | (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc.Plane(c)); |
1947 | 0 | } |
1948 | |
|
1949 | 0 | ImageF mask; |
1950 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(MaskPsychoImage)( |
1951 | 0 | pi0_, pi1, xsize_, ysize_, params_, &blur_temp_, &mask, |
1952 | 0 | &block_diff_ac.Plane(1))); |
1953 | | |
1954 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap)( |
1955 | 0 | mask, block_diff_dc, block_diff_ac, xmul_, &diffmap)); |
1956 | 0 | return true; |
1957 | 0 | } |
1958 | | |
1959 | | double ButteraugliScoreFromDiffmap(const ImageF& diffmap, |
1960 | 0 | const ButteraugliParams* params) { |
1961 | 0 | float retval = 0.0f; |
1962 | 0 | for (size_t y = 0; y < diffmap.ysize(); ++y) { |
1963 | 0 | const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y); |
1964 | 0 | for (size_t x = 0; x < diffmap.xsize(); ++x) { |
1965 | 0 | retval = std::max(retval, row[x]); |
1966 | 0 | } |
1967 | 0 | } |
1968 | 0 | return retval; |
1969 | 0 | } |
1970 | | |
1971 | | Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, |
1972 | 0 | double hf_asymmetry, double xmul, ImageF& diffmap) { |
1973 | 0 | ButteraugliParams params; |
1974 | 0 | params.hf_asymmetry = hf_asymmetry; |
1975 | 0 | params.xmul = xmul; |
1976 | 0 | return ButteraugliDiffmap(rgb0, rgb1, params, diffmap); |
1977 | 0 | } |
1978 | | |
1979 | | template <size_t kMax> |
1980 | | bool ButteraugliDiffmapSmall(const Image3F& rgb0, const Image3F& rgb1, |
1981 | 0 | const ButteraugliParams& params, ImageF& diffmap) { |
1982 | 0 | const size_t xsize = rgb0.xsize(); |
1983 | 0 | const size_t ysize = rgb0.ysize(); |
1984 | 0 | JxlMemoryManager* memory_manager = rgb0.memory_manager(); |
1985 | | // Butteraugli values for small (where xsize or ysize is smaller |
1986 | | // than 8 pixels) images are non-sensical, but most likely it is |
1987 | | // less disruptive to try to compute something than just give up. |
1988 | | // Temporarily extend the borders of the image to fit 8 x 8 size. |
1989 | 0 | size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0; |
1990 | 0 | size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0; |
1991 | 0 | size_t xscaled = std::max<size_t>(kMax, xsize); |
1992 | 0 | size_t yscaled = std::max<size_t>(kMax, ysize); |
1993 | 0 | JXL_ASSIGN_OR_RETURN(Image3F scaled0, |
1994 | 0 | Image3F::Create(memory_manager, xscaled, yscaled)); |
1995 | 0 | JXL_ASSIGN_OR_RETURN(Image3F scaled1, |
1996 | 0 | Image3F::Create(memory_manager, xscaled, yscaled)); |
1997 | 0 | for (int i = 0; i < 3; ++i) { |
1998 | 0 | for (size_t y = 0; y < yscaled; ++y) { |
1999 | 0 | for (size_t x = 0; x < xscaled; ++x) { |
2000 | 0 | size_t x2 = std::min<size_t>(xsize - 1, x > xborder ? x - xborder : 0); |
2001 | 0 | size_t y2 = std::min<size_t>(ysize - 1, y > yborder ? y - yborder : 0); |
2002 | 0 | scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2]; |
2003 | 0 | scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2]; |
2004 | 0 | } |
2005 | 0 | } |
2006 | 0 | } |
2007 | 0 | ImageF diffmap_scaled; |
2008 | 0 | const bool ok = ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled); |
2009 | 0 | JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(memory_manager, xsize, ysize)); |
2010 | 0 | for (size_t y = 0; y < ysize; ++y) { |
2011 | 0 | for (size_t x = 0; x < xsize; ++x) { |
2012 | 0 | diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder]; |
2013 | 0 | } |
2014 | 0 | } |
2015 | 0 | return ok; |
2016 | 0 | } |
2017 | | |
2018 | | Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, |
2019 | 0 | const ButteraugliParams& params, ImageF& diffmap) { |
2020 | 0 | const size_t xsize = rgb0.xsize(); |
2021 | 0 | const size_t ysize = rgb0.ysize(); |
2022 | 0 | if (xsize < 1 || ysize < 1) { |
2023 | 0 | return JXL_FAILURE("Zero-sized image"); |
2024 | 0 | } |
2025 | 0 | if (!SameSize(rgb0, rgb1)) { |
2026 | 0 | return JXL_FAILURE("Size mismatch"); |
2027 | 0 | } |
2028 | 0 | static const int kMax = 8; |
2029 | 0 | if (xsize < kMax || ysize < kMax) { |
2030 | 0 | return ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap); |
2031 | 0 | } |
2032 | 0 | JXL_ASSIGN_OR_RETURN(std::unique_ptr<ButteraugliComparator> butteraugli, |
2033 | 0 | ButteraugliComparator::Make(rgb0, params)); |
2034 | 0 | JXL_RETURN_IF_ERROR(butteraugli->Diffmap(rgb1, diffmap)); |
2035 | 0 | return true; |
2036 | 0 | } |
2037 | | |
2038 | | bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, |
2039 | | float hf_asymmetry, float xmul, ImageF& diffmap, |
2040 | 0 | double& diffvalue) { |
2041 | 0 | ButteraugliParams params; |
2042 | 0 | params.hf_asymmetry = hf_asymmetry; |
2043 | 0 | params.xmul = xmul; |
2044 | 0 | return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue); |
2045 | 0 | } |
2046 | | |
2047 | | bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, |
2048 | | const ButteraugliParams& params, ImageF& diffmap, |
2049 | 0 | double& diffvalue) { |
2050 | 0 | if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) { |
2051 | 0 | return false; |
2052 | 0 | } |
2053 | 0 | diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); |
2054 | 0 | return true; |
2055 | 0 | } |
2056 | | |
2057 | | Status ButteraugliInterfaceInPlace(Image3F&& rgb0, Image3F&& rgb1, |
2058 | | const ButteraugliParams& params, |
2059 | 0 | ImageF& diffmap, double& diffvalue) { |
2060 | 0 | const size_t xsize = rgb0.xsize(); |
2061 | 0 | const size_t ysize = rgb0.ysize(); |
2062 | 0 | if (xsize < 1 || ysize < 1) { |
2063 | 0 | return JXL_FAILURE("Zero-sized image"); |
2064 | 0 | } |
2065 | 0 | if (!SameSize(rgb0, rgb1)) { |
2066 | 0 | return JXL_FAILURE("Size mismatch"); |
2067 | 0 | } |
2068 | 0 | static const int kMax = 8; |
2069 | 0 | if (xsize < kMax || ysize < kMax) { |
2070 | 0 | bool ok = ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap); |
2071 | 0 | diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); |
2072 | 0 | return ok; |
2073 | 0 | } |
2074 | 0 | ImageF subdiffmap; |
2075 | 0 | if (xsize >= 15 && ysize >= 15) { |
2076 | 0 | JXL_ASSIGN_OR_RETURN(Image3F rgb0_sub, SubSample2x(rgb0)); |
2077 | 0 | JXL_ASSIGN_OR_RETURN(Image3F rgb1_sub, SubSample2x(rgb1)); |
2078 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)( |
2079 | 0 | rgb0_sub, rgb1_sub, params, subdiffmap)); |
2080 | 0 | } |
2081 | 0 | JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)( |
2082 | 0 | rgb0, rgb1, params, diffmap)); |
2083 | 0 | if (xsize >= 15 && ysize >= 15) { |
2084 | 0 | AddSupersampled2x(subdiffmap, 0.5, diffmap); |
2085 | 0 | } |
2086 | 0 | diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); |
2087 | 0 | return true; |
2088 | 0 | } |
2089 | | |
2090 | 0 | double ButteraugliFuzzyClass(double score) { |
2091 | 0 | static const double fuzzy_width_up = 4.8; |
2092 | 0 | static const double fuzzy_width_down = 4.8; |
2093 | 0 | static const double m0 = 2.0; |
2094 | 0 | static const double scaler = 0.7777; |
2095 | 0 | double val; |
2096 | 0 | if (score < 1.0) { |
2097 | | // val in [scaler .. 2.0] |
2098 | 0 | val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down)); |
2099 | 0 | val -= 1.0; // from [1 .. 2] to [0 .. 1] |
2100 | 0 | val *= 2.0 - scaler; // from [0 .. 1] to [0 .. 2.0 - scaler] |
2101 | 0 | val += scaler; // from [0 .. 2.0 - scaler] to [scaler .. 2.0] |
2102 | 0 | } else { |
2103 | | // val in [0 .. scaler] |
2104 | 0 | val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up)); |
2105 | 0 | val *= scaler; |
2106 | 0 | } |
2107 | 0 | return val; |
2108 | 0 | } |
2109 | | |
2110 | | // #define PRINT_OUT_NORMALIZATION |
2111 | | |
2112 | 0 | double ButteraugliFuzzyInverse(double seek) { |
2113 | 0 | double pos = 0; |
2114 | | // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter) |
2115 | 0 | for (double range = 1.0; range >= 1e-10; range *= 0.5) { |
2116 | 0 | double cur = ButteraugliFuzzyClass(pos); |
2117 | 0 | if (cur < seek) { |
2118 | 0 | pos -= range; |
2119 | 0 | } else { |
2120 | 0 | pos += range; |
2121 | 0 | } |
2122 | 0 | } |
2123 | | #ifdef PRINT_OUT_NORMALIZATION |
2124 | | if (seek == 1.0) { |
2125 | | fprintf(stderr, "Fuzzy inverse %g\n", pos); |
2126 | | } |
2127 | | #endif |
2128 | 0 | return pos; |
2129 | 0 | } |
2130 | | |
2131 | | #ifdef PRINT_OUT_NORMALIZATION |
2132 | | static double print_out_normalization = ButteraugliFuzzyInverse(1.0); |
2133 | | #endif |
2134 | | |
2135 | | namespace { |
2136 | | |
2137 | | void ScoreToRgb(double score, double good_threshold, double bad_threshold, |
2138 | 0 | float rgb[3]) { |
2139 | 0 | double heatmap[12][3] = { |
2140 | 0 | {0, 0, 0}, {0, 0, 1}, |
2141 | 0 | {0, 1, 1}, {0, 1, 0}, // Good level |
2142 | 0 | {1, 1, 0}, {1, 0, 0}, // Bad level |
2143 | 0 | {1, 0, 1}, {0.5, 0.5, 1.0}, |
2144 | 0 | {1.0, 0.5, 0.5}, // Pastel colors for the very bad quality range. |
2145 | 0 | {1.0, 1.0, 0.5}, {1, 1, 1}, |
2146 | 0 | {1, 1, 1}, // Last color repeated to have a solid range of white. |
2147 | 0 | }; |
2148 | 0 | if (score < good_threshold) { |
2149 | 0 | score = (score / good_threshold) * 0.3; |
2150 | 0 | } else if (score < bad_threshold) { |
2151 | 0 | score = 0.3 + |
2152 | 0 | (score - good_threshold) / (bad_threshold - good_threshold) * 0.15; |
2153 | 0 | } else { |
2154 | 0 | score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5; |
2155 | 0 | } |
2156 | 0 | static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]); |
2157 | 0 | score = std::min<double>(std::max<double>(score * (kTableSize - 1), 0.0), |
2158 | 0 | kTableSize - 2); |
2159 | 0 | int ix = static_cast<int>(score); |
2160 | 0 | ix = std::min(std::max(0, ix), kTableSize - 2); // Handle NaN |
2161 | 0 | double mix = score - ix; |
2162 | 0 | for (int i = 0; i < 3; ++i) { |
2163 | 0 | double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i]; |
2164 | 0 | rgb[i] = pow(v, 0.5); |
2165 | 0 | } |
2166 | 0 | } |
2167 | | |
2168 | | } // namespace |
2169 | | |
2170 | | StatusOr<Image3F> CreateHeatMapImage(const ImageF& distmap, |
2171 | | double good_threshold, |
2172 | 0 | double bad_threshold) { |
2173 | 0 | JxlMemoryManager* memory_manager = distmap.memory_manager(); |
2174 | 0 | JXL_ASSIGN_OR_RETURN( |
2175 | 0 | Image3F heatmap, |
2176 | 0 | Image3F::Create(memory_manager, distmap.xsize(), distmap.ysize())); |
2177 | 0 | for (size_t y = 0; y < distmap.ysize(); ++y) { |
2178 | 0 | const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y); |
2179 | 0 | float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y); |
2180 | 0 | float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y); |
2181 | 0 | float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y); |
2182 | 0 | for (size_t x = 0; x < distmap.xsize(); ++x) { |
2183 | 0 | const float d = row_distmap[x]; |
2184 | 0 | float rgb[3]; |
2185 | 0 | ScoreToRgb(d, good_threshold, bad_threshold, rgb); |
2186 | 0 | row_h0[x] = rgb[0]; |
2187 | 0 | row_h1[x] = rgb[1]; |
2188 | 0 | row_h2[x] = rgb[2]; |
2189 | 0 | } |
2190 | 0 | } |
2191 | 0 | return heatmap; |
2192 | 0 | } |
2193 | | |
2194 | | } // namespace jxl |
2195 | | #endif // HWY_ONCE |