Coverage Report

Created: 2025-06-16 07:00

/src/libjxl/lib/jxl/butteraugli/butteraugli.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
//
6
// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
7
//
8
// The physical architecture of butteraugli is based on the following naming
9
// convention:
10
//   * Opsin - dynamics of the photosensitive chemicals in the retina
11
//             with their immediate electrical processing
12
//   * Xyb - hybrid opponent/trichromatic color space
13
//     x is roughly red-subtract-green.
14
//     y is yellow.
15
//     b is blue.
16
//     Xyb values are computed from Opsin mixing, not directly from rgb.
17
//   * Mask - for visual masking
18
//   * Hf - color modeling for spatially high-frequency features
19
//   * Lf - color modeling for spatially low-frequency features
20
//   * Diffmap - to cluster and build an image of error between the images
21
//   * Blur - to hold the smoothing code
22
23
#include "lib/jxl/butteraugli/butteraugli.h"
24
25
#include <jxl/memory_manager.h>
26
27
#include <algorithm>
28
#include <atomic>
29
#include <cmath>
30
#include <cstdint>
31
#include <cstdio>
32
#include <cstdlib>
33
#include <cstring>
34
#include <memory>
35
36
#include "lib/jxl/base/common.h"
37
#include "lib/jxl/base/compiler_specific.h"
38
#include "lib/jxl/base/rect.h"
39
#include "lib/jxl/convolve.h"
40
#include "lib/jxl/image.h"
41
42
#undef HWY_TARGET_INCLUDE
43
#define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc"
44
#include <hwy/foreach_target.h>
45
46
#include "lib/jxl/base/fast_math-inl.h"
47
#include "lib/jxl/base/status.h"
48
#include "lib/jxl/image_ops.h"
49
50
#if BUTTERAUGLI_ENABLE_CHECKS
51
#include "lib/jxl/base/printf_macros.h"
52
#endif
53
54
#ifndef JXL_BUTTERAUGLI_ONCE
55
#define JXL_BUTTERAUGLI_ONCE
56
57
namespace jxl {
58
59
static const double wMfMalta = 37.0819870399;
60
static const double norm1Mf = 130262059.556;
61
static const double wMfMaltaX = 8246.75321353;
62
static const double norm1MfX = 1009002.70582;
63
static const double wHfMalta = 18.7237414387;
64
static const double norm1Hf = 4498534.45232;
65
static const double wHfMaltaX = 6923.99476109;
66
static const double norm1HfX = 8051.15833247;
67
static const double wUhfMalta = 1.10039032555;
68
static const double norm1Uhf = 71.7800275169;
69
static const double wUhfMaltaX = 173.5;
70
static const double norm1UhfX = 5.0;
71
static const double wmul[9] = {
72
    400.0,         1.50815703118,  0,
73
    2150.0,        10.6195433239,  16.2176043152,
74
    29.2353797994, 0.844626970982, 0.703646627719,
75
};
76
77
0
std::vector<float> ComputeKernel(float sigma) {
78
0
  const float m = 2.25;  // Accuracy increases when m is increased.
79
0
  const double scaler = -1.0 / (2.0 * sigma * sigma);
80
0
  const int diff = std::max<int>(1, m * std::fabs(sigma));
81
0
  std::vector<float> kernel(2 * diff + 1);
82
0
  for (int i = -diff; i <= diff; ++i) {
83
0
    kernel[i + diff] = std::exp(scaler * i * i);
84
0
  }
85
0
  return kernel;
86
0
}
87
88
void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel,
89
0
                          const size_t x, float* BUTTERAUGLI_RESTRICT row_out) {
90
0
  const size_t offset = kernel.size() / 2;
91
0
  int minx = x < offset ? 0 : x - offset;
92
0
  int maxx = std::min<int>(in.xsize() - 1, x + offset);
93
0
  float weight = 0.0f;
94
0
  for (int j = minx; j <= maxx; ++j) {
95
0
    weight += kernel[j - x + offset];
96
0
  }
97
0
  float scale = 1.0f / weight;
98
0
  for (size_t y = 0; y < in.ysize(); ++y) {
99
0
    const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y);
100
0
    float sum = 0.0f;
101
0
    for (int j = minx; j <= maxx; ++j) {
102
0
      sum += row_in[j] * kernel[j - x + offset];
103
0
    }
104
0
    row_out[y] = sum * scale;
105
0
  }
106
0
}
107
108
// Computes a horizontal convolution and transposes the result.
109
Status ConvolutionWithTranspose(const ImageF& in,
110
                                const std::vector<float>& kernel,
111
0
                                ImageF* BUTTERAUGLI_RESTRICT out) {
112
0
  JXL_ENSURE(out->xsize() == in.ysize());
113
0
  JXL_ENSURE(out->ysize() == in.xsize());
114
0
  const size_t len = kernel.size();
115
0
  const size_t offset = len / 2;
116
0
  float weight_no_border = 0.0f;
117
0
  for (size_t j = 0; j < len; ++j) {
118
0
    weight_no_border += kernel[j];
119
0
  }
120
0
  const float scale_no_border = 1.0f / weight_no_border;
121
0
  const size_t border1 = std::min(in.xsize(), offset);
122
0
  const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0;
123
0
  std::vector<float> scaled_kernel(len / 2 + 1);
124
0
  for (size_t i = 0; i <= len / 2; ++i) {
125
0
    scaled_kernel[i] = kernel[i] * scale_no_border;
126
0
  }
127
128
  // middle
129
0
  switch (len) {
130
0
    case 7: {
131
0
      const float sk0 = scaled_kernel[0];
132
0
      const float sk1 = scaled_kernel[1];
133
0
      const float sk2 = scaled_kernel[2];
134
0
      const float sk3 = scaled_kernel[3];
135
0
      for (size_t y = 0; y < in.ysize(); ++y) {
136
0
        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
137
0
        for (size_t x = border1; x < border2; ++x, ++row_in) {
138
0
          const float sum0 = (row_in[0] + row_in[6]) * sk0;
139
0
          const float sum1 = (row_in[1] + row_in[5]) * sk1;
140
0
          const float sum2 = (row_in[2] + row_in[4]) * sk2;
141
0
          const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2;
142
0
          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
143
0
          row_out[y] = sum;
144
0
        }
145
0
      }
146
0
    } break;
147
0
    case 13: {
148
0
      for (size_t y = 0; y < in.ysize(); ++y) {
149
0
        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
150
0
        for (size_t x = border1; x < border2; ++x, ++row_in) {
151
0
          float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0];
152
0
          float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1];
153
0
          float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2];
154
0
          float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3];
155
0
          sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4];
156
0
          sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5];
157
0
          const float sum = (row_in[6]) * scaled_kernel[6];
158
0
          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
159
0
          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
160
0
        }
161
0
      }
162
0
      break;
163
0
    }
164
0
    case 15: {
165
0
      for (size_t y = 0; y < in.ysize(); ++y) {
166
0
        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
167
0
        for (size_t x = border1; x < border2; ++x, ++row_in) {
168
0
          float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0];
169
0
          float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1];
170
0
          float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2];
171
0
          float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3];
172
0
          sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4];
173
0
          sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5];
174
0
          sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6];
175
0
          const float sum = (row_in[7]) * scaled_kernel[7];
176
0
          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
177
0
          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
178
0
        }
179
0
      }
180
0
      break;
181
0
    }
182
0
    case 33: {
183
0
      for (size_t y = 0; y < in.ysize(); ++y) {
184
0
        const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
185
0
        for (size_t x = border1; x < border2; ++x, ++row_in) {
186
0
          float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0];
187
0
          float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1];
188
0
          float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2];
189
0
          float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3];
190
0
          sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4];
191
0
          sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5];
192
0
          sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6];
193
0
          sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7];
194
0
          sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8];
195
0
          sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9];
196
0
          sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10];
197
0
          sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11];
198
0
          sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12];
199
0
          sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13];
200
0
          sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14];
201
0
          sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15];
202
0
          const float sum = (row_in[16]) * scaled_kernel[16];
203
0
          float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
204
0
          row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
205
0
        }
206
0
      }
207
0
      break;
208
0
    }
209
0
    default:
210
0
      return JXL_UNREACHABLE("kernel size %d not implemented",
211
0
                             static_cast<int>(len));
212
0
  }
213
  // left border
214
0
  for (size_t x = 0; x < border1; ++x) {
215
0
    ConvolveBorderColumn(in, kernel, x, out->Row(x));
216
0
  }
217
218
  // right border
219
0
  for (size_t x = border2; x < in.xsize(); ++x) {
220
0
    ConvolveBorderColumn(in, kernel, x, out->Row(x));
221
0
  }
222
0
  return true;
223
0
}
224
225
// A blur somewhat similar to a 2D Gaussian blur.
226
// See: https://en.wikipedia.org/wiki/Gaussian_blur
227
//
228
// This is a bottleneck because the sigma can be quite large (>7). We can use
229
// gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated
230
// Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are
231
// zero-valued. This leads to noticeable differences at the edges of diffmaps.
232
// We retain a special case for 5x5 kernels (even faster than gauss_blur),
233
// optionally use gauss_blur followed by fixup of the borders for large images,
234
// or fall back to the previous truncated FIR followed by a transpose.
235
Status Blur(const ImageF& in, float sigma, const ButteraugliParams& params,
236
0
            BlurTemp* temp, ImageF* out) {
237
0
  std::vector<float> kernel = ComputeKernel(sigma);
238
  // Separable5 does an in-place convolution, so this fast path is not safe if
239
  // in aliases out.
240
0
  if (kernel.size() == 5 && &in != out) {
241
0
    float sum_weights = 0.0f;
242
0
    for (const float w : kernel) {
243
0
      sum_weights += w;
244
0
    }
245
0
    const float scale = 1.0f / sum_weights;
246
0
    const float w0 = kernel[2] * scale;
247
0
    const float w1 = kernel[1] * scale;
248
0
    const float w2 = kernel[0] * scale;
249
0
    const WeightsSeparable5 weights = {
250
0
        {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
251
0
        {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
252
0
    };
253
0
    JXL_RETURN_IF_ERROR(
254
0
        Separable5(in, Rect(in), weights, /*pool=*/nullptr, out));
255
0
    return true;
256
0
  }
257
258
0
  ImageF* temp_t;
259
0
  JXL_RETURN_IF_ERROR(temp->GetTransposed(in, &temp_t));
260
0
  JXL_RETURN_IF_ERROR(ConvolutionWithTranspose(in, kernel, temp_t));
261
0
  JXL_RETURN_IF_ERROR(ConvolutionWithTranspose(*temp_t, kernel, out));
262
0
  return true;
263
0
}
264
265
// Allows PaddedMaltaUnit to call either function via overloading.
266
struct MaltaTagLF {};
267
struct MaltaTag {};
268
269
}  // namespace jxl
270
271
#endif  // JXL_BUTTERAUGLI_ONCE
272
273
#include <hwy/highway.h>
274
HWY_BEFORE_NAMESPACE();
275
namespace jxl {
276
namespace HWY_NAMESPACE {
277
278
// These templates are not found via ADL.
279
using hwy::HWY_NAMESPACE::Abs;
280
using hwy::HWY_NAMESPACE::Div;
281
using hwy::HWY_NAMESPACE::Gt;
282
using hwy::HWY_NAMESPACE::IfThenElse;
283
using hwy::HWY_NAMESPACE::IfThenElseZero;
284
using hwy::HWY_NAMESPACE::Lt;
285
using hwy::HWY_NAMESPACE::Max;
286
using hwy::HWY_NAMESPACE::Mul;
287
using hwy::HWY_NAMESPACE::MulAdd;
288
using hwy::HWY_NAMESPACE::MulSub;
289
using hwy::HWY_NAMESPACE::Neg;
290
using hwy::HWY_NAMESPACE::Sub;
291
using hwy::HWY_NAMESPACE::Vec;
292
using hwy::HWY_NAMESPACE::ZeroIfNegative;
293
294
template <class D, class V>
295
0
HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) {
296
0
  static const double kMul = 0.724216145665;
297
0
  const V mul = Set(d, kMul);
298
0
  const V maxval = Set(d, kMaxVal);
299
  // If greater than maxval or less than -maxval, replace with if_*.
300
0
  const V if_pos = MulAdd(Sub(v, maxval), mul, maxval);
301
0
  const V if_neg = MulSub(Add(v, maxval), mul, maxval);
302
0
  const V pos_or_v = IfThenElse(Ge(v, maxval), if_pos, v);
303
0
  return IfThenElse(Lt(v, Neg(maxval)), if_neg, pos_or_v);
304
0
}
Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::MaximumClamp<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>, double)
Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::MaximumClamp<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>, double)
Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::MaximumClamp<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>, double)
305
306
// Make area around zero less important (remove it).
307
template <class D, class V>
308
0
HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) {
309
0
  const auto w = Set(d, kw);
310
0
  return IfThenElse(Gt(x, w), Sub(x, w),
311
0
                    IfThenElseZero(Lt(x, Neg(w)), Add(x, w)));
312
0
}
Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::RemoveRangeAroundZero<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, double, hwy::N_SSE4::Vec128<float, 4ul>)
Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::RemoveRangeAroundZero<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, double, hwy::N_AVX2::Vec256<float>)
Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::RemoveRangeAroundZero<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, double, hwy::N_SSE2::Vec128<float, 4ul>)
313
314
// Make area around zero more important (2x it until the limit).
315
template <class D, class V>
316
0
HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) {
317
0
  const auto w = Set(d, kw);
318
0
  return IfThenElse(Gt(x, w), Add(x, w),
319
0
                    IfThenElse(Lt(x, Neg(w)), Sub(x, w), Add(x, x)));
320
0
}
Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::AmplifyRangeAroundZero<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, double, hwy::N_SSE4::Vec128<float, 4ul>)
Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::AmplifyRangeAroundZero<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, double, hwy::N_AVX2::Vec256<float>)
Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::AmplifyRangeAroundZero<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, double, hwy::N_SSE2::Vec128<float, 4ul>)
321
322
// XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space.
323
// Vals space can be converted to L2-norm space (Euclidean and normalized)
324
// through visual masking.
325
template <class D, class V>
326
HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y,
327
                                 const V& b_arg, V* HWY_RESTRICT valx,
328
0
                                 V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) {
329
0
  static const double xmul_scalar = 33.832837186260;
330
0
  static const double ymul_scalar = 14.458268100570;
331
0
  static const double bmul_scalar = 49.87984651440;
332
0
  static const double y_to_b_mul_scalar = -0.362267051518;
333
0
  const V xmul = Set(d, xmul_scalar);
334
0
  const V ymul = Set(d, ymul_scalar);
335
0
  const V bmul = Set(d, bmul_scalar);
336
0
  const V y_to_b_mul = Set(d, y_to_b_mul_scalar);
337
0
  const V b = MulAdd(y_to_b_mul, y, b_arg);
338
0
  *valb = Mul(b, bmul);
339
0
  *valx = Mul(x, xmul);
340
0
  *valy = Mul(y, ymul);
341
0
}
Unexecuted instantiation: void jxl::N_SSE4::XybLowFreqToVals<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*)
Unexecuted instantiation: void jxl::N_AVX2::XybLowFreqToVals<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*)
Unexecuted instantiation: void jxl::N_SSE2::XybLowFreqToVals<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*)
342
343
0
void XybLowFreqToVals(Image3F* xyb_lf) {
344
  // Modify range around zero code only concerns the high frequency
345
  // planes and only the X and Y channels.
346
  // Convert low freq xyb to vals space so that we can do a simple squared sum
347
  // diff on the low frequencies later.
348
0
  const HWY_FULL(float) d;
349
0
  for (size_t y = 0; y < xyb_lf->ysize(); ++y) {
350
0
    float* BUTTERAUGLI_RESTRICT row_x = xyb_lf->PlaneRow(0, y);
351
0
    float* BUTTERAUGLI_RESTRICT row_y = xyb_lf->PlaneRow(1, y);
352
0
    float* BUTTERAUGLI_RESTRICT row_b = xyb_lf->PlaneRow(2, y);
353
0
    for (size_t x = 0; x < xyb_lf->xsize(); x += Lanes(d)) {
354
0
      auto valx = Undefined(d);
355
0
      auto valy = Undefined(d);
356
0
      auto valb = Undefined(d);
357
0
      XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x),
358
0
                       Load(d, row_b + x), &valx, &valy, &valb);
359
0
      Store(valx, d, row_x + x);
360
0
      Store(valy, d, row_y + x);
361
0
      Store(valb, d, row_b + x);
362
0
    }
363
0
  }
364
0
}
Unexecuted instantiation: jxl::N_SSE4::XybLowFreqToVals(jxl::Image3<float>*)
Unexecuted instantiation: jxl::N_AVX2::XybLowFreqToVals(jxl::Image3<float>*)
Unexecuted instantiation: jxl::N_SSE2::XybLowFreqToVals(jxl::Image3<float>*)
365
366
0
Status SuppressXByY(const ImageF& in_y, ImageF* HWY_RESTRICT inout_x) {
367
0
  JXL_ENSURE(SameSize(*inout_x, in_y));
368
0
  const size_t xsize = in_y.xsize();
369
0
  const size_t ysize = in_y.ysize();
370
0
  const HWY_FULL(float) d;
371
0
  static const double suppress = 46.0;
372
0
  static const double s = 0.653020556257;
373
0
  const auto sv = Set(d, s);
374
0
  const auto one_minus_s = Set(d, 1.0 - s);
375
0
  const auto ywv = Set(d, suppress);
376
377
0
  for (size_t y = 0; y < ysize; ++y) {
378
0
    const float* HWY_RESTRICT row_y = in_y.ConstRow(y);
379
0
    float* HWY_RESTRICT row_x = inout_x->Row(y);
380
0
    for (size_t x = 0; x < xsize; x += Lanes(d)) {
381
0
      const auto vx = Load(d, row_x + x);
382
0
      const auto vy = Load(d, row_y + x);
383
0
      const auto scaler =
384
0
          MulAdd(Div(ywv, MulAdd(vy, vy, ywv)), one_minus_s, sv);
385
0
      Store(Mul(scaler, vx), d, row_x + x);
386
0
    }
387
0
  }
388
0
  return true;
389
0
}
Unexecuted instantiation: jxl::N_SSE4::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::SuppressXByY(jxl::Plane<float> const&, jxl::Plane<float>*)
390
391
0
void Subtract(const ImageF& a, const ImageF& b, ImageF* c) {
392
0
  const HWY_FULL(float) d;
393
0
  for (size_t y = 0; y < a.ysize(); ++y) {
394
0
    const float* row_a = a.ConstRow(y);
395
0
    const float* row_b = b.ConstRow(y);
396
0
    float* row_c = c->Row(y);
397
0
    for (size_t x = 0; x < a.xsize(); x += Lanes(d)) {
398
0
      Store(Sub(Load(d, row_a + x), Load(d, row_b + x)), d, row_c + x);
399
0
    }
400
0
  }
401
0
}
Unexecuted instantiation: jxl::N_SSE4::Subtract(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::Subtract(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::Subtract(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*)
402
403
Status SeparateLFAndMF(const ButteraugliParams& params, const Image3F& xyb,
404
0
                       Image3F* lf, Image3F* mf, BlurTemp* blur_temp) {
405
0
  static const double kSigmaLf = 7.15593339443;
406
0
  for (int i = 0; i < 3; ++i) {
407
    // Extract lf ...
408
0
    JXL_RETURN_IF_ERROR(
409
0
        Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &lf->Plane(i)));
410
    // ... and keep everything else in mf.
411
0
    Subtract(xyb.Plane(i), lf->Plane(i), &mf->Plane(i));
412
0
  }
413
0
  XybLowFreqToVals(lf);
414
0
  return true;
415
0
}
Unexecuted instantiation: jxl::N_SSE4::SeparateLFAndMF(jxl::ButteraugliParams const&, jxl::Image3<float> const&, jxl::Image3<float>*, jxl::Image3<float>*, jxl::BlurTemp*)
Unexecuted instantiation: jxl::N_AVX2::SeparateLFAndMF(jxl::ButteraugliParams const&, jxl::Image3<float> const&, jxl::Image3<float>*, jxl::Image3<float>*, jxl::BlurTemp*)
Unexecuted instantiation: jxl::N_SSE2::SeparateLFAndMF(jxl::ButteraugliParams const&, jxl::Image3<float> const&, jxl::Image3<float>*, jxl::Image3<float>*, jxl::BlurTemp*)
416
417
Status SeparateMFAndHF(const ButteraugliParams& params, Image3F* mf, ImageF* hf,
418
0
                       BlurTemp* blur_temp) {
419
0
  const HWY_FULL(float) d;
420
0
  static const double kSigmaHf = 3.22489901262;
421
0
  const size_t xsize = mf->xsize();
422
0
  const size_t ysize = mf->ysize();
423
0
  JxlMemoryManager* memory_manager = mf[0].memory_manager();
424
0
  JXL_ASSIGN_OR_RETURN(hf[0], ImageF::Create(memory_manager, xsize, ysize));
425
0
  JXL_ASSIGN_OR_RETURN(hf[1], ImageF::Create(memory_manager, xsize, ysize));
426
0
  for (int i = 0; i < 3; ++i) {
427
0
    if (i == 2) {
428
0
      JXL_RETURN_IF_ERROR(
429
0
          Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i)));
430
0
      break;
431
0
    }
432
0
    for (size_t y = 0; y < ysize; ++y) {
433
0
      float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(i, y);
434
0
      float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y);
435
0
      for (size_t x = 0; x < xsize; x += Lanes(d)) {
436
0
        Store(Load(d, row_mf + x), d, row_hf + x);
437
0
      }
438
0
    }
439
0
    JXL_RETURN_IF_ERROR(
440
0
        Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i)));
441
0
    static const double kRemoveMfRange = 0.29;
442
0
    static const double kAddMfRange = 0.1;
443
0
    if (i == 0) {
444
0
      for (size_t y = 0; y < ysize; ++y) {
445
0
        float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(0, y);
446
0
        float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y);
447
0
        for (size_t x = 0; x < xsize; x += Lanes(d)) {
448
0
          auto mfv = Load(d, row_mf + x);
449
0
          auto hfv = Sub(Load(d, row_hf + x), mfv);
450
0
          mfv = RemoveRangeAroundZero(d, kRemoveMfRange, mfv);
451
0
          Store(mfv, d, row_mf + x);
452
0
          Store(hfv, d, row_hf + x);
453
0
        }
454
0
      }
455
0
    } else {
456
0
      for (size_t y = 0; y < ysize; ++y) {
457
0
        float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(1, y);
458
0
        float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y);
459
0
        for (size_t x = 0; x < xsize; x += Lanes(d)) {
460
0
          auto mfv = Load(d, row_mf + x);
461
0
          auto hfv = Sub(Load(d, row_hf + x), mfv);
462
463
0
          mfv = AmplifyRangeAroundZero(d, kAddMfRange, mfv);
464
0
          Store(mfv, d, row_mf + x);
465
0
          Store(hfv, d, row_hf + x);
466
0
        }
467
0
      }
468
0
    }
469
0
  }
470
  // Suppress red-green by intensity change in the high freq channels.
471
0
  JXL_RETURN_IF_ERROR(SuppressXByY(hf[1], &hf[0]));
472
0
  return true;
473
0
}
Unexecuted instantiation: jxl::N_SSE4::SeparateMFAndHF(jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::Plane<float>*, jxl::BlurTemp*)
Unexecuted instantiation: jxl::N_AVX2::SeparateMFAndHF(jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::Plane<float>*, jxl::BlurTemp*)
Unexecuted instantiation: jxl::N_SSE2::SeparateMFAndHF(jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::Plane<float>*, jxl::BlurTemp*)
474
475
Status SeparateHFAndUHF(const ButteraugliParams& params, ImageF* hf,
476
0
                        ImageF* uhf, BlurTemp* blur_temp) {
477
0
  const HWY_FULL(float) d;
478
0
  const size_t xsize = hf[0].xsize();
479
0
  const size_t ysize = hf[0].ysize();
480
0
  JxlMemoryManager* memory_manager = hf[0].memory_manager();
481
0
  static const double kSigmaUhf = 1.56416327805;
482
0
  JXL_ASSIGN_OR_RETURN(uhf[0], ImageF::Create(memory_manager, xsize, ysize));
483
0
  JXL_ASSIGN_OR_RETURN(uhf[1], ImageF::Create(memory_manager, xsize, ysize));
484
0
  for (int i = 0; i < 2; ++i) {
485
    // Divide hf into hf and uhf.
486
0
    for (size_t y = 0; y < ysize; ++y) {
487
0
      float* BUTTERAUGLI_RESTRICT row_uhf = uhf[i].Row(y);
488
0
      float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y);
489
0
      for (size_t x = 0; x < xsize; ++x) {
490
0
        row_uhf[x] = row_hf[x];
491
0
      }
492
0
    }
493
0
    JXL_RETURN_IF_ERROR(Blur(hf[i], kSigmaUhf, params, blur_temp, &hf[i]));
494
0
    static const double kRemoveHfRange = 1.5;
495
0
    static const double kAddHfRange = 0.132;
496
0
    static const double kRemoveUhfRange = 0.04;
497
0
    static const double kMaxclampHf = 28.4691806922;
498
0
    static const double kMaxclampUhf = 5.19175294647;
499
0
    static double kMulYHf = 2.155;
500
0
    static double kMulYUhf = 2.69313763794;
501
0
    if (i == 0) {
502
0
      for (size_t y = 0; y < ysize; ++y) {
503
0
        float* BUTTERAUGLI_RESTRICT row_uhf = uhf[0].Row(y);
504
0
        float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y);
505
0
        for (size_t x = 0; x < xsize; x += Lanes(d)) {
506
0
          auto hfv = Load(d, row_hf + x);
507
0
          auto uhfv = Sub(Load(d, row_uhf + x), hfv);
508
0
          hfv = RemoveRangeAroundZero(d, kRemoveHfRange, hfv);
509
0
          uhfv = RemoveRangeAroundZero(d, kRemoveUhfRange, uhfv);
510
0
          Store(hfv, d, row_hf + x);
511
0
          Store(uhfv, d, row_uhf + x);
512
0
        }
513
0
      }
514
0
    } else {
515
0
      for (size_t y = 0; y < ysize; ++y) {
516
0
        float* BUTTERAUGLI_RESTRICT row_uhf = uhf[1].Row(y);
517
0
        float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y);
518
0
        for (size_t x = 0; x < xsize; x += Lanes(d)) {
519
0
          auto hfv = Load(d, row_hf + x);
520
0
          hfv = MaximumClamp(d, hfv, kMaxclampHf);
521
522
0
          auto uhfv = Sub(Load(d, row_uhf + x), hfv);
523
0
          uhfv = MaximumClamp(d, uhfv, kMaxclampUhf);
524
0
          uhfv = Mul(uhfv, Set(d, kMulYUhf));
525
0
          Store(uhfv, d, row_uhf + x);
526
527
0
          hfv = Mul(hfv, Set(d, kMulYHf));
528
0
          hfv = AmplifyRangeAroundZero(d, kAddHfRange, hfv);
529
0
          Store(hfv, d, row_hf + x);
530
0
        }
531
0
      }
532
0
    }
533
0
  }
534
0
  return true;
535
0
}
Unexecuted instantiation: jxl::N_SSE4::SeparateHFAndUHF(jxl::ButteraugliParams const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::BlurTemp*)
Unexecuted instantiation: jxl::N_AVX2::SeparateHFAndUHF(jxl::ButteraugliParams const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::BlurTemp*)
Unexecuted instantiation: jxl::N_SSE2::SeparateHFAndUHF(jxl::ButteraugliParams const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::BlurTemp*)
536
537
0
void DeallocateHFAndUHF(ImageF* hf, ImageF* uhf) {
538
0
  for (int i = 0; i < 2; ++i) {
539
0
    hf[i] = ImageF();
540
0
    uhf[i] = ImageF();
541
0
  }
542
0
}
Unexecuted instantiation: jxl::N_SSE4::DeallocateHFAndUHF(jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::DeallocateHFAndUHF(jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::DeallocateHFAndUHF(jxl::Plane<float>*, jxl::Plane<float>*)
543
544
Status SeparateFrequencies(size_t xsize, size_t ysize,
545
                           const ButteraugliParams& params, BlurTemp* blur_temp,
546
0
                           const Image3F& xyb, PsychoImage& ps) {
547
0
  JxlMemoryManager* memory_manager = xyb.memory_manager();
548
0
  JXL_ASSIGN_OR_RETURN(
549
0
      ps.lf, Image3F::Create(memory_manager, xyb.xsize(), xyb.ysize()));
550
0
  JXL_ASSIGN_OR_RETURN(
551
0
      ps.mf, Image3F::Create(memory_manager, xyb.xsize(), xyb.ysize()));
552
0
  JXL_RETURN_IF_ERROR(SeparateLFAndMF(params, xyb, &ps.lf, &ps.mf, blur_temp));
553
0
  JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &ps.mf, &ps.hf[0], blur_temp));
554
0
  JXL_RETURN_IF_ERROR(
555
0
      SeparateHFAndUHF(params, &ps.hf[0], &ps.uhf[0], blur_temp));
556
0
  return true;
557
0
}
Unexecuted instantiation: jxl::N_SSE4::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&)
Unexecuted instantiation: jxl::N_AVX2::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&)
Unexecuted instantiation: jxl::N_SSE2::SeparateFrequencies(unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Image3<float> const&, jxl::PsychoImage&)
558
559
namespace {
560
template <typename V>
561
0
BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d) {
562
0
  return Add(Add(a, b), Add(c, d));
563
0
}
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 1ul> >(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>)
564
template <typename V>
565
0
BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e) {
566
0
  return Sum(a, b, c, Add(d, e));
567
0
}
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 1ul> >(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>)
568
template <typename V>
569
0
BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g) {
570
0
  return Sum(a, b, c, Sum(d, e, f, g));
571
0
}
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 1ul> >(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>)
572
template <typename V>
573
0
BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g, V h, V i) {
574
0
  return Add(Add(Sum(a, b, c, d), Sum(e, f, g, h)), i);
575
0
}
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 1ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 1ul> >(hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>, hwy::N_SSE4::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::(anonymous namespace)::Sum<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec128<float, 1ul> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec128<float, 1ul> >(hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>, hwy::N_AVX2::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_AVX2::Vec256<float> jxl::N_AVX2::(anonymous namespace)::Sum<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 1ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 1ul> >(hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>, hwy::N_SSE2::Vec128<float, 1ul>)
Unexecuted instantiation: butteraugli.cc:hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::(anonymous namespace)::Sum<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>)
576
}  // namespace
577
578
template <class D>
579
Vec<D> MaltaUnit(MaltaTagLF /*tag*/, const D df,
580
0
                 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) {
581
0
  const intptr_t xs3 = 3 * xs;
582
583
0
  const auto center = LoadU(df, d);
584
585
  // x grows, y constant
586
0
  const auto sum_yconst = Sum(LoadU(df, d - 4), LoadU(df, d - 2), center,
587
0
                              LoadU(df, d + 2), LoadU(df, d + 4));
588
  // Will return this, sum of all line kernels
589
0
  auto retval = Mul(sum_yconst, sum_yconst);
590
0
  {
591
    // y grows, x constant
592
0
    auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs - xs), center,
593
0
                   LoadU(df, d + xs + xs), LoadU(df, d + xs3 + xs));
594
0
    retval = MulAdd(sum, sum, retval);
595
0
  }
596
0
  {
597
    // both grow
598
0
    auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), center,
599
0
                   LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3));
600
0
    retval = MulAdd(sum, sum, retval);
601
0
  }
602
0
  {
603
    // y grows, x shrinks
604
0
    auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), center,
605
0
                   LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3));
606
0
    retval = MulAdd(sum, sum, retval);
607
0
  }
608
0
  {
609
    // y grows -4 to 4, x shrinks 1 -> -1
610
0
    auto sum =
611
0
        Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs - xs + 1), center,
612
0
            LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 1));
613
0
    retval = MulAdd(sum, sum, retval);
614
0
  }
615
0
  {
616
    //  y grows -4 to 4, x grows -1 -> 1
617
0
    auto sum =
618
0
        Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs - xs - 1), center,
619
0
            LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 1));
620
0
    retval = MulAdd(sum, sum, retval);
621
0
  }
622
0
  {
623
    // x grows -4 to 4, y grows -1 to 1
624
0
    auto sum = Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 2 - xs), center,
625
0
                   LoadU(df, d + 2 + xs), LoadU(df, d + 4 + xs));
626
0
    retval = MulAdd(sum, sum, retval);
627
0
  }
628
0
  {
629
    // x grows -4 to 4, y shrinks 1 to -1
630
0
    auto sum = Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 2 + xs), center,
631
0
                   LoadU(df, d + 2 - xs), LoadU(df, d + 4 - xs));
632
0
    retval = MulAdd(sum, sum, retval);
633
0
  }
634
0
  {
635
    /* 0_________
636
       1__*______
637
       2___*_____
638
       3_________
639
       4____0____
640
       5_________
641
       6_____*___
642
       7______*__
643
       8_________ */
644
0
    auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), center,
645
0
                   LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2));
646
0
    retval = MulAdd(sum, sum, retval);
647
0
  }
648
0
  {
649
    /* 0_________
650
       1______*__
651
       2_____*___
652
       3_________
653
       4____0____
654
       5_________
655
       6___*_____
656
       7__*______
657
       8_________ */
658
0
    auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), center,
659
0
                   LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2));
660
0
    retval = MulAdd(sum, sum, retval);
661
0
  }
662
0
  {
663
    /* 0_________
664
       1_________
665
       2_*_______
666
       3__*______
667
       4____0____
668
       5______*__
669
       6_______*_
670
       7_________
671
       8_________ */
672
0
    auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), center,
673
0
                   LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3));
674
0
    retval = MulAdd(sum, sum, retval);
675
0
  }
676
0
  {
677
    /* 0_________
678
       1_________
679
       2_______*_
680
       3______*__
681
       4____0____
682
       5__*______
683
       6_*_______
684
       7_________
685
       8_________ */
686
0
    auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), center,
687
0
                   LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3));
688
0
    retval = MulAdd(sum, sum, retval);
689
0
  }
690
0
  {
691
    /* 0_________
692
       1_________
693
       2________*
694
       3______*__
695
       4____0____
696
       5__*______
697
       6*________
698
       7_________
699
       8_________ */
700
701
0
    auto sum = Sum(LoadU(df, d + xs + xs - 4), LoadU(df, d + xs - 2), center,
702
0
                   LoadU(df, d - xs + 2), LoadU(df, d - xs - xs + 4));
703
0
    retval = MulAdd(sum, sum, retval);
704
0
  }
705
0
  {
706
    /* 0_________
707
       1_________
708
       2*________
709
       3__*______
710
       4____0____
711
       5______*__
712
       6________*
713
       7_________
714
       8_________ */
715
0
    auto sum = Sum(LoadU(df, d - xs - xs - 4), LoadU(df, d - xs - 2), center,
716
0
                   LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 4));
717
0
    retval = MulAdd(sum, sum, retval);
718
0
  }
719
0
  {
720
    /* 0__*______
721
       1_________
722
       2___*_____
723
       3_________
724
       4____0____
725
       5_________
726
       6_____*___
727
       7_________
728
       8______*__ */
729
0
    auto sum =
730
0
        Sum(LoadU(df, d - xs3 - xs - 2), LoadU(df, d - xs - xs - 1), center,
731
0
            LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 2));
732
0
    retval = MulAdd(sum, sum, retval);
733
0
  }
734
0
  {
735
    /* 0______*__
736
       1_________
737
       2_____*___
738
       3_________
739
       4____0____
740
       5_________
741
       6___*_____
742
       7_________
743
       8__*______ */
744
0
    auto sum =
745
0
        Sum(LoadU(df, d - xs3 - xs + 2), LoadU(df, d - xs - xs + 1), center,
746
0
            LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 2));
747
0
    retval = MulAdd(sum, sum, retval);
748
0
  }
749
0
  return retval;
750
0
}
Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE4::Simd<float, 1ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 4ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE4::Simd<float, 4ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_AVX2::Simd<float, 1ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 8ul, 0> >(jxl::MaltaTagLF, hwy::N_AVX2::Simd<float, 8ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::MaltaUnit<hwy::N_SSE2::Simd<float, 1ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE2::Simd<float, 1ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::MaltaUnit<hwy::N_SSE2::Simd<float, 4ul, 0> >(jxl::MaltaTagLF, hwy::N_SSE2::Simd<float, 4ul, 0>, float const*, long)
751
752
template <class D>
753
Vec<D> MaltaUnit(MaltaTag /*tag*/, const D df,
754
0
                 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) {
755
0
  const intptr_t xs3 = 3 * xs;
756
757
0
  const auto center = LoadU(df, d);
758
759
  // x grows, y constant
760
0
  const auto sum_yconst =
761
0
      Sum(LoadU(df, d - 4), LoadU(df, d - 3), LoadU(df, d - 2),
762
0
          LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2),
763
0
          LoadU(df, d + 3), LoadU(df, d + 4));
764
  // Will return this, sum of all line kernels
765
0
  auto retval = Mul(sum_yconst, sum_yconst);
766
767
0
  {
768
    // y grows, x constant
769
0
    auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs3),
770
0
                   LoadU(df, d - xs - xs), LoadU(df, d - xs), center,
771
0
                   LoadU(df, d + xs), LoadU(df, d + xs + xs),
772
0
                   LoadU(df, d + xs3), LoadU(df, d + xs3 + xs));
773
0
    retval = MulAdd(sum, sum, retval);
774
0
  }
775
0
  {
776
    // both grow
777
0
    auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2),
778
0
                   LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1),
779
0
                   LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3));
780
0
    retval = MulAdd(sum, sum, retval);
781
0
  }
782
0
  {
783
    // y grows, x shrinks
784
0
    auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2),
785
0
                   LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1),
786
0
                   LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3));
787
0
    retval = MulAdd(sum, sum, retval);
788
0
  }
789
0
  {
790
    // y grows -4 to 4, x shrinks 1 -> -1
791
0
    auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1),
792
0
                   LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center,
793
0
                   LoadU(df, d + xs), LoadU(df, d + xs + xs - 1),
794
0
                   LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1));
795
0
    retval = MulAdd(sum, sum, retval);
796
0
  }
797
0
  {
798
    //  y grows -4 to 4, x grows -1 -> 1
799
0
    auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1),
800
0
                   LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center,
801
0
                   LoadU(df, d + xs), LoadU(df, d + xs + xs + 1),
802
0
                   LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1));
803
0
    retval = MulAdd(sum, sum, retval);
804
0
  }
805
0
  {
806
    // x grows -4 to 4, y grows -1 to 1
807
0
    auto sum =
808
0
        Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 3 - xs), LoadU(df, d - 2 - xs),
809
0
            LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 + xs),
810
0
            LoadU(df, d + 3 + xs), LoadU(df, d + 4 + xs));
811
0
    retval = MulAdd(sum, sum, retval);
812
0
  }
813
0
  {
814
    // x grows -4 to 4, y shrinks 1 to -1
815
0
    auto sum =
816
0
        Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 3 + xs), LoadU(df, d - 2 + xs),
817
0
            LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 - xs),
818
0
            LoadU(df, d + 3 - xs), LoadU(df, d + 4 - xs));
819
0
    retval = MulAdd(sum, sum, retval);
820
0
  }
821
0
  {
822
    /* 0_________
823
       1__*______
824
       2___*_____
825
       3___*_____
826
       4____0____
827
       5_____*___
828
       6_____*___
829
       7______*__
830
       8_________ */
831
0
    auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1),
832
0
                   LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1),
833
0
                   LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2));
834
0
    retval = MulAdd(sum, sum, retval);
835
0
  }
836
0
  {
837
    /* 0_________
838
       1______*__
839
       2_____*___
840
       3_____*___
841
       4____0____
842
       5___*_____
843
       6___*_____
844
       7__*______
845
       8_________ */
846
0
    auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1),
847
0
                   LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1),
848
0
                   LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2));
849
0
    retval = MulAdd(sum, sum, retval);
850
0
  }
851
0
  {
852
    /* 0_________
853
       1_________
854
       2_*_______
855
       3__**_____
856
       4____0____
857
       5_____**__
858
       6_______*_
859
       7_________
860
       8_________ */
861
0
    auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2),
862
0
                   LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1),
863
0
                   LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3));
864
0
    retval = MulAdd(sum, sum, retval);
865
0
  }
866
0
  {
867
    /* 0_________
868
       1_________
869
       2_______*_
870
       3_____**__
871
       4____0____
872
       5__**_____
873
       6_*_______
874
       7_________
875
       8_________ */
876
0
    auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2),
877
0
                   LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1),
878
0
                   LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3));
879
0
    retval = MulAdd(sum, sum, retval);
880
0
  }
881
0
  {
882
    /* 0_________
883
       1_________
884
       2_________
885
       3______***
886
       4___*0*___
887
       5***______
888
       6_________
889
       7_________
890
       8_________ */
891
892
0
    auto sum =
893
0
        Sum(LoadU(df, d + xs - 4), LoadU(df, d + xs - 3), LoadU(df, d + xs - 2),
894
0
            LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d - xs + 2),
895
0
            LoadU(df, d - xs + 3), LoadU(df, d - xs + 4));
896
0
    retval = MulAdd(sum, sum, retval);
897
0
  }
898
0
  {
899
    /* 0_________
900
       1_________
901
       2_________
902
       3***______
903
       4___*0*___
904
       5______***
905
       6_________
906
       7_________
907
       8_________ */
908
0
    auto sum =
909
0
        Sum(LoadU(df, d - xs - 4), LoadU(df, d - xs - 3), LoadU(df, d - xs - 2),
910
0
            LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + xs + 2),
911
0
            LoadU(df, d + xs + 3), LoadU(df, d + xs + 4));
912
0
    retval = MulAdd(sum, sum, retval);
913
0
  }
914
0
  {
915
    /* 0___*_____
916
       1___*_____
917
       2___*_____
918
       3____*____
919
       4____0____
920
       5____*____
921
       6_____*___
922
       7_____*___
923
       8_____*___ */
924
0
    auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1),
925
0
                   LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center,
926
0
                   LoadU(df, d + xs), LoadU(df, d + xs + xs + 1),
927
0
                   LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1));
928
0
    retval = MulAdd(sum, sum, retval);
929
0
  }
930
0
  {
931
    /* 0_____*___
932
       1_____*___
933
       2____ *___
934
       3____*____
935
       4____0____
936
       5____*____
937
       6___*_____
938
       7___*_____
939
       8___*_____ */
940
0
    auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1),
941
0
                   LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center,
942
0
                   LoadU(df, d + xs), LoadU(df, d + xs + xs - 1),
943
0
                   LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1));
944
0
    retval = MulAdd(sum, sum, retval);
945
0
  }
946
0
  return retval;
947
0
}
Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 1ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_SSE4::Simd<float, 1ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_SSE4::Simd<float, 4ul, 0>)())) jxl::N_SSE4::MaltaUnit<hwy::N_SSE4::Simd<float, 4ul, 0> >(jxl::MaltaTag, hwy::N_SSE4::Simd<float, 4ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 1ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_AVX2::Simd<float, 1ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_AVX2::Simd<float, 8ul, 0>)())) jxl::N_AVX2::MaltaUnit<hwy::N_AVX2::Simd<float, 8ul, 0> >(jxl::MaltaTag, hwy::N_AVX2::Simd<float, 8ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_SSE2::Simd<float, 1ul, 0>)())) jxl::N_SSE2::MaltaUnit<hwy::N_SSE2::Simd<float, 1ul, 0> >(jxl::MaltaTag, hwy::N_SSE2::Simd<float, 1ul, 0>, float const*, long)
Unexecuted instantiation: decltype (Zero((hwy::N_SSE2::Simd<float, 4ul, 0>)())) jxl::N_SSE2::MaltaUnit<hwy::N_SSE2::Simd<float, 4ul, 0> >(jxl::MaltaTag, hwy::N_SSE2::Simd<float, 4ul, 0>, float const*, long)
948
949
// Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known
950
// to be far enough from the image borders. "diffs" is a packed image.
951
template <class Tag>
952
static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs,
953
                                                const size_t x0,
954
0
                                                const size_t y0) {
955
0
  const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0;
956
0
  const HWY_CAPPED(float, 1) df;
957
0
  if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) &&
958
0
       y0 < (diffs.ysize() - 4))) {
959
0
    return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow()));
960
0
  }
961
962
0
  float borderimage[12 * 9];  // round up to 4
963
0
  for (int dy = 0; dy < 9; ++dy) {
964
0
    int y = y0 + dy - 4;
965
0
    if (y < 0 || static_cast<size_t>(y) >= diffs.ysize()) {
966
0
      for (int dx = 0; dx < 12; ++dx) {
967
0
        borderimage[dy * 12 + dx] = 0.0f;
968
0
      }
969
0
      continue;
970
0
    }
971
972
0
    const float* row_diffs = diffs.ConstRow(y);
973
0
    for (int dx = 0; dx < 9; ++dx) {
974
0
      int x = x0 + dx - 4;
975
0
      if (x < 0 || static_cast<size_t>(x) >= diffs.xsize()) {
976
0
        borderimage[dy * 12 + dx] = 0.0f;
977
0
      } else {
978
0
        borderimage[dy * 12 + dx] = row_diffs[x];
979
0
      }
980
0
    }
981
0
    std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f);
982
0
  }
983
0
  return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12));
984
0
}
Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE4::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long)
Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE4::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long)
Unexecuted instantiation: butteraugli.cc:float jxl::N_AVX2::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long)
Unexecuted instantiation: butteraugli.cc:float jxl::N_AVX2::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long)
Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE2::PaddedMaltaUnit<jxl::MaltaTag>(jxl::Plane<float> const&, unsigned long, unsigned long)
Unexecuted instantiation: butteraugli.cc:float jxl::N_SSE2::PaddedMaltaUnit<jxl::MaltaTagLF>(jxl::Plane<float> const&, unsigned long, unsigned long)
985
986
template <class Tag>
987
static Status MaltaDiffMapT(const Tag tag, const ImageF& lum0,
988
                            const ImageF& lum1, const double w_0gt1,
989
                            const double w_0lt1, const double norm1,
990
                            const double len, const double mulli,
991
                            ImageF* HWY_RESTRICT diffs,
992
0
                            ImageF* HWY_RESTRICT block_diff_ac) {
993
0
  JXL_ENSURE(SameSize(lum0, lum1) && SameSize(lum0, *diffs));
994
0
  const size_t xsize_ = lum0.xsize();
995
0
  const size_t ysize_ = lum0.ysize();
996
997
0
  const float kWeight0 = 0.5;
998
0
  const float kWeight1 = 0.33;
999
1000
0
  const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1);
1001
0
  const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1);
1002
0
  const float norm2_0gt1 = w_pre0gt1 * norm1;
1003
0
  const float norm2_0lt1 = w_pre0lt1 * norm1;
1004
1005
0
  for (size_t y = 0; y < ysize_; ++y) {
1006
0
    const float* HWY_RESTRICT row0 = lum0.ConstRow(y);
1007
0
    const float* HWY_RESTRICT row1 = lum1.ConstRow(y);
1008
0
    float* HWY_RESTRICT row_diffs = diffs->Row(y);
1009
0
    for (size_t x = 0; x < xsize_; ++x) {
1010
0
      const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x]));
1011
0
      const float diff = row0[x] - row1[x];
1012
0
      const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval);
1013
1014
      // Primary symmetric quadratic objective.
1015
0
      row_diffs[x] = scaler * diff;
1016
1017
0
      const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval);
1018
0
      const double fabs0 = std::fabs(row0[x]);
1019
1020
      // Secondary half-open quadratic objectives.
1021
0
      const double too_small = 0.55 * fabs0;
1022
0
      const double too_big = 1.05 * fabs0;
1023
1024
0
      if (row0[x] < 0) {
1025
0
        if (row1[x] > -too_small) {
1026
0
          double impact = scaler2 * (row1[x] + too_small);
1027
0
          row_diffs[x] -= impact;
1028
0
        } else if (row1[x] < -too_big) {
1029
0
          double impact = scaler2 * (-row1[x] - too_big);
1030
0
          row_diffs[x] += impact;
1031
0
        }
1032
0
      } else {
1033
0
        if (row1[x] < too_small) {
1034
0
          double impact = scaler2 * (too_small - row1[x]);
1035
0
          row_diffs[x] += impact;
1036
0
        } else if (row1[x] > too_big) {
1037
0
          double impact = scaler2 * (row1[x] - too_big);
1038
0
          row_diffs[x] -= impact;
1039
0
        }
1040
0
      }
1041
0
    }
1042
0
  }
1043
1044
0
  size_t y0 = 0;
1045
  // Top
1046
0
  for (; y0 < 4; ++y0) {
1047
0
    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0);
1048
0
    for (size_t x0 = 0; x0 < xsize_; ++x0) {
1049
0
      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
1050
0
    }
1051
0
  }
1052
1053
0
  const HWY_FULL(float) df;
1054
0
  const size_t aligned_x = std::max(static_cast<size_t>(4), Lanes(df));
1055
0
  const intptr_t stride = diffs->PixelsPerRow();
1056
1057
  // Middle
1058
0
  for (; y0 < ysize_ - 4; ++y0) {
1059
0
    const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0);
1060
0
    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0);
1061
0
    size_t x0 = 0;
1062
0
    for (; x0 < aligned_x; ++x0) {
1063
0
      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
1064
0
    }
1065
0
    for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) {
1066
0
      auto diff = Load(df, row_diff + x0);
1067
0
      diff = Add(diff, MaltaUnit(Tag(), df, row_in + x0, stride));
1068
0
      Store(diff, df, row_diff + x0);
1069
0
    }
1070
1071
0
    for (; x0 < xsize_; ++x0) {
1072
0
      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
1073
0
    }
1074
0
  }
1075
1076
  // Bottom
1077
0
  for (; y0 < ysize_; ++y0) {
1078
0
    float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0);
1079
0
    for (size_t x0 = 0; x0 < xsize_; ++x0) {
1080
0
      row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
1081
0
    }
1082
0
  }
1083
0
  return true;
1084
0
}
Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SSE4::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SSE4::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_AVX2::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_AVX2::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SSE2::MaltaDiffMapT<jxl::MaltaTag>(jxl::MaltaTag, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::Status jxl::N_SSE2::MaltaDiffMapT<jxl::MaltaTagLF>(jxl::MaltaTagLF, jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
1085
1086
// Need non-template wrapper functions for HWY_EXPORT.
1087
Status MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
1088
                    const double w_0lt1, const double norm1,
1089
                    ImageF* HWY_RESTRICT diffs,
1090
0
                    ImageF* HWY_RESTRICT block_diff_ac) {
1091
0
  const double len = 3.75;
1092
0
  static const double mulli = 0.39905817637;
1093
0
  JXL_RETURN_IF_ERROR(MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1,
1094
0
                                    norm1, len, mulli, diffs, block_diff_ac));
1095
0
  return true;
1096
0
}
Unexecuted instantiation: jxl::N_SSE4::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::MaltaDiffMap(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
1097
1098
Status MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1,
1099
                      const double w_0gt1, const double w_0lt1,
1100
                      const double norm1, ImageF* HWY_RESTRICT diffs,
1101
0
                      ImageF* HWY_RESTRICT block_diff_ac) {
1102
0
  const double len = 3.75;
1103
0
  static const double mulli = 0.611612573796;
1104
0
  JXL_RETURN_IF_ERROR(MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1,
1105
0
                                    norm1, len, mulli, diffs, block_diff_ac));
1106
0
  return true;
1107
0
}
Unexecuted instantiation: jxl::N_SSE4::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::MaltaDiffMapLF(jxl::Plane<float> const&, jxl::Plane<float> const&, double, double, double, jxl::Plane<float>*, jxl::Plane<float>*)
1108
1109
void CombineChannelsForMasking(const ImageF* hf, const ImageF* uhf,
1110
0
                               ImageF* out) {
1111
  // Only X and Y components are involved in masking. B's influence
1112
  // is considered less important in the high frequency area, and we
1113
  // don't model masking from lower frequency signals.
1114
0
  static const float muls[3] = {
1115
0
      2.5f,
1116
0
      0.4f,
1117
0
      0.4f,
1118
0
  };
1119
  // Silly and unoptimized approach here. TODO(jyrki): rework this.
1120
0
  for (size_t y = 0; y < hf[0].ysize(); ++y) {
1121
0
    const float* BUTTERAUGLI_RESTRICT row_y_hf = hf[1].Row(y);
1122
0
    const float* BUTTERAUGLI_RESTRICT row_y_uhf = uhf[1].Row(y);
1123
0
    const float* BUTTERAUGLI_RESTRICT row_x_hf = hf[0].Row(y);
1124
0
    const float* BUTTERAUGLI_RESTRICT row_x_uhf = uhf[0].Row(y);
1125
0
    float* BUTTERAUGLI_RESTRICT row = out->Row(y);
1126
0
    for (size_t x = 0; x < hf[0].xsize(); ++x) {
1127
0
      float xdiff = (row_x_uhf[x] + row_x_hf[x]) * muls[0];
1128
0
      float ydiff = row_y_uhf[x] * muls[1] + row_y_hf[x] * muls[2];
1129
0
      row[x] = xdiff * xdiff + ydiff * ydiff;
1130
0
      row[x] = std::sqrt(row[x]);
1131
0
    }
1132
0
  }
1133
0
}
Unexecuted instantiation: jxl::N_SSE4::CombineChannelsForMasking(jxl::Plane<float> const*, jxl::Plane<float> const*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::CombineChannelsForMasking(jxl::Plane<float> const*, jxl::Plane<float> const*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::CombineChannelsForMasking(jxl::Plane<float> const*, jxl::Plane<float> const*, jxl::Plane<float>*)
1134
1135
0
void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) {
1136
0
  const size_t xsize = xyb.xsize();
1137
0
  const size_t ysize = xyb.ysize();
1138
0
  const float bias = mul * bias_arg;
1139
0
  const float sqrt_bias = std::sqrt(bias);
1140
0
  for (size_t y = 0; y < ysize; ++y) {
1141
0
    const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y);
1142
0
    float* BUTTERAUGLI_RESTRICT row_out = out->Row(y);
1143
0
    for (size_t x = 0; x < xsize; ++x) {
1144
      // kBias makes sqrt behave more linearly.
1145
0
      row_out[x] = std::sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias;
1146
0
    }
1147
0
  }
1148
0
}
Unexecuted instantiation: jxl::N_SSE4::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::DiffPrecompute(jxl::Plane<float> const&, float, float, jxl::Plane<float>*)
1149
1150
// std::log(80.0) / std::log(255.0);
1151
constexpr float kIntensityTargetNormalizationHack = 0.79079917404f;
1152
static const float kInternalGoodQualityThreshold =
1153
    17.83f * kIntensityTargetNormalizationHack;
1154
static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold;
1155
1156
0
void StoreMin3(const float v, float& min0, float& min1, float& min2) {
1157
0
  if (v < min2) {
1158
0
    if (v < min0) {
1159
0
      min2 = min1;
1160
0
      min1 = min0;
1161
0
      min0 = v;
1162
0
    } else if (v < min1) {
1163
0
      min2 = min1;
1164
0
      min1 = v;
1165
0
    } else {
1166
0
      min2 = v;
1167
0
    }
1168
0
  }
1169
0
}
Unexecuted instantiation: jxl::N_SSE4::StoreMin3(float, float&, float&, float&)
Unexecuted instantiation: jxl::N_AVX2::StoreMin3(float, float&, float&, float&)
Unexecuted instantiation: jxl::N_SSE2::StoreMin3(float, float&, float&, float&)
1170
1171
// Look for smooth areas near the area of degradation.
1172
// If the areas area generally smooth, don't do masking.
1173
0
void FuzzyErosion(const ImageF& from, ImageF* to) {
1174
0
  const size_t xsize = from.xsize();
1175
0
  const size_t ysize = from.ysize();
1176
0
  static const int kStep = 3;
1177
0
  for (size_t y = 0; y < ysize; ++y) {
1178
0
    for (size_t x = 0; x < xsize; ++x) {
1179
0
      float min0 = from.Row(y)[x];
1180
0
      float min1 = 2 * min0;
1181
0
      float min2 = min1;
1182
0
      if (x >= kStep) {
1183
0
        StoreMin3(from.Row(y)[x - kStep], min0, min1, min2);
1184
0
        if (y >= kStep) {
1185
0
          StoreMin3(from.Row(y - kStep)[x - kStep], min0, min1, min2);
1186
0
        }
1187
0
        if (y < ysize - kStep) {
1188
0
          StoreMin3(from.Row(y + kStep)[x - kStep], min0, min1, min2);
1189
0
        }
1190
0
      }
1191
0
      if (x < xsize - kStep) {
1192
0
        StoreMin3(from.Row(y)[x + kStep], min0, min1, min2);
1193
0
        if (y >= kStep) {
1194
0
          StoreMin3(from.Row(y - kStep)[x + kStep], min0, min1, min2);
1195
0
        }
1196
0
        if (y < ysize - kStep) {
1197
0
          StoreMin3(from.Row(y + kStep)[x + kStep], min0, min1, min2);
1198
0
        }
1199
0
      }
1200
0
      if (y >= kStep) {
1201
0
        StoreMin3(from.Row(y - kStep)[x], min0, min1, min2);
1202
0
      }
1203
0
      if (y < ysize - kStep) {
1204
0
        StoreMin3(from.Row(y + kStep)[x], min0, min1, min2);
1205
0
      }
1206
0
      to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2);
1207
0
    }
1208
0
  }
1209
0
}
Unexecuted instantiation: jxl::N_SSE4::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::FuzzyErosion(jxl::Plane<float> const&, jxl::Plane<float>*)
1210
1211
// Compute values of local frequency and dc masking based on the activity
1212
// in the two images. img_diff_ac may be null.
1213
Status Mask(const ImageF& mask0, const ImageF& mask1,
1214
            const ButteraugliParams& params, BlurTemp* blur_temp,
1215
            ImageF* BUTTERAUGLI_RESTRICT mask,
1216
0
            ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
1217
0
  const size_t xsize = mask0.xsize();
1218
0
  const size_t ysize = mask0.ysize();
1219
0
  JxlMemoryManager* memory_manager = mask0.memory_manager();
1220
0
  JXL_ASSIGN_OR_RETURN(*mask, ImageF::Create(memory_manager, xsize, ysize));
1221
0
  static const float kMul = 6.19424080439;
1222
0
  static const float kBias = 12.61050594197;
1223
0
  static const float kRadius = 2.7;
1224
0
  JXL_ASSIGN_OR_RETURN(ImageF diff0,
1225
0
                       ImageF::Create(memory_manager, xsize, ysize));
1226
0
  JXL_ASSIGN_OR_RETURN(ImageF diff1,
1227
0
                       ImageF::Create(memory_manager, xsize, ysize));
1228
0
  JXL_ASSIGN_OR_RETURN(ImageF blurred0,
1229
0
                       ImageF::Create(memory_manager, xsize, ysize));
1230
0
  JXL_ASSIGN_OR_RETURN(ImageF blurred1,
1231
0
                       ImageF::Create(memory_manager, xsize, ysize));
1232
0
  DiffPrecompute(mask0, kMul, kBias, &diff0);
1233
0
  DiffPrecompute(mask1, kMul, kBias, &diff1);
1234
0
  JXL_RETURN_IF_ERROR(Blur(diff0, kRadius, params, blur_temp, &blurred0));
1235
0
  FuzzyErosion(blurred0, &diff0);
1236
0
  JXL_RETURN_IF_ERROR(Blur(diff1, kRadius, params, blur_temp, &blurred1));
1237
0
  for (size_t y = 0; y < ysize; ++y) {
1238
0
    for (size_t x = 0; x < xsize; ++x) {
1239
0
      mask->Row(y)[x] = diff0.Row(y)[x];
1240
0
      if (diff_ac != nullptr) {
1241
0
        static const float kMaskToErrorMul = 10.0;
1242
0
        float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x];
1243
0
        diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff;
1244
0
      }
1245
0
    }
1246
0
  }
1247
0
  return true;
1248
0
}
Unexecuted instantiation: jxl::N_SSE4::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::Mask(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*)
1249
1250
// `diff_ac` may be null.
1251
Status MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1,
1252
                       const size_t xsize, const size_t ysize,
1253
                       const ButteraugliParams& params, BlurTemp* blur_temp,
1254
                       ImageF* BUTTERAUGLI_RESTRICT mask,
1255
0
                       ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
1256
0
  JxlMemoryManager* memory_manager = pi0.hf[0].memory_manager();
1257
0
  JXL_ASSIGN_OR_RETURN(ImageF mask0,
1258
0
                       ImageF::Create(memory_manager, xsize, ysize));
1259
0
  JXL_ASSIGN_OR_RETURN(ImageF mask1,
1260
0
                       ImageF::Create(memory_manager, xsize, ysize));
1261
0
  CombineChannelsForMasking(&pi0.hf[0], &pi0.uhf[0], &mask0);
1262
0
  CombineChannelsForMasking(&pi1.hf[0], &pi1.uhf[0], &mask1);
1263
0
  JXL_RETURN_IF_ERROR(Mask(mask0, mask1, params, blur_temp, mask, diff_ac));
1264
0
  return true;
1265
0
}
Unexecuted instantiation: jxl::N_SSE4::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::MaskPsychoImage(jxl::PsychoImage const&, jxl::PsychoImage const&, unsigned long, unsigned long, jxl::ButteraugliParams const&, jxl::BlurTemp*, jxl::Plane<float>*, jxl::Plane<float>*)
1266
1267
0
double MaskY(double delta) {
1268
0
  static const double offset = 0.829591754942;
1269
0
  static const double scaler = 0.451936922203;
1270
0
  static const double mul = 2.5485944793;
1271
0
  const double c = mul / ((scaler * delta) + offset);
1272
0
  const double retval = kGlobalScale * (1.0 + c);
1273
0
  return retval * retval;
1274
0
}
Unexecuted instantiation: jxl::N_SSE4::MaskY(double)
Unexecuted instantiation: jxl::N_AVX2::MaskY(double)
Unexecuted instantiation: jxl::N_SSE2::MaskY(double)
1275
1276
0
double MaskDcY(double delta) {
1277
0
  static const double offset = 0.20025578522;
1278
0
  static const double scaler = 3.87449418804;
1279
0
  static const double mul = 0.505054525019;
1280
0
  const double c = mul / ((scaler * delta) + offset);
1281
0
  const double retval = kGlobalScale * (1.0 + c);
1282
0
  return retval * retval;
1283
0
}
Unexecuted instantiation: jxl::N_SSE4::MaskDcY(double)
Unexecuted instantiation: jxl::N_AVX2::MaskDcY(double)
Unexecuted instantiation: jxl::N_SSE2::MaskDcY(double)
1284
1285
0
inline float MaskColor(const float color[3], const float mask) {
1286
0
  return color[0] * mask + color[1] * mask + color[2] * mask;
1287
0
}
Unexecuted instantiation: jxl::N_SSE4::MaskColor(float const*, float)
Unexecuted instantiation: jxl::N_AVX2::MaskColor(float const*, float)
Unexecuted instantiation: jxl::N_SSE2::MaskColor(float const*, float)
1288
1289
// Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks}
1290
Status CombineChannelsToDiffmap(const ImageF& mask,
1291
                                const Image3F& block_diff_dc,
1292
                                const Image3F& block_diff_ac, float xmul,
1293
0
                                ImageF* result) {
1294
0
  JXL_ENSURE(SameSize(mask, *result));
1295
0
  size_t xsize = mask.xsize();
1296
0
  size_t ysize = mask.ysize();
1297
0
  for (size_t y = 0; y < ysize; ++y) {
1298
0
    float* BUTTERAUGLI_RESTRICT row_out = result->Row(y);
1299
0
    for (size_t x = 0; x < xsize; ++x) {
1300
0
      float val = mask.Row(y)[x];
1301
0
      float maskval = MaskY(val);
1302
0
      float dc_maskval = MaskDcY(val);
1303
0
      float diff_dc[3];
1304
0
      float diff_ac[3];
1305
0
      for (int i = 0; i < 3; ++i) {
1306
0
        diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x];
1307
0
        diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x];
1308
0
      }
1309
0
      diff_ac[0] *= xmul;
1310
0
      diff_dc[0] *= xmul;
1311
0
      row_out[x] = std::sqrt(MaskColor(diff_dc, dc_maskval) +
1312
0
                             MaskColor(diff_ac, maskval));
1313
0
    }
1314
0
  }
1315
0
  return true;
1316
0
}
Unexecuted instantiation: jxl::N_SSE4::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_AVX2::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*)
Unexecuted instantiation: jxl::N_SSE2::CombineChannelsToDiffmap(jxl::Plane<float> const&, jxl::Image3<float> const&, jxl::Image3<float> const&, float, jxl::Plane<float>*)
1317
1318
// Adds weighted L2 difference between i0 and i1 to diffmap.
1319
static void L2Diff(const ImageF& i0, const ImageF& i1, const float w,
1320
0
                   ImageF* BUTTERAUGLI_RESTRICT diffmap) {
1321
0
  if (w == 0) return;
1322
1323
0
  const HWY_FULL(float) d;
1324
0
  const auto weight = Set(d, w);
1325
1326
0
  for (size_t y = 0; y < i0.ysize(); ++y) {
1327
0
    const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y);
1328
0
    const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y);
1329
0
    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y);
1330
1331
0
    for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
1332
0
      const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x));
1333
0
      const auto diff2 = Mul(diff, diff);
1334
0
      const auto prev = Load(d, row_diff + x);
1335
0
      Store(MulAdd(diff2, weight, prev), d, row_diff + x);
1336
0
    }
1337
0
  }
1338
0
}
Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::N_SSE2::L2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*)
1339
1340
// Initializes diffmap to the weighted L2 difference between i0 and i1.
1341
static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w,
1342
0
                      ImageF* BUTTERAUGLI_RESTRICT diffmap) {
1343
0
  if (w == 0) return;
1344
1345
0
  const HWY_FULL(float) d;
1346
0
  const auto weight = Set(d, w);
1347
1348
0
  for (size_t y = 0; y < i0.ysize(); ++y) {
1349
0
    const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y);
1350
0
    const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y);
1351
0
    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y);
1352
1353
0
    for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
1354
0
      const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x));
1355
0
      const auto diff2 = Mul(diff, diff);
1356
0
      Store(Mul(diff2, weight), d, row_diff + x);
1357
0
    }
1358
0
  }
1359
0
}
Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::N_SSE2::SetL2Diff(jxl::Plane<float> const&, jxl::Plane<float> const&, float, jxl::Plane<float>*)
1360
1361
// i0 is the original image.
1362
// i1 is the deformed copy.
1363
static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1,
1364
                             float w_0lt1,
1365
0
                             ImageF* BUTTERAUGLI_RESTRICT diffmap) {
1366
0
  if (w_0gt1 == 0 && w_0lt1 == 0) {
1367
0
    return;
1368
0
  }
1369
1370
0
  const HWY_FULL(float) d;
1371
0
  const auto vw_0gt1 = Set(d, w_0gt1 * 0.8);
1372
0
  const auto vw_0lt1 = Set(d, w_0lt1 * 0.8);
1373
1374
0
  for (size_t y = 0; y < i0.ysize(); ++y) {
1375
0
    const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y);
1376
0
    const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y);
1377
0
    float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y);
1378
1379
0
    for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
1380
0
      const auto val0 = Load(d, row0 + x);
1381
0
      const auto val1 = Load(d, row1 + x);
1382
1383
      // Primary symmetric quadratic objective.
1384
0
      const auto diff = Sub(val0, val1);
1385
0
      auto total = MulAdd(Mul(diff, diff), vw_0gt1, Load(d, row_diff + x));
1386
1387
      // Secondary half-open quadratic objectives.
1388
0
      const auto fabs0 = Abs(val0);
1389
0
      const auto too_small = Mul(Set(d, 0.4), fabs0);
1390
0
      const auto too_big = fabs0;
1391
1392
0
      const auto if_neg = IfThenElse(
1393
0
          Gt(val1, Neg(too_small)), Add(val1, too_small),
1394
0
          IfThenElseZero(Lt(val1, Neg(too_big)), Sub(Neg(val1), too_big)));
1395
0
      const auto if_pos =
1396
0
          IfThenElse(Lt(val1, too_small), Sub(too_small, val1),
1397
0
                     IfThenElseZero(Gt(val1, too_big), Sub(val1, too_big)));
1398
0
      const auto v = IfThenElse(Lt(val0, Zero(d)), if_neg, if_pos);
1399
0
      total = MulAdd(vw_0lt1, Mul(v, v), total);
1400
0
      Store(total, d, row_diff + x);
1401
0
    }
1402
0
  }
1403
0
}
Unexecuted instantiation: butteraugli.cc:jxl::N_SSE4::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::N_AVX2::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Plane<float>*)
Unexecuted instantiation: butteraugli.cc:jxl::N_SSE2::L2DiffAsymmetric(jxl::Plane<float> const&, jxl::Plane<float> const&, float, float, jxl::Plane<float>*)
1404
1405
// A simple HDR compatible gamma function.
1406
template <class DF, class V>
1407
0
V Gamma(const DF df, V v) {
1408
  // ln(2) constant folded in because we want std::log but have FastLog2f.
1409
0
  const auto kRetMul = Set(df, 19.245013259874995f * kInvLog2e);
1410
0
  const auto kRetAdd = Set(df, -23.16046239805755);
1411
  // This should happen rarely, but may lead to a NaN in log, which is
1412
  // undesirable. Since negative photons don't exist we solve the NaNs by
1413
  // clamping here.
1414
0
  v = ZeroIfNegative(v);
1415
1416
0
  const auto biased = Add(v, Set(df, 9.9710635769299145));
1417
0
  const auto log = FastLog2f(df, biased);
1418
  // We could fold this into a custom Log2 polynomial, but there would be
1419
  // relatively little gain.
1420
0
  return MulAdd(kRetMul, log, kRetAdd);
1421
0
}
Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::Gamma<hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul>)
Unexecuted instantiation: hwy::N_AVX2::Vec256<float> jxl::N_AVX2::Gamma<hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float>)
Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::Gamma<hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul>)
1422
1423
template <bool Clamp, class DF, class V>
1424
BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1,
1425
                                        const V& in2, V* JXL_RESTRICT out0,
1426
                                        V* JXL_RESTRICT out1,
1427
0
                                        V* JXL_RESTRICT out2) {
1428
  // https://en.wikipedia.org/wiki/Photopsin absorbance modeling.
1429
0
  static const double mixi0 = 0.29956550340058319;
1430
0
  static const double mixi1 = 0.63373087833825936;
1431
0
  static const double mixi2 = 0.077705617820981968;
1432
0
  static const double mixi3 = 1.7557483643287353;
1433
0
  static const double mixi4 = 0.22158691104574774;
1434
0
  static const double mixi5 = 0.69391388044116142;
1435
0
  static const double mixi6 = 0.0987313588422;
1436
0
  static const double mixi7 = 1.7557483643287353;
1437
0
  static const double mixi8 = 0.02;
1438
0
  static const double mixi9 = 0.02;
1439
0
  static const double mixi10 = 0.20480129041026129;
1440
0
  static const double mixi11 = 12.226454707163354;
1441
1442
0
  const V mix0 = Set(df, mixi0);
1443
0
  const V mix1 = Set(df, mixi1);
1444
0
  const V mix2 = Set(df, mixi2);
1445
0
  const V mix3 = Set(df, mixi3);
1446
0
  const V mix4 = Set(df, mixi4);
1447
0
  const V mix5 = Set(df, mixi5);
1448
0
  const V mix6 = Set(df, mixi6);
1449
0
  const V mix7 = Set(df, mixi7);
1450
0
  const V mix8 = Set(df, mixi8);
1451
0
  const V mix9 = Set(df, mixi9);
1452
0
  const V mix10 = Set(df, mixi10);
1453
0
  const V mix11 = Set(df, mixi11);
1454
1455
0
  *out0 = MulAdd(mix0, in0, MulAdd(mix1, in1, MulAdd(mix2, in2, mix3)));
1456
0
  *out1 = MulAdd(mix4, in0, MulAdd(mix5, in1, MulAdd(mix6, in2, mix7)));
1457
0
  *out2 = MulAdd(mix8, in0, MulAdd(mix9, in1, MulAdd(mix10, in2, mix11)));
1458
1459
0
  if (Clamp) {
1460
0
    *out0 = Max(*out0, mix3);
1461
0
    *out1 = Max(*out1, mix7);
1462
0
    *out2 = Max(*out2, mix11);
1463
0
  }
1464
0
}
Unexecuted instantiation: void jxl::N_SSE4::OpsinAbsorbance<true, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*)
Unexecuted instantiation: void jxl::N_SSE4::OpsinAbsorbance<false, hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Simd<float, 4ul, 0>, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul> const&, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*)
Unexecuted instantiation: void jxl::N_AVX2::OpsinAbsorbance<true, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*)
Unexecuted instantiation: void jxl::N_AVX2::OpsinAbsorbance<false, hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Simd<float, 8ul, 0>, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float> const&, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*)
Unexecuted instantiation: void jxl::N_SSE2::OpsinAbsorbance<true, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*)
Unexecuted instantiation: void jxl::N_SSE2::OpsinAbsorbance<false, hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Simd<float, 4ul, 0>, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul> const&, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*)
1465
1466
// `blurred` is a temporary image used inside this function and not returned.
1467
Status OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params,
1468
0
                          Image3F* blurred, BlurTemp* blur_temp, Image3F* xyb) {
1469
0
  JXL_ENSURE(blurred != nullptr);
1470
0
  const double kSigma = 1.2;
1471
0
  JXL_RETURN_IF_ERROR(
1472
0
      Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0)));
1473
0
  JXL_RETURN_IF_ERROR(
1474
0
      Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1)));
1475
0
  JXL_RETURN_IF_ERROR(
1476
0
      Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2)));
1477
0
  const HWY_FULL(float) df;
1478
0
  const auto intensity_target_multiplier = Set(df, params.intensity_target);
1479
0
  for (size_t y = 0; y < rgb.ysize(); ++y) {
1480
0
    const float* row_r = rgb.ConstPlaneRow(0, y);
1481
0
    const float* row_g = rgb.ConstPlaneRow(1, y);
1482
0
    const float* row_b = rgb.ConstPlaneRow(2, y);
1483
0
    const float* row_blurred_r = blurred->ConstPlaneRow(0, y);
1484
0
    const float* row_blurred_g = blurred->ConstPlaneRow(1, y);
1485
0
    const float* row_blurred_b = blurred->ConstPlaneRow(2, y);
1486
0
    float* row_out_x = xyb->PlaneRow(0, y);
1487
0
    float* row_out_y = xyb->PlaneRow(1, y);
1488
0
    float* row_out_b = xyb->PlaneRow(2, y);
1489
0
    const auto min = Set(df, 1e-4f);
1490
0
    for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) {
1491
0
      auto sensitivity0 = Undefined(df);
1492
0
      auto sensitivity1 = Undefined(df);
1493
0
      auto sensitivity2 = Undefined(df);
1494
0
      {
1495
        // Calculate sensitivity based on the smoothed image gamma derivative.
1496
0
        auto pre_mixed0 = Undefined(df);
1497
0
        auto pre_mixed1 = Undefined(df);
1498
0
        auto pre_mixed2 = Undefined(df);
1499
0
        OpsinAbsorbance<true>(
1500
0
            df, Mul(Load(df, row_blurred_r + x), intensity_target_multiplier),
1501
0
            Mul(Load(df, row_blurred_g + x), intensity_target_multiplier),
1502
0
            Mul(Load(df, row_blurred_b + x), intensity_target_multiplier),
1503
0
            &pre_mixed0, &pre_mixed1, &pre_mixed2);
1504
0
        pre_mixed0 = Max(pre_mixed0, min);
1505
0
        pre_mixed1 = Max(pre_mixed1, min);
1506
0
        pre_mixed2 = Max(pre_mixed2, min);
1507
0
        sensitivity0 = Div(Gamma(df, pre_mixed0), pre_mixed0);
1508
0
        sensitivity1 = Div(Gamma(df, pre_mixed1), pre_mixed1);
1509
0
        sensitivity2 = Div(Gamma(df, pre_mixed2), pre_mixed2);
1510
0
        sensitivity0 = Max(sensitivity0, min);
1511
0
        sensitivity1 = Max(sensitivity1, min);
1512
0
        sensitivity2 = Max(sensitivity2, min);
1513
0
      }
1514
0
      auto cur_mixed0 = Undefined(df);
1515
0
      auto cur_mixed1 = Undefined(df);
1516
0
      auto cur_mixed2 = Undefined(df);
1517
0
      OpsinAbsorbance<false>(
1518
0
          df, Mul(Load(df, row_r + x), intensity_target_multiplier),
1519
0
          Mul(Load(df, row_g + x), intensity_target_multiplier),
1520
0
          Mul(Load(df, row_b + x), intensity_target_multiplier), &cur_mixed0,
1521
0
          &cur_mixed1, &cur_mixed2);
1522
0
      cur_mixed0 = Mul(cur_mixed0, sensitivity0);
1523
0
      cur_mixed1 = Mul(cur_mixed1, sensitivity1);
1524
0
      cur_mixed2 = Mul(cur_mixed2, sensitivity2);
1525
      // This is a kludge. The negative values should be zeroed away before
1526
      // blurring. Ideally there would be no negative values in the first place.
1527
0
      const auto min01 = Set(df, 1.7557483643287353f);
1528
0
      const auto min2 = Set(df, 12.226454707163354f);
1529
0
      cur_mixed0 = Max(cur_mixed0, min01);
1530
0
      cur_mixed1 = Max(cur_mixed1, min01);
1531
0
      cur_mixed2 = Max(cur_mixed2, min2);
1532
1533
0
      Store(Sub(cur_mixed0, cur_mixed1), df, row_out_x + x);
1534
0
      Store(Add(cur_mixed0, cur_mixed1), df, row_out_y + x);
1535
0
      Store(cur_mixed2, df, row_out_b + x);
1536
0
    }
1537
0
  }
1538
0
  return true;
1539
0
}
Unexecuted instantiation: jxl::N_SSE4::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Image3<float>*)
Unexecuted instantiation: jxl::N_AVX2::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Image3<float>*)
Unexecuted instantiation: jxl::N_SSE2::OpsinDynamicsImage(jxl::Image3<float> const&, jxl::ButteraugliParams const&, jxl::Image3<float>*, jxl::BlurTemp*, jxl::Image3<float>*)
1540
1541
Status ButteraugliDiffmapInPlace(Image3F& image0, Image3F& image1,
1542
                                 const ButteraugliParams& params,
1543
0
                                 ImageF& diffmap) {
1544
  // image0 and image1 are in linear sRGB color space
1545
0
  const size_t xsize = image0.xsize();
1546
0
  const size_t ysize = image0.ysize();
1547
0
  JxlMemoryManager* memory_manager = image0.memory_manager();
1548
0
  BlurTemp blur_temp;
1549
0
  {
1550
    // Convert image0 and image1 to XYB in-place
1551
0
    JXL_ASSIGN_OR_RETURN(Image3F temp,
1552
0
                         Image3F::Create(memory_manager, xsize, ysize));
1553
0
    JXL_RETURN_IF_ERROR(
1554
0
        OpsinDynamicsImage(image0, params, &temp, &blur_temp, &image0));
1555
0
    JXL_RETURN_IF_ERROR(
1556
0
        OpsinDynamicsImage(image1, params, &temp, &blur_temp, &image1));
1557
0
  }
1558
  // image0 and image1 are in XYB color space
1559
0
  JXL_ASSIGN_OR_RETURN(ImageF block_diff_dc,
1560
0
                       ImageF::Create(memory_manager, xsize, ysize));
1561
0
  ZeroFillImage(&block_diff_dc);
1562
0
  {
1563
    // separate out LF components from image0 and image1 and compute the dc
1564
    // diff image from them
1565
0
    JXL_ASSIGN_OR_RETURN(Image3F lf0,
1566
0
                         Image3F::Create(memory_manager, xsize, ysize));
1567
0
    JXL_ASSIGN_OR_RETURN(Image3F lf1,
1568
0
                         Image3F::Create(memory_manager, xsize, ysize));
1569
0
    JXL_RETURN_IF_ERROR(
1570
0
        SeparateLFAndMF(params, image0, &lf0, &image0, &blur_temp));
1571
0
    JXL_RETURN_IF_ERROR(
1572
0
        SeparateLFAndMF(params, image1, &lf1, &image1, &blur_temp));
1573
0
    for (size_t c = 0; c < 3; ++c) {
1574
0
      L2Diff(lf0.Plane(c), lf1.Plane(c), wmul[6 + c], &block_diff_dc);
1575
0
    }
1576
0
  }
1577
  // image0 and image1 are MF residuals (before blurring) in XYB color space
1578
0
  ImageF hf0[2];
1579
0
  ImageF hf1[2];
1580
0
  JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image0, &hf0[0], &blur_temp));
1581
0
  JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image1, &hf1[0], &blur_temp));
1582
  // image0 and image1 are MF-images in XYB color space
1583
1584
0
  JXL_ASSIGN_OR_RETURN(ImageF block_diff_ac,
1585
0
                       ImageF::Create(memory_manager, xsize, ysize));
1586
0
  ZeroFillImage(&block_diff_ac);
1587
  // start accumulating ac diff image from MF images
1588
0
  {
1589
0
    JXL_ASSIGN_OR_RETURN(ImageF diffs,
1590
0
                         ImageF::Create(memory_manager, xsize, ysize));
1591
0
    JXL_RETURN_IF_ERROR(MaltaDiffMapLF(image0.Plane(1), image1.Plane(1),
1592
0
                                       wMfMalta, wMfMalta, norm1Mf, &diffs,
1593
0
                                       &block_diff_ac));
1594
0
    JXL_RETURN_IF_ERROR(MaltaDiffMapLF(image0.Plane(0), image1.Plane(0),
1595
0
                                       wMfMaltaX, wMfMaltaX, norm1MfX, &diffs,
1596
0
                                       &block_diff_ac));
1597
0
  }
1598
0
  for (size_t c = 0; c < 3; ++c) {
1599
0
    L2Diff(image0.Plane(c), image1.Plane(c), wmul[3 + c], &block_diff_ac);
1600
0
  }
1601
  // we will not need the MF-images and more, so we deallocate them to reduce
1602
  // peak memory usage
1603
0
  image0 = Image3F();
1604
0
  image1 = Image3F();
1605
1606
0
  ImageF uhf0[2];
1607
0
  ImageF uhf1[2];
1608
0
  JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf0[0], &uhf0[0], &blur_temp));
1609
0
  JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf1[0], &uhf1[0], &blur_temp));
1610
1611
  // continue accumulating ac diff image from HF and UHF images
1612
0
  const float hf_asymmetry = params.hf_asymmetry;
1613
0
  {
1614
0
    JXL_ASSIGN_OR_RETURN(ImageF diffs,
1615
0
                         ImageF::Create(memory_manager, xsize, ysize));
1616
0
    JXL_RETURN_IF_ERROR(MaltaDiffMap(uhf0[1], uhf1[1], wUhfMalta * hf_asymmetry,
1617
0
                                     wUhfMalta / hf_asymmetry, norm1Uhf, &diffs,
1618
0
                                     &block_diff_ac));
1619
0
    JXL_RETURN_IF_ERROR(MaltaDiffMap(
1620
0
        uhf0[0], uhf1[0], wUhfMaltaX * hf_asymmetry, wUhfMaltaX / hf_asymmetry,
1621
0
        norm1UhfX, &diffs, &block_diff_ac));
1622
0
    JXL_RETURN_IF_ERROR(MaltaDiffMapLF(
1623
0
        hf0[1], hf1[1], wHfMalta * std::sqrt(hf_asymmetry),
1624
0
        wHfMalta / std::sqrt(hf_asymmetry), norm1Hf, &diffs, &block_diff_ac));
1625
0
    JXL_RETURN_IF_ERROR(MaltaDiffMapLF(
1626
0
        hf0[0], hf1[0], wHfMaltaX * std::sqrt(hf_asymmetry),
1627
0
        wHfMaltaX / std::sqrt(hf_asymmetry), norm1HfX, &diffs, &block_diff_ac));
1628
0
  }
1629
0
  for (size_t c = 0; c < 2; ++c) {
1630
0
    L2DiffAsymmetric(hf0[c], hf1[c], wmul[c] * hf_asymmetry,
1631
0
                     wmul[c] / hf_asymmetry, &block_diff_ac);
1632
0
  }
1633
1634
  // compute mask image from HF and UHF X and Y images
1635
0
  JXL_ASSIGN_OR_RETURN(ImageF mask,
1636
0
                       ImageF::Create(memory_manager, xsize, ysize));
1637
0
  {
1638
0
    JXL_ASSIGN_OR_RETURN(ImageF mask0,
1639
0
                         ImageF::Create(memory_manager, xsize, ysize));
1640
0
    JXL_ASSIGN_OR_RETURN(ImageF mask1,
1641
0
                         ImageF::Create(memory_manager, xsize, ysize));
1642
0
    CombineChannelsForMasking(&hf0[0], &uhf0[0], &mask0);
1643
0
    CombineChannelsForMasking(&hf1[0], &uhf1[0], &mask1);
1644
0
    DeallocateHFAndUHF(&hf1[0], &uhf1[0]);
1645
0
    DeallocateHFAndUHF(&hf0[0], &uhf0[0]);
1646
0
    JXL_RETURN_IF_ERROR(
1647
0
        Mask(mask0, mask1, params, &blur_temp, &mask, &block_diff_ac));
1648
0
  }
1649
1650
  // compute final diffmap from mask image and ac and dc diff images
1651
0
  JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(memory_manager, xsize, ysize));
1652
0
  for (size_t y = 0; y < ysize; ++y) {
1653
0
    const float* row_dc = block_diff_dc.Row(y);
1654
0
    const float* row_ac = block_diff_ac.Row(y);
1655
0
    float* row_out = diffmap.Row(y);
1656
0
    for (size_t x = 0; x < xsize; ++x) {
1657
0
      const float val = mask.Row(y)[x];
1658
0
      row_out[x] = sqrt(row_dc[x] * MaskDcY(val) + row_ac[x] * MaskY(val));
1659
0
    }
1660
0
  }
1661
0
  return true;
1662
0
}
Unexecuted instantiation: jxl::N_SSE4::ButteraugliDiffmapInPlace(jxl::Image3<float>&, jxl::Image3<float>&, jxl::ButteraugliParams const&, jxl::Plane<float>&)
Unexecuted instantiation: jxl::N_AVX2::ButteraugliDiffmapInPlace(jxl::Image3<float>&, jxl::Image3<float>&, jxl::ButteraugliParams const&, jxl::Plane<float>&)
Unexecuted instantiation: jxl::N_SSE2::ButteraugliDiffmapInPlace(jxl::Image3<float>&, jxl::Image3<float>&, jxl::ButteraugliParams const&, jxl::Plane<float>&)
1663
1664
// NOLINTNEXTLINE(google-readability-namespace-comments)
1665
}  // namespace HWY_NAMESPACE
1666
}  // namespace jxl
1667
HWY_AFTER_NAMESPACE();
1668
1669
#if HWY_ONCE
1670
namespace jxl {
1671
1672
HWY_EXPORT(SeparateFrequencies);       // Local function.
1673
HWY_EXPORT(MaskPsychoImage);           // Local function.
1674
HWY_EXPORT(L2DiffAsymmetric);          // Local function.
1675
HWY_EXPORT(L2Diff);                    // Local function.
1676
HWY_EXPORT(SetL2Diff);                 // Local function.
1677
HWY_EXPORT(CombineChannelsToDiffmap);  // Local function.
1678
HWY_EXPORT(MaltaDiffMap);              // Local function.
1679
HWY_EXPORT(MaltaDiffMapLF);            // Local function.
1680
HWY_EXPORT(OpsinDynamicsImage);        // Local function.
1681
HWY_EXPORT(ButteraugliDiffmapInPlace);  // Local function.
1682
1683
#if BUTTERAUGLI_ENABLE_CHECKS
1684
1685
static inline bool IsNan(const float x) {
1686
  uint32_t bits;
1687
  memcpy(&bits, &x, sizeof(bits));
1688
  const uint32_t bitmask_exp = 0x7F800000;
1689
  return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF);
1690
}
1691
1692
static inline bool IsNan(const double x) {
1693
  uint64_t bits;
1694
  memcpy(&bits, &x, sizeof(bits));
1695
  return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) ||
1696
         (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL);
1697
}
1698
1699
static inline void CheckImage(const ImageF& image, const char* name) {
1700
  for (size_t y = 0; y < image.ysize(); ++y) {
1701
    const float* BUTTERAUGLI_RESTRICT row = image.Row(y);
1702
    for (size_t x = 0; x < image.xsize(); ++x) {
1703
      if (IsNan(row[x])) {
1704
        printf("NAN: Image %s @ %" PRIuS ",%" PRIuS " (of %" PRIuS ",%" PRIuS
1705
               ")\n",
1706
               name, x, y, image.xsize(), image.ysize());
1707
        exit(1);
1708
      }
1709
    }
1710
  }
1711
}
1712
1713
#define CHECK_NAN(x, str)                \
1714
  do {                                   \
1715
    if (IsNan(x)) {                      \
1716
      printf("%d: %s\n", __LINE__, str); \
1717
      abort();                           \
1718
    }                                    \
1719
  } while (0)
1720
1721
#define CHECK_IMAGE(image, name) CheckImage(image, name)
1722
1723
#else  // BUTTERAUGLI_ENABLE_CHECKS
1724
1725
#define CHECK_NAN(x, str)
1726
#define CHECK_IMAGE(image, name)
1727
1728
#endif  // BUTTERAUGLI_ENABLE_CHECKS
1729
1730
// Calculate a 2x2 subsampled image for purposes of recursive butteraugli at
1731
// multiresolution.
1732
0
static StatusOr<Image3F> SubSample2x(const Image3F& in) {
1733
0
  size_t xs = (in.xsize() + 1) / 2;
1734
0
  size_t ys = (in.ysize() + 1) / 2;
1735
0
  JxlMemoryManager* memory_manager = in.memory_manager();
1736
0
  JXL_ASSIGN_OR_RETURN(Image3F retval, Image3F::Create(memory_manager, xs, ys));
1737
0
  for (size_t c = 0; c < 3; ++c) {
1738
0
    for (size_t y = 0; y < ys; ++y) {
1739
0
      for (size_t x = 0; x < xs; ++x) {
1740
0
        retval.PlaneRow(c, y)[x] = 0;
1741
0
      }
1742
0
    }
1743
0
  }
1744
0
  for (size_t c = 0; c < 3; ++c) {
1745
0
    for (size_t y = 0; y < in.ysize(); ++y) {
1746
0
      for (size_t x = 0; x < in.xsize(); ++x) {
1747
0
        retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x];
1748
0
      }
1749
0
    }
1750
0
    if ((in.xsize() & 1) != 0) {
1751
0
      for (size_t y = 0; y < retval.ysize(); ++y) {
1752
0
        size_t last_column = retval.xsize() - 1;
1753
0
        retval.PlaneRow(c, y)[last_column] *= 2.0f;
1754
0
      }
1755
0
    }
1756
0
    if ((in.ysize() & 1) != 0) {
1757
0
      for (size_t x = 0; x < retval.xsize(); ++x) {
1758
0
        size_t last_row = retval.ysize() - 1;
1759
0
        retval.PlaneRow(c, last_row)[x] *= 2.0f;
1760
0
      }
1761
0
    }
1762
0
  }
1763
0
  return retval;
1764
0
}
1765
1766
// Supersample src by 2x and add it to dest.
1767
0
static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) {
1768
0
  for (size_t y = 0; y < dest.ysize(); ++y) {
1769
0
    for (size_t x = 0; x < dest.xsize(); ++x) {
1770
      // There will be less errors from the more averaged images.
1771
      // We take it into account to some extent using a scaler.
1772
0
      static const double kHeuristicMixingValue = 0.3;
1773
0
      dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w;
1774
0
      dest.Row(y)[x] += w * src.Row(y / 2)[x / 2];
1775
0
    }
1776
0
  }
1777
0
}
1778
1779
0
Image3F* ButteraugliComparator::Temp() const {
1780
0
  bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel);
1781
0
  if (was_in_use) return nullptr;
1782
0
  return &temp_;
1783
0
}
1784
1785
0
void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); }
1786
1787
ButteraugliComparator::ButteraugliComparator(size_t xsize, size_t ysize,
1788
                                             const ButteraugliParams& params)
1789
0
    : xsize_(xsize), ysize_(ysize), params_(params) {}
1790
1791
StatusOr<std::unique_ptr<ButteraugliComparator>> ButteraugliComparator::Make(
1792
0
    const Image3F& rgb0, const ButteraugliParams& params) {
1793
0
  size_t xsize = rgb0.xsize();
1794
0
  size_t ysize = rgb0.ysize();
1795
0
  JxlMemoryManager* memory_manager = rgb0.memory_manager();
1796
0
  std::unique_ptr<ButteraugliComparator> result =
1797
0
      std::unique_ptr<ButteraugliComparator>(
1798
0
          new ButteraugliComparator(xsize, ysize, params));
1799
0
  JXL_ASSIGN_OR_RETURN(result->temp_,
1800
0
                       Image3F::Create(memory_manager, xsize, ysize));
1801
1802
0
  if (xsize < 8 || ysize < 8) {
1803
0
    return result;
1804
0
  }
1805
1806
0
  JXL_ASSIGN_OR_RETURN(Image3F xyb0,
1807
0
                       Image3F::Create(memory_manager, xsize, ysize));
1808
0
  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(
1809
0
      rgb0, params, result->Temp(), &result->blur_temp_, &xyb0));
1810
0
  result->ReleaseTemp();
1811
0
  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)(
1812
0
      xsize, ysize, params, &result->blur_temp_, xyb0, result->pi0_));
1813
1814
  // Awful recursive construction of samples of different resolution.
1815
  // This is an after-thought and possibly somewhat parallel in
1816
  // functionality with the PsychoImage multi-resolution approach.
1817
0
  JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb0, SubSample2x(rgb0));
1818
0
  JXL_ASSIGN_OR_RETURN(result->sub_,
1819
0
                       ButteraugliComparator::Make(subsampledRgb0, params));
1820
0
  return result;
1821
0
}
1822
1823
0
Status ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const {
1824
0
  return HWY_DYNAMIC_DISPATCH(MaskPsychoImage)(
1825
0
      pi0_, pi0_, xsize_, ysize_, params_, &blur_temp_, mask, nullptr);
1826
0
}
1827
1828
Status ButteraugliComparator::Diffmap(const Image3F& rgb1,
1829
0
                                      ImageF& result) const {
1830
0
  JxlMemoryManager* memory_manager = rgb1.memory_manager();
1831
0
  if (xsize_ < 8 || ysize_ < 8) {
1832
0
    ZeroFillImage(&result);
1833
0
    return true;
1834
0
  }
1835
0
  JXL_ASSIGN_OR_RETURN(Image3F xyb1,
1836
0
                       Image3F::Create(memory_manager, xsize_, ysize_));
1837
0
  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(
1838
0
      rgb1, params_, Temp(), &blur_temp_, &xyb1));
1839
0
  ReleaseTemp();
1840
0
  JXL_RETURN_IF_ERROR(DiffmapOpsinDynamicsImage(xyb1, result));
1841
0
  if (sub_) {
1842
0
    if (sub_->xsize_ < 8 || sub_->ysize_ < 8) {
1843
0
      return true;
1844
0
    }
1845
0
    JXL_ASSIGN_OR_RETURN(
1846
0
        Image3F sub_xyb,
1847
0
        Image3F::Create(memory_manager, sub_->xsize_, sub_->ysize_));
1848
0
    JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb1, SubSample2x(rgb1));
1849
0
    JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(
1850
0
        subsampledRgb1, params_, sub_->Temp(), &sub_->blur_temp_, &sub_xyb));
1851
0
    sub_->ReleaseTemp();
1852
0
    ImageF subresult;
1853
0
    JXL_RETURN_IF_ERROR(sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult));
1854
0
    AddSupersampled2x(subresult, 0.5, result);
1855
0
  }
1856
0
  return true;
1857
0
}
1858
1859
Status ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1,
1860
0
                                                        ImageF& result) const {
1861
0
  JxlMemoryManager* memory_manager = xyb1.memory_manager();
1862
0
  if (xsize_ < 8 || ysize_ < 8) {
1863
0
    ZeroFillImage(&result);
1864
0
    return true;
1865
0
  }
1866
0
  PsychoImage pi1;
1867
0
  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)(
1868
0
      xsize_, ysize_, params_, &blur_temp_, xyb1, pi1));
1869
0
  JXL_ASSIGN_OR_RETURN(result, ImageF::Create(memory_manager, xsize_, ysize_));
1870
0
  return DiffmapPsychoImage(pi1, result);
1871
0
}
1872
1873
namespace {
1874
1875
Status MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
1876
                    const double w_0lt1, const double norm1,
1877
                    ImageF* HWY_RESTRICT diffs,
1878
0
                    Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
1879
0
  return HWY_DYNAMIC_DISPATCH(MaltaDiffMap)(lum0, lum1, w_0gt1, w_0lt1, norm1,
1880
0
                                            diffs, &block_diff_ac->Plane(c));
1881
0
}
1882
1883
Status MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1,
1884
                      const double w_0gt1, const double w_0lt1,
1885
                      const double norm1, ImageF* HWY_RESTRICT diffs,
1886
0
                      Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
1887
0
  return HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF)(lum0, lum1, w_0gt1, w_0lt1, norm1,
1888
0
                                              diffs, &block_diff_ac->Plane(c));
1889
0
}
1890
1891
}  // namespace
1892
1893
Status ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1,
1894
0
                                                 ImageF& diffmap) const {
1895
0
  JxlMemoryManager* memory_manager = diffmap.memory_manager();
1896
0
  if (xsize_ < 8 || ysize_ < 8) {
1897
0
    ZeroFillImage(&diffmap);
1898
0
    return true;
1899
0
  }
1900
1901
0
  const float hf_asymmetry_ = params_.hf_asymmetry;
1902
0
  const float xmul_ = params_.xmul;
1903
1904
0
  JXL_ASSIGN_OR_RETURN(ImageF diffs,
1905
0
                       ImageF::Create(memory_manager, xsize_, ysize_));
1906
0
  JXL_ASSIGN_OR_RETURN(Image3F block_diff_ac,
1907
0
                       Image3F::Create(memory_manager, xsize_, ysize_));
1908
0
  ZeroFillImage(&block_diff_ac);
1909
0
  JXL_RETURN_IF_ERROR(MaltaDiffMap(
1910
0
      pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_,
1911
0
      wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1));
1912
0
  JXL_RETURN_IF_ERROR(MaltaDiffMap(
1913
0
      pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_,
1914
0
      wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac, 0));
1915
0
  JXL_RETURN_IF_ERROR(MaltaDiffMapLF(
1916
0
      pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_),
1917
0
      wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs, &block_diff_ac, 1));
1918
0
  JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0],
1919
0
                                     wHfMaltaX * std::sqrt(hf_asymmetry_),
1920
0
                                     wHfMaltaX / std::sqrt(hf_asymmetry_),
1921
0
                                     norm1HfX, &diffs, &block_diff_ac, 0));
1922
0
  JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1),
1923
0
                                     wMfMalta, wMfMalta, norm1Mf, &diffs,
1924
0
                                     &block_diff_ac, 1));
1925
0
  JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0),
1926
0
                                     wMfMaltaX, wMfMaltaX, norm1MfX, &diffs,
1927
0
                                     &block_diff_ac, 0));
1928
1929
0
  JXL_ASSIGN_OR_RETURN(Image3F block_diff_dc,
1930
0
                       Image3F::Create(memory_manager, xsize_, ysize_));
1931
0
  for (size_t c = 0; c < 3; ++c) {
1932
0
    if (c < 2) {  // No blue channel error accumulated at HF.
1933
0
      HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric)
1934
0
      (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_,
1935
0
       &block_diff_ac.Plane(c));
1936
0
    }
1937
0
    HWY_DYNAMIC_DISPATCH(L2Diff)
1938
0
    (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac.Plane(c));
1939
0
    HWY_DYNAMIC_DISPATCH(SetL2Diff)
1940
0
    (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc.Plane(c));
1941
0
  }
1942
1943
0
  ImageF mask;
1944
0
  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(MaskPsychoImage)(
1945
0
      pi0_, pi1, xsize_, ysize_, params_, &blur_temp_, &mask,
1946
0
      &block_diff_ac.Plane(1)));
1947
1948
0
  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap)(
1949
0
      mask, block_diff_dc, block_diff_ac, xmul_, &diffmap));
1950
0
  return true;
1951
0
}
1952
1953
double ButteraugliScoreFromDiffmap(const ImageF& diffmap,
1954
0
                                   const ButteraugliParams* params) {
1955
0
  float retval = 0.0f;
1956
0
  for (size_t y = 0; y < diffmap.ysize(); ++y) {
1957
0
    const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y);
1958
0
    for (size_t x = 0; x < diffmap.xsize(); ++x) {
1959
0
      retval = std::max(retval, row[x]);
1960
0
    }
1961
0
  }
1962
0
  return retval;
1963
0
}
1964
1965
Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1,
1966
0
                          double hf_asymmetry, double xmul, ImageF& diffmap) {
1967
0
  ButteraugliParams params;
1968
0
  params.hf_asymmetry = hf_asymmetry;
1969
0
  params.xmul = xmul;
1970
0
  return ButteraugliDiffmap(rgb0, rgb1, params, diffmap);
1971
0
}
1972
1973
template <size_t kMax>
1974
bool ButteraugliDiffmapSmall(const Image3F& rgb0, const Image3F& rgb1,
1975
0
                             const ButteraugliParams& params, ImageF& diffmap) {
1976
0
  const size_t xsize = rgb0.xsize();
1977
0
  const size_t ysize = rgb0.ysize();
1978
0
  JxlMemoryManager* memory_manager = rgb0.memory_manager();
1979
  // Butteraugli values for small (where xsize or ysize is smaller
1980
  // than 8 pixels) images are non-sensical, but most likely it is
1981
  // less disruptive to try to compute something than just give up.
1982
  // Temporarily extend the borders of the image to fit 8 x 8 size.
1983
0
  size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0;
1984
0
  size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0;
1985
0
  size_t xscaled = std::max<size_t>(kMax, xsize);
1986
0
  size_t yscaled = std::max<size_t>(kMax, ysize);
1987
0
  JXL_ASSIGN_OR_RETURN(Image3F scaled0,
1988
0
                       Image3F::Create(memory_manager, xscaled, yscaled));
1989
0
  JXL_ASSIGN_OR_RETURN(Image3F scaled1,
1990
0
                       Image3F::Create(memory_manager, xscaled, yscaled));
1991
0
  for (int i = 0; i < 3; ++i) {
1992
0
    for (size_t y = 0; y < yscaled; ++y) {
1993
0
      for (size_t x = 0; x < xscaled; ++x) {
1994
0
        size_t x2 = std::min<size_t>(xsize - 1, x > xborder ? x - xborder : 0);
1995
0
        size_t y2 = std::min<size_t>(ysize - 1, y > yborder ? y - yborder : 0);
1996
0
        scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2];
1997
0
        scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2];
1998
0
      }
1999
0
    }
2000
0
  }
2001
0
  ImageF diffmap_scaled;
2002
0
  const bool ok = ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled);
2003
0
  JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(memory_manager, xsize, ysize));
2004
0
  for (size_t y = 0; y < ysize; ++y) {
2005
0
    for (size_t x = 0; x < xsize; ++x) {
2006
0
      diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder];
2007
0
    }
2008
0
  }
2009
0
  return ok;
2010
0
}
2011
2012
Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1,
2013
0
                          const ButteraugliParams& params, ImageF& diffmap) {
2014
0
  const size_t xsize = rgb0.xsize();
2015
0
  const size_t ysize = rgb0.ysize();
2016
0
  if (xsize < 1 || ysize < 1) {
2017
0
    return JXL_FAILURE("Zero-sized image");
2018
0
  }
2019
0
  if (!SameSize(rgb0, rgb1)) {
2020
0
    return JXL_FAILURE("Size mismatch");
2021
0
  }
2022
0
  static const int kMax = 8;
2023
0
  if (xsize < kMax || ysize < kMax) {
2024
0
    return ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap);
2025
0
  }
2026
0
  JXL_ASSIGN_OR_RETURN(std::unique_ptr<ButteraugliComparator> butteraugli,
2027
0
                       ButteraugliComparator::Make(rgb0, params));
2028
0
  JXL_RETURN_IF_ERROR(butteraugli->Diffmap(rgb1, diffmap));
2029
0
  return true;
2030
0
}
2031
2032
bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1,
2033
                          float hf_asymmetry, float xmul, ImageF& diffmap,
2034
0
                          double& diffvalue) {
2035
0
  ButteraugliParams params;
2036
0
  params.hf_asymmetry = hf_asymmetry;
2037
0
  params.xmul = xmul;
2038
0
  return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue);
2039
0
}
2040
2041
bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1,
2042
                          const ButteraugliParams& params, ImageF& diffmap,
2043
0
                          double& diffvalue) {
2044
0
  if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) {
2045
0
    return false;
2046
0
  }
2047
0
  diffvalue = ButteraugliScoreFromDiffmap(diffmap, &params);
2048
0
  return true;
2049
0
}
2050
2051
Status ButteraugliInterfaceInPlace(Image3F&& rgb0, Image3F&& rgb1,
2052
                                   const ButteraugliParams& params,
2053
0
                                   ImageF& diffmap, double& diffvalue) {
2054
0
  const size_t xsize = rgb0.xsize();
2055
0
  const size_t ysize = rgb0.ysize();
2056
0
  if (xsize < 1 || ysize < 1) {
2057
0
    return JXL_FAILURE("Zero-sized image");
2058
0
  }
2059
0
  if (!SameSize(rgb0, rgb1)) {
2060
0
    return JXL_FAILURE("Size mismatch");
2061
0
  }
2062
0
  static const int kMax = 8;
2063
0
  if (xsize < kMax || ysize < kMax) {
2064
0
    bool ok = ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap);
2065
0
    diffvalue = ButteraugliScoreFromDiffmap(diffmap, &params);
2066
0
    return ok;
2067
0
  }
2068
0
  ImageF subdiffmap;
2069
0
  if (xsize >= 15 && ysize >= 15) {
2070
0
    JXL_ASSIGN_OR_RETURN(Image3F rgb0_sub, SubSample2x(rgb0));
2071
0
    JXL_ASSIGN_OR_RETURN(Image3F rgb1_sub, SubSample2x(rgb1));
2072
0
    JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)(
2073
0
        rgb0_sub, rgb1_sub, params, subdiffmap));
2074
0
  }
2075
0
  JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)(
2076
0
      rgb0, rgb1, params, diffmap));
2077
0
  if (xsize >= 15 && ysize >= 15) {
2078
0
    AddSupersampled2x(subdiffmap, 0.5, diffmap);
2079
0
  }
2080
0
  diffvalue = ButteraugliScoreFromDiffmap(diffmap, &params);
2081
0
  return true;
2082
0
}
2083
2084
0
double ButteraugliFuzzyClass(double score) {
2085
0
  static const double fuzzy_width_up = 4.8;
2086
0
  static const double fuzzy_width_down = 4.8;
2087
0
  static const double m0 = 2.0;
2088
0
  static const double scaler = 0.7777;
2089
0
  double val;
2090
0
  if (score < 1.0) {
2091
    // val in [scaler .. 2.0]
2092
0
    val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down));
2093
0
    val -= 1.0;           // from [1 .. 2] to [0 .. 1]
2094
0
    val *= 2.0 - scaler;  // from [0 .. 1] to [0 .. 2.0 - scaler]
2095
0
    val += scaler;        // from [0 .. 2.0 - scaler] to [scaler .. 2.0]
2096
0
  } else {
2097
    // val in [0 .. scaler]
2098
0
    val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up));
2099
0
    val *= scaler;
2100
0
  }
2101
0
  return val;
2102
0
}
2103
2104
// #define PRINT_OUT_NORMALIZATION
2105
2106
0
double ButteraugliFuzzyInverse(double seek) {
2107
0
  double pos = 0;
2108
  // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
2109
0
  for (double range = 1.0; range >= 1e-10; range *= 0.5) {
2110
0
    double cur = ButteraugliFuzzyClass(pos);
2111
0
    if (cur < seek) {
2112
0
      pos -= range;
2113
0
    } else {
2114
0
      pos += range;
2115
0
    }
2116
0
  }
2117
#ifdef PRINT_OUT_NORMALIZATION
2118
  if (seek == 1.0) {
2119
    fprintf(stderr, "Fuzzy inverse %g\n", pos);
2120
  }
2121
#endif
2122
0
  return pos;
2123
0
}
2124
2125
#ifdef PRINT_OUT_NORMALIZATION
2126
static double print_out_normalization = ButteraugliFuzzyInverse(1.0);
2127
#endif
2128
2129
namespace {
2130
2131
void ScoreToRgb(double score, double good_threshold, double bad_threshold,
2132
0
                float rgb[3]) {
2133
0
  double heatmap[12][3] = {
2134
0
      {0, 0, 0},       {0, 0, 1},
2135
0
      {0, 1, 1},       {0, 1, 0},  // Good level
2136
0
      {1, 1, 0},       {1, 0, 0},  // Bad level
2137
0
      {1, 0, 1},       {0.5, 0.5, 1.0},
2138
0
      {1.0, 0.5, 0.5},  // Pastel colors for the very bad quality range.
2139
0
      {1.0, 1.0, 0.5}, {1, 1, 1},
2140
0
      {1, 1, 1},  // Last color repeated to have a solid range of white.
2141
0
  };
2142
0
  if (score < good_threshold) {
2143
0
    score = (score / good_threshold) * 0.3;
2144
0
  } else if (score < bad_threshold) {
2145
0
    score = 0.3 +
2146
0
            (score - good_threshold) / (bad_threshold - good_threshold) * 0.15;
2147
0
  } else {
2148
0
    score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5;
2149
0
  }
2150
0
  static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]);
2151
0
  score = jxl::Clamp1<double>(score * (kTableSize - 1), 0.0, kTableSize - 2);
2152
0
  int ix = static_cast<int>(score);
2153
0
  ix = jxl::Clamp1(ix, 0, kTableSize - 2);  // Handle NaN
2154
0
  double mix = score - ix;
2155
0
  for (int i = 0; i < 3; ++i) {
2156
0
    double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i];
2157
0
    rgb[i] = pow(v, 0.5);
2158
0
  }
2159
0
}
2160
2161
}  // namespace
2162
2163
StatusOr<Image3F> CreateHeatMapImage(const ImageF& distmap,
2164
                                     double good_threshold,
2165
0
                                     double bad_threshold) {
2166
0
  JxlMemoryManager* memory_manager = distmap.memory_manager();
2167
0
  JXL_ASSIGN_OR_RETURN(
2168
0
      Image3F heatmap,
2169
0
      Image3F::Create(memory_manager, distmap.xsize(), distmap.ysize()));
2170
0
  for (size_t y = 0; y < distmap.ysize(); ++y) {
2171
0
    const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y);
2172
0
    float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y);
2173
0
    float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y);
2174
0
    float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y);
2175
0
    for (size_t x = 0; x < distmap.xsize(); ++x) {
2176
0
      const float d = row_distmap[x];
2177
0
      float rgb[3];
2178
0
      ScoreToRgb(d, good_threshold, bad_threshold, rgb);
2179
0
      row_h0[x] = rgb[0];
2180
0
      row_h1[x] = rgb[1];
2181
0
      row_h2[x] = rgb[2];
2182
0
    }
2183
0
  }
2184
0
  return heatmap;
2185
0
}
2186
2187
}  // namespace jxl
2188
#endif  // HWY_ONCE