Coverage Report

Created: 2025-06-16 07:00

/src/libjxl/lib/jxl/enc_xyb.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_xyb.h"
7
8
#include <jxl/cms_interface.h>
9
#include <jxl/memory_manager.h>
10
11
#include <algorithm>
12
#include <cmath>
13
#include <cstdint>
14
#include <cstdlib>
15
16
#include "lib/jxl/base/common.h"
17
#include "lib/jxl/frame_dimensions.h"
18
#include "lib/jxl/image.h"
19
20
#undef HWY_TARGET_INCLUDE
21
#define HWY_TARGET_INCLUDE "lib/jxl/enc_xyb.cc"
22
#include <hwy/foreach_target.h>
23
#include <hwy/highway.h>
24
25
#include "lib/jxl/base/compiler_specific.h"
26
#include "lib/jxl/base/data_parallel.h"
27
#include "lib/jxl/base/fast_math-inl.h"
28
#include "lib/jxl/base/rect.h"
29
#include "lib/jxl/base/status.h"
30
#include "lib/jxl/cms/opsin_params.h"
31
#include "lib/jxl/cms/transfer_functions-inl.h"
32
#include "lib/jxl/color_encoding_internal.h"
33
#include "lib/jxl/enc_image_bundle.h"
34
#include "lib/jxl/image_ops.h"
35
#include "lib/jxl/memory_manager_internal.h"
36
37
HWY_BEFORE_NAMESPACE();
38
namespace jxl {
39
namespace HWY_NAMESPACE {
40
41
// These templates are not found via ADL.
42
using hwy::HWY_NAMESPACE::Add;
43
using hwy::HWY_NAMESPACE::Mul;
44
using hwy::HWY_NAMESPACE::MulAdd;
45
using hwy::HWY_NAMESPACE::Sub;
46
using hwy::HWY_NAMESPACE::ZeroIfNegative;
47
48
// 4x3 matrix * 3x1 SIMD vectors
49
template <class V>
50
JXL_INLINE void OpsinAbsorbance(const V r, const V g, const V b,
51
                                const float* JXL_RESTRICT premul_absorb,
52
                                V* JXL_RESTRICT mixed0, V* JXL_RESTRICT mixed1,
53
3.16M
                                V* JXL_RESTRICT mixed2) {
54
3.16M
  const float* bias = jxl::cms::kOpsinAbsorbanceBias.data();
55
3.16M
  const HWY_FULL(float) d;
56
3.16M
  const size_t N = Lanes(d);
57
3.16M
  const auto m0 = Load(d, premul_absorb + 0 * N);
58
3.16M
  const auto m1 = Load(d, premul_absorb + 1 * N);
59
3.16M
  const auto m2 = Load(d, premul_absorb + 2 * N);
60
3.16M
  const auto m3 = Load(d, premul_absorb + 3 * N);
61
3.16M
  const auto m4 = Load(d, premul_absorb + 4 * N);
62
3.16M
  const auto m5 = Load(d, premul_absorb + 5 * N);
63
3.16M
  const auto m6 = Load(d, premul_absorb + 6 * N);
64
3.16M
  const auto m7 = Load(d, premul_absorb + 7 * N);
65
3.16M
  const auto m8 = Load(d, premul_absorb + 8 * N);
66
3.16M
  *mixed0 = MulAdd(m0, r, MulAdd(m1, g, MulAdd(m2, b, Set(d, bias[0]))));
67
3.16M
  *mixed1 = MulAdd(m3, r, MulAdd(m4, g, MulAdd(m5, b, Set(d, bias[1]))));
68
3.16M
  *mixed2 = MulAdd(m6, r, MulAdd(m7, g, MulAdd(m8, b, Set(d, bias[2]))));
69
3.16M
}
Unexecuted instantiation: void jxl::N_SSE4::OpsinAbsorbance<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, float const*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*, hwy::N_SSE4::Vec128<float, 4ul>*)
void jxl::N_AVX2::OpsinAbsorbance<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, float const*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*, hwy::N_AVX2::Vec256<float>*)
Line
Count
Source
53
3.16M
                                V* JXL_RESTRICT mixed2) {
54
3.16M
  const float* bias = jxl::cms::kOpsinAbsorbanceBias.data();
55
3.16M
  const HWY_FULL(float) d;
56
3.16M
  const size_t N = Lanes(d);
57
3.16M
  const auto m0 = Load(d, premul_absorb + 0 * N);
58
3.16M
  const auto m1 = Load(d, premul_absorb + 1 * N);
59
3.16M
  const auto m2 = Load(d, premul_absorb + 2 * N);
60
3.16M
  const auto m3 = Load(d, premul_absorb + 3 * N);
61
3.16M
  const auto m4 = Load(d, premul_absorb + 4 * N);
62
3.16M
  const auto m5 = Load(d, premul_absorb + 5 * N);
63
3.16M
  const auto m6 = Load(d, premul_absorb + 6 * N);
64
3.16M
  const auto m7 = Load(d, premul_absorb + 7 * N);
65
3.16M
  const auto m8 = Load(d, premul_absorb + 8 * N);
66
3.16M
  *mixed0 = MulAdd(m0, r, MulAdd(m1, g, MulAdd(m2, b, Set(d, bias[0]))));
67
3.16M
  *mixed1 = MulAdd(m3, r, MulAdd(m4, g, MulAdd(m5, b, Set(d, bias[1]))));
68
3.16M
  *mixed2 = MulAdd(m6, r, MulAdd(m7, g, MulAdd(m8, b, Set(d, bias[2]))));
69
3.16M
}
Unexecuted instantiation: void jxl::N_SSE2::OpsinAbsorbance<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, float const*, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*, hwy::N_SSE2::Vec128<float, 4ul>*)
70
71
template <class V>
72
void StoreXYB(const V r, V g, const V b, float* JXL_RESTRICT valx,
73
3.16M
              float* JXL_RESTRICT valy, float* JXL_RESTRICT valz) {
74
3.16M
  const HWY_FULL(float) d;
75
3.16M
  const V half = Set(d, 0.5f);
76
3.16M
  Store(Mul(half, Sub(r, g)), d, valx);
77
3.16M
  Store(Mul(half, Add(r, g)), d, valy);
78
3.16M
  Store(b, d, valz);
79
3.16M
}
Unexecuted instantiation: void jxl::N_SSE4::StoreXYB<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, float*, float*, float*)
void jxl::N_AVX2::StoreXYB<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, float*, float*, float*)
Line
Count
Source
73
3.16M
              float* JXL_RESTRICT valy, float* JXL_RESTRICT valz) {
74
3.16M
  const HWY_FULL(float) d;
75
3.16M
  const V half = Set(d, 0.5f);
76
3.16M
  Store(Mul(half, Sub(r, g)), d, valx);
77
3.16M
  Store(Mul(half, Add(r, g)), d, valy);
78
3.16M
  Store(b, d, valz);
79
3.16M
}
Unexecuted instantiation: void jxl::N_SSE2::StoreXYB<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, float*, float*, float*)
80
81
// Converts one RGB vector to XYB.
82
template <class V>
83
void LinearRGBToXYB(const V r, const V g, const V b,
84
                    const float* JXL_RESTRICT premul_absorb,
85
                    float* JXL_RESTRICT valx, float* JXL_RESTRICT valy,
86
3.16M
                    float* JXL_RESTRICT valz) {
87
3.16M
  V mixed0;
88
3.16M
  V mixed1;
89
3.16M
  V mixed2;
90
3.16M
  OpsinAbsorbance(r, g, b, premul_absorb, &mixed0, &mixed1, &mixed2);
91
92
  // mixed* should be non-negative even for wide-gamut, so clamp to zero.
93
3.16M
  mixed0 = ZeroIfNegative(mixed0);
94
3.16M
  mixed1 = ZeroIfNegative(mixed1);
95
3.16M
  mixed2 = ZeroIfNegative(mixed2);
96
97
3.16M
  const HWY_FULL(float) d;
98
3.16M
  const size_t N = Lanes(d);
99
3.16M
  mixed0 = CubeRootAndAdd(mixed0, Load(d, premul_absorb + 9 * N));
100
3.16M
  mixed1 = CubeRootAndAdd(mixed1, Load(d, premul_absorb + 10 * N));
101
3.16M
  mixed2 = CubeRootAndAdd(mixed2, Load(d, premul_absorb + 11 * N));
102
3.16M
  StoreXYB(mixed0, mixed1, mixed2, valx, valy, valz);
103
104
  // For wide-gamut inputs, r/g/b and valx (but not y/z) are often negative.
105
3.16M
}
Unexecuted instantiation: void jxl::N_SSE4::LinearRGBToXYB<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, hwy::N_SSE4::Vec128<float, 4ul>, float const*, float*, float*, float*)
void jxl::N_AVX2::LinearRGBToXYB<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, hwy::N_AVX2::Vec256<float>, float const*, float*, float*, float*)
Line
Count
Source
86
3.16M
                    float* JXL_RESTRICT valz) {
87
3.16M
  V mixed0;
88
3.16M
  V mixed1;
89
3.16M
  V mixed2;
90
3.16M
  OpsinAbsorbance(r, g, b, premul_absorb, &mixed0, &mixed1, &mixed2);
91
92
  // mixed* should be non-negative even for wide-gamut, so clamp to zero.
93
3.16M
  mixed0 = ZeroIfNegative(mixed0);
94
3.16M
  mixed1 = ZeroIfNegative(mixed1);
95
3.16M
  mixed2 = ZeroIfNegative(mixed2);
96
97
3.16M
  const HWY_FULL(float) d;
98
3.16M
  const size_t N = Lanes(d);
99
3.16M
  mixed0 = CubeRootAndAdd(mixed0, Load(d, premul_absorb + 9 * N));
100
3.16M
  mixed1 = CubeRootAndAdd(mixed1, Load(d, premul_absorb + 10 * N));
101
3.16M
  mixed2 = CubeRootAndAdd(mixed2, Load(d, premul_absorb + 11 * N));
102
3.16M
  StoreXYB(mixed0, mixed1, mixed2, valx, valy, valz);
103
104
  // For wide-gamut inputs, r/g/b and valx (but not y/z) are often negative.
105
3.16M
}
Unexecuted instantiation: void jxl::N_SSE2::LinearRGBToXYB<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, hwy::N_SSE2::Vec128<float, 4ul>, float const*, float*, float*, float*)
106
107
void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
108
                       float* JXL_RESTRICT row2,
109
0
                       const float* JXL_RESTRICT premul_absorb, size_t xsize) {
110
0
  const HWY_FULL(float) d;
111
0
  for (size_t x = 0; x < xsize; x += Lanes(d)) {
112
0
    const auto r = Load(d, row0 + x);
113
0
    const auto g = Load(d, row1 + x);
114
0
    const auto b = Load(d, row2 + x);
115
0
    LinearRGBToXYB(r, g, b, premul_absorb, row0 + x, row1 + x, row2 + x);
116
0
  }
117
0
}
Unexecuted instantiation: jxl::N_SSE4::LinearRGBRowToXYB(float*, float*, float*, float const*, unsigned long)
Unexecuted instantiation: jxl::N_AVX2::LinearRGBRowToXYB(float*, float*, float*, float const*, unsigned long)
Unexecuted instantiation: jxl::N_SSE2::LinearRGBRowToXYB(float*, float*, float*, float const*, unsigned long)
118
119
// Input/output uses the codec.h scaling: nominally 0-1 if in-gamut.
120
template <class V>
121
9.50M
V LinearFromSRGB(V encoded) {
122
9.50M
  return TF_SRGB().DisplayFromEncoded(encoded);
123
9.50M
}
Unexecuted instantiation: hwy::N_SSE4::Vec128<float, 4ul> jxl::N_SSE4::LinearFromSRGB<hwy::N_SSE4::Vec128<float, 4ul> >(hwy::N_SSE4::Vec128<float, 4ul>)
hwy::N_AVX2::Vec256<float> jxl::N_AVX2::LinearFromSRGB<hwy::N_AVX2::Vec256<float> >(hwy::N_AVX2::Vec256<float>)
Line
Count
Source
121
9.50M
V LinearFromSRGB(V encoded) {
122
9.50M
  return TF_SRGB().DisplayFromEncoded(encoded);
123
9.50M
}
Unexecuted instantiation: hwy::N_SSE2::Vec128<float, 4ul> jxl::N_SSE2::LinearFromSRGB<hwy::N_SSE2::Vec128<float, 4ul> >(hwy::N_SSE2::Vec128<float, 4ul>)
124
125
Status LinearSRGBToXYB(const float* JXL_RESTRICT premul_absorb,
126
0
                       ThreadPool* pool, Image3F* JXL_RESTRICT image) {
127
0
  const size_t xsize = image->xsize();
128
129
0
  const HWY_FULL(float) d;
130
0
  const auto process_row = [&](const uint32_t task,
131
0
                               size_t /*thread*/) -> Status {
132
0
    const size_t y = static_cast<size_t>(task);
133
0
    float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
134
0
    float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
135
0
    float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
136
137
0
    for (size_t x = 0; x < xsize; x += Lanes(d)) {
138
0
      const auto in_r = Load(d, row0 + x);
139
0
      const auto in_g = Load(d, row1 + x);
140
0
      const auto in_b = Load(d, row2 + x);
141
0
      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
142
0
                     row2 + x);
143
0
    }
144
0
    return true;
145
0
  };
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE4::LinearSRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)::$_0::operator()(unsigned int, unsigned long) const
Unexecuted instantiation: enc_xyb.cc:jxl::N_AVX2::LinearSRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)::$_0::operator()(unsigned int, unsigned long) const
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE2::LinearSRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)::$_0::operator()(unsigned int, unsigned long) const
146
0
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
147
0
                                ThreadPool::NoInit, process_row,
148
0
                                "LinearToXYB"));
149
0
  return true;
150
0
}
Unexecuted instantiation: jxl::N_SSE4::LinearSRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)
Unexecuted instantiation: jxl::N_AVX2::LinearSRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)
Unexecuted instantiation: jxl::N_SSE2::LinearSRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)
151
152
Status SRGBToXYB(const float* JXL_RESTRICT premul_absorb, ThreadPool* pool,
153
186
                 Image3F* JXL_RESTRICT image) {
154
186
  const size_t xsize = image->xsize();
155
156
186
  const HWY_FULL(float) d;
157
186
  const auto process_row = [&](const uint32_t task,
158
58.2k
                               size_t /*thread*/) -> Status {
159
58.2k
    const size_t y = static_cast<size_t>(task);
160
58.2k
    float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
161
58.2k
    float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
162
58.2k
    float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
163
164
3.22M
    for (size_t x = 0; x < xsize; x += Lanes(d)) {
165
3.16M
      const auto in_r = LinearFromSRGB(Load(d, row0 + x));
166
3.16M
      const auto in_g = LinearFromSRGB(Load(d, row1 + x));
167
3.16M
      const auto in_b = LinearFromSRGB(Load(d, row2 + x));
168
3.16M
      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
169
3.16M
                     row2 + x);
170
3.16M
    }
171
58.2k
    return true;
172
58.2k
  };
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE4::SRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)::$_0::operator()(unsigned int, unsigned long) const
enc_xyb.cc:jxl::N_AVX2::SRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)::$_0::operator()(unsigned int, unsigned long) const
Line
Count
Source
158
58.2k
                               size_t /*thread*/) -> Status {
159
58.2k
    const size_t y = static_cast<size_t>(task);
160
58.2k
    float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
161
58.2k
    float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
162
58.2k
    float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
163
164
3.22M
    for (size_t x = 0; x < xsize; x += Lanes(d)) {
165
3.16M
      const auto in_r = LinearFromSRGB(Load(d, row0 + x));
166
3.16M
      const auto in_g = LinearFromSRGB(Load(d, row1 + x));
167
3.16M
      const auto in_b = LinearFromSRGB(Load(d, row2 + x));
168
3.16M
      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
169
3.16M
                     row2 + x);
170
3.16M
    }
171
58.2k
    return true;
172
58.2k
  };
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE2::SRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)::$_0::operator()(unsigned int, unsigned long) const
173
186
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
174
186
                                ThreadPool::NoInit, process_row, "SRGBToXYB"));
175
186
  return true;
176
186
}
Unexecuted instantiation: jxl::N_SSE4::SRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)
jxl::N_AVX2::SRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)
Line
Count
Source
153
186
                 Image3F* JXL_RESTRICT image) {
154
186
  const size_t xsize = image->xsize();
155
156
186
  const HWY_FULL(float) d;
157
186
  const auto process_row = [&](const uint32_t task,
158
186
                               size_t /*thread*/) -> Status {
159
186
    const size_t y = static_cast<size_t>(task);
160
186
    float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
161
186
    float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
162
186
    float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
163
164
186
    for (size_t x = 0; x < xsize; x += Lanes(d)) {
165
186
      const auto in_r = LinearFromSRGB(Load(d, row0 + x));
166
186
      const auto in_g = LinearFromSRGB(Load(d, row1 + x));
167
186
      const auto in_b = LinearFromSRGB(Load(d, row2 + x));
168
186
      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
169
186
                     row2 + x);
170
186
    }
171
186
    return true;
172
186
  };
173
186
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
174
186
                                ThreadPool::NoInit, process_row, "SRGBToXYB"));
175
186
  return true;
176
186
}
Unexecuted instantiation: jxl::N_SSE2::SRGBToXYB(float const*, jxl::ThreadPool*, jxl::Image3<float>*)
177
178
Status SRGBToXYBAndLinear(const float* JXL_RESTRICT premul_absorb,
179
                          ThreadPool* pool, Image3F* JXL_RESTRICT image,
180
0
                          Image3F* JXL_RESTRICT linear) {
181
0
  const size_t xsize = image->xsize();
182
183
0
  const HWY_FULL(float) d;
184
0
  const auto process_row = [&](const uint32_t task,
185
0
                               size_t /*thread*/) -> Status {
186
0
    const size_t y = static_cast<size_t>(task);
187
0
    float* JXL_RESTRICT row_image0 = image->PlaneRow(0, y);
188
0
    float* JXL_RESTRICT row_image1 = image->PlaneRow(1, y);
189
0
    float* JXL_RESTRICT row_image2 = image->PlaneRow(2, y);
190
0
    float* JXL_RESTRICT row_linear0 = linear->PlaneRow(0, y);
191
0
    float* JXL_RESTRICT row_linear1 = linear->PlaneRow(1, y);
192
0
    float* JXL_RESTRICT row_linear2 = linear->PlaneRow(2, y);
193
194
0
    for (size_t x = 0; x < xsize; x += Lanes(d)) {
195
0
      const auto in_r = LinearFromSRGB(Load(d, row_image0 + x));
196
0
      const auto in_g = LinearFromSRGB(Load(d, row_image1 + x));
197
0
      const auto in_b = LinearFromSRGB(Load(d, row_image2 + x));
198
199
0
      Store(in_r, d, row_linear0 + x);
200
0
      Store(in_g, d, row_linear1 + x);
201
0
      Store(in_b, d, row_linear2 + x);
202
203
0
      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_image0 + x,
204
0
                     row_image1 + x, row_image2 + x);
205
0
    }
206
0
    return true;
207
0
  };
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE4::SRGBToXYBAndLinear(float const*, jxl::ThreadPool*, jxl::Image3<float>*, jxl::Image3<float>*)::$_0::operator()(unsigned int, unsigned long) const
Unexecuted instantiation: enc_xyb.cc:jxl::N_AVX2::SRGBToXYBAndLinear(float const*, jxl::ThreadPool*, jxl::Image3<float>*, jxl::Image3<float>*)::$_0::operator()(unsigned int, unsigned long) const
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE2::SRGBToXYBAndLinear(float const*, jxl::ThreadPool*, jxl::Image3<float>*, jxl::Image3<float>*)::$_0::operator()(unsigned int, unsigned long) const
208
0
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
209
0
                                ThreadPool::NoInit, process_row,
210
0
                                "SRGBToXYBAndLinear"));
211
0
  return true;
212
0
}
Unexecuted instantiation: jxl::N_SSE4::SRGBToXYBAndLinear(float const*, jxl::ThreadPool*, jxl::Image3<float>*, jxl::Image3<float>*)
Unexecuted instantiation: jxl::N_AVX2::SRGBToXYBAndLinear(float const*, jxl::ThreadPool*, jxl::Image3<float>*, jxl::Image3<float>*)
Unexecuted instantiation: jxl::N_SSE2::SRGBToXYBAndLinear(float const*, jxl::ThreadPool*, jxl::Image3<float>*, jxl::Image3<float>*)
213
214
186
void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
215
186
  const HWY_FULL(float) d;
216
186
  const size_t N = Lanes(d);
217
186
  const float mul = intensity_target / 255.0f;
218
744
  for (size_t j = 0; j < 3; ++j) {
219
2.23k
    for (size_t i = 0; i < 3; ++i) {
220
1.67k
      const auto absorb = Set(d, jxl::cms::kOpsinAbsorbanceMatrix[j][i] * mul);
221
1.67k
      Store(absorb, d, premul_absorb + (j * 3 + i) * N);
222
1.67k
    }
223
558
  }
224
744
  for (size_t i = 0; i < 3; ++i) {
225
558
    const auto neg_bias_cbrt =
226
558
        Set(d, -cbrtf(jxl::cms::kOpsinAbsorbanceBias[i]));
227
558
    Store(neg_bias_cbrt, d, premul_absorb + (9 + i) * N);
228
558
  }
229
186
}
Unexecuted instantiation: jxl::N_SSE4::ComputePremulAbsorb(float, float*)
jxl::N_AVX2::ComputePremulAbsorb(float, float*)
Line
Count
Source
214
186
void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
215
186
  const HWY_FULL(float) d;
216
186
  const size_t N = Lanes(d);
217
186
  const float mul = intensity_target / 255.0f;
218
744
  for (size_t j = 0; j < 3; ++j) {
219
2.23k
    for (size_t i = 0; i < 3; ++i) {
220
1.67k
      const auto absorb = Set(d, jxl::cms::kOpsinAbsorbanceMatrix[j][i] * mul);
221
1.67k
      Store(absorb, d, premul_absorb + (j * 3 + i) * N);
222
1.67k
    }
223
558
  }
224
744
  for (size_t i = 0; i < 3; ++i) {
225
558
    const auto neg_bias_cbrt =
226
558
        Set(d, -cbrtf(jxl::cms::kOpsinAbsorbanceBias[i]));
227
558
    Store(neg_bias_cbrt, d, premul_absorb + (9 + i) * N);
228
558
  }
229
186
}
Unexecuted instantiation: jxl::N_SSE2::ComputePremulAbsorb(float, float*)
230
231
// This is different from Butteraugli's OpsinDynamicsImage() in the sense that
232
// it does not contain a sensitivity multiplier based on the blurred image.
233
Status ToXYB(const ColorEncoding& c_current, float intensity_target,
234
             const ImageF* black, ThreadPool* pool, Image3F* JXL_RESTRICT image,
235
186
             const JxlCmsInterface& cms, Image3F* const JXL_RESTRICT linear) {
236
186
  JXL_ENSURE(image);
237
186
  if (black) JXL_ENSURE(SameSize(*image, *black));
238
186
  if (linear) JXL_ENSURE(SameSize(*image, *linear));
239
240
186
  JxlMemoryManager* memory_manager = image->memory_manager();
241
186
  JXL_ENSURE(memory_manager);
242
243
186
  const HWY_FULL(float) d;
244
  // Pre-broadcasted constants
245
186
  JXL_ASSIGN_OR_RETURN(
246
186
      AlignedMemory mem,
247
186
      AlignedMemory::Create(memory_manager, Lanes(d) * 12 * sizeof(float)));
248
186
  float* premul_absorb = mem.address<float>();
249
186
  ComputePremulAbsorb(intensity_target, premul_absorb);
250
251
186
  const bool want_linear = (linear != nullptr);
252
253
186
  const ColorEncoding& c_linear_srgb =
254
186
      ColorEncoding::LinearSRGB(c_current.IsGray());
255
  // Linear sRGB inputs are rare but can be useful for the fastest encoders, for
256
  // which undoing the sRGB transfer function would be a large part of the cost.
257
186
  if (c_linear_srgb.SameColorEncoding(c_current)) {
258
    // This only happens if kitten or slower, moving ImageBundle might be
259
    // possible but the encoder is much slower than this copy.
260
0
    if (want_linear) {
261
0
      JXL_RETURN_IF_ERROR(CopyImageTo(*image, linear));
262
0
    }
263
0
    JXL_RETURN_IF_ERROR(LinearSRGBToXYB(premul_absorb, pool, image));
264
0
    return true;
265
0
  }
266
267
  // Common case: already sRGB, can avoid the color transform
268
186
  if (c_current.IsSRGB()) {
269
    // Common case: can avoid allocating/copying
270
186
    if (want_linear) {
271
      // Slow encoder also wants linear sRGB.
272
0
      JXL_RETURN_IF_ERROR(
273
0
          SRGBToXYBAndLinear(premul_absorb, pool, image, linear));
274
186
    } else {
275
186
      JXL_RETURN_IF_ERROR(SRGBToXYB(premul_absorb, pool, image));
276
186
    }
277
186
    return true;
278
186
  }
279
280
0
  JXL_RETURN_IF_ERROR(ApplyColorTransform(
281
0
      c_current, intensity_target, *image, black, Rect(*image), c_linear_srgb,
282
0
      cms, pool, want_linear ? linear : image));
283
0
  if (want_linear) {
284
0
    JXL_RETURN_IF_ERROR(CopyImageTo(*linear, image));
285
0
  }
286
0
  JXL_RETURN_IF_ERROR(LinearSRGBToXYB(premul_absorb, pool, image));
287
0
  return true;
288
0
}
Unexecuted instantiation: jxl::N_SSE4::ToXYB(jxl::ColorEncoding const&, float, jxl::Plane<float> const*, jxl::ThreadPool*, jxl::Image3<float>*, JxlCmsInterface const&, jxl::Image3<float>*)
jxl::N_AVX2::ToXYB(jxl::ColorEncoding const&, float, jxl::Plane<float> const*, jxl::ThreadPool*, jxl::Image3<float>*, JxlCmsInterface const&, jxl::Image3<float>*)
Line
Count
Source
235
186
             const JxlCmsInterface& cms, Image3F* const JXL_RESTRICT linear) {
236
186
  JXL_ENSURE(image);
237
186
  if (black) JXL_ENSURE(SameSize(*image, *black));
238
186
  if (linear) JXL_ENSURE(SameSize(*image, *linear));
239
240
186
  JxlMemoryManager* memory_manager = image->memory_manager();
241
186
  JXL_ENSURE(memory_manager);
242
243
186
  const HWY_FULL(float) d;
244
  // Pre-broadcasted constants
245
186
  JXL_ASSIGN_OR_RETURN(
246
186
      AlignedMemory mem,
247
186
      AlignedMemory::Create(memory_manager, Lanes(d) * 12 * sizeof(float)));
248
186
  float* premul_absorb = mem.address<float>();
249
186
  ComputePremulAbsorb(intensity_target, premul_absorb);
250
251
186
  const bool want_linear = (linear != nullptr);
252
253
186
  const ColorEncoding& c_linear_srgb =
254
186
      ColorEncoding::LinearSRGB(c_current.IsGray());
255
  // Linear sRGB inputs are rare but can be useful for the fastest encoders, for
256
  // which undoing the sRGB transfer function would be a large part of the cost.
257
186
  if (c_linear_srgb.SameColorEncoding(c_current)) {
258
    // This only happens if kitten or slower, moving ImageBundle might be
259
    // possible but the encoder is much slower than this copy.
260
0
    if (want_linear) {
261
0
      JXL_RETURN_IF_ERROR(CopyImageTo(*image, linear));
262
0
    }
263
0
    JXL_RETURN_IF_ERROR(LinearSRGBToXYB(premul_absorb, pool, image));
264
0
    return true;
265
0
  }
266
267
  // Common case: already sRGB, can avoid the color transform
268
186
  if (c_current.IsSRGB()) {
269
    // Common case: can avoid allocating/copying
270
186
    if (want_linear) {
271
      // Slow encoder also wants linear sRGB.
272
0
      JXL_RETURN_IF_ERROR(
273
0
          SRGBToXYBAndLinear(premul_absorb, pool, image, linear));
274
186
    } else {
275
186
      JXL_RETURN_IF_ERROR(SRGBToXYB(premul_absorb, pool, image));
276
186
    }
277
186
    return true;
278
186
  }
279
280
0
  JXL_RETURN_IF_ERROR(ApplyColorTransform(
281
0
      c_current, intensity_target, *image, black, Rect(*image), c_linear_srgb,
282
0
      cms, pool, want_linear ? linear : image));
283
0
  if (want_linear) {
284
0
    JXL_RETURN_IF_ERROR(CopyImageTo(*linear, image));
285
0
  }
286
0
  JXL_RETURN_IF_ERROR(LinearSRGBToXYB(premul_absorb, pool, image));
287
0
  return true;
288
0
}
Unexecuted instantiation: jxl::N_SSE2::ToXYB(jxl::ColorEncoding const&, float, jxl::Plane<float> const*, jxl::ThreadPool*, jxl::Image3<float>*, JxlCmsInterface const&, jxl::Image3<float>*)
289
290
// Transform RGB to YCbCr.
291
// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
292
Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
293
                  const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
294
0
                  ImageF* cr_plane, ThreadPool* pool) {
295
0
  const HWY_FULL(float) df;
296
0
  const size_t S = Lanes(df);  // Step.
297
298
0
  const size_t xsize = r_plane.xsize();
299
0
  const size_t ysize = r_plane.ysize();
300
0
  if ((xsize == 0) || (ysize == 0)) return true;
301
302
  // Full-range BT.601 as defined by JFIF Clause 7:
303
  // https://www.itu.int/rec/T-REC-T.871-201105-I/en
304
0
  const auto k128 = Set(df, 128.0f / 255);
305
0
  const auto kR = Set(df, 0.299f);  // NTSC luma
306
0
  const auto kG = Set(df, 0.587f);
307
0
  const auto kB = Set(df, 0.114f);
308
0
  const auto kAmpR = Set(df, 0.701f);
309
0
  const auto kAmpB = Set(df, 0.886f);
310
0
  const auto kDiffR = Add(kAmpR, kR);
311
0
  const auto kDiffB = Add(kAmpB, kB);
312
0
  const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB))));
313
0
  const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB))));
314
315
0
  constexpr size_t kGroupArea = kGroupDim * kGroupDim;
316
0
  const size_t lines_per_group = DivCeil(kGroupArea, xsize);
317
0
  const size_t num_stripes = DivCeil(ysize, lines_per_group);
318
0
  const auto transform = [&](int idx, int /* thread*/) -> Status {
319
0
    const size_t y0 = idx * lines_per_group;
320
0
    const size_t y1 = std::min<size_t>(y0 + lines_per_group, ysize);
321
0
    for (size_t y = y0; y < y1; ++y) {
322
0
      const float* r_row = r_plane.ConstRow(y);
323
0
      const float* g_row = g_plane.ConstRow(y);
324
0
      const float* b_row = b_plane.ConstRow(y);
325
0
      float* y_row = y_plane->Row(y);
326
0
      float* cb_row = cb_plane->Row(y);
327
0
      float* cr_row = cr_plane->Row(y);
328
0
      for (size_t x = 0; x < xsize; x += S) {
329
0
        const auto r = Load(df, r_row + x);
330
0
        const auto g = Load(df, g_row + x);
331
0
        const auto b = Load(df, b_row + x);
332
0
        const auto r_base = Mul(r, kR);
333
0
        const auto r_diff = Mul(r, kDiffR);
334
0
        const auto g_base = Mul(g, kG);
335
0
        const auto b_base = Mul(b, kB);
336
0
        const auto b_diff = Mul(b, kDiffB);
337
0
        const auto y_base = Add(r_base, Add(g_base, b_base));
338
0
        const auto y_vec = Sub(y_base, k128);
339
0
        const auto cb_vec = Mul(Sub(b_diff, y_base), kNormB);
340
0
        const auto cr_vec = Mul(Sub(r_diff, y_base), kNormR);
341
0
        Store(y_vec, df, y_row + x);
342
0
        Store(cb_vec, df, cb_row + x);
343
0
        Store(cr_vec, df, cr_row + x);
344
0
      }
345
0
    }
346
0
    return true;
347
0
  };
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE4::RgbToYcbcr(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::Plane<float>*, jxl::ThreadPool*)::$_0::operator()(int, int) const
Unexecuted instantiation: enc_xyb.cc:jxl::N_AVX2::RgbToYcbcr(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::Plane<float>*, jxl::ThreadPool*)::$_0::operator()(int, int) const
Unexecuted instantiation: enc_xyb.cc:jxl::N_SSE2::RgbToYcbcr(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::Plane<float>*, jxl::ThreadPool*)::$_0::operator()(int, int) const
348
0
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<int>(num_stripes),
349
0
                                ThreadPool::NoInit, transform, "RgbToYcbCr"));
350
0
  return true;
351
0
}
Unexecuted instantiation: jxl::N_SSE4::RgbToYcbcr(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::Plane<float>*, jxl::ThreadPool*)
Unexecuted instantiation: jxl::N_AVX2::RgbToYcbcr(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::Plane<float>*, jxl::ThreadPool*)
Unexecuted instantiation: jxl::N_SSE2::RgbToYcbcr(jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float> const&, jxl::Plane<float>*, jxl::Plane<float>*, jxl::Plane<float>*, jxl::ThreadPool*)
352
353
// NOLINTNEXTLINE(google-readability-namespace-comments)
354
}  // namespace HWY_NAMESPACE
355
}  // namespace jxl
356
HWY_AFTER_NAMESPACE();
357
358
#if HWY_ONCE
359
namespace jxl {
360
HWY_EXPORT(ToXYB);
361
Status ToXYB(const ColorEncoding& c_current, float intensity_target,
362
             const ImageF* black, ThreadPool* pool, Image3F* JXL_RESTRICT image,
363
186
             const JxlCmsInterface& cms, Image3F* const JXL_RESTRICT linear) {
364
186
  return HWY_DYNAMIC_DISPATCH(ToXYB)(c_current, intensity_target, black, pool,
365
186
                                     image, cms, linear);
366
186
}
367
368
HWY_EXPORT(LinearRGBRowToXYB);
369
void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
370
                       float* JXL_RESTRICT row2,
371
0
                       const float* JXL_RESTRICT premul_absorb, size_t xsize) {
372
0
  HWY_DYNAMIC_DISPATCH(LinearRGBRowToXYB)
373
0
  (row0, row1, row2, premul_absorb, xsize);
374
0
}
375
376
HWY_EXPORT(ComputePremulAbsorb);
377
0
void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
378
0
  HWY_DYNAMIC_DISPATCH(ComputePremulAbsorb)(intensity_target, premul_absorb);
379
0
}
380
381
void ScaleXYBRow(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
382
0
                 float* JXL_RESTRICT row2, size_t xsize) {
383
0
  for (size_t x = 0; x < xsize; x++) {
384
0
    row2[x] = (row2[x] - row1[x] + jxl::cms::kScaledXYBOffset[2]) *
385
0
              jxl::cms::kScaledXYBScale[2];
386
0
    row0[x] = (row0[x] + jxl::cms::kScaledXYBOffset[0]) *
387
0
              jxl::cms::kScaledXYBScale[0];
388
0
    row1[x] = (row1[x] + jxl::cms::kScaledXYBOffset[1]) *
389
0
              jxl::cms::kScaledXYBScale[1];
390
0
  }
391
0
}
392
393
0
void ScaleXYB(Image3F* opsin) {
394
0
  for (size_t y = 0; y < opsin->ysize(); y++) {
395
0
    float* row0 = opsin->PlaneRow(0, y);
396
0
    float* row1 = opsin->PlaneRow(1, y);
397
0
    float* row2 = opsin->PlaneRow(2, y);
398
0
    ScaleXYBRow(row0, row1, row2, opsin->xsize());
399
0
  }
400
0
}
401
402
HWY_EXPORT(RgbToYcbcr);
403
Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
404
                  const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
405
0
                  ImageF* cr_plane, ThreadPool* pool) {
406
0
  return HWY_DYNAMIC_DISPATCH(RgbToYcbcr)(r_plane, g_plane, b_plane, y_plane,
407
0
                                          cb_plane, cr_plane, pool);
408
0
}
409
410
}  // namespace jxl
411
#endif  // HWY_ONCE