Coverage Report

Created: 2025-06-22 08:04

/src/libjxl/lib/jxl/enc_xyb.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_xyb.h"
7
8
#include <jxl/memory_manager.h>
9
10
#include <algorithm>
11
#include <cstdlib>
12
13
#undef HWY_TARGET_INCLUDE
14
#define HWY_TARGET_INCLUDE "lib/jxl/enc_xyb.cc"
15
#include <hwy/foreach_target.h>
16
#include <hwy/highway.h>
17
18
#include "lib/jxl/base/compiler_specific.h"
19
#include "lib/jxl/base/data_parallel.h"
20
#include "lib/jxl/base/fast_math-inl.h"
21
#include "lib/jxl/base/rect.h"
22
#include "lib/jxl/base/status.h"
23
#include "lib/jxl/cms/opsin_params.h"
24
#include "lib/jxl/cms/transfer_functions-inl.h"
25
#include "lib/jxl/color_encoding_internal.h"
26
#include "lib/jxl/enc_image_bundle.h"
27
#include "lib/jxl/image_bundle.h"
28
#include "lib/jxl/image_ops.h"
29
30
HWY_BEFORE_NAMESPACE();
31
namespace jxl {
32
namespace HWY_NAMESPACE {
33
34
// These templates are not found via ADL.
35
using hwy::HWY_NAMESPACE::Add;
36
using hwy::HWY_NAMESPACE::Mul;
37
using hwy::HWY_NAMESPACE::MulAdd;
38
using hwy::HWY_NAMESPACE::Sub;
39
using hwy::HWY_NAMESPACE::ZeroIfNegative;
40
41
// 4x3 matrix * 3x1 SIMD vectors
42
template <class V>
43
JXL_INLINE void OpsinAbsorbance(const V r, const V g, const V b,
44
                                const float* JXL_RESTRICT premul_absorb,
45
                                V* JXL_RESTRICT mixed0, V* JXL_RESTRICT mixed1,
46
0
                                V* JXL_RESTRICT mixed2) {
47
0
  const float* bias = jxl::cms::kOpsinAbsorbanceBias.data();
48
0
  const HWY_FULL(float) d;
49
0
  const size_t N = Lanes(d);
50
0
  const auto m0 = Load(d, premul_absorb + 0 * N);
51
0
  const auto m1 = Load(d, premul_absorb + 1 * N);
52
0
  const auto m2 = Load(d, premul_absorb + 2 * N);
53
0
  const auto m3 = Load(d, premul_absorb + 3 * N);
54
0
  const auto m4 = Load(d, premul_absorb + 4 * N);
55
0
  const auto m5 = Load(d, premul_absorb + 5 * N);
56
0
  const auto m6 = Load(d, premul_absorb + 6 * N);
57
0
  const auto m7 = Load(d, premul_absorb + 7 * N);
58
0
  const auto m8 = Load(d, premul_absorb + 8 * N);
59
0
  *mixed0 = MulAdd(m0, r, MulAdd(m1, g, MulAdd(m2, b, Set(d, bias[0]))));
60
0
  *mixed1 = MulAdd(m3, r, MulAdd(m4, g, MulAdd(m5, b, Set(d, bias[1]))));
61
0
  *mixed2 = MulAdd(m6, r, MulAdd(m7, g, MulAdd(m8, b, Set(d, bias[2]))));
62
0
}
63
64
template <class V>
65
void StoreXYB(const V r, V g, const V b, float* JXL_RESTRICT valx,
66
0
              float* JXL_RESTRICT valy, float* JXL_RESTRICT valz) {
67
0
  const HWY_FULL(float) d;
68
0
  const V half = Set(d, 0.5f);
69
0
  Store(Mul(half, Sub(r, g)), d, valx);
70
0
  Store(Mul(half, Add(r, g)), d, valy);
71
0
  Store(b, d, valz);
72
0
}
73
74
// Converts one RGB vector to XYB.
75
template <class V>
76
void LinearRGBToXYB(const V r, const V g, const V b,
77
                    const float* JXL_RESTRICT premul_absorb,
78
                    float* JXL_RESTRICT valx, float* JXL_RESTRICT valy,
79
0
                    float* JXL_RESTRICT valz) {
80
0
  V mixed0;
81
0
  V mixed1;
82
0
  V mixed2;
83
0
  OpsinAbsorbance(r, g, b, premul_absorb, &mixed0, &mixed1, &mixed2);
84
85
  // mixed* should be non-negative even for wide-gamut, so clamp to zero.
86
0
  mixed0 = ZeroIfNegative(mixed0);
87
0
  mixed1 = ZeroIfNegative(mixed1);
88
0
  mixed2 = ZeroIfNegative(mixed2);
89
90
0
  const HWY_FULL(float) d;
91
0
  const size_t N = Lanes(d);
92
0
  mixed0 = CubeRootAndAdd(mixed0, Load(d, premul_absorb + 9 * N));
93
0
  mixed1 = CubeRootAndAdd(mixed1, Load(d, premul_absorb + 10 * N));
94
0
  mixed2 = CubeRootAndAdd(mixed2, Load(d, premul_absorb + 11 * N));
95
0
  StoreXYB(mixed0, mixed1, mixed2, valx, valy, valz);
96
97
  // For wide-gamut inputs, r/g/b and valx (but not y/z) are often negative.
98
0
}
99
100
void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
101
                       float* JXL_RESTRICT row2,
102
0
                       const float* JXL_RESTRICT premul_absorb, size_t xsize) {
103
0
  const HWY_FULL(float) d;
104
0
  for (size_t x = 0; x < xsize; x += Lanes(d)) {
105
0
    const auto r = Load(d, row0 + x);
106
0
    const auto g = Load(d, row1 + x);
107
0
    const auto b = Load(d, row2 + x);
108
0
    LinearRGBToXYB(r, g, b, premul_absorb, row0 + x, row1 + x, row2 + x);
109
0
  }
110
0
}
111
112
// Input/output uses the codec.h scaling: nominally 0-1 if in-gamut.
113
template <class V>
114
0
V LinearFromSRGB(V encoded) {
115
0
  return TF_SRGB().DisplayFromEncoded(encoded);
116
0
}
117
118
Status LinearSRGBToXYB(const float* JXL_RESTRICT premul_absorb,
119
0
                       ThreadPool* pool, Image3F* JXL_RESTRICT image) {
120
0
  const size_t xsize = image->xsize();
121
122
0
  const HWY_FULL(float) d;
123
0
  const auto process_row = [&](const uint32_t task,
124
0
                               size_t /*thread*/) -> Status {
125
0
    const size_t y = static_cast<size_t>(task);
126
0
    float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
127
0
    float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
128
0
    float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
129
130
0
    for (size_t x = 0; x < xsize; x += Lanes(d)) {
131
0
      const auto in_r = Load(d, row0 + x);
132
0
      const auto in_g = Load(d, row1 + x);
133
0
      const auto in_b = Load(d, row2 + x);
134
0
      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
135
0
                     row2 + x);
136
0
    }
137
0
    return true;
138
0
  };
139
0
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
140
0
                                ThreadPool::NoInit, process_row,
141
0
                                "LinearToXYB"));
142
0
  return true;
143
0
}
144
145
Status SRGBToXYB(const float* JXL_RESTRICT premul_absorb, ThreadPool* pool,
146
0
                 Image3F* JXL_RESTRICT image) {
147
0
  const size_t xsize = image->xsize();
148
149
0
  const HWY_FULL(float) d;
150
0
  const auto process_row = [&](const uint32_t task,
151
0
                               size_t /*thread*/) -> Status {
152
0
    const size_t y = static_cast<size_t>(task);
153
0
    float* JXL_RESTRICT row0 = image->PlaneRow(0, y);
154
0
    float* JXL_RESTRICT row1 = image->PlaneRow(1, y);
155
0
    float* JXL_RESTRICT row2 = image->PlaneRow(2, y);
156
157
0
    for (size_t x = 0; x < xsize; x += Lanes(d)) {
158
0
      const auto in_r = LinearFromSRGB(Load(d, row0 + x));
159
0
      const auto in_g = LinearFromSRGB(Load(d, row1 + x));
160
0
      const auto in_b = LinearFromSRGB(Load(d, row2 + x));
161
0
      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row0 + x, row1 + x,
162
0
                     row2 + x);
163
0
    }
164
0
    return true;
165
0
  };
166
0
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
167
0
                                ThreadPool::NoInit, process_row, "SRGBToXYB"));
168
0
  return true;
169
0
}
170
171
Status SRGBToXYBAndLinear(const float* JXL_RESTRICT premul_absorb,
172
                          ThreadPool* pool, Image3F* JXL_RESTRICT image,
173
0
                          Image3F* JXL_RESTRICT linear) {
174
0
  const size_t xsize = image->xsize();
175
176
0
  const HWY_FULL(float) d;
177
0
  const auto process_row = [&](const uint32_t task,
178
0
                               size_t /*thread*/) -> Status {
179
0
    const size_t y = static_cast<size_t>(task);
180
0
    float* JXL_RESTRICT row_image0 = image->PlaneRow(0, y);
181
0
    float* JXL_RESTRICT row_image1 = image->PlaneRow(1, y);
182
0
    float* JXL_RESTRICT row_image2 = image->PlaneRow(2, y);
183
0
    float* JXL_RESTRICT row_linear0 = linear->PlaneRow(0, y);
184
0
    float* JXL_RESTRICT row_linear1 = linear->PlaneRow(1, y);
185
0
    float* JXL_RESTRICT row_linear2 = linear->PlaneRow(2, y);
186
187
0
    for (size_t x = 0; x < xsize; x += Lanes(d)) {
188
0
      const auto in_r = LinearFromSRGB(Load(d, row_image0 + x));
189
0
      const auto in_g = LinearFromSRGB(Load(d, row_image1 + x));
190
0
      const auto in_b = LinearFromSRGB(Load(d, row_image2 + x));
191
192
0
      Store(in_r, d, row_linear0 + x);
193
0
      Store(in_g, d, row_linear1 + x);
194
0
      Store(in_b, d, row_linear2 + x);
195
196
0
      LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_image0 + x,
197
0
                     row_image1 + x, row_image2 + x);
198
0
    }
199
0
    return true;
200
0
  };
201
0
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(image->ysize()),
202
0
                                ThreadPool::NoInit, process_row,
203
0
                                "SRGBToXYBAndLinear"));
204
0
  return true;
205
0
}
206
207
0
void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
208
0
  const HWY_FULL(float) d;
209
0
  const size_t N = Lanes(d);
210
0
  const float mul = intensity_target / 255.0f;
211
0
  for (size_t j = 0; j < 3; ++j) {
212
0
    for (size_t i = 0; i < 3; ++i) {
213
0
      const auto absorb = Set(d, jxl::cms::kOpsinAbsorbanceMatrix[j][i] * mul);
214
0
      Store(absorb, d, premul_absorb + (j * 3 + i) * N);
215
0
    }
216
0
  }
217
0
  for (size_t i = 0; i < 3; ++i) {
218
0
    const auto neg_bias_cbrt =
219
0
        Set(d, -cbrtf(jxl::cms::kOpsinAbsorbanceBias[i]));
220
0
    Store(neg_bias_cbrt, d, premul_absorb + (9 + i) * N);
221
0
  }
222
0
}
223
224
// This is different from Butteraugli's OpsinDynamicsImage() in the sense that
225
// it does not contain a sensitivity multiplier based on the blurred image.
226
Status ToXYB(const ColorEncoding& c_current, float intensity_target,
227
             const ImageF* black, ThreadPool* pool, Image3F* JXL_RESTRICT image,
228
0
             const JxlCmsInterface& cms, Image3F* const JXL_RESTRICT linear) {
229
0
  if (black) JXL_ENSURE(SameSize(*image, *black));
230
0
  if (linear) JXL_ENSURE(SameSize(*image, *linear));
231
232
0
  const HWY_FULL(float) d;
233
  // Pre-broadcasted constants
234
0
  HWY_ALIGN float premul_absorb[MaxLanes(d) * 12];
235
0
  ComputePremulAbsorb(intensity_target, premul_absorb);
236
237
0
  const bool want_linear = linear != nullptr;
238
239
0
  const ColorEncoding& c_linear_srgb =
240
0
      ColorEncoding::LinearSRGB(c_current.IsGray());
241
  // Linear sRGB inputs are rare but can be useful for the fastest encoders, for
242
  // which undoing the sRGB transfer function would be a large part of the cost.
243
0
  if (c_linear_srgb.SameColorEncoding(c_current)) {
244
    // This only happens if kitten or slower, moving ImageBundle might be
245
    // possible but the encoder is much slower than this copy.
246
0
    if (want_linear) {
247
0
      JXL_RETURN_IF_ERROR(CopyImageTo(*image, linear));
248
0
    }
249
0
    JXL_RETURN_IF_ERROR(LinearSRGBToXYB(premul_absorb, pool, image));
250
0
    return true;
251
0
  }
252
253
  // Common case: already sRGB, can avoid the color transform
254
0
  if (c_current.IsSRGB()) {
255
    // Common case: can avoid allocating/copying
256
0
    if (want_linear) {
257
      // Slow encoder also wants linear sRGB.
258
0
      JXL_RETURN_IF_ERROR(
259
0
          SRGBToXYBAndLinear(premul_absorb, pool, image, linear));
260
0
    } else {
261
0
      JXL_RETURN_IF_ERROR(SRGBToXYB(premul_absorb, pool, image));
262
0
    }
263
0
    return true;
264
0
  }
265
266
0
  JXL_RETURN_IF_ERROR(ApplyColorTransform(
267
0
      c_current, intensity_target, *image, black, Rect(*image), c_linear_srgb,
268
0
      cms, pool, want_linear ? linear : image));
269
0
  if (want_linear) {
270
0
    JXL_RETURN_IF_ERROR(CopyImageTo(*linear, image));
271
0
  }
272
0
  JXL_RETURN_IF_ERROR(LinearSRGBToXYB(premul_absorb, pool, image));
273
0
  return true;
274
0
}
275
276
// Transform RGB to YCbCr.
277
// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
278
Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
279
                  const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
280
0
                  ImageF* cr_plane, ThreadPool* pool) {
281
0
  const HWY_FULL(float) df;
282
0
  const size_t S = Lanes(df);  // Step.
283
284
0
  const size_t xsize = r_plane.xsize();
285
0
  const size_t ysize = r_plane.ysize();
286
0
  if ((xsize == 0) || (ysize == 0)) return true;
287
288
  // Full-range BT.601 as defined by JFIF Clause 7:
289
  // https://www.itu.int/rec/T-REC-T.871-201105-I/en
290
0
  const auto k128 = Set(df, 128.0f / 255);
291
0
  const auto kR = Set(df, 0.299f);  // NTSC luma
292
0
  const auto kG = Set(df, 0.587f);
293
0
  const auto kB = Set(df, 0.114f);
294
0
  const auto kAmpR = Set(df, 0.701f);
295
0
  const auto kAmpB = Set(df, 0.886f);
296
0
  const auto kDiffR = Add(kAmpR, kR);
297
0
  const auto kDiffB = Add(kAmpB, kB);
298
0
  const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB))));
299
0
  const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB))));
300
301
0
  constexpr size_t kGroupArea = kGroupDim * kGroupDim;
302
0
  const size_t lines_per_group = DivCeil(kGroupArea, xsize);
303
0
  const size_t num_stripes = DivCeil(ysize, lines_per_group);
304
0
  const auto transform = [&](int idx, int /* thread*/) -> Status {
305
0
    const size_t y0 = idx * lines_per_group;
306
0
    const size_t y1 = std::min<size_t>(y0 + lines_per_group, ysize);
307
0
    for (size_t y = y0; y < y1; ++y) {
308
0
      const float* r_row = r_plane.ConstRow(y);
309
0
      const float* g_row = g_plane.ConstRow(y);
310
0
      const float* b_row = b_plane.ConstRow(y);
311
0
      float* y_row = y_plane->Row(y);
312
0
      float* cb_row = cb_plane->Row(y);
313
0
      float* cr_row = cr_plane->Row(y);
314
0
      for (size_t x = 0; x < xsize; x += S) {
315
0
        const auto r = Load(df, r_row + x);
316
0
        const auto g = Load(df, g_row + x);
317
0
        const auto b = Load(df, b_row + x);
318
0
        const auto r_base = Mul(r, kR);
319
0
        const auto r_diff = Mul(r, kDiffR);
320
0
        const auto g_base = Mul(g, kG);
321
0
        const auto b_base = Mul(b, kB);
322
0
        const auto b_diff = Mul(b, kDiffB);
323
0
        const auto y_base = Add(r_base, Add(g_base, b_base));
324
0
        const auto y_vec = Sub(y_base, k128);
325
0
        const auto cb_vec = Mul(Sub(b_diff, y_base), kNormB);
326
0
        const auto cr_vec = Mul(Sub(r_diff, y_base), kNormR);
327
0
        Store(y_vec, df, y_row + x);
328
0
        Store(cb_vec, df, cb_row + x);
329
0
        Store(cr_vec, df, cr_row + x);
330
0
      }
331
0
    }
332
0
    return true;
333
0
  };
334
0
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<int>(num_stripes),
335
0
                                ThreadPool::NoInit, transform, "RgbToYcbCr"));
336
0
  return true;
337
0
}
338
339
// NOLINTNEXTLINE(google-readability-namespace-comments)
340
}  // namespace HWY_NAMESPACE
341
}  // namespace jxl
342
HWY_AFTER_NAMESPACE();
343
344
#if HWY_ONCE
345
namespace jxl {
346
HWY_EXPORT(ToXYB);
347
Status ToXYB(const ColorEncoding& c_current, float intensity_target,
348
             const ImageF* black, ThreadPool* pool, Image3F* JXL_RESTRICT image,
349
0
             const JxlCmsInterface& cms, Image3F* const JXL_RESTRICT linear) {
350
0
  return HWY_DYNAMIC_DISPATCH(ToXYB)(c_current, intensity_target, black, pool,
351
0
                                     image, cms, linear);
352
0
}
353
354
Status ToXYB(const ImageBundle& in, ThreadPool* pool, Image3F* JXL_RESTRICT xyb,
355
0
             const JxlCmsInterface& cms, Image3F* JXL_RESTRICT linear) {
356
0
  JxlMemoryManager* memory_manager = in.memory_manager();
357
0
  JXL_ASSIGN_OR_RETURN(*xyb,
358
0
                       Image3F::Create(memory_manager, in.xsize(), in.ysize()));
359
0
  JXL_RETURN_IF_ERROR(CopyImageTo(in.color(), xyb));
360
0
  JXL_RETURN_IF_ERROR(ToXYB(in.c_current(), in.metadata()->IntensityTarget(),
361
0
                            in.black(), pool, xyb, cms, linear));
362
0
  return true;
363
0
}
364
365
HWY_EXPORT(LinearRGBRowToXYB);
366
void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
367
                       float* JXL_RESTRICT row2,
368
0
                       const float* JXL_RESTRICT premul_absorb, size_t xsize) {
369
0
  HWY_DYNAMIC_DISPATCH(LinearRGBRowToXYB)
370
0
  (row0, row1, row2, premul_absorb, xsize);
371
0
}
372
373
HWY_EXPORT(ComputePremulAbsorb);
374
0
void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
375
0
  HWY_DYNAMIC_DISPATCH(ComputePremulAbsorb)(intensity_target, premul_absorb);
376
0
}
377
378
void ScaleXYBRow(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
379
0
                 float* JXL_RESTRICT row2, size_t xsize) {
380
0
  for (size_t x = 0; x < xsize; x++) {
381
0
    row2[x] = (row2[x] - row1[x] + jxl::cms::kScaledXYBOffset[2]) *
382
0
              jxl::cms::kScaledXYBScale[2];
383
0
    row0[x] = (row0[x] + jxl::cms::kScaledXYBOffset[0]) *
384
0
              jxl::cms::kScaledXYBScale[0];
385
0
    row1[x] = (row1[x] + jxl::cms::kScaledXYBOffset[1]) *
386
0
              jxl::cms::kScaledXYBScale[1];
387
0
  }
388
0
}
389
390
0
void ScaleXYB(Image3F* opsin) {
391
0
  for (size_t y = 0; y < opsin->ysize(); y++) {
392
0
    float* row0 = opsin->PlaneRow(0, y);
393
0
    float* row1 = opsin->PlaneRow(1, y);
394
0
    float* row2 = opsin->PlaneRow(2, y);
395
0
    ScaleXYBRow(row0, row1, row2, opsin->xsize());
396
0
  }
397
0
}
398
399
HWY_EXPORT(RgbToYcbcr);
400
Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
401
                  const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
402
0
                  ImageF* cr_plane, ThreadPool* pool) {
403
0
  return HWY_DYNAMIC_DISPATCH(RgbToYcbcr)(r_plane, g_plane, b_plane, y_plane,
404
0
                                          cb_plane, cr_plane, pool);
405
0
}
406
407
}  // namespace jxl
408
#endif  // HWY_ONCE