Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_external_image.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/dec_external_image.h"
7
8
#include <jxl/memory_manager.h>
9
#include <jxl/types.h>
10
11
#include <algorithm>
12
#include <cstdint>
13
#include <cstring>
14
#include <memory>
15
#include <utility>
16
#include <vector>
17
18
#include "lib/jxl/base/data_parallel.h"
19
#include "lib/jxl/base/status.h"
20
#include "lib/jxl/dec_cache.h"
21
#include "lib/jxl/image.h"
22
#include "lib/jxl/image_bundle.h"
23
#include "lib/jxl/image_metadata.h"
24
#include "lib/jxl/image_ops.h"
25
26
#undef HWY_TARGET_INCLUDE
27
#define HWY_TARGET_INCLUDE "lib/jxl/dec_external_image.cc"
28
#include <hwy/foreach_target.h>
29
#include <hwy/highway.h>
30
31
#include "lib/jxl/alpha.h"
32
#include "lib/jxl/base/byte_order.h"
33
#include "lib/jxl/base/common.h"
34
#include "lib/jxl/base/compiler_specific.h"
35
#include "lib/jxl/base/printf_macros.h"
36
#include "lib/jxl/base/sanitizers.h"
37
#include "lib/jxl/base/span.h"
38
39
HWY_BEFORE_NAMESPACE();
40
namespace jxl {
41
namespace HWY_NAMESPACE {
42
43
// These templates are not found via ADL.
44
using hwy::HWY_NAMESPACE::Clamp;
45
using hwy::HWY_NAMESPACE::Mul;
46
using hwy::HWY_NAMESPACE::NearestInt;
47
48
// TODO(jon): check if this can be replaced by a FloatToU16 function
49
void FloatToU32(const float* in, uint32_t* out, size_t num, float mul,
50
0
                size_t bits_per_sample) {
51
0
  const HWY_FULL(float) d;
52
0
  const hwy::HWY_NAMESPACE::Rebind<uint32_t, decltype(d)> du;
53
54
  // Unpoison accessing partially-uninitialized vectors with memory sanitizer.
55
  // This is because we run NearestInt() on the vector, which triggers MSAN even
56
  // it is safe to do so since the values are not mixed between lanes.
57
0
  const size_t num_round_up = RoundUpTo(num, Lanes(d));
58
0
  msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
59
60
0
  const auto one = Set(d, 1.0f);
61
0
  const auto scale = Set(d, mul);
62
0
  for (size_t x = 0; x < num; x += Lanes(d)) {
63
0
    auto v = Load(d, in + x);
64
    // Clamp turns NaN to 'min'.
65
0
    v = Clamp(v, Zero(d), one);
66
0
    auto i = NearestInt(Mul(v, scale));
67
0
    Store(BitCast(du, i), du, out + x);
68
0
  }
69
70
  // Poison back the output.
71
0
  msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
72
0
}
Unexecuted instantiation: jxl::N_SSE4::FloatToU32(float const*, unsigned int*, unsigned long, float, unsigned long)
Unexecuted instantiation: jxl::N_AVX2::FloatToU32(float const*, unsigned int*, unsigned long, float, unsigned long)
Unexecuted instantiation: jxl::N_AVX3::FloatToU32(float const*, unsigned int*, unsigned long, float, unsigned long)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::FloatToU32(float const*, unsigned int*, unsigned long, float, unsigned long)
Unexecuted instantiation: jxl::N_AVX3_SPR::FloatToU32(float const*, unsigned int*, unsigned long, float, unsigned long)
Unexecuted instantiation: jxl::N_SSE2::FloatToU32(float const*, unsigned int*, unsigned long, float, unsigned long)
73
74
0
void FloatToF16(const float* in, hwy::float16_t* out, size_t num) {
75
0
  const HWY_FULL(float) d;
76
0
  const hwy::HWY_NAMESPACE::Rebind<hwy::float16_t, decltype(d)> du;
77
78
  // Unpoison accessing partially-uninitialized vectors with memory sanitizer.
79
  // This is because we run DemoteTo() on the vector which triggers msan.
80
0
  const size_t num_round_up = RoundUpTo(num, Lanes(d));
81
0
  msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
82
83
0
  for (size_t x = 0; x < num; x += Lanes(d)) {
84
0
    auto v = Load(d, in + x);
85
0
    auto v16 = DemoteTo(du, v);
86
0
    Store(v16, du, out + x);
87
0
  }
88
89
  // Poison back the output.
90
0
  msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
91
0
}
Unexecuted instantiation: jxl::N_SSE4::FloatToF16(float const*, hwy::float16_t*, unsigned long)
Unexecuted instantiation: jxl::N_AVX2::FloatToF16(float const*, hwy::float16_t*, unsigned long)
Unexecuted instantiation: jxl::N_AVX3::FloatToF16(float const*, hwy::float16_t*, unsigned long)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::FloatToF16(float const*, hwy::float16_t*, unsigned long)
Unexecuted instantiation: jxl::N_AVX3_SPR::FloatToF16(float const*, hwy::float16_t*, unsigned long)
Unexecuted instantiation: jxl::N_SSE2::FloatToF16(float const*, hwy::float16_t*, unsigned long)
92
93
// NOLINTNEXTLINE(google-readability-namespace-comments)
94
}  // namespace HWY_NAMESPACE
95
}  // namespace jxl
96
HWY_AFTER_NAMESPACE();
97
98
#if HWY_ONCE
99
100
namespace jxl {
101
namespace {
102
103
// Stores a float in big endian
104
0
void StoreBEFloat(float value, uint8_t* p) {
105
0
  uint32_t u;
106
0
  memcpy(&u, &value, 4);
107
0
  StoreBE32(u, p);
108
0
}
109
110
// Stores a float in little endian
111
26.0M
void StoreLEFloat(float value, uint8_t* p) {
112
26.0M
  uint32_t u;
113
26.0M
  memcpy(&u, &value, 4);
114
26.0M
  StoreLE32(u, p);
115
26.0M
}
116
117
// The orientation may not be identity.
118
// TODO(lode): SIMDify where possible
119
template <typename T>
120
Status UndoOrientation(jxl::Orientation undo_orientation, const Plane<T>& image,
121
0
                       Plane<T>& out, jxl::ThreadPool* pool) {
122
0
  const size_t xsize = image.xsize();
123
0
  const size_t ysize = image.ysize();
124
0
  JxlMemoryManager* memory_manager = image.memory_manager();
125
126
0
  if (undo_orientation == Orientation::kFlipHorizontal) {
127
0
    JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, xsize, ysize));
128
0
    const auto process_row = [&](const uint32_t task,
129
0
                                 size_t /*thread*/) -> Status {
130
0
      const int64_t y = task;
131
0
      const T* JXL_RESTRICT row_in = image.Row(y);
132
0
      T* JXL_RESTRICT row_out = out.Row(y);
133
0
      for (size_t x = 0; x < xsize; ++x) {
134
0
        row_out[xsize - x - 1] = row_in[x];
135
0
      }
136
0
      return true;
137
0
    };
138
0
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
139
0
                                  ThreadPool::NoInit, process_row,
140
0
                                  "UndoOrientation"));
141
0
  } else if (undo_orientation == Orientation::kRotate180) {
142
0
    JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, xsize, ysize));
143
0
    const auto process_row = [&](const uint32_t task,
144
0
                                 size_t /*thread*/) -> Status {
145
0
      const int64_t y = task;
146
0
      const T* JXL_RESTRICT row_in = image.Row(y);
147
0
      T* JXL_RESTRICT row_out = out.Row(ysize - y - 1);
148
0
      for (size_t x = 0; x < xsize; ++x) {
149
0
        row_out[xsize - x - 1] = row_in[x];
150
0
      }
151
0
      return true;
152
0
    };
153
0
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
154
0
                                  ThreadPool::NoInit, process_row,
155
0
                                  "UndoOrientation"));
156
0
  } else if (undo_orientation == Orientation::kFlipVertical) {
157
0
    JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, xsize, ysize));
158
0
    const auto process_row = [&](const uint32_t task,
159
0
                                 size_t /*thread*/) -> Status {
160
0
      const int64_t y = task;
161
0
      const T* JXL_RESTRICT row_in = image.Row(y);
162
0
      T* JXL_RESTRICT row_out = out.Row(ysize - y - 1);
163
0
      for (size_t x = 0; x < xsize; ++x) {
164
0
        row_out[x] = row_in[x];
165
0
      }
166
0
      return true;
167
0
    };
168
0
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
169
0
                                  ThreadPool::NoInit, process_row,
170
0
                                  "UndoOrientation"));
171
0
  } else if (undo_orientation == Orientation::kTranspose) {
172
0
    JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize));
173
0
    const auto process_row = [&](const uint32_t task,
174
0
                                 size_t /*thread*/) -> Status {
175
0
      const int64_t y = task;
176
0
      const T* JXL_RESTRICT row_in = image.Row(y);
177
0
      for (size_t x = 0; x < xsize; ++x) {
178
0
        out.Row(x)[y] = row_in[x];
179
0
      }
180
0
      return true;
181
0
    };
182
0
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
183
0
                                  ThreadPool::NoInit, process_row,
184
0
                                  "UndoOrientation"));
185
0
  } else if (undo_orientation == Orientation::kRotate90) {
186
0
    JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize));
187
0
    const auto process_row = [&](const uint32_t task,
188
0
                                 size_t /*thread*/) -> Status {
189
0
      const int64_t y = task;
190
0
      const T* JXL_RESTRICT row_in = image.Row(y);
191
0
      for (size_t x = 0; x < xsize; ++x) {
192
0
        out.Row(x)[ysize - y - 1] = row_in[x];
193
0
      }
194
0
      return true;
195
0
    };
196
0
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
197
0
                                  ThreadPool::NoInit, process_row,
198
0
                                  "UndoOrientation"));
199
0
  } else if (undo_orientation == Orientation::kAntiTranspose) {
200
0
    JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize));
201
0
    const auto process_row = [&](const uint32_t task,
202
0
                                 size_t /*thread*/) -> Status {
203
0
      const int64_t y = task;
204
0
      const T* JXL_RESTRICT row_in = image.Row(y);
205
0
      for (size_t x = 0; x < xsize; ++x) {
206
0
        out.Row(xsize - x - 1)[ysize - y - 1] = row_in[x];
207
0
      }
208
0
      return true;
209
0
    };
210
0
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
211
0
                                  ThreadPool::NoInit, process_row,
212
0
                                  "UndoOrientation"));
213
0
  } else if (undo_orientation == Orientation::kRotate270) {
214
0
    JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize));
215
0
    const auto process_row = [&](const uint32_t task,
216
0
                                 size_t /*thread*/) -> Status {
217
0
      const int64_t y = task;
218
0
      const T* JXL_RESTRICT row_in = image.Row(y);
219
0
      for (size_t x = 0; x < xsize; ++x) {
220
0
        out.Row(xsize - x - 1)[y] = row_in[x];
221
0
      }
222
0
      return true;
223
0
    };
224
0
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
225
0
                                  ThreadPool::NoInit, process_row,
226
0
                                  "UndoOrientation"));
227
0
  }
228
0
  return true;
229
0
}
230
}  // namespace
231
232
HWY_EXPORT(FloatToU32);
233
HWY_EXPORT(FloatToF16);
234
235
namespace {
236
237
using StoreFuncType = void(uint32_t value, uint8_t* dest);
238
template <StoreFuncType StoreFunc>
239
void StoreUintRow(uint32_t* JXL_RESTRICT* rows_u32, size_t num_channels,
240
0
                  size_t xsize, size_t bytes_per_sample, Span<uint8_t> out) {
241
  // The output row `out` is a bounds-carrying view of exactly the validated
242
  // scanline (`row_size` bytes). The highest byte written below is
243
  // `num_channels * xsize * bytes_per_sample - 1`; assert it fits so the
244
  // destination-buffer safety (guaranteed up the call stack by SafeMul/SafeAdd
245
  // on stride/out_size) is also verifiable locally at the write site.
246
0
  JXL_DASSERT(num_channels * xsize * bytes_per_sample <= out.size());
247
0
  for (size_t x = 0; x < xsize; ++x) {
248
0
    for (size_t c = 0; c < num_channels; c++) {
249
0
      StoreFunc(rows_u32[c][x],
250
0
                &out[(num_channels * x + c) * bytes_per_sample]);
251
0
    }
252
0
  }
253
0
}
Unexecuted instantiation: dec_external_image.cc:void jxl::(anonymous namespace)::StoreUintRow<&jxl::(anonymous namespace)::Store8>(unsigned int* restrict*, unsigned long, unsigned long, unsigned long, jxl::Span<unsigned char>)
Unexecuted instantiation: dec_external_image.cc:void jxl::(anonymous namespace)::StoreUintRow<&(StoreLE16(unsigned int, unsigned char*))>(unsigned int* restrict*, unsigned long, unsigned long, unsigned long, jxl::Span<unsigned char>)
Unexecuted instantiation: dec_external_image.cc:void jxl::(anonymous namespace)::StoreUintRow<&(StoreBE16(unsigned int, unsigned char*))>(unsigned int* restrict*, unsigned long, unsigned long, unsigned long, jxl::Span<unsigned char>)
254
255
template <void(StoreFunc)(float, uint8_t*)>
256
void StoreFloatRow(const float* JXL_RESTRICT* rows_in, size_t num_channels,
257
91.4k
                   size_t xsize, Span<uint8_t> out) {
258
91.4k
  JXL_DASSERT(num_channels * xsize * sizeof(float) <= out.size());
259
8.77M
  for (size_t x = 0; x < xsize; ++x) {
260
34.7M
    for (size_t c = 0; c < num_channels; c++) {
261
26.0M
      StoreFunc(rows_in[c][x], &out[(num_channels * x + c) * sizeof(float)]);
262
26.0M
    }
263
8.67M
  }
264
91.4k
}
dec_external_image.cc:void jxl::(anonymous namespace)::StoreFloatRow<&jxl::(anonymous namespace)::StoreLEFloat>(float const* restrict*, unsigned long, unsigned long, jxl::Span<unsigned char>)
Line
Count
Source
257
91.4k
                   size_t xsize, Span<uint8_t> out) {
258
91.4k
  JXL_DASSERT(num_channels * xsize * sizeof(float) <= out.size());
259
8.77M
  for (size_t x = 0; x < xsize; ++x) {
260
34.7M
    for (size_t c = 0; c < num_channels; c++) {
261
26.0M
      StoreFunc(rows_in[c][x], &out[(num_channels * x + c) * sizeof(float)]);
262
26.0M
    }
263
8.67M
  }
264
91.4k
}
Unexecuted instantiation: dec_external_image.cc:void jxl::(anonymous namespace)::StoreFloatRow<&jxl::(anonymous namespace)::StoreBEFloat>(float const* restrict*, unsigned long, unsigned long, jxl::Span<unsigned char>)
265
266
0
void JXL_INLINE Store8(uint32_t value, uint8_t* dest) { *dest = value & 0xff; }
267
268
}  // namespace
269
270
Status ConvertChannelsToExternal(const ImageF* in_channels[],
271
                                 size_t num_channels, size_t bits_per_sample,
272
                                 bool float_out, JxlEndianness endianness,
273
                                 size_t stride, jxl::ThreadPool* pool,
274
                                 void* out_image, size_t out_size,
275
                                 const PixelCallback& out_callback,
276
2.19k
                                 jxl::Orientation undo_orientation) {
277
2.19k
  JXL_ENSURE(num_channels != 0 && num_channels <= kConvertMaxChannels);
278
2.19k
  JXL_ENSURE(in_channels[0] != nullptr);
279
2.19k
  JxlMemoryManager* memory_manager = in_channels[0]->memory_manager();
280
2.19k
  JXL_ENSURE(float_out ? bits_per_sample == 16 || bits_per_sample == 32
281
2.19k
                       : bits_per_sample > 0 && bits_per_sample <= 16);
282
2.19k
  const bool has_out_image = (out_image != nullptr);
283
2.19k
  if (has_out_image == out_callback.IsPresent()) {
284
0
    return JXL_FAILURE(
285
0
        "Must provide either an out_image or an out_callback, but not both.");
286
0
  }
287
2.19k
  std::vector<const ImageF*> channels;
288
2.19k
  channels.assign(in_channels, in_channels + num_channels);
289
290
2.19k
  const size_t bytes_per_channel = DivCeil(bits_per_sample, jxl::kBitsPerByte);
291
2.19k
  const size_t bytes_per_pixel = num_channels * bytes_per_channel;
292
293
2.19k
  std::vector<std::vector<uint8_t>> row_out_callback;
294
2.19k
  const auto FreeCallbackOpaque = [&out_callback](void* p) {
295
0
    out_callback.destroy(p);
296
0
  };
297
2.19k
  std::unique_ptr<void, decltype(FreeCallbackOpaque)> out_run_opaque(
298
2.19k
      nullptr, FreeCallbackOpaque);
299
2.19k
  auto InitOutCallback = [&](size_t num_threads) -> Status {
300
2.19k
    if (out_callback.IsPresent()) {
301
0
      out_run_opaque.reset(out_callback.Init(num_threads, stride));
302
0
      JXL_RETURN_IF_ERROR(out_run_opaque != nullptr);
303
0
      row_out_callback.resize(num_threads);
304
0
      for (size_t i = 0; i < num_threads; ++i) {
305
0
        row_out_callback[i].resize(stride);
306
0
      }
307
0
    }
308
2.19k
    return true;
309
2.19k
  };
310
311
  // Channels used to store the transformed original channels if needed.
312
2.19k
  ImageF temp_channels[kConvertMaxChannels];
313
2.19k
  if (undo_orientation != Orientation::kIdentity) {
314
0
    for (size_t c = 0; c < num_channels; ++c) {
315
0
      if (channels[c]) {
316
0
        JXL_RETURN_IF_ERROR(UndoOrientation(undo_orientation, *channels[c],
317
0
                                            temp_channels[c], pool));
318
0
        channels[c] = &(temp_channels[c]);
319
0
      }
320
0
    }
321
0
  }
322
323
  // First channel may not be nullptr.
324
2.19k
  size_t xsize = channels[0]->xsize();
325
2.19k
  size_t ysize = channels[0]->ysize();
326
2.19k
  size_t row_size;
327
2.19k
  if (!SafeMul(bytes_per_pixel, xsize, row_size) || stride < row_size) {
328
0
    return JXL_FAILURE("stride is smaller than scanline width in bytes: %" PRIuS
329
0
                       " vs %" PRIuS,
330
0
                       stride, row_size);
331
0
  }
332
2.19k
  if (!out_callback.IsPresent()) {
333
2.19k
    size_t total_size;
334
2.19k
    if (!SafeMul(ysize - 1, stride, total_size) ||
335
2.19k
        !SafeAdd(total_size, row_size, total_size) || out_size < total_size) {
336
0
      return JXL_FAILURE("out_size is too small to store image");
337
0
    }
338
2.19k
  }
339
340
2.19k
  const bool little_endian =
341
2.19k
      endianness == JXL_LITTLE_ENDIAN ||
342
2.19k
      (endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
343
344
  // Handle the case where a channel is nullptr by creating a single row with
345
  // ones to use instead.
346
2.19k
  ImageF ones;
347
8.78k
  for (size_t c = 0; c < num_channels; ++c) {
348
6.59k
    if (!channels[c]) {
349
0
      JXL_ASSIGN_OR_RETURN(ones, ImageF::Create(memory_manager, xsize, 1));
350
0
      FillImage(1.0f, &ones);
351
0
      break;
352
0
    }
353
6.59k
  }
354
355
2.19k
  if (float_out) {
356
2.19k
    if (bits_per_sample == 16) {
357
0
      bool swap_endianness = little_endian != IsLittleEndian();
358
0
      Plane<hwy::float16_t> f16_cache;
359
0
      const auto init_cache = [&](size_t num_threads) -> Status {
360
0
        JXL_ASSIGN_OR_RETURN(
361
0
            f16_cache, Plane<hwy::float16_t>::Create(
362
0
                           memory_manager, xsize, num_channels * num_threads));
363
0
        JXL_RETURN_IF_ERROR(InitOutCallback(num_threads));
364
0
        return true;
365
0
      };
366
0
      const auto process_row = [&](const uint32_t task,
367
0
                                   const size_t thread) -> Status {
368
0
        const int64_t y = task;
369
0
        const float* JXL_RESTRICT row_in[kConvertMaxChannels];
370
0
        for (size_t c = 0; c < num_channels; c++) {
371
0
          row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
372
0
        }
373
0
        hwy::float16_t* JXL_RESTRICT row_f16[kConvertMaxChannels];
374
0
        for (size_t c = 0; c < num_channels; c++) {
375
0
          row_f16[c] = f16_cache.Row(c + thread * num_channels);
376
0
          HWY_DYNAMIC_DISPATCH(FloatToF16)
377
0
          (row_in[c], row_f16[c], xsize);
378
0
        }
379
0
        uint8_t* row_out =
380
0
            out_callback.IsPresent()
381
0
                ? row_out_callback[thread].data()
382
0
                : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
383
        // Bounds-carrying view of exactly the validated scanline.
384
0
        Span<uint8_t> out_span(row_out, row_size);
385
        // interleave the one scanline
386
0
        Span<hwy::float16_t> row_f16_out(
387
0
            reinterpret_cast<hwy::float16_t*>(row_out),
388
0
            row_size / sizeof(hwy::float16_t));
389
0
        JXL_DASSERT(xsize * num_channels <= row_f16_out.size());
390
0
        for (size_t x = 0; x < xsize; x++) {
391
0
          for (size_t c = 0; c < num_channels; c++) {
392
0
            row_f16_out[x * num_channels + c] = row_f16[c][x];
393
0
          }
394
0
        }
395
0
        if (swap_endianness) {
396
0
          size_t size = xsize * num_channels * 2;
397
0
          for (size_t i = 0; i < size; i += 2) {
398
0
            std::swap(out_span[i + 0], out_span[i + 1]);
399
0
          }
400
0
        }
401
0
        if (out_callback.IsPresent()) {
402
0
          out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out);
403
0
        }
404
0
        return true;
405
0
      };
406
0
      JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
407
0
                                    init_cache, process_row, "ConvertF16"));
408
2.19k
    } else if (bits_per_sample == 32) {
409
2.19k
      const auto init_cache = [&](size_t num_threads) -> Status {
410
2.19k
        JXL_RETURN_IF_ERROR(InitOutCallback(num_threads));
411
2.19k
        return true;
412
2.19k
      };
413
2.19k
      const auto process_row = [&](const uint32_t task,
414
91.4k
                                   const size_t thread) -> Status {
415
91.4k
        const int64_t y = task;
416
91.4k
        uint8_t* row_out =
417
91.4k
            out_callback.IsPresent()
418
91.4k
                ? row_out_callback[thread].data()
419
91.4k
                : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
420
91.4k
        Span<uint8_t> out_span(row_out, row_size);
421
91.4k
        const float* JXL_RESTRICT row_in[kConvertMaxChannels];
422
365k
        for (size_t c = 0; c < num_channels; c++) {
423
274k
          row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
424
274k
        }
425
91.4k
        if (little_endian) {
426
91.4k
          StoreFloatRow<StoreLEFloat>(row_in, num_channels, xsize, out_span);
427
91.4k
        } else {
428
0
          StoreFloatRow<StoreBEFloat>(row_in, num_channels, xsize, out_span);
429
0
        }
430
91.4k
        if (out_callback.IsPresent()) {
431
0
          out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out);
432
0
        }
433
91.4k
        return true;
434
91.4k
      };
435
2.19k
      JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
436
2.19k
                                    init_cache, process_row, "ConvertFloat"));
437
2.19k
    } else {
438
0
      return JXL_FAILURE("float other than 16-bit and 32-bit not supported");
439
0
    }
440
2.19k
  } else {
441
    // Multiplier to convert from floating point 0-1 range to the integer
442
    // range.
443
0
    float mul = (1ull << bits_per_sample) - 1;
444
0
    Plane<uint32_t> u32_cache;
445
0
    const auto init_cache = [&](size_t num_threads) -> Status {
446
0
      JXL_ASSIGN_OR_RETURN(u32_cache,
447
0
                           Plane<uint32_t>::Create(memory_manager, xsize,
448
0
                                                   num_channels * num_threads));
449
0
      JXL_RETURN_IF_ERROR(InitOutCallback(num_threads));
450
0
      return true;
451
0
    };
452
0
    const auto process_row = [&](const uint32_t task,
453
0
                                 const size_t thread) -> Status {
454
0
      const int64_t y = task;
455
0
      uint8_t* row_out =
456
0
          out_callback.IsPresent()
457
0
              ? row_out_callback[thread].data()
458
0
              : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
459
0
      Span<uint8_t> out_span(row_out, row_size);
460
0
      const float* JXL_RESTRICT row_in[kConvertMaxChannels];
461
0
      for (size_t c = 0; c < num_channels; c++) {
462
0
        row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
463
0
      }
464
0
      uint32_t* JXL_RESTRICT row_u32[kConvertMaxChannels];
465
0
      for (size_t c = 0; c < num_channels; c++) {
466
0
        row_u32[c] = u32_cache.Row(c + thread * num_channels);
467
        // row_u32[] is a per-thread temporary row storage, this isn't
468
        // intended to be initialized on a previous run.
469
0
        msan::PoisonMemory(row_u32[c], xsize * sizeof(row_u32[c][0]));
470
0
        HWY_DYNAMIC_DISPATCH(FloatToU32)
471
0
        (row_in[c], row_u32[c], xsize, mul, bits_per_sample);
472
0
      }
473
0
      if (bits_per_sample <= 8) {
474
0
        StoreUintRow<Store8>(row_u32, num_channels, xsize, 1, out_span);
475
0
      } else {
476
0
        if (little_endian) {
477
0
          StoreUintRow<StoreLE16>(row_u32, num_channels, xsize, 2, out_span);
478
0
        } else {
479
0
          StoreUintRow<StoreBE16>(row_u32, num_channels, xsize, 2, out_span);
480
0
        }
481
0
      }
482
0
      if (out_callback.IsPresent()) {
483
0
        out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out);
484
0
      }
485
0
      return true;
486
0
    };
487
0
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
488
0
                                  init_cache, process_row, "ConvertUint"));
489
0
  }
490
2.19k
  return true;
491
2.19k
}
492
493
Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample,
494
                         bool float_out, size_t num_channels,
495
                         JxlEndianness endianness, size_t stride,
496
                         jxl::ThreadPool* pool, void* out_image,
497
                         size_t out_size, const PixelCallback& out_callback,
498
                         jxl::Orientation undo_orientation,
499
2.19k
                         bool unpremul_alpha) {
500
2.19k
  bool want_alpha = num_channels == 2 || num_channels == 4;
501
2.19k
  size_t color_channels = num_channels <= 2 ? 1 : 3;
502
503
2.19k
  const Image3F* color = &ib.color();
504
2.19k
  JxlMemoryManager* memory_manager = color->memory_manager();
505
  // Undo premultiplied alpha.
506
2.19k
  Image3F unpremul;
507
2.19k
  if (ib.AlphaIsPremultiplied() && ib.HasAlpha() && unpremul_alpha) {
508
0
    JXL_ASSIGN_OR_RETURN(
509
0
        unpremul,
510
0
        Image3F::Create(memory_manager, color->xsize(), color->ysize()));
511
0
    JXL_RETURN_IF_ERROR(CopyImageTo(*color, &unpremul));
512
0
    const ImageF* alpha = ib.alpha();
513
0
    for (size_t y = 0; y < unpremul.ysize(); y++) {
514
0
      UnpremultiplyAlpha(unpremul.PlaneRow(0, y), unpremul.PlaneRow(1, y),
515
0
                         unpremul.PlaneRow(2, y), alpha->Row(y),
516
0
                         unpremul.xsize());
517
0
    }
518
0
    color = &unpremul;
519
0
  }
520
521
2.19k
  const ImageF* channels[kConvertMaxChannels];
522
2.19k
  size_t c = 0;
523
8.78k
  for (; c < color_channels; c++) {
524
6.59k
    channels[c] = &color->Plane(c);
525
6.59k
  }
526
2.19k
  if (want_alpha) {
527
0
    channels[c++] = ib.alpha();
528
0
  }
529
2.19k
  JXL_ENSURE(num_channels == c);
530
531
2.19k
  return ConvertChannelsToExternal(
532
2.19k
      channels, num_channels, bits_per_sample, float_out, endianness, stride,
533
2.19k
      pool, out_image, out_size, out_callback, undo_orientation);
534
2.19k
}
535
536
}  // namespace jxl
537
#endif  // HWY_ONCE