/src/libjxl/lib/jxl/dec_external_image.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/dec_external_image.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | #include <jxl/types.h> |
10 | | |
11 | | #include <algorithm> |
12 | | #include <cstring> |
13 | | #include <utility> |
14 | | #include <vector> |
15 | | |
16 | | #include "lib/jxl/base/status.h" |
17 | | #include "lib/jxl/image.h" |
18 | | |
19 | | #undef HWY_TARGET_INCLUDE |
20 | | #define HWY_TARGET_INCLUDE "lib/jxl/dec_external_image.cc" |
21 | | #include <hwy/foreach_target.h> |
22 | | #include <hwy/highway.h> |
23 | | |
24 | | #include "lib/jxl/alpha.h" |
25 | | #include "lib/jxl/base/byte_order.h" |
26 | | #include "lib/jxl/base/common.h" |
27 | | #include "lib/jxl/base/compiler_specific.h" |
28 | | #include "lib/jxl/base/printf_macros.h" |
29 | | #include "lib/jxl/base/sanitizers.h" |
30 | | |
31 | | HWY_BEFORE_NAMESPACE(); |
32 | | namespace jxl { |
33 | | namespace HWY_NAMESPACE { |
34 | | |
35 | | // These templates are not found via ADL. |
36 | | using hwy::HWY_NAMESPACE::Clamp; |
37 | | using hwy::HWY_NAMESPACE::Mul; |
38 | | using hwy::HWY_NAMESPACE::NearestInt; |
39 | | |
40 | | // TODO(jon): check if this can be replaced by a FloatToU16 function |
41 | | void FloatToU32(const float* in, uint32_t* out, size_t num, float mul, |
42 | 0 | size_t bits_per_sample) { |
43 | 0 | const HWY_FULL(float) d; |
44 | 0 | const hwy::HWY_NAMESPACE::Rebind<uint32_t, decltype(d)> du; |
45 | | |
46 | | // Unpoison accessing partially-uninitialized vectors with memory sanitizer. |
47 | | // This is because we run NearestInt() on the vector, which triggers MSAN even |
48 | | // it is safe to do so since the values are not mixed between lanes. |
49 | 0 | const size_t num_round_up = RoundUpTo(num, Lanes(d)); |
50 | 0 | msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num)); |
51 | |
|
52 | 0 | const auto one = Set(d, 1.0f); |
53 | 0 | const auto scale = Set(d, mul); |
54 | 0 | for (size_t x = 0; x < num; x += Lanes(d)) { |
55 | 0 | auto v = Load(d, in + x); |
56 | | // Clamp turns NaN to 'min'. |
57 | 0 | v = Clamp(v, Zero(d), one); |
58 | 0 | auto i = NearestInt(Mul(v, scale)); |
59 | 0 | Store(BitCast(du, i), du, out + x); |
60 | 0 | } |
61 | | |
62 | | // Poison back the output. |
63 | 0 | msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num)); |
64 | 0 | } |
65 | | |
66 | 0 | void FloatToF16(const float* in, hwy::float16_t* out, size_t num) { |
67 | 0 | const HWY_FULL(float) d; |
68 | 0 | const hwy::HWY_NAMESPACE::Rebind<hwy::float16_t, decltype(d)> du; |
69 | | |
70 | | // Unpoison accessing partially-uninitialized vectors with memory sanitizer. |
71 | | // This is because we run DemoteTo() on the vector which triggers msan. |
72 | 0 | const size_t num_round_up = RoundUpTo(num, Lanes(d)); |
73 | 0 | msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num)); |
74 | |
|
75 | 0 | for (size_t x = 0; x < num; x += Lanes(d)) { |
76 | 0 | auto v = Load(d, in + x); |
77 | 0 | auto v16 = DemoteTo(du, v); |
78 | 0 | Store(v16, du, out + x); |
79 | 0 | } |
80 | | |
81 | | // Poison back the output. |
82 | 0 | msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num)); |
83 | 0 | } |
84 | | |
85 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
86 | | } // namespace HWY_NAMESPACE |
87 | | } // namespace jxl |
88 | | HWY_AFTER_NAMESPACE(); |
89 | | |
90 | | #if HWY_ONCE |
91 | | |
92 | | namespace jxl { |
93 | | namespace { |
94 | | |
95 | | // Stores a float in big endian |
96 | 0 | void StoreBEFloat(float value, uint8_t* p) { |
97 | 0 | uint32_t u; |
98 | 0 | memcpy(&u, &value, 4); |
99 | 0 | StoreBE32(u, p); |
100 | 0 | } |
101 | | |
102 | | // Stores a float in little endian |
103 | 0 | void StoreLEFloat(float value, uint8_t* p) { |
104 | 0 | uint32_t u; |
105 | 0 | memcpy(&u, &value, 4); |
106 | 0 | StoreLE32(u, p); |
107 | 0 | } |
108 | | |
109 | | // The orientation may not be identity. |
110 | | // TODO(lode): SIMDify where possible |
111 | | template <typename T> |
112 | | Status UndoOrientation(jxl::Orientation undo_orientation, const Plane<T>& image, |
113 | 0 | Plane<T>& out, jxl::ThreadPool* pool) { |
114 | 0 | const size_t xsize = image.xsize(); |
115 | 0 | const size_t ysize = image.ysize(); |
116 | 0 | JxlMemoryManager* memory_manager = image.memory_manager(); |
117 | |
|
118 | 0 | if (undo_orientation == Orientation::kFlipHorizontal) { |
119 | 0 | JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, xsize, ysize)); |
120 | 0 | const auto process_row = [&](const uint32_t task, |
121 | 0 | size_t /*thread*/) -> Status { |
122 | 0 | const int64_t y = task; |
123 | 0 | const T* JXL_RESTRICT row_in = image.Row(y); |
124 | 0 | T* JXL_RESTRICT row_out = out.Row(y); |
125 | 0 | for (size_t x = 0; x < xsize; ++x) { |
126 | 0 | row_out[xsize - x - 1] = row_in[x]; |
127 | 0 | } |
128 | 0 | return true; |
129 | 0 | }; |
130 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
131 | 0 | ThreadPool::NoInit, process_row, |
132 | 0 | "UndoOrientation")); |
133 | 0 | } else if (undo_orientation == Orientation::kRotate180) { |
134 | 0 | JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, xsize, ysize)); |
135 | 0 | const auto process_row = [&](const uint32_t task, |
136 | 0 | size_t /*thread*/) -> Status { |
137 | 0 | const int64_t y = task; |
138 | 0 | const T* JXL_RESTRICT row_in = image.Row(y); |
139 | 0 | T* JXL_RESTRICT row_out = out.Row(ysize - y - 1); |
140 | 0 | for (size_t x = 0; x < xsize; ++x) { |
141 | 0 | row_out[xsize - x - 1] = row_in[x]; |
142 | 0 | } |
143 | 0 | return true; |
144 | 0 | }; |
145 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
146 | 0 | ThreadPool::NoInit, process_row, |
147 | 0 | "UndoOrientation")); |
148 | 0 | } else if (undo_orientation == Orientation::kFlipVertical) { |
149 | 0 | JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, xsize, ysize)); |
150 | 0 | const auto process_row = [&](const uint32_t task, |
151 | 0 | size_t /*thread*/) -> Status { |
152 | 0 | const int64_t y = task; |
153 | 0 | const T* JXL_RESTRICT row_in = image.Row(y); |
154 | 0 | T* JXL_RESTRICT row_out = out.Row(ysize - y - 1); |
155 | 0 | for (size_t x = 0; x < xsize; ++x) { |
156 | 0 | row_out[x] = row_in[x]; |
157 | 0 | } |
158 | 0 | return true; |
159 | 0 | }; |
160 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
161 | 0 | ThreadPool::NoInit, process_row, |
162 | 0 | "UndoOrientation")); |
163 | 0 | } else if (undo_orientation == Orientation::kTranspose) { |
164 | 0 | JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize)); |
165 | 0 | const auto process_row = [&](const uint32_t task, |
166 | 0 | size_t /*thread*/) -> Status { |
167 | 0 | const int64_t y = task; |
168 | 0 | const T* JXL_RESTRICT row_in = image.Row(y); |
169 | 0 | for (size_t x = 0; x < xsize; ++x) { |
170 | 0 | out.Row(x)[y] = row_in[x]; |
171 | 0 | } |
172 | 0 | return true; |
173 | 0 | }; |
174 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
175 | 0 | ThreadPool::NoInit, process_row, |
176 | 0 | "UndoOrientation")); |
177 | 0 | } else if (undo_orientation == Orientation::kRotate90) { |
178 | 0 | JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize)); |
179 | 0 | const auto process_row = [&](const uint32_t task, |
180 | 0 | size_t /*thread*/) -> Status { |
181 | 0 | const int64_t y = task; |
182 | 0 | const T* JXL_RESTRICT row_in = image.Row(y); |
183 | 0 | for (size_t x = 0; x < xsize; ++x) { |
184 | 0 | out.Row(x)[ysize - y - 1] = row_in[x]; |
185 | 0 | } |
186 | 0 | return true; |
187 | 0 | }; |
188 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
189 | 0 | ThreadPool::NoInit, process_row, |
190 | 0 | "UndoOrientation")); |
191 | 0 | } else if (undo_orientation == Orientation::kAntiTranspose) { |
192 | 0 | JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize)); |
193 | 0 | const auto process_row = [&](const uint32_t task, |
194 | 0 | size_t /*thread*/) -> Status { |
195 | 0 | const int64_t y = task; |
196 | 0 | const T* JXL_RESTRICT row_in = image.Row(y); |
197 | 0 | for (size_t x = 0; x < xsize; ++x) { |
198 | 0 | out.Row(xsize - x - 1)[ysize - y - 1] = row_in[x]; |
199 | 0 | } |
200 | 0 | return true; |
201 | 0 | }; |
202 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
203 | 0 | ThreadPool::NoInit, process_row, |
204 | 0 | "UndoOrientation")); |
205 | 0 | } else if (undo_orientation == Orientation::kRotate270) { |
206 | 0 | JXL_ASSIGN_OR_RETURN(out, Plane<T>::Create(memory_manager, ysize, xsize)); |
207 | 0 | const auto process_row = [&](const uint32_t task, |
208 | 0 | size_t /*thread*/) -> Status { |
209 | 0 | const int64_t y = task; |
210 | 0 | const T* JXL_RESTRICT row_in = image.Row(y); |
211 | 0 | for (size_t x = 0; x < xsize; ++x) { |
212 | 0 | out.Row(xsize - x - 1)[y] = row_in[x]; |
213 | 0 | } |
214 | 0 | return true; |
215 | 0 | }; |
216 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
217 | 0 | ThreadPool::NoInit, process_row, |
218 | 0 | "UndoOrientation")); |
219 | 0 | } |
220 | 0 | return true; |
221 | 0 | } |
222 | | } // namespace |
223 | | |
224 | | HWY_EXPORT(FloatToU32); |
225 | | HWY_EXPORT(FloatToF16); |
226 | | |
227 | | namespace { |
228 | | |
229 | | using StoreFuncType = void(uint32_t value, uint8_t* dest); |
230 | | template <StoreFuncType StoreFunc> |
231 | | void StoreUintRow(uint32_t* JXL_RESTRICT* rows_u32, size_t num_channels, |
232 | | size_t xsize, size_t bytes_per_sample, |
233 | 0 | uint8_t* JXL_RESTRICT out) { |
234 | 0 | for (size_t x = 0; x < xsize; ++x) { |
235 | 0 | for (size_t c = 0; c < num_channels; c++) { |
236 | 0 | StoreFunc(rows_u32[c][x], |
237 | 0 | out + (num_channels * x + c) * bytes_per_sample); |
238 | 0 | } |
239 | 0 | } |
240 | 0 | } Unexecuted instantiation: dec_external_image.cc:void jxl::(anonymous namespace)::StoreUintRow<&jxl::(anonymous namespace)::Store8>(unsigned int* restrict*, unsigned long, unsigned long, unsigned long, unsigned char*) Unexecuted instantiation: dec_external_image.cc:void jxl::(anonymous namespace)::StoreUintRow<&(StoreLE16(unsigned int, unsigned char*))>(unsigned int* restrict*, unsigned long, unsigned long, unsigned long, unsigned char*) Unexecuted instantiation: dec_external_image.cc:void jxl::(anonymous namespace)::StoreUintRow<&(StoreBE16(unsigned int, unsigned char*))>(unsigned int* restrict*, unsigned long, unsigned long, unsigned long, unsigned char*) |
241 | | |
242 | | template <void(StoreFunc)(float, uint8_t*)> |
243 | | void StoreFloatRow(const float* JXL_RESTRICT* rows_in, size_t num_channels, |
244 | 0 | size_t xsize, uint8_t* JXL_RESTRICT out) { |
245 | 0 | for (size_t x = 0; x < xsize; ++x) { |
246 | 0 | for (size_t c = 0; c < num_channels; c++) { |
247 | 0 | StoreFunc(rows_in[c][x], out + (num_channels * x + c) * sizeof(float)); |
248 | 0 | } |
249 | 0 | } |
250 | 0 | } Unexecuted instantiation: dec_external_image.cc:void jxl::(anonymous namespace)::StoreFloatRow<&jxl::(anonymous namespace)::StoreLEFloat>(float const* restrict*, unsigned long, unsigned long, unsigned char*) Unexecuted instantiation: dec_external_image.cc:void jxl::(anonymous namespace)::StoreFloatRow<&jxl::(anonymous namespace)::StoreBEFloat>(float const* restrict*, unsigned long, unsigned long, unsigned char*) |
251 | | |
252 | 0 | void JXL_INLINE Store8(uint32_t value, uint8_t* dest) { *dest = value & 0xff; } |
253 | | |
254 | | } // namespace |
255 | | |
256 | | Status ConvertChannelsToExternal(const ImageF* in_channels[], |
257 | | size_t num_channels, size_t bits_per_sample, |
258 | | bool float_out, JxlEndianness endianness, |
259 | | size_t stride, jxl::ThreadPool* pool, |
260 | | void* out_image, size_t out_size, |
261 | | const PixelCallback& out_callback, |
262 | 0 | jxl::Orientation undo_orientation) { |
263 | 0 | JXL_ENSURE(num_channels != 0 && num_channels <= kConvertMaxChannels); |
264 | 0 | JXL_ENSURE(in_channels[0] != nullptr); |
265 | 0 | JxlMemoryManager* memory_manager = in_channels[0]->memory_manager(); |
266 | 0 | JXL_ENSURE(float_out ? bits_per_sample == 16 || bits_per_sample == 32 |
267 | 0 | : bits_per_sample > 0 && bits_per_sample <= 16); |
268 | 0 | const bool has_out_image = (out_image != nullptr); |
269 | 0 | if (has_out_image == out_callback.IsPresent()) { |
270 | 0 | return JXL_FAILURE( |
271 | 0 | "Must provide either an out_image or an out_callback, but not both."); |
272 | 0 | } |
273 | 0 | std::vector<const ImageF*> channels; |
274 | 0 | channels.assign(in_channels, in_channels + num_channels); |
275 | |
|
276 | 0 | const size_t bytes_per_channel = DivCeil(bits_per_sample, jxl::kBitsPerByte); |
277 | 0 | const size_t bytes_per_pixel = num_channels * bytes_per_channel; |
278 | |
|
279 | 0 | std::vector<std::vector<uint8_t>> row_out_callback; |
280 | 0 | const auto FreeCallbackOpaque = [&out_callback](void* p) { |
281 | 0 | out_callback.destroy(p); |
282 | 0 | }; |
283 | 0 | std::unique_ptr<void, decltype(FreeCallbackOpaque)> out_run_opaque( |
284 | 0 | nullptr, FreeCallbackOpaque); |
285 | 0 | auto InitOutCallback = [&](size_t num_threads) -> Status { |
286 | 0 | if (out_callback.IsPresent()) { |
287 | 0 | out_run_opaque.reset(out_callback.Init(num_threads, stride)); |
288 | 0 | JXL_RETURN_IF_ERROR(out_run_opaque != nullptr); |
289 | 0 | row_out_callback.resize(num_threads); |
290 | 0 | for (size_t i = 0; i < num_threads; ++i) { |
291 | 0 | row_out_callback[i].resize(stride); |
292 | 0 | } |
293 | 0 | } |
294 | 0 | return true; |
295 | 0 | }; |
296 | | |
297 | | // Channels used to store the transformed original channels if needed. |
298 | 0 | ImageF temp_channels[kConvertMaxChannels]; |
299 | 0 | if (undo_orientation != Orientation::kIdentity) { |
300 | 0 | for (size_t c = 0; c < num_channels; ++c) { |
301 | 0 | if (channels[c]) { |
302 | 0 | JXL_RETURN_IF_ERROR(UndoOrientation(undo_orientation, *channels[c], |
303 | 0 | temp_channels[c], pool)); |
304 | 0 | channels[c] = &(temp_channels[c]); |
305 | 0 | } |
306 | 0 | } |
307 | 0 | } |
308 | | |
309 | | // First channel may not be nullptr. |
310 | 0 | size_t xsize = channels[0]->xsize(); |
311 | 0 | size_t ysize = channels[0]->ysize(); |
312 | 0 | if (stride < bytes_per_pixel * xsize) { |
313 | 0 | return JXL_FAILURE("stride is smaller than scanline width in bytes: %" PRIuS |
314 | 0 | " vs %" PRIuS, |
315 | 0 | stride, bytes_per_pixel * xsize); |
316 | 0 | } |
317 | 0 | if (!out_callback.IsPresent() && |
318 | 0 | out_size < (ysize - 1) * stride + bytes_per_pixel * xsize) { |
319 | 0 | return JXL_FAILURE("out_size is too small to store image"); |
320 | 0 | } |
321 | | |
322 | 0 | const bool little_endian = |
323 | 0 | endianness == JXL_LITTLE_ENDIAN || |
324 | 0 | (endianness == JXL_NATIVE_ENDIAN && IsLittleEndian()); |
325 | | |
326 | | // Handle the case where a channel is nullptr by creating a single row with |
327 | | // ones to use instead. |
328 | 0 | ImageF ones; |
329 | 0 | for (size_t c = 0; c < num_channels; ++c) { |
330 | 0 | if (!channels[c]) { |
331 | 0 | JXL_ASSIGN_OR_RETURN(ones, ImageF::Create(memory_manager, xsize, 1)); |
332 | 0 | FillImage(1.0f, &ones); |
333 | 0 | break; |
334 | 0 | } |
335 | 0 | } |
336 | | |
337 | 0 | if (float_out) { |
338 | 0 | if (bits_per_sample == 16) { |
339 | 0 | bool swap_endianness = little_endian != IsLittleEndian(); |
340 | 0 | Plane<hwy::float16_t> f16_cache; |
341 | 0 | const auto init_cache = [&](size_t num_threads) -> Status { |
342 | 0 | JXL_ASSIGN_OR_RETURN( |
343 | 0 | f16_cache, Plane<hwy::float16_t>::Create( |
344 | 0 | memory_manager, xsize, num_channels * num_threads)); |
345 | 0 | JXL_RETURN_IF_ERROR(InitOutCallback(num_threads)); |
346 | 0 | return true; |
347 | 0 | }; |
348 | 0 | const auto process_row = [&](const uint32_t task, |
349 | 0 | const size_t thread) -> Status { |
350 | 0 | const int64_t y = task; |
351 | 0 | const float* JXL_RESTRICT row_in[kConvertMaxChannels]; |
352 | 0 | for (size_t c = 0; c < num_channels; c++) { |
353 | 0 | row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0); |
354 | 0 | } |
355 | 0 | hwy::float16_t* JXL_RESTRICT row_f16[kConvertMaxChannels]; |
356 | 0 | for (size_t c = 0; c < num_channels; c++) { |
357 | 0 | row_f16[c] = f16_cache.Row(c + thread * num_channels); |
358 | 0 | HWY_DYNAMIC_DISPATCH(FloatToF16) |
359 | 0 | (row_in[c], row_f16[c], xsize); |
360 | 0 | } |
361 | 0 | uint8_t* row_out = |
362 | 0 | out_callback.IsPresent() |
363 | 0 | ? row_out_callback[thread].data() |
364 | 0 | : &(reinterpret_cast<uint8_t*>(out_image))[stride * y]; |
365 | | // interleave the one scanline |
366 | 0 | hwy::float16_t* row_f16_out = |
367 | 0 | reinterpret_cast<hwy::float16_t*>(row_out); |
368 | 0 | for (size_t x = 0; x < xsize; x++) { |
369 | 0 | for (size_t c = 0; c < num_channels; c++) { |
370 | 0 | row_f16_out[x * num_channels + c] = row_f16[c][x]; |
371 | 0 | } |
372 | 0 | } |
373 | 0 | if (swap_endianness) { |
374 | 0 | size_t size = xsize * num_channels * 2; |
375 | 0 | for (size_t i = 0; i < size; i += 2) { |
376 | 0 | std::swap(row_out[i + 0], row_out[i + 1]); |
377 | 0 | } |
378 | 0 | } |
379 | 0 | if (out_callback.IsPresent()) { |
380 | 0 | out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out); |
381 | 0 | } |
382 | 0 | return true; |
383 | 0 | }; |
384 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
385 | 0 | init_cache, process_row, "ConvertF16")); |
386 | 0 | } else if (bits_per_sample == 32) { |
387 | 0 | const auto init_cache = [&](size_t num_threads) -> Status { |
388 | 0 | JXL_RETURN_IF_ERROR(InitOutCallback(num_threads)); |
389 | 0 | return true; |
390 | 0 | }; |
391 | 0 | const auto process_row = [&](const uint32_t task, |
392 | 0 | const size_t thread) -> Status { |
393 | 0 | const int64_t y = task; |
394 | 0 | uint8_t* row_out = |
395 | 0 | out_callback.IsPresent() |
396 | 0 | ? row_out_callback[thread].data() |
397 | 0 | : &(reinterpret_cast<uint8_t*>(out_image))[stride * y]; |
398 | 0 | const float* JXL_RESTRICT row_in[kConvertMaxChannels]; |
399 | 0 | for (size_t c = 0; c < num_channels; c++) { |
400 | 0 | row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0); |
401 | 0 | } |
402 | 0 | if (little_endian) { |
403 | 0 | StoreFloatRow<StoreLEFloat>(row_in, num_channels, xsize, row_out); |
404 | 0 | } else { |
405 | 0 | StoreFloatRow<StoreBEFloat>(row_in, num_channels, xsize, row_out); |
406 | 0 | } |
407 | 0 | if (out_callback.IsPresent()) { |
408 | 0 | out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out); |
409 | 0 | } |
410 | 0 | return true; |
411 | 0 | }; |
412 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
413 | 0 | init_cache, process_row, "ConvertFloat")); |
414 | 0 | } else { |
415 | 0 | return JXL_FAILURE("float other than 16-bit and 32-bit not supported"); |
416 | 0 | } |
417 | 0 | } else { |
418 | | // Multiplier to convert from floating point 0-1 range to the integer |
419 | | // range. |
420 | 0 | float mul = (1ull << bits_per_sample) - 1; |
421 | 0 | Plane<uint32_t> u32_cache; |
422 | 0 | const auto init_cache = [&](size_t num_threads) -> Status { |
423 | 0 | JXL_ASSIGN_OR_RETURN(u32_cache, |
424 | 0 | Plane<uint32_t>::Create(memory_manager, xsize, |
425 | 0 | num_channels * num_threads)); |
426 | 0 | JXL_RETURN_IF_ERROR(InitOutCallback(num_threads)); |
427 | 0 | return true; |
428 | 0 | }; |
429 | 0 | const auto process_row = [&](const uint32_t task, |
430 | 0 | const size_t thread) -> Status { |
431 | 0 | const int64_t y = task; |
432 | 0 | uint8_t* row_out = |
433 | 0 | out_callback.IsPresent() |
434 | 0 | ? row_out_callback[thread].data() |
435 | 0 | : &(reinterpret_cast<uint8_t*>(out_image))[stride * y]; |
436 | 0 | const float* JXL_RESTRICT row_in[kConvertMaxChannels]; |
437 | 0 | for (size_t c = 0; c < num_channels; c++) { |
438 | 0 | row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0); |
439 | 0 | } |
440 | 0 | uint32_t* JXL_RESTRICT row_u32[kConvertMaxChannels]; |
441 | 0 | for (size_t c = 0; c < num_channels; c++) { |
442 | 0 | row_u32[c] = u32_cache.Row(c + thread * num_channels); |
443 | | // row_u32[] is a per-thread temporary row storage, this isn't |
444 | | // intended to be initialized on a previous run. |
445 | 0 | msan::PoisonMemory(row_u32[c], xsize * sizeof(row_u32[c][0])); |
446 | 0 | HWY_DYNAMIC_DISPATCH(FloatToU32) |
447 | 0 | (row_in[c], row_u32[c], xsize, mul, bits_per_sample); |
448 | 0 | } |
449 | 0 | if (bits_per_sample <= 8) { |
450 | 0 | StoreUintRow<Store8>(row_u32, num_channels, xsize, 1, row_out); |
451 | 0 | } else { |
452 | 0 | if (little_endian) { |
453 | 0 | StoreUintRow<StoreLE16>(row_u32, num_channels, xsize, 2, row_out); |
454 | 0 | } else { |
455 | 0 | StoreUintRow<StoreBE16>(row_u32, num_channels, xsize, 2, row_out); |
456 | 0 | } |
457 | 0 | } |
458 | 0 | if (out_callback.IsPresent()) { |
459 | 0 | out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, row_out); |
460 | 0 | } |
461 | 0 | return true; |
462 | 0 | }; |
463 | 0 | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), |
464 | 0 | init_cache, process_row, "ConvertUint")); |
465 | 0 | } |
466 | 0 | return true; |
467 | 0 | } |
468 | | |
469 | | Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample, |
470 | | bool float_out, size_t num_channels, |
471 | | JxlEndianness endianness, size_t stride, |
472 | | jxl::ThreadPool* pool, void* out_image, |
473 | | size_t out_size, const PixelCallback& out_callback, |
474 | | jxl::Orientation undo_orientation, |
475 | 0 | bool unpremul_alpha) { |
476 | 0 | bool want_alpha = num_channels == 2 || num_channels == 4; |
477 | 0 | size_t color_channels = num_channels <= 2 ? 1 : 3; |
478 | |
|
479 | 0 | const Image3F* color = &ib.color(); |
480 | 0 | JxlMemoryManager* memory_manager = color->memory_manager(); |
481 | | // Undo premultiplied alpha. |
482 | 0 | Image3F unpremul; |
483 | 0 | if (ib.AlphaIsPremultiplied() && ib.HasAlpha() && unpremul_alpha) { |
484 | 0 | JXL_ASSIGN_OR_RETURN( |
485 | 0 | unpremul, |
486 | 0 | Image3F::Create(memory_manager, color->xsize(), color->ysize())); |
487 | 0 | JXL_RETURN_IF_ERROR(CopyImageTo(*color, &unpremul)); |
488 | 0 | const ImageF* alpha = ib.alpha(); |
489 | 0 | for (size_t y = 0; y < unpremul.ysize(); y++) { |
490 | 0 | UnpremultiplyAlpha(unpremul.PlaneRow(0, y), unpremul.PlaneRow(1, y), |
491 | 0 | unpremul.PlaneRow(2, y), alpha->Row(y), |
492 | 0 | unpremul.xsize()); |
493 | 0 | } |
494 | 0 | color = &unpremul; |
495 | 0 | } |
496 | | |
497 | 0 | const ImageF* channels[kConvertMaxChannels]; |
498 | 0 | size_t c = 0; |
499 | 0 | for (; c < color_channels; c++) { |
500 | 0 | channels[c] = &color->Plane(c); |
501 | 0 | } |
502 | 0 | if (want_alpha) { |
503 | 0 | channels[c++] = ib.alpha(); |
504 | 0 | } |
505 | 0 | JXL_ENSURE(num_channels == c); |
506 | | |
507 | 0 | return ConvertChannelsToExternal( |
508 | 0 | channels, num_channels, bits_per_sample, float_out, endianness, stride, |
509 | 0 | pool, out_image, out_size, out_callback, undo_orientation); |
510 | 0 | } |
511 | | |
512 | | } // namespace jxl |
513 | | #endif // HWY_ONCE |