/src/libjxl/lib/jxl/dec_modular.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/dec_modular.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | |
10 | | #include <algorithm> |
11 | | #include <cstddef> |
12 | | #include <cstdint> |
13 | | #include <cstring> |
14 | | #include <utility> |
15 | | #include <vector> |
16 | | |
17 | | #include "lib/jxl/ac_strategy.h" |
18 | | #include "lib/jxl/base/bits.h" |
19 | | #include "lib/jxl/base/common.h" |
20 | | #include "lib/jxl/base/data_parallel.h" |
21 | | #include "lib/jxl/chroma_from_luma.h" |
22 | | #include "lib/jxl/dec_ans.h" |
23 | | #include "lib/jxl/dec_cache.h" |
24 | | #include "lib/jxl/fields.h" |
25 | | #include "lib/jxl/frame_dimensions.h" |
26 | | #include "lib/jxl/frame_header.h" |
27 | | #include "lib/jxl/image.h" |
28 | | #include "lib/jxl/image_metadata.h" |
29 | | #include "lib/jxl/image_ops.h" |
30 | | #include "lib/jxl/loop_filter.h" |
31 | | #include "lib/jxl/modular/encoding/dec_ma.h" |
32 | | #include "lib/jxl/modular/options.h" |
33 | | #include "lib/jxl/quant_weights.h" |
34 | | #include "lib/jxl/quantizer.h" |
35 | | #include "lib/jxl/render_pipeline/render_pipeline.h" |
36 | | |
37 | | #undef HWY_TARGET_INCLUDE |
38 | | #define HWY_TARGET_INCLUDE "lib/jxl/dec_modular.cc" |
39 | | #include <hwy/foreach_target.h> |
40 | | #include <hwy/highway.h> |
41 | | |
42 | | #include "lib/jxl/base/compiler_specific.h" |
43 | | #include "lib/jxl/base/printf_macros.h" |
44 | | #include "lib/jxl/base/rect.h" |
45 | | #include "lib/jxl/base/status.h" |
46 | | #include "lib/jxl/compressed_dc.h" |
47 | | #include "lib/jxl/epf.h" |
48 | | #include "lib/jxl/modular/encoding/encoding.h" |
49 | | #include "lib/jxl/modular/modular_image.h" |
50 | | #include "lib/jxl/modular/transform/transform.h" |
51 | | |
52 | | HWY_BEFORE_NAMESPACE(); |
53 | | namespace jxl { |
54 | | namespace HWY_NAMESPACE { |
55 | | |
56 | | // These templates are not found via ADL. |
57 | | using hwy::HWY_NAMESPACE::Add; |
58 | | using hwy::HWY_NAMESPACE::Mul; |
59 | | using hwy::HWY_NAMESPACE::Rebind; |
60 | | |
61 | | void MultiplySum(const size_t xsize, |
62 | | const pixel_type* const JXL_RESTRICT row_in, |
63 | | const pixel_type* const JXL_RESTRICT row_in_Y, |
64 | 540k | const float factor, float* const JXL_RESTRICT row_out) { |
65 | 540k | const HWY_FULL(float) df; |
66 | 540k | const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float |
67 | 540k | const auto factor_v = Set(df, factor); |
68 | 6.52M | for (size_t x = 0; x < xsize; x += Lanes(di)) { |
69 | 5.98M | const auto in = Add(Load(di, row_in + x), Load(di, row_in_Y + x)); |
70 | 5.98M | const auto out = Mul(ConvertTo(df, in), factor_v); |
71 | 5.98M | Store(out, df, row_out + x); |
72 | 5.98M | } |
73 | 540k | } Unexecuted instantiation: jxl::N_SSE4::MultiplySum(unsigned long, int const*, int const*, float, float*) jxl::N_AVX2::MultiplySum(unsigned long, int const*, int const*, float, float*) Line | Count | Source | 64 | 540k | const float factor, float* const JXL_RESTRICT row_out) { | 65 | 540k | const HWY_FULL(float) df; | 66 | 540k | const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float | 67 | 540k | const auto factor_v = Set(df, factor); | 68 | 6.52M | for (size_t x = 0; x < xsize; x += Lanes(di)) { | 69 | 5.98M | const auto in = Add(Load(di, row_in + x), Load(di, row_in_Y + x)); | 70 | 5.98M | const auto out = Mul(ConvertTo(df, in), factor_v); | 71 | 5.98M | Store(out, df, row_out + x); | 72 | 5.98M | } | 73 | 540k | } |
Unexecuted instantiation: jxl::N_AVX3::MultiplySum(unsigned long, int const*, int const*, float, float*) Unexecuted instantiation: jxl::N_AVX3_ZEN4::MultiplySum(unsigned long, int const*, int const*, float, float*) Unexecuted instantiation: jxl::N_AVX3_SPR::MultiplySum(unsigned long, int const*, int const*, float, float*) Unexecuted instantiation: jxl::N_SSE2::MultiplySum(unsigned long, int const*, int const*, float, float*) |
74 | | |
75 | | void RgbFromSingle(const size_t xsize, |
76 | | const pixel_type* const JXL_RESTRICT row_in, |
77 | | const float factor, float* out_r, float* out_g, |
78 | 38.8k | float* out_b) { |
79 | 38.8k | const HWY_FULL(float) df; |
80 | 38.8k | const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float |
81 | | |
82 | 38.8k | const auto factor_v = Set(df, factor); |
83 | 597k | for (size_t x = 0; x < xsize; x += Lanes(di)) { |
84 | 558k | const auto in = Load(di, row_in + x); |
85 | 558k | const auto out = Mul(ConvertTo(df, in), factor_v); |
86 | 558k | Store(out, df, out_r + x); |
87 | 558k | Store(out, df, out_g + x); |
88 | 558k | Store(out, df, out_b + x); |
89 | 558k | } |
90 | 38.8k | } Unexecuted instantiation: jxl::N_SSE4::RgbFromSingle(unsigned long, int const*, float, float*, float*, float*) jxl::N_AVX2::RgbFromSingle(unsigned long, int const*, float, float*, float*, float*) Line | Count | Source | 78 | 38.8k | float* out_b) { | 79 | 38.8k | const HWY_FULL(float) df; | 80 | 38.8k | const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float | 81 | | | 82 | 38.8k | const auto factor_v = Set(df, factor); | 83 | 597k | for (size_t x = 0; x < xsize; x += Lanes(di)) { | 84 | 558k | const auto in = Load(di, row_in + x); | 85 | 558k | const auto out = Mul(ConvertTo(df, in), factor_v); | 86 | 558k | Store(out, df, out_r + x); | 87 | 558k | Store(out, df, out_g + x); | 88 | 558k | Store(out, df, out_b + x); | 89 | 558k | } | 90 | 38.8k | } |
Unexecuted instantiation: jxl::N_AVX3::RgbFromSingle(unsigned long, int const*, float, float*, float*, float*) Unexecuted instantiation: jxl::N_AVX3_ZEN4::RgbFromSingle(unsigned long, int const*, float, float*, float*, float*) Unexecuted instantiation: jxl::N_AVX3_SPR::RgbFromSingle(unsigned long, int const*, float, float*, float*, float*) Unexecuted instantiation: jxl::N_SSE2::RgbFromSingle(unsigned long, int const*, float, float*, float*, float*) |
91 | | |
92 | | void SingleFromSingle(const size_t xsize, |
93 | | const pixel_type* const JXL_RESTRICT row_in, |
94 | 2.97M | const float factor, float* row_out) { |
95 | 2.97M | const HWY_FULL(float) df; |
96 | 2.97M | const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float |
97 | | |
98 | 2.97M | const auto factor_v = Set(df, factor); |
99 | 30.9M | for (size_t x = 0; x < xsize; x += Lanes(di)) { |
100 | 27.9M | const auto in = Load(di, row_in + x); |
101 | 27.9M | const auto out = Mul(ConvertTo(df, in), factor_v); |
102 | 27.9M | Store(out, df, row_out + x); |
103 | 27.9M | } |
104 | 2.97M | } Unexecuted instantiation: jxl::N_SSE4::SingleFromSingle(unsigned long, int const*, float, float*) jxl::N_AVX2::SingleFromSingle(unsigned long, int const*, float, float*) Line | Count | Source | 94 | 2.97M | const float factor, float* row_out) { | 95 | 2.97M | const HWY_FULL(float) df; | 96 | 2.97M | const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float | 97 | | | 98 | 2.97M | const auto factor_v = Set(df, factor); | 99 | 30.9M | for (size_t x = 0; x < xsize; x += Lanes(di)) { | 100 | 27.9M | const auto in = Load(di, row_in + x); | 101 | 27.9M | const auto out = Mul(ConvertTo(df, in), factor_v); | 102 | 27.9M | Store(out, df, row_out + x); | 103 | 27.9M | } | 104 | 2.97M | } |
Unexecuted instantiation: jxl::N_AVX3::SingleFromSingle(unsigned long, int const*, float, float*) Unexecuted instantiation: jxl::N_AVX3_ZEN4::SingleFromSingle(unsigned long, int const*, float, float*) Unexecuted instantiation: jxl::N_AVX3_SPR::SingleFromSingle(unsigned long, int const*, float, float*) Unexecuted instantiation: jxl::N_SSE2::SingleFromSingle(unsigned long, int const*, float, float*) |
105 | | // NOLINTNEXTLINE(google-readability-namespace-comments) |
106 | | } // namespace HWY_NAMESPACE |
107 | | } // namespace jxl |
108 | | HWY_AFTER_NAMESPACE(); |
109 | | |
110 | | #if HWY_ONCE |
111 | | namespace jxl { |
112 | | HWY_EXPORT(MultiplySum); // Local function |
113 | | HWY_EXPORT(RgbFromSingle); // Local function |
114 | | HWY_EXPORT(SingleFromSingle); // Local function |
115 | | |
116 | | // Slow conversion using double precision multiplication, only |
117 | | // needed when the bit depth is too high for single precision |
118 | | void SingleFromSingleAccurate(const size_t xsize, |
119 | | const pixel_type* const JXL_RESTRICT row_in, |
120 | 4.37k | const double factor, float* row_out) { |
121 | 966k | for (size_t x = 0; x < xsize; x++) { |
122 | 962k | row_out[x] = row_in[x] * factor; |
123 | 962k | } |
124 | 4.37k | } |
125 | | |
126 | | // convert custom [bits]-bit float (with [exp_bits] exponent bits) stored as int |
127 | | // back to binary32 float |
128 | | Status int_to_float(const pixel_type* const JXL_RESTRICT row_in, |
129 | | float* const JXL_RESTRICT row_out, const size_t xsize, |
130 | 130k | const int bits, const int exp_bits) { |
131 | 130k | static_assert(sizeof(pixel_type) == sizeof(float), "32-bit input is assumed"); |
132 | 130k | if (bits == 32) { |
133 | 110k | JXL_ENSURE(exp_bits == 8); |
134 | 110k | memcpy(row_out, row_in, xsize * sizeof(float)); |
135 | 110k | return true; |
136 | 110k | } |
137 | 19.9k | int exp_bias = (1 << (exp_bits - 1)) - 1; |
138 | 19.9k | int sign_shift = bits - 1; |
139 | 19.9k | int mant_bits = bits - exp_bits - 1; |
140 | 19.9k | int mant_shift = 23 - mant_bits; |
141 | 1.51M | for (size_t x = 0; x < xsize; ++x) { |
142 | 1.49M | uint32_t f; |
143 | 1.49M | memcpy(&f, &row_in[x], 4); |
144 | 1.49M | int signbit = (f >> sign_shift); |
145 | 1.49M | f &= (1 << sign_shift) - 1; |
146 | 1.49M | if (f == 0) { |
147 | 844k | row_out[x] = (signbit ? -0.f : 0.f); |
148 | 844k | continue; |
149 | 844k | } |
150 | 652k | int exp = (f >> mant_bits); |
151 | 652k | int mantissa = (f & ((1 << mant_bits) - 1)); |
152 | 652k | if (exp == (1 << exp_bits) - 1) { |
153 | | // NaN or infinity |
154 | 145k | f = (signbit ? 0x80000000 : 0); |
155 | 145k | f |= 0b11111111 << 23; |
156 | 145k | f |= mantissa << mant_shift; |
157 | 145k | memcpy(&row_out[x], &f, 4); |
158 | 145k | continue; |
159 | 145k | } |
160 | 507k | mantissa <<= mant_shift; |
161 | | // Try to normalize only if there is space for maneuver. |
162 | 507k | if (exp == 0 && exp_bits < 8) { |
163 | | // subnormal number |
164 | 369k | while ((mantissa & 0x800000) == 0) { |
165 | 346k | mantissa <<= 1; |
166 | 346k | exp--; |
167 | 346k | } |
168 | 22.2k | exp++; |
169 | | // remove leading 1 because it is implicit now |
170 | 22.2k | mantissa &= 0x7fffff; |
171 | 22.2k | } |
172 | 507k | exp -= exp_bias; |
173 | | // broke up the arbitrary float into its parts, now reassemble into |
174 | | // binary32 |
175 | 507k | exp += 127; |
176 | 507k | JXL_ENSURE(exp >= 0); |
177 | 507k | f = (signbit ? 0x80000000 : 0); |
178 | 507k | f |= (exp << 23); |
179 | 507k | f |= mantissa; |
180 | 507k | memcpy(&row_out[x], &f, 4); |
181 | 507k | } |
182 | 19.9k | return true; |
183 | 19.9k | } |
184 | | |
185 | | #if JXL_DEBUG_V_LEVEL >= 1 |
186 | | std::string ModularStreamId::DebugString() const { |
187 | | std::ostringstream os; |
188 | | os << (kind == Kind::GlobalData ? "ModularGlobal" |
189 | | : kind == Kind::VarDCTDC ? "VarDCTDC" |
190 | | : kind == Kind::ModularDC ? "ModularDC" |
191 | | : kind == Kind::ACMetadata ? "ACMeta" |
192 | | : kind == Kind::QuantTable ? "QuantTable" |
193 | | : kind == Kind::ModularAC ? "ModularAC" |
194 | | : ""); |
195 | | if (kind == Kind::VarDCTDC || kind == Kind::ModularDC || |
196 | | kind == Kind::ACMetadata || kind == Kind::ModularAC) { |
197 | | os << " group " << group_id; |
198 | | } |
199 | | if (kind == Kind::ModularAC) { |
200 | | os << " pass " << pass_id; |
201 | | } |
202 | | if (kind == Kind::QuantTable) { |
203 | | os << " " << quant_table_id; |
204 | | } |
205 | | return os.str(); |
206 | | } |
207 | | #endif |
208 | | |
209 | | Status ModularFrameDecoder::DecodeGlobalInfo(BitReader* reader, |
210 | | const FrameHeader& frame_header, |
211 | 72.8k | bool allow_truncated_group) { |
212 | 72.8k | JxlMemoryManager* memory_manager = this->memory_manager(); |
213 | 72.8k | bool decode_color = frame_header.encoding == FrameEncoding::kModular; |
214 | 72.8k | const auto& metadata = frame_header.nonserialized_metadata->m; |
215 | 72.8k | bool is_gray = metadata.color_encoding.IsGray(); |
216 | 72.8k | size_t nb_chans = 3; |
217 | 72.8k | if (is_gray && frame_header.color_transform == ColorTransform::kNone) { |
218 | 1.60k | nb_chans = 1; |
219 | 1.60k | } |
220 | 72.8k | do_color = decode_color; |
221 | 72.8k | size_t nb_extra = metadata.extra_channel_info.size(); |
222 | 72.8k | bool has_tree = static_cast<bool>(reader->ReadBits(1)); |
223 | 72.8k | if (!allow_truncated_group || |
224 | 72.8k | reader->TotalBitsConsumed() < reader->TotalBytes() * kBitsPerByte) { |
225 | 72.8k | if (has_tree) { |
226 | 21.4k | size_t tree_size_limit = |
227 | 21.4k | std::min(static_cast<size_t>(1 << 22), |
228 | 21.4k | 1024 + frame_dim.xsize * frame_dim.ysize * |
229 | 21.4k | (nb_chans + nb_extra) / 16); |
230 | 21.4k | JXL_RETURN_IF_ERROR( |
231 | 21.4k | DecodeTree(memory_manager, reader, &tree, tree_size_limit)); |
232 | 11.9k | JXL_RETURN_IF_ERROR(DecodeHistograms( |
233 | 11.9k | memory_manager, reader, (tree.size() + 1) / 2, &code, &context_map)); |
234 | 11.9k | } |
235 | 72.8k | } |
236 | 63.1k | if (!do_color) nb_chans = 0; |
237 | | |
238 | 63.1k | bool fp = metadata.bit_depth.floating_point_sample; |
239 | | |
240 | | // bits_per_sample is just metadata for XYB images. |
241 | 63.1k | if (metadata.bit_depth.bits_per_sample >= 32 && do_color && |
242 | 4.65k | frame_header.color_transform != ColorTransform::kXYB) { |
243 | 4.65k | if (metadata.bit_depth.bits_per_sample == 32 && fp == false) { |
244 | 0 | return JXL_FAILURE("uint32_t not supported in dec_modular"); |
245 | 4.65k | } else if (metadata.bit_depth.bits_per_sample > 32) { |
246 | 0 | return JXL_FAILURE("bits_per_sample > 32 not supported"); |
247 | 0 | } |
248 | 4.65k | } |
249 | | |
250 | 126k | JXL_ASSIGN_OR_RETURN( |
251 | 126k | Image gi, |
252 | 126k | Image::Create(memory_manager, frame_dim.xsize, frame_dim.ysize, |
253 | 126k | metadata.bit_depth.bits_per_sample, nb_chans + nb_extra)); |
254 | | |
255 | 126k | all_same_shift = true; |
256 | 126k | if (frame_header.color_transform == ColorTransform::kYCbCr) { |
257 | 26.7k | for (size_t c = 0; c < nb_chans; c++) { |
258 | 7.24k | gi.channel[c].hshift = frame_header.chroma_subsampling.HShift(c); |
259 | 7.24k | gi.channel[c].vshift = frame_header.chroma_subsampling.VShift(c); |
260 | 7.24k | size_t xsize_shifted = |
261 | 7.24k | DivCeil(frame_dim.xsize, 1 << gi.channel[c].hshift); |
262 | 7.24k | size_t ysize_shifted = |
263 | 7.24k | DivCeil(frame_dim.ysize, 1 << gi.channel[c].vshift); |
264 | 7.24k | JXL_RETURN_IF_ERROR(gi.channel[c].shrink(xsize_shifted, ysize_shifted)); |
265 | 7.24k | if (gi.channel[c].hshift != gi.channel[0].hshift || |
266 | 4.40k | gi.channel[c].vshift != gi.channel[0].vshift) |
267 | 3.97k | all_same_shift = false; |
268 | 7.24k | } |
269 | 19.4k | } |
270 | | |
271 | 76.8k | for (size_t ec = 0, c = nb_chans; ec < nb_extra; ec++, c++) { |
272 | 13.6k | size_t ecups = frame_header.extra_channel_upsampling[ec]; |
273 | 13.6k | JXL_RETURN_IF_ERROR( |
274 | 13.6k | gi.channel[c].shrink(DivCeil(frame_dim.xsize_upsampled, ecups), |
275 | 13.6k | DivCeil(frame_dim.ysize_upsampled, ecups))); |
276 | 13.6k | gi.channel[c].hshift = gi.channel[c].vshift = |
277 | 13.6k | CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling); |
278 | 13.6k | if (gi.channel[c].hshift != gi.channel[0].hshift || |
279 | 9.39k | gi.channel[c].vshift != gi.channel[0].vshift) |
280 | 4.70k | all_same_shift = false; |
281 | 13.6k | } |
282 | | |
283 | 63.1k | JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (w/o transforms) %s", |
284 | 63.1k | gi.DebugString().c_str()); |
285 | 63.1k | ModularOptions options; |
286 | 63.1k | options.max_chan_size = frame_dim.group_dim; |
287 | 63.1k | options.group_dim = frame_dim.group_dim; |
288 | 63.1k | Status dec_status = ModularGenericDecompress( |
289 | 63.1k | reader, gi, &global_header, ModularStreamId::Global().ID(frame_dim), |
290 | 63.1k | &options, |
291 | 63.1k | /*undo_transforms=*/false, &tree, &code, &context_map, |
292 | 63.1k | allow_truncated_group); |
293 | 63.1k | if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status); |
294 | 59.2k | if (dec_status.IsFatalError()) { |
295 | 0 | return JXL_FAILURE("Failed to decode global modular info"); |
296 | 0 | } |
297 | | |
298 | | // TODO(eustas): are we sure this can be done after partial decode? |
299 | 59.2k | have_something = false; |
300 | 238k | for (size_t c = 0; c < gi.channel.size(); c++) { |
301 | 179k | Channel& gic = gi.channel[c]; |
302 | 179k | if (c >= gi.nb_meta_channels && gic.w <= frame_dim.group_dim && |
303 | 176k | gic.h <= frame_dim.group_dim) |
304 | 174k | have_something = true; |
305 | 179k | } |
306 | | // move global transforms to groups if possible |
307 | 59.2k | if (!have_something && all_same_shift) { |
308 | 31.4k | if (gi.transform.size() == 1 && gi.transform[0].id == TransformId::kRCT) { |
309 | 43 | global_transform = gi.transform; |
310 | 43 | gi.transform.clear(); |
311 | | // TODO(jon): also move no-delta-palette out (trickier though) |
312 | 43 | } |
313 | 31.4k | } |
314 | 59.2k | full_image = std::move(gi); |
315 | 59.2k | JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (with transforms) %s", |
316 | 59.2k | full_image.DebugString().c_str()); |
317 | 59.2k | return dec_status; |
318 | 59.2k | } |
319 | | |
320 | 32.4k | void ModularFrameDecoder::MaybeDropFullImage() { |
321 | 32.4k | if (full_image.transform.empty() && !have_something && all_same_shift) { |
322 | 4.62k | use_full_image = false; |
323 | 4.62k | JXL_DEBUG_V(6, "Dropping full image"); |
324 | 4.62k | for (auto& ch : full_image.channel) { |
325 | | // keep metadata on channels around, but dealloc their planes |
326 | 1.04k | ch.plane = Plane<pixel_type>(); |
327 | 1.04k | } |
328 | 4.62k | } |
329 | 32.4k | } |
330 | | |
331 | | Status ModularFrameDecoder::DecodeGroup( |
332 | | const FrameHeader& frame_header, const Rect& rect, BitReader* reader, |
333 | | int minShift, int maxShift, const ModularStreamId& stream, bool zerofill, |
334 | | PassesDecoderState* dec_state, RenderPipelineInput* render_pipeline_input, |
335 | 81.8k | bool allow_truncated, bool* should_run_pipeline) { |
336 | 81.8k | JXL_DEBUG_V(6, "Decoding %s with rect %s and shift bracket %d..%d %s", |
337 | 81.8k | stream.DebugString().c_str(), Description(rect).c_str(), minShift, |
338 | 81.8k | maxShift, zerofill ? "using zerofill" : ""); |
339 | 81.8k | JXL_ENSURE(stream.kind == ModularStreamId::Kind::ModularDC || |
340 | 81.8k | stream.kind == ModularStreamId::Kind::ModularAC); |
341 | 81.8k | const size_t xsize = rect.xsize(); |
342 | 81.8k | const size_t ysize = rect.ysize(); |
343 | 81.8k | JXL_ASSIGN_OR_RETURN(Image gi, Image::Create(memory_manager_, xsize, ysize, |
344 | 81.8k | full_image.bitdepth, 0)); |
345 | | // start at the first bigger-than-groupsize non-metachannel |
346 | 81.8k | size_t c = full_image.nb_meta_channels; |
347 | 510k | for (; c < full_image.channel.size(); c++) { |
348 | 436k | Channel& fc = full_image.channel[c]; |
349 | 436k | if (fc.w > frame_dim.group_dim || fc.h > frame_dim.group_dim) break; |
350 | 436k | } |
351 | 81.8k | size_t beginc = c; |
352 | 104k | for (; c < full_image.channel.size(); c++) { |
353 | 23.0k | Channel& fc = full_image.channel[c]; |
354 | 23.0k | int shift = std::min(fc.hshift, fc.vshift); |
355 | 23.0k | if (shift > maxShift) continue; |
356 | 23.0k | if (shift < minShift) continue; |
357 | 13.1k | Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift, |
358 | 13.1k | rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); |
359 | 13.1k | if (r.xsize() == 0 || r.ysize() == 0) continue; |
360 | 12.7k | if (zerofill && use_full_image) { |
361 | 0 | for (size_t y = 0; y < r.ysize(); ++y) { |
362 | 0 | pixel_type* const JXL_RESTRICT row_out = r.Row(&fc.plane, y); |
363 | 0 | memset(row_out, 0, r.xsize() * sizeof(*row_out)); |
364 | 0 | } |
365 | 12.7k | } else { |
366 | 12.7k | JXL_ASSIGN_OR_RETURN( |
367 | 12.7k | Channel gc, Channel::Create(memory_manager_, r.xsize(), r.ysize())); |
368 | 12.7k | if (zerofill) ZeroFillImage(&gc.plane); |
369 | 12.7k | gc.hshift = fc.hshift; |
370 | 12.7k | gc.vshift = fc.vshift; |
371 | 12.7k | gi.channel.emplace_back(std::move(gc)); |
372 | 12.7k | } |
373 | 12.7k | } |
374 | 81.8k | if (zerofill && use_full_image) return true; |
375 | | // Return early if there's nothing to decode. Otherwise there might be |
376 | | // problems later (in ModularImageToDecodedRect). |
377 | 81.8k | if (gi.channel.empty()) { |
378 | 77.2k | if (dec_state && should_run_pipeline) { |
379 | 39.5k | const auto* metadata = frame_header.nonserialized_metadata; |
380 | 39.5k | if (do_color || metadata->m.num_extra_channels > 0) { |
381 | | // Signal to FrameDecoder that we do not have some of the required input |
382 | | // for the render pipeline. |
383 | 31.1k | *should_run_pipeline = false; |
384 | 31.1k | } |
385 | 39.5k | } |
386 | 77.2k | JXL_DEBUG_V(6, "Nothing to decode, returning early."); |
387 | 77.2k | return true; |
388 | 77.2k | } |
389 | 4.66k | ModularOptions options; |
390 | 4.66k | if (!zerofill) { |
391 | 4.66k | auto status = ModularGenericDecompress( |
392 | 4.66k | reader, gi, /*header=*/nullptr, stream.ID(frame_dim), &options, |
393 | 4.66k | /*undo_transforms=*/true, &tree, &code, &context_map, allow_truncated); |
394 | 4.66k | if (!allow_truncated) JXL_RETURN_IF_ERROR(status); |
395 | 4.43k | if (status.IsFatalError()) return status; |
396 | 4.43k | } |
397 | | // Undo global transforms that have been pushed to the group level |
398 | 4.43k | if (!use_full_image) { |
399 | 3.63k | JXL_ENSURE(render_pipeline_input); |
400 | 3.63k | for (const auto& t : global_transform) { |
401 | 355 | JXL_RETURN_IF_ERROR(t.Inverse(gi, global_header.wp_header)); |
402 | 355 | } |
403 | 3.63k | JXL_RETURN_IF_ERROR(ModularImageToDecodedRect( |
404 | 3.63k | frame_header, gi, dec_state, nullptr, *render_pipeline_input, |
405 | 3.63k | Rect(0, 0, gi.w, gi.h))); |
406 | 3.63k | return true; |
407 | 3.63k | } |
408 | 800 | int gic = 0; |
409 | 4.72k | for (c = beginc; c < full_image.channel.size(); c++) { |
410 | 3.92k | Channel& fc = full_image.channel[c]; |
411 | 3.92k | int shift = std::min(fc.hshift, fc.vshift); |
412 | 3.92k | if (shift > maxShift) continue; |
413 | 3.90k | if (shift < minShift) continue; |
414 | 3.70k | Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift, |
415 | 3.70k | rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); |
416 | 3.70k | if (r.xsize() == 0 || r.ysize() == 0) continue; |
417 | 3.45k | JXL_ENSURE(use_full_image); |
418 | 3.45k | JXL_RETURN_IF_ERROR( |
419 | 3.45k | CopyImageTo(/*rect_from=*/Rect(0, 0, r.xsize(), r.ysize()), |
420 | 3.45k | /*from=*/gi.channel[gic].plane, |
421 | 3.45k | /*rect_to=*/r, /*to=*/&fc.plane)); |
422 | 3.45k | gic++; |
423 | 3.45k | } |
424 | 800 | return true; |
425 | 800 | } |
426 | | |
427 | | Status ModularFrameDecoder::DecodeVarDCTDC(const FrameHeader& frame_header, |
428 | | size_t group_id, BitReader* reader, |
429 | 31.6k | PassesDecoderState* dec_state) { |
430 | 31.6k | JxlMemoryManager* memory_manager = dec_state->memory_manager(); |
431 | 31.6k | const Rect r = dec_state->shared->frame_dim.DCGroupRect(group_id); |
432 | 31.6k | JXL_DEBUG_V(6, "Decoding VarDCT DC with rect %s", Description(r).c_str()); |
433 | | // TODO(eustas): investigate if we could reduce the impact of |
434 | | // EvalRationalPolynomial; generally speaking, the limit is |
435 | | // 2**(128/(3*magic)), where 128 comes from IEEE 754 exponent, |
436 | | // 3 comes from XybToRgb that cubes the values, and "magic" is |
437 | | // the sum of all other contributions. 2**18 is known to lead |
438 | | // to NaN on input found by fuzzing (see commit message). |
439 | 31.6k | JXL_ASSIGN_OR_RETURN(Image image, |
440 | 31.6k | Image::Create(memory_manager, r.xsize(), r.ysize(), |
441 | 31.6k | full_image.bitdepth, 3)); |
442 | 31.6k | size_t stream_id = ModularStreamId::VarDCTDC(group_id).ID(frame_dim); |
443 | 31.6k | reader->Refill(); |
444 | 31.6k | size_t extra_precision = reader->ReadFixedBits<2>(); |
445 | 31.6k | float mul = 1.0f / (1 << extra_precision); |
446 | 31.6k | ModularOptions options; |
447 | 126k | for (size_t c = 0; c < 3; c++) { |
448 | 94.9k | Channel& ch = image.channel[c < 2 ? c ^ 1 : c]; |
449 | 94.9k | ch.w >>= frame_header.chroma_subsampling.HShift(c); |
450 | 94.9k | ch.h >>= frame_header.chroma_subsampling.VShift(c); |
451 | 94.9k | JXL_RETURN_IF_ERROR(ch.shrink()); |
452 | 94.9k | } |
453 | 31.6k | if (!ModularGenericDecompress( |
454 | 31.6k | reader, image, /*header=*/nullptr, stream_id, &options, |
455 | 31.6k | /*undo_transforms=*/true, &tree, &code, &context_map)) { |
456 | 23.2k | return JXL_FAILURE("Failed to decode VarDCT DC group (DC group id %d)", |
457 | 23.2k | static_cast<int>(group_id)); |
458 | 23.2k | } |
459 | 8.45k | DequantDC(r, &dec_state->shared_storage.dc_storage, |
460 | 8.45k | &dec_state->shared_storage.quant_dc, image, |
461 | 8.45k | dec_state->shared->quantizer.MulDC(), mul, |
462 | 8.45k | dec_state->shared->cmap.base().DCFactors(), |
463 | 8.45k | frame_header.chroma_subsampling, dec_state->shared->block_ctx_map); |
464 | 8.45k | return true; |
465 | 31.6k | } |
466 | | |
467 | | Status ModularFrameDecoder::DecodeAcMetadata(const FrameHeader& frame_header, |
468 | | size_t group_id, BitReader* reader, |
469 | 8.45k | PassesDecoderState* dec_state) { |
470 | 8.45k | JxlMemoryManager* memory_manager = dec_state->memory_manager(); |
471 | 8.45k | const Rect r = dec_state->shared->frame_dim.DCGroupRect(group_id); |
472 | 8.45k | JXL_DEBUG_V(6, "Decoding AcMetadata with rect %s", Description(r).c_str()); |
473 | 8.45k | size_t upper_bound = r.xsize() * r.ysize(); |
474 | 8.45k | reader->Refill(); |
475 | 8.45k | size_t count = reader->ReadBits(CeilLog2Nonzero(upper_bound)) + 1; |
476 | 8.45k | size_t stream_id = ModularStreamId::ACMetadata(group_id).ID(frame_dim); |
477 | | // YToX, YToB, ACS + QF, EPF |
478 | 8.45k | JXL_ASSIGN_OR_RETURN(Image image, |
479 | 8.45k | Image::Create(memory_manager, r.xsize(), r.ysize(), |
480 | 8.45k | full_image.bitdepth, 4)); |
481 | 8.45k | static_assert(kColorTileDimInBlocks == 8, "Color tile size changed"); |
482 | 8.45k | Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3); |
483 | 8.45k | JXL_ASSIGN_OR_RETURN( |
484 | 8.45k | image.channel[0], |
485 | 8.45k | Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3)); |
486 | 8.45k | JXL_ASSIGN_OR_RETURN( |
487 | 8.45k | image.channel[1], |
488 | 8.45k | Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3)); |
489 | 8.45k | JXL_ASSIGN_OR_RETURN(image.channel[2], |
490 | 8.45k | Channel::Create(memory_manager, count, 2, 0, 0)); |
491 | 8.45k | ModularOptions options; |
492 | 8.45k | if (!ModularGenericDecompress( |
493 | 8.45k | reader, image, /*header=*/nullptr, stream_id, &options, |
494 | 8.45k | /*undo_transforms=*/true, &tree, &code, &context_map)) { |
495 | 2.92k | return JXL_FAILURE("Failed to decode AC metadata"); |
496 | 2.92k | } |
497 | 5.53k | JXL_RETURN_IF_ERROR( |
498 | 5.53k | ConvertPlaneAndClamp(Rect(image.channel[0].plane), image.channel[0].plane, |
499 | 5.53k | cr, &dec_state->shared_storage.cmap.ytox_map)); |
500 | 5.53k | JXL_RETURN_IF_ERROR( |
501 | 5.53k | ConvertPlaneAndClamp(Rect(image.channel[1].plane), image.channel[1].plane, |
502 | 5.53k | cr, &dec_state->shared_storage.cmap.ytob_map)); |
503 | 5.53k | size_t num = 0; |
504 | 5.53k | bool is444 = frame_header.chroma_subsampling.Is444(); |
505 | 5.53k | auto& ac_strategy = dec_state->shared_storage.ac_strategy; |
506 | 5.53k | size_t xlim = std::min(ac_strategy.xsize(), r.x0() + r.xsize()); |
507 | 5.53k | size_t ylim = std::min(ac_strategy.ysize(), r.y0() + r.ysize()); |
508 | 5.53k | uint32_t local_used_acs = 0; |
509 | 157k | for (size_t iy = 0; iy < r.ysize(); iy++) { |
510 | 152k | size_t y = r.y0() + iy; |
511 | 152k | int32_t* row_qf = r.Row(&dec_state->shared_storage.raw_quant_field, iy); |
512 | 152k | uint8_t* row_epf = r.Row(&dec_state->shared_storage.epf_sharpness, iy); |
513 | 152k | int32_t* row_in_1 = image.channel[2].plane.Row(0); |
514 | 152k | int32_t* row_in_2 = image.channel[2].plane.Row(1); |
515 | 152k | int32_t* row_in_3 = image.channel[3].plane.Row(iy); |
516 | 2.54M | for (size_t ix = 0; ix < r.xsize(); ix++) { |
517 | 2.39M | size_t x = r.x0() + ix; |
518 | 2.39M | int sharpness = row_in_3[ix]; |
519 | 2.39M | if (sharpness < 0 || sharpness >= LoopFilter::kEpfSharpEntries) { |
520 | 178 | return JXL_FAILURE("Corrupted sharpness field"); |
521 | 178 | } |
522 | 2.39M | row_epf[ix] = sharpness; |
523 | 2.39M | if (ac_strategy.IsValid(x, y)) { |
524 | 1.21M | continue; |
525 | 1.21M | } |
526 | | |
527 | 1.17M | if (num >= count) return JXL_FAILURE("Corrupted stream"); |
528 | | |
529 | 1.17M | if (!AcStrategy::IsRawStrategyValid(row_in_1[num])) { |
530 | 56 | return JXL_FAILURE("Invalid AC strategy"); |
531 | 56 | } |
532 | 1.17M | local_used_acs |= 1u << row_in_1[num]; |
533 | 1.17M | AcStrategy acs = AcStrategy::FromRawStrategy(row_in_1[num]); |
534 | 1.17M | if ((acs.covered_blocks_x() > 1 || acs.covered_blocks_y() > 1) && |
535 | 248k | !is444) { |
536 | 4 | return JXL_FAILURE( |
537 | 4 | "AC strategy not compatible with chroma subsampling"); |
538 | 4 | } |
539 | | // Ensure that blocks do not overflow *AC* groups. |
540 | 1.17M | size_t next_x_ac_block = (x / kGroupDimInBlocks + 1) * kGroupDimInBlocks; |
541 | 1.17M | size_t next_y_ac_block = (y / kGroupDimInBlocks + 1) * kGroupDimInBlocks; |
542 | 1.17M | size_t next_x_dct_block = x + acs.covered_blocks_x(); |
543 | 1.17M | size_t next_y_dct_block = y + acs.covered_blocks_y(); |
544 | 1.17M | if (next_x_dct_block > next_x_ac_block || next_x_dct_block > xlim) { |
545 | 21 | return JXL_FAILURE("Invalid AC strategy, x overflow"); |
546 | 21 | } |
547 | 1.17M | if (next_y_dct_block > next_y_ac_block || next_y_dct_block > ylim) { |
548 | 10 | return JXL_FAILURE("Invalid AC strategy, y overflow"); |
549 | 10 | } |
550 | 1.17M | JXL_RETURN_IF_ERROR( |
551 | 1.17M | ac_strategy.SetNoBoundsCheck(x, y, AcStrategyType(row_in_1[num]))); |
552 | 1.17M | row_qf[ix] = 1 + std::max<int32_t>(0, std::min(Quantizer::kQuantMax - 1, |
553 | 1.17M | row_in_2[num])); |
554 | 1.17M | num++; |
555 | 1.17M | } |
556 | 152k | } |
557 | 5.13k | dec_state->used_acs |= local_used_acs; |
558 | 5.13k | if (frame_header.loop_filter.epf_iters > 0) { |
559 | 3.93k | JXL_RETURN_IF_ERROR(ComputeSigma(frame_header.loop_filter, r, dec_state)); |
560 | 3.93k | } |
561 | 5.13k | return true; |
562 | 5.13k | } |
563 | | |
564 | | Status ModularFrameDecoder::ModularImageToDecodedRect( |
565 | | const FrameHeader& frame_header, Image& gi, PassesDecoderState* dec_state, |
566 | | jxl::ThreadPool* pool, RenderPipelineInput& render_pipeline_input, |
567 | 31.8k | Rect modular_rect) const { |
568 | 31.8k | const auto* metadata = frame_header.nonserialized_metadata; |
569 | 31.8k | JXL_ENSURE(gi.transform.empty()); |
570 | | |
571 | 3.43M | auto get_row = [&](size_t c, size_t y) { |
572 | 3.43M | const auto& buffer = render_pipeline_input.GetBuffer(c); |
573 | 3.43M | return buffer.second.Row(buffer.first, y); |
574 | 3.43M | }; |
575 | | |
576 | 31.8k | size_t c = 0; |
577 | 31.8k | if (do_color) { |
578 | 30.2k | const bool rgb_from_gray = |
579 | 30.2k | metadata->m.color_encoding.IsGray() && |
580 | 1.64k | frame_header.color_transform == ColorTransform::kNone; |
581 | 30.2k | const bool fp = metadata->m.bit_depth.floating_point_sample && |
582 | 4.82k | frame_header.color_transform != ColorTransform::kXYB; |
583 | 116k | for (; c < 3; c++) { |
584 | 87.6k | double factor = full_image.bitdepth < 32 |
585 | 87.6k | ? 1.0 / ((1u << full_image.bitdepth) - 1) |
586 | 87.6k | : 0; |
587 | 87.6k | size_t c_in = c; |
588 | 87.6k | if (frame_header.color_transform == ColorTransform::kXYB) { |
589 | 40.0k | factor = dec_state->shared->matrices.DCQuants()[c]; |
590 | | // XYB is encoded as YX(B-Y) |
591 | 40.0k | if (c < 2) c_in = 1 - c; |
592 | 47.5k | } else if (rgb_from_gray) { |
593 | 1.58k | c_in = 0; |
594 | 1.58k | } |
595 | 87.6k | JXL_ENSURE(c_in < gi.channel.size()); |
596 | 87.6k | Channel& ch_in = gi.channel[c_in]; |
597 | | // TODO(eustas): could we detect it on earlier stage? |
598 | 87.6k | if (ch_in.w == 0 || ch_in.h == 0) { |
599 | 0 | return JXL_FAILURE("Empty image"); |
600 | 0 | } |
601 | 87.6k | JXL_ENSURE(ch_in.hshift <= 3 && ch_in.vshift <= 3); |
602 | 87.6k | Rect r = render_pipeline_input.GetBuffer(c).second; |
603 | 87.6k | Rect mr(modular_rect.x0() >> ch_in.hshift, |
604 | 87.6k | modular_rect.y0() >> ch_in.vshift, |
605 | 87.6k | DivCeil(modular_rect.xsize(), 1 << ch_in.hshift), |
606 | 87.6k | DivCeil(modular_rect.ysize(), 1 << ch_in.vshift)); |
607 | 87.6k | mr = mr.Crop(ch_in.plane); |
608 | 87.6k | size_t xsize_shifted = r.xsize(); |
609 | 87.6k | size_t ysize_shifted = r.ysize(); |
610 | 87.6k | if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) { |
611 | 0 | return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS |
612 | 0 | "x%" PRIuS |
613 | 0 | " modular channel into " |
614 | 0 | "a %" PRIuS "x%" PRIuS " rect", |
615 | 0 | mr.xsize(), mr.ysize(), r.xsize(), r.ysize()); |
616 | 0 | } |
617 | 87.6k | if (frame_header.color_transform == ColorTransform::kXYB && c == 2) { |
618 | 13.3k | JXL_ENSURE(!fp); |
619 | 13.3k | const auto process_row = [&](const uint32_t task, |
620 | 540k | size_t /* thread */) -> Status { |
621 | 540k | const size_t y = task; |
622 | 540k | const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y); |
623 | 540k | const pixel_type* const JXL_RESTRICT row_in_Y = |
624 | 540k | mr.Row(&gi.channel[0].plane, y); |
625 | 540k | float* const JXL_RESTRICT row_out = get_row(c, y); |
626 | 540k | HWY_DYNAMIC_DISPATCH(MultiplySum) |
627 | 540k | (xsize_shifted, row_in, row_in_Y, factor, row_out); |
628 | 540k | return true; |
629 | 540k | }; |
630 | 13.3k | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted, |
631 | 13.3k | ThreadPool::NoInit, process_row, |
632 | 13.3k | "ModularIntToFloat")); |
633 | 74.2k | } else if (fp) { |
634 | 14.1k | int bits = metadata->m.bit_depth.bits_per_sample; |
635 | 14.1k | int exp_bits = metadata->m.bit_depth.exponent_bits_per_sample; |
636 | 14.1k | const auto process_row = [&](const uint32_t task, |
637 | 113k | size_t /* thread */) -> Status { |
638 | 113k | const size_t y = task; |
639 | 113k | const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y); |
640 | 113k | if (rgb_from_gray) { |
641 | 5.58k | for (size_t cc = 0; cc < 3; cc++) { |
642 | 4.19k | float* const JXL_RESTRICT row_out = get_row(cc, y); |
643 | 4.19k | JXL_RETURN_IF_ERROR( |
644 | 4.19k | int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits)); |
645 | 4.19k | } |
646 | 112k | } else { |
647 | 112k | float* const JXL_RESTRICT row_out = get_row(c, y); |
648 | 112k | JXL_RETURN_IF_ERROR( |
649 | 112k | int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits)); |
650 | 112k | } |
651 | 113k | return true; |
652 | 113k | }; |
653 | 14.1k | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted, |
654 | 14.1k | ThreadPool::NoInit, process_row, |
655 | 14.1k | "ModularIntToFloat_losslessfloat")); |
656 | 60.1k | } else { |
657 | 60.1k | const auto process_row = [&](const uint32_t task, |
658 | 2.70M | size_t /* thread */) -> Status { |
659 | 2.70M | const size_t y = task; |
660 | 2.70M | const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y); |
661 | 2.70M | if (rgb_from_gray) { |
662 | 38.8k | if (full_image.bitdepth < 23) { |
663 | 38.8k | HWY_DYNAMIC_DISPATCH(RgbFromSingle) |
664 | 38.8k | (xsize_shifted, row_in, factor, get_row(0, y), get_row(1, y), |
665 | 38.8k | get_row(2, y)); |
666 | 38.8k | } else { |
667 | 0 | SingleFromSingleAccurate(xsize_shifted, row_in, factor, |
668 | 0 | get_row(0, y)); |
669 | 0 | SingleFromSingleAccurate(xsize_shifted, row_in, factor, |
670 | 0 | get_row(1, y)); |
671 | 0 | SingleFromSingleAccurate(xsize_shifted, row_in, factor, |
672 | 0 | get_row(2, y)); |
673 | 0 | } |
674 | 2.66M | } else { |
675 | 2.66M | float* const JXL_RESTRICT row_out = get_row(c, y); |
676 | 2.66M | if (full_image.bitdepth < 23) { |
677 | 2.66M | HWY_DYNAMIC_DISPATCH(SingleFromSingle) |
678 | 2.66M | (xsize_shifted, row_in, factor, row_out); |
679 | 2.66M | } else { |
680 | 3.79k | SingleFromSingleAccurate(xsize_shifted, row_in, factor, row_out); |
681 | 3.79k | } |
682 | 2.66M | } |
683 | 2.70M | return true; |
684 | 2.70M | }; |
685 | 60.1k | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted, |
686 | 60.1k | ThreadPool::NoInit, process_row, |
687 | 60.1k | "ModularIntToFloat")); |
688 | 60.1k | } |
689 | 87.6k | if (rgb_from_gray) { |
690 | 1.58k | break; |
691 | 1.58k | } |
692 | 87.6k | } |
693 | 30.2k | if (rgb_from_gray) { |
694 | 1.58k | c = 1; |
695 | 1.58k | } |
696 | 30.2k | } |
697 | 31.8k | size_t num_extra_channels = metadata->m.num_extra_channels; |
698 | 42.6k | for (size_t ec = 0; ec < num_extra_channels; ec++, c++) { |
699 | 10.8k | const ExtraChannelInfo& eci = metadata->m.extra_channel_info[ec]; |
700 | 10.8k | int bits = eci.bit_depth.bits_per_sample; |
701 | 10.8k | int exp_bits = eci.bit_depth.exponent_bits_per_sample; |
702 | 10.8k | bool fp = eci.bit_depth.floating_point_sample; |
703 | 10.8k | JXL_ENSURE(fp || bits < 32); |
704 | 10.8k | const double factor = fp ? 0 : (1.0 / ((1u << bits) - 1)); |
705 | 10.8k | JXL_ENSURE(c < gi.channel.size()); |
706 | 10.8k | Channel& ch_in = gi.channel[c]; |
707 | 10.8k | const auto& buffer = render_pipeline_input.GetBuffer(3 + ec); |
708 | 10.8k | Rect r = buffer.second; |
709 | 10.8k | Rect mr(modular_rect.x0() >> ch_in.hshift, |
710 | 10.8k | modular_rect.y0() >> ch_in.vshift, |
711 | 10.8k | DivCeil(modular_rect.xsize(), 1 << ch_in.hshift), |
712 | 10.8k | DivCeil(modular_rect.ysize(), 1 << ch_in.vshift)); |
713 | 10.8k | mr = mr.Crop(ch_in.plane); |
714 | 10.8k | if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) { |
715 | 0 | return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS |
716 | 0 | "x%" PRIuS |
717 | 0 | " modular channel into " |
718 | 0 | "a %" PRIuS "x%" PRIuS " rect", |
719 | 0 | mr.xsize(), mr.ysize(), r.xsize(), r.ysize()); |
720 | 0 | } |
721 | 339k | for (size_t y = 0; y < r.ysize(); ++y) { |
722 | 328k | float* const JXL_RESTRICT row_out = r.Row(buffer.first, y); |
723 | 328k | const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y); |
724 | 328k | if (fp) { |
725 | 13.6k | JXL_RETURN_IF_ERROR( |
726 | 13.6k | int_to_float(row_in, row_out, r.xsize(), bits, exp_bits)); |
727 | 315k | } else { |
728 | 315k | if (full_image.bitdepth < 23) { |
729 | 314k | HWY_DYNAMIC_DISPATCH(SingleFromSingle) |
730 | 314k | (r.xsize(), row_in, factor, row_out); |
731 | 314k | } else { |
732 | 581 | SingleFromSingleAccurate(r.xsize(), row_in, factor, row_out); |
733 | 581 | } |
734 | 315k | } |
735 | 328k | } |
736 | 10.8k | } |
737 | 31.8k | return true; |
738 | 31.8k | } |
739 | | |
740 | | Status ModularFrameDecoder::FinalizeDecoding(const FrameHeader& frame_header, |
741 | | PassesDecoderState* dec_state, |
742 | | jxl::ThreadPool* pool, |
743 | 29.6k | bool inplace) { |
744 | 29.6k | if (!use_full_image) return true; |
745 | 27.5k | JxlMemoryManager* memory_manager = dec_state->memory_manager(); |
746 | 27.5k | Image gi{memory_manager}; |
747 | 27.5k | if (inplace) { |
748 | 27.5k | gi = std::move(full_image); |
749 | 27.5k | } else { |
750 | 0 | JXL_ASSIGN_OR_RETURN(gi, Image::Clone(full_image)); |
751 | 0 | } |
752 | 27.5k | size_t xsize = gi.w; |
753 | 27.5k | size_t ysize = gi.h; |
754 | | |
755 | 27.5k | JXL_DEBUG_V(3, "Finalizing decoding for modular image: %s", |
756 | 27.5k | gi.DebugString().c_str()); |
757 | | |
758 | | // Don't use threads if total image size is smaller than a group |
759 | 27.5k | if (xsize * ysize < frame_dim.group_dim * frame_dim.group_dim) pool = nullptr; |
760 | | |
761 | | // Undo the global transforms |
762 | 27.5k | gi.undo_transforms(global_header.wp_header, pool); |
763 | 27.5k | JXL_ENSURE(global_transform.empty()); |
764 | 27.5k | if (gi.error) return JXL_FAILURE("Undoing transforms failed"); |
765 | | |
766 | 55.6k | for (size_t i = 0; i < dec_state->shared->frame_dim.num_groups; i++) { |
767 | 28.1k | dec_state->render_pipeline->ClearDone(i); |
768 | 28.1k | } |
769 | | |
770 | 27.5k | const auto init = [&](size_t num_threads) -> Status { |
771 | 27.5k | bool use_group_ids = (frame_header.encoding == FrameEncoding::kVarDCT || |
772 | 27.4k | (frame_header.flags & FrameHeader::kNoise)); |
773 | 27.5k | JXL_RETURN_IF_ERROR(dec_state->render_pipeline->PrepareForThreads( |
774 | 27.5k | num_threads, use_group_ids)); |
775 | 27.5k | return true; |
776 | 27.5k | }; |
777 | 27.5k | const auto process_group = [&](const uint32_t group, |
778 | 28.1k | size_t thread_id) -> Status { |
779 | 28.1k | RenderPipelineInput input = |
780 | 28.1k | dec_state->render_pipeline->GetInputBuffers(group, thread_id); |
781 | 28.1k | JXL_RETURN_IF_ERROR(ModularImageToDecodedRect( |
782 | 28.1k | frame_header, gi, dec_state, nullptr, input, |
783 | 28.1k | dec_state->shared->frame_dim.GroupRect(group))); |
784 | 28.1k | JXL_RETURN_IF_ERROR(input.Done()); |
785 | 28.1k | return true; |
786 | 28.1k | }; |
787 | 27.5k | JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, |
788 | 27.5k | dec_state->shared->frame_dim.num_groups, init, |
789 | 27.5k | process_group, "ModularToRect")); |
790 | 27.5k | return true; |
791 | 27.5k | } |
792 | | |
793 | | static constexpr const float kAlmostZero = 1e-8f; |
794 | | |
795 | | Status ModularFrameDecoder::DecodeQuantTable( |
796 | | JxlMemoryManager* memory_manager, size_t required_size_x, |
797 | | size_t required_size_y, BitReader* br, QuantEncoding* encoding, size_t idx, |
798 | 339 | ModularFrameDecoder* modular_frame_decoder) { |
799 | 339 | JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->qraw.qtable_den)); |
800 | 332 | if (encoding->qraw.qtable_den < kAlmostZero) { |
801 | | // qtable[] values are already checked for <= 0 so the denominator may not |
802 | | // be negative. |
803 | 9 | return JXL_FAILURE("Invalid qtable_den: value too small"); |
804 | 9 | } |
805 | 646 | JXL_ASSIGN_OR_RETURN( |
806 | 646 | Image image, |
807 | 646 | Image::Create(memory_manager, required_size_x, required_size_y, 8, 3)); |
808 | 646 | ModularOptions options; |
809 | 646 | if (modular_frame_decoder) { |
810 | 323 | JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx)); |
811 | 323 | JXL_RETURN_IF_ERROR(ModularGenericDecompress( |
812 | 323 | br, image, /*header=*/nullptr, qt.ID(modular_frame_decoder->frame_dim), |
813 | 323 | &options, /*undo_transforms=*/true, &modular_frame_decoder->tree, |
814 | 323 | &modular_frame_decoder->code, &modular_frame_decoder->context_map)); |
815 | 323 | } else { |
816 | 0 | JXL_RETURN_IF_ERROR(ModularGenericDecompress(br, image, /*header=*/nullptr, |
817 | 0 | 0, &options, |
818 | 0 | /*undo_transforms=*/true)); |
819 | 0 | } |
820 | 266 | if (!encoding->qraw.qtable) { |
821 | 266 | encoding->qraw.qtable = |
822 | 266 | new std::vector<int>(required_size_x * required_size_y * 3); |
823 | 266 | } else { |
824 | 0 | JXL_ENSURE(encoding->qraw.qtable->size() == |
825 | 0 | required_size_x * required_size_y * 3); |
826 | 0 | } |
827 | 266 | int* qtable = encoding->qraw.qtable->data(); |
828 | 385 | for (size_t c = 0; c < 3; c++) { |
829 | 3.02k | for (size_t y = 0; y < required_size_y; y++) { |
830 | 2.90k | int32_t* JXL_RESTRICT row = image.channel[c].Row(y); |
831 | 153k | for (size_t x = 0; x < required_size_x; x++) { |
832 | 151k | qtable[c * required_size_x * required_size_y + y * required_size_x + |
833 | 151k | x] = row[x]; |
834 | 151k | if (row[x] <= 0) { |
835 | 241 | return JXL_FAILURE("Invalid raw quantization table"); |
836 | 241 | } |
837 | 151k | } |
838 | 2.90k | } |
839 | 360 | } |
840 | 25 | return true; |
841 | 266 | } |
842 | | |
843 | | } // namespace jxl |
844 | | #endif // HWY_ONCE |