Coverage Report

Created: 2025-12-03 07:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_modular.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/dec_modular.h"
7
8
#include <jxl/memory_manager.h>
9
10
#include <algorithm>
11
#include <cstddef>
12
#include <cstdint>
13
#include <cstring>
14
#include <utility>
15
#include <vector>
16
17
#include "lib/jxl/ac_strategy.h"
18
#include "lib/jxl/base/bits.h"
19
#include "lib/jxl/base/common.h"
20
#include "lib/jxl/base/data_parallel.h"
21
#include "lib/jxl/chroma_from_luma.h"
22
#include "lib/jxl/dec_ans.h"
23
#include "lib/jxl/dec_cache.h"
24
#include "lib/jxl/fields.h"
25
#include "lib/jxl/frame_dimensions.h"
26
#include "lib/jxl/frame_header.h"
27
#include "lib/jxl/image.h"
28
#include "lib/jxl/image_metadata.h"
29
#include "lib/jxl/image_ops.h"
30
#include "lib/jxl/loop_filter.h"
31
#include "lib/jxl/modular/encoding/dec_ma.h"
32
#include "lib/jxl/modular/options.h"
33
#include "lib/jxl/quant_weights.h"
34
#include "lib/jxl/quantizer.h"
35
#include "lib/jxl/render_pipeline/render_pipeline.h"
36
37
#undef HWY_TARGET_INCLUDE
38
#define HWY_TARGET_INCLUDE "lib/jxl/dec_modular.cc"
39
#include <hwy/foreach_target.h>
40
#include <hwy/highway.h>
41
42
#include "lib/jxl/base/compiler_specific.h"
43
#include "lib/jxl/base/printf_macros.h"
44
#include "lib/jxl/base/rect.h"
45
#include "lib/jxl/base/status.h"
46
#include "lib/jxl/compressed_dc.h"
47
#include "lib/jxl/epf.h"
48
#include "lib/jxl/modular/encoding/encoding.h"
49
#include "lib/jxl/modular/modular_image.h"
50
#include "lib/jxl/modular/transform/transform.h"
51
52
HWY_BEFORE_NAMESPACE();
53
namespace jxl {
54
namespace HWY_NAMESPACE {
55
56
// These templates are not found via ADL.
57
using hwy::HWY_NAMESPACE::Add;
58
using hwy::HWY_NAMESPACE::Mul;
59
using hwy::HWY_NAMESPACE::Rebind;
60
61
void MultiplySum(const size_t xsize,
62
                 const pixel_type* const JXL_RESTRICT row_in,
63
                 const pixel_type* const JXL_RESTRICT row_in_Y,
64
2.64M
                 const float factor, float* const JXL_RESTRICT row_out) {
65
2.64M
  const HWY_FULL(float) df;
66
2.64M
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
67
2.64M
  const auto factor_v = Set(df, factor);
68
80.4M
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
69
77.8M
    const auto in = Add(Load(di, row_in + x), Load(di, row_in_Y + x));
70
77.8M
    const auto out = Mul(ConvertTo(df, in), factor_v);
71
77.8M
    Store(out, df, row_out + x);
72
77.8M
  }
73
2.64M
}
jxl::N_SSE4::MultiplySum(unsigned long, int const*, int const*, float, float*)
Line
Count
Source
64
392k
                 const float factor, float* const JXL_RESTRICT row_out) {
65
392k
  const HWY_FULL(float) df;
66
392k
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
67
392k
  const auto factor_v = Set(df, factor);
68
22.6M
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
69
22.2M
    const auto in = Add(Load(di, row_in + x), Load(di, row_in_Y + x));
70
22.2M
    const auto out = Mul(ConvertTo(df, in), factor_v);
71
22.2M
    Store(out, df, row_out + x);
72
22.2M
  }
73
392k
}
jxl::N_AVX2::MultiplySum(unsigned long, int const*, int const*, float, float*)
Line
Count
Source
64
1.93M
                 const float factor, float* const JXL_RESTRICT row_out) {
65
1.93M
  const HWY_FULL(float) df;
66
1.93M
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
67
1.93M
  const auto factor_v = Set(df, factor);
68
40.0M
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
69
38.1M
    const auto in = Add(Load(di, row_in + x), Load(di, row_in_Y + x));
70
38.1M
    const auto out = Mul(ConvertTo(df, in), factor_v);
71
38.1M
    Store(out, df, row_out + x);
72
38.1M
  }
73
1.93M
}
jxl::N_SSE2::MultiplySum(unsigned long, int const*, int const*, float, float*)
Line
Count
Source
64
316k
                 const float factor, float* const JXL_RESTRICT row_out) {
65
316k
  const HWY_FULL(float) df;
66
316k
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
67
316k
  const auto factor_v = Set(df, factor);
68
17.7M
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
69
17.4M
    const auto in = Add(Load(di, row_in + x), Load(di, row_in_Y + x));
70
17.4M
    const auto out = Mul(ConvertTo(df, in), factor_v);
71
17.4M
    Store(out, df, row_out + x);
72
17.4M
  }
73
316k
}
74
75
void RgbFromSingle(const size_t xsize,
76
                   const pixel_type* const JXL_RESTRICT row_in,
77
                   const float factor, float* out_r, float* out_g,
78
56.4k
                   float* out_b) {
79
56.4k
  const HWY_FULL(float) df;
80
56.4k
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
81
82
56.4k
  const auto factor_v = Set(df, factor);
83
364k
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
84
308k
    const auto in = Load(di, row_in + x);
85
308k
    const auto out = Mul(ConvertTo(df, in), factor_v);
86
308k
    Store(out, df, out_r + x);
87
308k
    Store(out, df, out_g + x);
88
308k
    Store(out, df, out_b + x);
89
308k
  }
90
56.4k
}
jxl::N_SSE4::RgbFromSingle(unsigned long, int const*, float, float*, float*, float*)
Line
Count
Source
78
3.27k
                   float* out_b) {
79
3.27k
  const HWY_FULL(float) df;
80
3.27k
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
81
82
3.27k
  const auto factor_v = Set(df, factor);
83
21.2k
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
84
17.9k
    const auto in = Load(di, row_in + x);
85
17.9k
    const auto out = Mul(ConvertTo(df, in), factor_v);
86
17.9k
    Store(out, df, out_r + x);
87
17.9k
    Store(out, df, out_g + x);
88
17.9k
    Store(out, df, out_b + x);
89
17.9k
  }
90
3.27k
}
jxl::N_AVX2::RgbFromSingle(unsigned long, int const*, float, float*, float*, float*)
Line
Count
Source
78
48.3k
                   float* out_b) {
79
48.3k
  const HWY_FULL(float) df;
80
48.3k
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
81
82
48.3k
  const auto factor_v = Set(df, factor);
83
325k
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
84
277k
    const auto in = Load(di, row_in + x);
85
277k
    const auto out = Mul(ConvertTo(df, in), factor_v);
86
277k
    Store(out, df, out_r + x);
87
277k
    Store(out, df, out_g + x);
88
277k
    Store(out, df, out_b + x);
89
277k
  }
90
48.3k
}
jxl::N_SSE2::RgbFromSingle(unsigned long, int const*, float, float*, float*, float*)
Line
Count
Source
78
4.73k
                   float* out_b) {
79
4.73k
  const HWY_FULL(float) df;
80
4.73k
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
81
82
4.73k
  const auto factor_v = Set(df, factor);
83
17.3k
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
84
12.5k
    const auto in = Load(di, row_in + x);
85
12.5k
    const auto out = Mul(ConvertTo(df, in), factor_v);
86
12.5k
    Store(out, df, out_r + x);
87
12.5k
    Store(out, df, out_g + x);
88
12.5k
    Store(out, df, out_b + x);
89
12.5k
  }
90
4.73k
}
91
92
void SingleFromSingle(const size_t xsize,
93
                      const pixel_type* const JXL_RESTRICT row_in,
94
11.9M
                      const float factor, float* row_out) {
95
11.9M
  const HWY_FULL(float) df;
96
11.9M
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
97
98
11.9M
  const auto factor_v = Set(df, factor);
99
296M
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
100
284M
    const auto in = Load(di, row_in + x);
101
284M
    const auto out = Mul(ConvertTo(df, in), factor_v);
102
284M
    Store(out, df, row_out + x);
103
284M
  }
104
11.9M
}
jxl::N_SSE4::SingleFromSingle(unsigned long, int const*, float, float*)
Line
Count
Source
94
1.85M
                      const float factor, float* row_out) {
95
1.85M
  const HWY_FULL(float) df;
96
1.85M
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
97
98
1.85M
  const auto factor_v = Set(df, factor);
99
81.2M
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
100
79.4M
    const auto in = Load(di, row_in + x);
101
79.4M
    const auto out = Mul(ConvertTo(df, in), factor_v);
102
79.4M
    Store(out, df, row_out + x);
103
79.4M
  }
104
1.85M
}
jxl::N_AVX2::SingleFromSingle(unsigned long, int const*, float, float*)
Line
Count
Source
94
8.41M
                      const float factor, float* row_out) {
95
8.41M
  const HWY_FULL(float) df;
96
8.41M
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
97
98
8.41M
  const auto factor_v = Set(df, factor);
99
141M
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
100
132M
    const auto in = Load(di, row_in + x);
101
132M
    const auto out = Mul(ConvertTo(df, in), factor_v);
102
132M
    Store(out, df, row_out + x);
103
132M
  }
104
8.41M
}
jxl::N_SSE2::SingleFromSingle(unsigned long, int const*, float, float*)
Line
Count
Source
94
1.72M
                      const float factor, float* row_out) {
95
1.72M
  const HWY_FULL(float) df;
96
1.72M
  const Rebind<pixel_type, HWY_FULL(float)> di;  // assumes pixel_type <= float
97
98
1.72M
  const auto factor_v = Set(df, factor);
99
73.7M
  for (size_t x = 0; x < xsize; x += Lanes(di)) {
100
72.0M
    const auto in = Load(di, row_in + x);
101
72.0M
    const auto out = Mul(ConvertTo(df, in), factor_v);
102
72.0M
    Store(out, df, row_out + x);
103
72.0M
  }
104
1.72M
}
105
// NOLINTNEXTLINE(google-readability-namespace-comments)
106
}  // namespace HWY_NAMESPACE
107
}  // namespace jxl
108
HWY_AFTER_NAMESPACE();
109
110
#if HWY_ONCE
111
namespace jxl {
112
HWY_EXPORT(MultiplySum);       // Local function
113
HWY_EXPORT(RgbFromSingle);     // Local function
114
HWY_EXPORT(SingleFromSingle);  // Local function
115
116
// Slow conversion using double precision multiplication, only
117
// needed when the bit depth is too high for single precision
118
void SingleFromSingleAccurate(const size_t xsize,
119
                              const pixel_type* const JXL_RESTRICT row_in,
120
241k
                              const double factor, float* row_out) {
121
83.6M
  for (size_t x = 0; x < xsize; x++) {
122
83.3M
    row_out[x] = row_in[x] * factor;
123
83.3M
  }
124
241k
}
125
126
// convert custom [bits]-bit float (with [exp_bits] exponent bits) stored as int
127
// back to binary32 float
128
Status int_to_float(const pixel_type* const JXL_RESTRICT row_in,
129
                    float* const JXL_RESTRICT row_out, const size_t xsize,
130
519k
                    const int bits, const int exp_bits) {
131
519k
  static_assert(sizeof(pixel_type) == sizeof(float), "32-bit input is assumed");
132
519k
  if (bits == 32) {
133
129k
    JXL_ENSURE(exp_bits == 8);
134
129k
    memcpy(row_out, row_in, xsize * sizeof(float));
135
129k
    return true;
136
129k
  }
137
389k
  int exp_bias = (1 << (exp_bits - 1)) - 1;
138
389k
  int sign_shift = bits - 1;
139
389k
  int mant_bits = bits - exp_bits - 1;
140
389k
  int mant_shift = 23 - mant_bits;
141
18.7M
  for (size_t x = 0; x < xsize; ++x) {
142
18.4M
    uint32_t f;
143
18.4M
    memcpy(&f, &row_in[x], 4);
144
18.4M
    int signbit = (f >> sign_shift);
145
18.4M
    f &= (1 << sign_shift) - 1;
146
18.4M
    if (f == 0) {
147
13.7M
      row_out[x] = (signbit ? -0.f : 0.f);
148
13.7M
      continue;
149
13.7M
    }
150
4.63M
    int exp = (f >> mant_bits);
151
4.63M
    int mantissa = (f & ((1 << mant_bits) - 1));
152
4.63M
    if (exp == (1 << exp_bits) - 1) {
153
      // NaN or infinity
154
2.38M
      f = (signbit ? 0x80000000 : 0);
155
2.38M
      f |= 0b11111111 << 23;
156
2.38M
      f |= mantissa << mant_shift;
157
2.38M
      memcpy(&row_out[x], &f, 4);
158
2.38M
      continue;
159
2.38M
    }
160
2.25M
    mantissa <<= mant_shift;
161
    // Try to normalize only if there is space for maneuver.
162
2.25M
    if (exp == 0 && exp_bits < 8) {
163
      // subnormal number
164
4.73M
      while ((mantissa & 0x800000) == 0) {
165
4.16M
        mantissa <<= 1;
166
4.16M
        exp--;
167
4.16M
      }
168
563k
      exp++;
169
      // remove leading 1 because it is implicit now
170
563k
      mantissa &= 0x7fffff;
171
563k
    }
172
2.25M
    exp -= exp_bias;
173
    // broke up the arbitrary float into its parts, now reassemble into
174
    // binary32
175
2.25M
    exp += 127;
176
2.25M
    JXL_ENSURE(exp >= 0);
177
2.25M
    f = (signbit ? 0x80000000 : 0);
178
2.25M
    f |= (exp << 23);
179
2.25M
    f |= mantissa;
180
2.25M
    memcpy(&row_out[x], &f, 4);
181
2.25M
  }
182
389k
  return true;
183
389k
}
184
185
#if JXL_DEBUG_V_LEVEL >= 1
186
std::string ModularStreamId::DebugString() const {
187
  std::ostringstream os;
188
  os << (kind == Kind::GlobalData   ? "ModularGlobal"
189
         : kind == Kind::VarDCTDC   ? "VarDCTDC"
190
         : kind == Kind::ModularDC  ? "ModularDC"
191
         : kind == Kind::ACMetadata ? "ACMeta"
192
         : kind == Kind::QuantTable ? "QuantTable"
193
         : kind == Kind::ModularAC  ? "ModularAC"
194
                                    : "");
195
  if (kind == Kind::VarDCTDC || kind == Kind::ModularDC ||
196
      kind == Kind::ACMetadata || kind == Kind::ModularAC) {
197
    os << " group " << group_id;
198
  }
199
  if (kind == Kind::ModularAC) {
200
    os << " pass " << pass_id;
201
  }
202
  if (kind == Kind::QuantTable) {
203
    os << " " << quant_table_id;
204
  }
205
  return os.str();
206
}
207
#endif
208
209
Status ModularFrameDecoder::DecodeGlobalInfo(BitReader* reader,
210
                                             const FrameHeader& frame_header,
211
110k
                                             bool allow_truncated_group) {
212
110k
  JxlMemoryManager* memory_manager = this->memory_manager();
213
110k
  bool decode_color = frame_header.encoding == FrameEncoding::kModular;
214
110k
  const auto& metadata = frame_header.nonserialized_metadata->m;
215
110k
  bool is_gray = metadata.color_encoding.IsGray();
216
110k
  size_t nb_chans = 3;
217
110k
  if (is_gray && frame_header.color_transform == ColorTransform::kNone) {
218
1.99k
    nb_chans = 1;
219
1.99k
  }
220
110k
  do_color = decode_color;
221
110k
  size_t nb_extra = metadata.extra_channel_info.size();
222
110k
  bool has_tree = static_cast<bool>(reader->ReadBits(1));
223
110k
  if (!allow_truncated_group ||
224
110k
      reader->TotalBitsConsumed() < reader->TotalBytes() * kBitsPerByte) {
225
110k
    if (has_tree) {
226
76.1k
      size_t tree_size_limit =
227
76.1k
          std::min(static_cast<size_t>(1 << 22),
228
76.1k
                   1024 + frame_dim.xsize * frame_dim.ysize *
229
76.1k
                              (nb_chans + nb_extra) / 16);
230
76.1k
      JXL_RETURN_IF_ERROR(
231
76.1k
          DecodeTree(memory_manager, reader, &tree, tree_size_limit));
232
74.9k
      JXL_RETURN_IF_ERROR(DecodeHistograms(
233
74.9k
          memory_manager, reader, (tree.size() + 1) / 2, &code, &context_map));
234
74.9k
    }
235
110k
  }
236
109k
  if (!do_color) nb_chans = 0;
237
238
109k
  bool fp = metadata.bit_depth.floating_point_sample;
239
240
  // bits_per_sample is just metadata for XYB images.
241
109k
  if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
242
4.17k
      frame_header.color_transform != ColorTransform::kXYB) {
243
3.13k
    if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
244
0
      return JXL_FAILURE("uint32_t not supported in dec_modular");
245
3.13k
    } else if (metadata.bit_depth.bits_per_sample > 32) {
246
0
      return JXL_FAILURE("bits_per_sample > 32 not supported");
247
0
    }
248
3.13k
  }
249
250
219k
  JXL_ASSIGN_OR_RETURN(
251
219k
      Image gi,
252
219k
      Image::Create(memory_manager, frame_dim.xsize, frame_dim.ysize,
253
219k
                    metadata.bit_depth.bits_per_sample, nb_chans + nb_extra));
254
255
219k
  all_same_shift = true;
256
219k
  if (frame_header.color_transform == ColorTransform::kYCbCr) {
257
111k
    for (size_t c = 0; c < nb_chans; c++) {
258
78.1k
      gi.channel[c].hshift = frame_header.chroma_subsampling.HShift(c);
259
78.1k
      gi.channel[c].vshift = frame_header.chroma_subsampling.VShift(c);
260
78.1k
      size_t xsize_shifted =
261
78.1k
          DivCeil(frame_dim.xsize, 1 << gi.channel[c].hshift);
262
78.1k
      size_t ysize_shifted =
263
78.1k
          DivCeil(frame_dim.ysize, 1 << gi.channel[c].vshift);
264
78.1k
      JXL_RETURN_IF_ERROR(gi.channel[c].shrink(xsize_shifted, ysize_shifted));
265
78.1k
      if (gi.channel[c].hshift != gi.channel[0].hshift ||
266
66.3k
          gi.channel[c].vshift != gi.channel[0].vshift)
267
17.9k
        all_same_shift = false;
268
78.1k
    }
269
33.4k
  }
270
271
199k
  for (size_t ec = 0, c = nb_chans; ec < nb_extra; ec++, c++) {
272
89.5k
    size_t ecups = frame_header.extra_channel_upsampling[ec];
273
89.5k
    JXL_RETURN_IF_ERROR(
274
89.5k
        gi.channel[c].shrink(DivCeil(frame_dim.xsize_upsampled, ecups),
275
89.5k
                             DivCeil(frame_dim.ysize_upsampled, ecups)));
276
89.5k
    gi.channel[c].hshift = gi.channel[c].vshift =
277
89.5k
        CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
278
89.5k
    if (gi.channel[c].hshift != gi.channel[0].hshift ||
279
41.4k
        gi.channel[c].vshift != gi.channel[0].vshift)
280
48.3k
      all_same_shift = false;
281
89.5k
  }
282
283
109k
  JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (w/o transforms) %s",
284
109k
              gi.DebugString().c_str());
285
109k
  ModularOptions options;
286
109k
  options.max_chan_size = frame_dim.group_dim;
287
109k
  options.group_dim = frame_dim.group_dim;
288
109k
  Status dec_status = ModularGenericDecompress(
289
109k
      reader, gi, &global_header, ModularStreamId::Global().ID(frame_dim),
290
109k
      &options,
291
109k
      /*undo_transforms=*/false, &tree, &code, &context_map,
292
109k
      allow_truncated_group);
293
109k
  if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status);
294
105k
  if (dec_status.IsFatalError()) {
295
0
    return JXL_FAILURE("Failed to decode global modular info");
296
0
  }
297
298
  // TODO(eustas): are we sure this can be done after partial decode?
299
105k
  have_something = false;
300
1.08M
  for (size_t c = 0; c < gi.channel.size(); c++) {
301
984k
    Channel& gic = gi.channel[c];
302
984k
    if (c >= gi.nb_meta_channels && gic.w <= frame_dim.group_dim &&
303
944k
        gic.h <= frame_dim.group_dim)
304
938k
      have_something = true;
305
984k
  }
306
  // move global transforms to groups if possible
307
105k
  if (!have_something && all_same_shift) {
308
25.3k
    if (gi.transform.size() == 1 && gi.transform[0].id == TransformId::kRCT) {
309
416
      global_transform = gi.transform;
310
416
      gi.transform.clear();
311
      // TODO(jon): also move no-delta-palette out (trickier though)
312
416
    }
313
25.3k
  }
314
105k
  full_image = std::move(gi);
315
105k
  JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (with transforms) %s",
316
105k
              full_image.DebugString().c_str());
317
105k
  return dec_status;
318
105k
}
319
320
101k
void ModularFrameDecoder::MaybeDropFullImage() {
321
101k
  if (full_image.transform.empty() && !have_something && all_same_shift) {
322
21.1k
    use_full_image = false;
323
21.1k
    JXL_DEBUG_V(6, "Dropping full image");
324
21.1k
    for (auto& ch : full_image.channel) {
325
      // keep metadata on channels around, but dealloc their planes
326
4.23k
      ch.plane = Plane<pixel_type>();
327
4.23k
    }
328
21.1k
  }
329
101k
}
330
331
Status ModularFrameDecoder::DecodeGroup(
332
    const FrameHeader& frame_header, const Rect& rect, BitReader* reader,
333
    int minShift, int maxShift, const ModularStreamId& stream, bool zerofill,
334
    PassesDecoderState* dec_state, RenderPipelineInput* render_pipeline_input,
335
228k
    bool allow_truncated, bool* should_run_pipeline) {
336
228k
  JXL_DEBUG_V(6, "Decoding %s with rect %s and shift bracket %d..%d %s",
337
228k
              stream.DebugString().c_str(), Description(rect).c_str(), minShift,
338
228k
              maxShift, zerofill ? "using zerofill" : "");
339
228k
  JXL_ENSURE(stream.kind == ModularStreamId::Kind::ModularDC ||
340
228k
             stream.kind == ModularStreamId::Kind::ModularAC);
341
228k
  const size_t xsize = rect.xsize();
342
228k
  const size_t ysize = rect.ysize();
343
228k
  JXL_ASSIGN_OR_RETURN(Image gi, Image::Create(memory_manager_, xsize, ysize,
344
228k
                                               full_image.bitdepth, 0));
345
  // start at the first bigger-than-groupsize non-metachannel
346
228k
  size_t c = full_image.nb_meta_channels;
347
2.07M
  for (; c < full_image.channel.size(); c++) {
348
1.86M
    Channel& fc = full_image.channel[c];
349
1.86M
    if (fc.w > frame_dim.group_dim || fc.h > frame_dim.group_dim) break;
350
1.86M
  }
351
228k
  size_t beginc = c;
352
390k
  for (; c < full_image.channel.size(); c++) {
353
161k
    Channel& fc = full_image.channel[c];
354
161k
    int shift = std::min(fc.hshift, fc.vshift);
355
161k
    if (shift > maxShift) continue;
356
154k
    if (shift < minShift) continue;
357
74.0k
    Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
358
74.0k
           rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
359
74.0k
    if (r.xsize() == 0 || r.ysize() == 0) continue;
360
72.2k
    if (zerofill && use_full_image) {
361
151k
      for (size_t y = 0; y < r.ysize(); ++y) {
362
149k
        pixel_type* const JXL_RESTRICT row_out = r.Row(&fc.plane, y);
363
149k
        memset(row_out, 0, r.xsize() * sizeof(*row_out));
364
149k
      }
365
70.1k
    } else {
366
70.1k
      JXL_ASSIGN_OR_RETURN(
367
70.1k
          Channel gc, Channel::Create(memory_manager_, r.xsize(), r.ysize()));
368
70.1k
      if (zerofill) ZeroFillImage(&gc.plane);
369
70.1k
      gc.hshift = fc.hshift;
370
70.1k
      gc.vshift = fc.vshift;
371
70.1k
      gi.channel.emplace_back(std::move(gc));
372
70.1k
    }
373
72.2k
  }
374
228k
  if (zerofill && use_full_image) return true;
375
  // Return early if there's nothing to decode. Otherwise there might be
376
  // problems later (in ModularImageToDecodedRect).
377
227k
  if (gi.channel.empty()) {
378
214k
    if (dec_state && should_run_pipeline) {
379
108k
      const auto* metadata = frame_header.nonserialized_metadata;
380
108k
      if (do_color || metadata->m.num_extra_channels > 0) {
381
        // Signal to FrameDecoder that we do not have some of the required input
382
        // for the render pipeline.
383
84.4k
        *should_run_pipeline = false;
384
84.4k
      }
385
108k
    }
386
214k
    JXL_DEBUG_V(6, "Nothing to decode, returning early.");
387
214k
    return true;
388
214k
  }
389
12.7k
  ModularOptions options;
390
12.7k
  if (!zerofill) {
391
12.6k
    auto status = ModularGenericDecompress(
392
12.6k
        reader, gi, /*header=*/nullptr, stream.ID(frame_dim), &options,
393
12.6k
        /*undo_transforms=*/true, &tree, &code, &context_map, allow_truncated);
394
12.6k
    if (!allow_truncated) JXL_RETURN_IF_ERROR(status);
395
12.3k
    if (status.IsFatalError()) return status;
396
12.3k
  }
397
  // Undo global transforms that have been pushed to the group level
398
12.4k
  if (!use_full_image) {
399
8.67k
    JXL_ENSURE(render_pipeline_input);
400
8.67k
    for (const auto& t : global_transform) {
401
1.91k
      JXL_RETURN_IF_ERROR(t.Inverse(gi, global_header.wp_header));
402
1.91k
    }
403
8.67k
    JXL_RETURN_IF_ERROR(ModularImageToDecodedRect(
404
8.67k
        frame_header, gi, dec_state, nullptr, *render_pipeline_input,
405
8.67k
        Rect(0, 0, gi.w, gi.h)));
406
8.67k
    return true;
407
8.67k
  }
408
3.80k
  int gic = 0;
409
80.5k
  for (c = beginc; c < full_image.channel.size(); c++) {
410
76.7k
    Channel& fc = full_image.channel[c];
411
76.7k
    int shift = std::min(fc.hshift, fc.vshift);
412
76.7k
    if (shift > maxShift) continue;
413
73.9k
    if (shift < minShift) continue;
414
35.9k
    Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
415
35.9k
           rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
416
36.0k
    if (r.xsize() == 0 || r.ysize() == 0) continue;
417
34.8k
    JXL_ENSURE(use_full_image);
418
34.8k
    JXL_RETURN_IF_ERROR(
419
34.8k
        CopyImageTo(/*rect_from=*/Rect(0, 0, r.xsize(), r.ysize()),
420
34.8k
                    /*from=*/gi.channel[gic].plane,
421
34.8k
                    /*rect_to=*/r, /*to=*/&fc.plane));
422
34.8k
    gic++;
423
34.8k
  }
424
3.80k
  return true;
425
3.80k
}
426
427
Status ModularFrameDecoder::DecodeVarDCTDC(const FrameHeader& frame_header,
428
                                           size_t group_id, BitReader* reader,
429
25.2k
                                           PassesDecoderState* dec_state) {
430
25.2k
  JxlMemoryManager* memory_manager = dec_state->memory_manager();
431
25.2k
  const Rect r = dec_state->shared->frame_dim.DCGroupRect(group_id);
432
25.2k
  JXL_DEBUG_V(6, "Decoding VarDCT DC with rect %s", Description(r).c_str());
433
  // TODO(eustas): investigate if we could reduce the impact of
434
  //               EvalRationalPolynomial; generally speaking, the limit is
435
  //               2**(128/(3*magic)), where 128 comes from IEEE 754 exponent,
436
  //               3 comes from XybToRgb that cubes the values, and "magic" is
437
  //               the sum of all other contributions. 2**18 is known to lead
438
  //               to NaN on input found by fuzzing (see commit message).
439
25.2k
  JXL_ASSIGN_OR_RETURN(Image image,
440
25.2k
                       Image::Create(memory_manager, r.xsize(), r.ysize(),
441
25.2k
                                     full_image.bitdepth, 3));
442
25.2k
  size_t stream_id = ModularStreamId::VarDCTDC(group_id).ID(frame_dim);
443
25.2k
  reader->Refill();
444
25.2k
  size_t extra_precision = reader->ReadFixedBits<2>();
445
25.2k
  float mul = 1.0f / (1 << extra_precision);
446
25.2k
  ModularOptions options;
447
100k
  for (size_t c = 0; c < 3; c++) {
448
75.6k
    Channel& ch = image.channel[c < 2 ? c ^ 1 : c];
449
75.6k
    ch.w >>= frame_header.chroma_subsampling.HShift(c);
450
75.6k
    ch.h >>= frame_header.chroma_subsampling.VShift(c);
451
75.6k
    JXL_RETURN_IF_ERROR(ch.shrink());
452
75.6k
  }
453
25.2k
  if (!ModularGenericDecompress(
454
25.2k
          reader, image, /*header=*/nullptr, stream_id, &options,
455
25.2k
          /*undo_transforms=*/true, &tree, &code, &context_map)) {
456
2.17k
    return JXL_FAILURE("Failed to decode VarDCT DC group (DC group id %d)",
457
2.17k
                       static_cast<int>(group_id));
458
2.17k
  }
459
23.0k
  DequantDC(r, &dec_state->shared_storage.dc_storage,
460
23.0k
            &dec_state->shared_storage.quant_dc, image,
461
23.0k
            dec_state->shared->quantizer.MulDC(), mul,
462
23.0k
            dec_state->shared->cmap.base().DCFactors(),
463
23.0k
            frame_header.chroma_subsampling, dec_state->shared->block_ctx_map);
464
23.0k
  return true;
465
25.2k
}
466
467
Status ModularFrameDecoder::DecodeAcMetadata(const FrameHeader& frame_header,
468
                                             size_t group_id, BitReader* reader,
469
23.0k
                                             PassesDecoderState* dec_state) {
470
23.0k
  JxlMemoryManager* memory_manager = dec_state->memory_manager();
471
23.0k
  const Rect r = dec_state->shared->frame_dim.DCGroupRect(group_id);
472
23.0k
  JXL_DEBUG_V(6, "Decoding AcMetadata with rect %s", Description(r).c_str());
473
23.0k
  size_t upper_bound = r.xsize() * r.ysize();
474
23.0k
  reader->Refill();
475
23.0k
  size_t count = reader->ReadBits(CeilLog2Nonzero(upper_bound)) + 1;
476
23.0k
  size_t stream_id = ModularStreamId::ACMetadata(group_id).ID(frame_dim);
477
  // YToX, YToB, ACS + QF, EPF
478
23.0k
  JXL_ASSIGN_OR_RETURN(Image image,
479
23.0k
                       Image::Create(memory_manager, r.xsize(), r.ysize(),
480
23.0k
                                     full_image.bitdepth, 4));
481
23.0k
  static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
482
23.0k
  Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
483
23.0k
  JXL_ASSIGN_OR_RETURN(
484
23.0k
      image.channel[0],
485
23.0k
      Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3));
486
23.0k
  JXL_ASSIGN_OR_RETURN(
487
23.0k
      image.channel[1],
488
23.0k
      Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3));
489
23.0k
  JXL_ASSIGN_OR_RETURN(image.channel[2],
490
23.0k
                       Channel::Create(memory_manager, count, 2, 0, 0));
491
23.0k
  ModularOptions options;
492
23.0k
  if (!ModularGenericDecompress(
493
23.0k
          reader, image, /*header=*/nullptr, stream_id, &options,
494
23.0k
          /*undo_transforms=*/true, &tree, &code, &context_map)) {
495
1.60k
    return JXL_FAILURE("Failed to decode AC metadata");
496
1.60k
  }
497
21.4k
  JXL_RETURN_IF_ERROR(
498
21.4k
      ConvertPlaneAndClamp(Rect(image.channel[0].plane), image.channel[0].plane,
499
21.4k
                           cr, &dec_state->shared_storage.cmap.ytox_map));
500
21.4k
  JXL_RETURN_IF_ERROR(
501
21.4k
      ConvertPlaneAndClamp(Rect(image.channel[1].plane), image.channel[1].plane,
502
21.4k
                           cr, &dec_state->shared_storage.cmap.ytob_map));
503
21.4k
  size_t num = 0;
504
21.4k
  bool is444 = frame_header.chroma_subsampling.Is444();
505
21.4k
  auto& ac_strategy = dec_state->shared_storage.ac_strategy;
506
21.4k
  size_t xlim = std::min(ac_strategy.xsize(), r.x0() + r.xsize());
507
21.4k
  size_t ylim = std::min(ac_strategy.ysize(), r.y0() + r.ysize());
508
21.4k
  uint32_t local_used_acs = 0;
509
298k
  for (size_t iy = 0; iy < r.ysize(); iy++) {
510
277k
    size_t y = r.y0() + iy;
511
277k
    int32_t* row_qf = r.Row(&dec_state->shared_storage.raw_quant_field, iy);
512
277k
    uint8_t* row_epf = r.Row(&dec_state->shared_storage.epf_sharpness, iy);
513
277k
    int32_t* row_in_1 = image.channel[2].plane.Row(0);
514
277k
    int32_t* row_in_2 = image.channel[2].plane.Row(1);
515
277k
    int32_t* row_in_3 = image.channel[3].plane.Row(iy);
516
4.62M
    for (size_t ix = 0; ix < r.xsize(); ix++) {
517
4.34M
      size_t x = r.x0() + ix;
518
4.34M
      int sharpness = row_in_3[ix];
519
4.34M
      if (sharpness < 0 || sharpness >= LoopFilter::kEpfSharpEntries) {
520
120
        return JXL_FAILURE("Corrupted sharpness field");
521
120
      }
522
4.34M
      row_epf[ix] = sharpness;
523
4.34M
      if (ac_strategy.IsValid(x, y)) {
524
803k
        continue;
525
803k
      }
526
527
3.54M
      if (num >= count) return JXL_FAILURE("Corrupted stream");
528
529
3.54M
      if (!AcStrategy::IsRawStrategyValid(row_in_1[num])) {
530
83
        return JXL_FAILURE("Invalid AC strategy");
531
83
      }
532
3.54M
      local_used_acs |= 1u << row_in_1[num];
533
3.54M
      AcStrategy acs = AcStrategy::FromRawStrategy(row_in_1[num]);
534
3.54M
      if ((acs.covered_blocks_x() > 1 || acs.covered_blocks_y() > 1) &&
535
330k
          !is444) {
536
3
        return JXL_FAILURE(
537
3
            "AC strategy not compatible with chroma subsampling");
538
3
      }
539
      // Ensure that blocks do not overflow *AC* groups.
540
3.54M
      size_t next_x_ac_block = (x / kGroupDimInBlocks + 1) * kGroupDimInBlocks;
541
3.54M
      size_t next_y_ac_block = (y / kGroupDimInBlocks + 1) * kGroupDimInBlocks;
542
3.54M
      size_t next_x_dct_block = x + acs.covered_blocks_x();
543
3.54M
      size_t next_y_dct_block = y + acs.covered_blocks_y();
544
3.54M
      if (next_x_dct_block > next_x_ac_block || next_x_dct_block > xlim) {
545
10
        return JXL_FAILURE("Invalid AC strategy, x overflow");
546
10
      }
547
3.54M
      if (next_y_dct_block > next_y_ac_block || next_y_dct_block > ylim) {
548
11
        return JXL_FAILURE("Invalid AC strategy, y overflow");
549
11
      }
550
3.54M
      JXL_RETURN_IF_ERROR(
551
3.54M
          ac_strategy.SetNoBoundsCheck(x, y, AcStrategyType(row_in_1[num])));
552
3.54M
      row_qf[ix] = 1 + std::max<int32_t>(0, std::min(Quantizer::kQuantMax - 1,
553
3.54M
                                                     row_in_2[num]));
554
3.54M
      num++;
555
3.54M
    }
556
277k
  }
557
21.1k
  dec_state->used_acs |= local_used_acs;
558
21.1k
  if (frame_header.loop_filter.epf_iters > 0) {
559
9.11k
    JXL_RETURN_IF_ERROR(ComputeSigma(frame_header.loop_filter, r, dec_state));
560
9.11k
  }
561
21.1k
  return true;
562
21.1k
}
563
564
Status ModularFrameDecoder::ModularImageToDecodedRect(
565
    const FrameHeader& frame_header, Image& gi, PassesDecoderState* dec_state,
566
    jxl::ThreadPool* pool, RenderPipelineInput& render_pipeline_input,
567
91.0k
    Rect modular_rect) const {
568
91.0k
  const auto* metadata = frame_header.nonserialized_metadata;
569
91.0k
  JXL_ENSURE(gi.transform.empty());
570
571
12.8M
  auto get_row = [&](size_t c, size_t y) {
572
12.8M
    const auto& buffer = render_pipeline_input.GetBuffer(c);
573
12.8M
    return buffer.second.Row(buffer.first, y);
574
12.8M
  };
575
576
91.0k
  size_t c = 0;
577
91.0k
  if (do_color) {
578
88.3k
    const bool rgb_from_gray =
579
88.3k
        metadata->m.color_encoding.IsGray() &&
580
7.87k
        frame_header.color_transform == ColorTransform::kNone;
581
88.3k
    const bool fp = metadata->m.bit_depth.floating_point_sample &&
582
10.1k
                    frame_header.color_transform != ColorTransform::kXYB;
583
347k
    for (; c < 3; c++) {
584
261k
      double factor = full_image.bitdepth < 32
585
261k
                          ? 1.0 / ((1u << full_image.bitdepth) - 1)
586
261k
                          : 0;
587
261k
      size_t c_in = c;
588
261k
      if (frame_header.color_transform == ColorTransform::kXYB) {
589
103k
        factor = dec_state->shared->matrices.DCQuants()[c];
590
        // XYB is encoded as YX(B-Y)
591
103k
        if (c < 2) c_in = 1 - c;
592
158k
      } else if (rgb_from_gray) {
593
1.93k
        c_in = 0;
594
1.93k
      }
595
261k
      JXL_ENSURE(c_in < gi.channel.size());
596
261k
      Channel& ch_in = gi.channel[c_in];
597
      // TODO(eustas): could we detect it on earlier stage?
598
261k
      if (ch_in.w == 0 || ch_in.h == 0) {
599
0
        return JXL_FAILURE("Empty image");
600
0
      }
601
261k
      JXL_ENSURE(ch_in.hshift <= 3 && ch_in.vshift <= 3);
602
261k
      Rect r = render_pipeline_input.GetBuffer(c).second;
603
261k
      Rect mr(modular_rect.x0() >> ch_in.hshift,
604
261k
              modular_rect.y0() >> ch_in.vshift,
605
261k
              DivCeil(modular_rect.xsize(), 1 << ch_in.hshift),
606
261k
              DivCeil(modular_rect.ysize(), 1 << ch_in.vshift));
607
261k
      mr = mr.Crop(ch_in.plane);
608
261k
      size_t xsize_shifted = r.xsize();
609
261k
      size_t ysize_shifted = r.ysize();
610
261k
      if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) {
611
0
        return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS
612
0
                           "x%" PRIuS
613
0
                           " modular channel into "
614
0
                           "a %" PRIuS "x%" PRIuS " rect",
615
0
                           mr.xsize(), mr.ysize(), r.xsize(), r.ysize());
616
0
      }
617
261k
      if (frame_header.color_transform == ColorTransform::kXYB && c == 2) {
618
34.3k
        JXL_ENSURE(!fp);
619
34.3k
        const auto process_row = [&](const uint32_t task,
620
2.64M
                                     size_t /* thread */) -> Status {
621
2.64M
          const size_t y = task;
622
2.64M
          const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y);
623
2.64M
          const pixel_type* const JXL_RESTRICT row_in_Y =
624
2.64M
              mr.Row(&gi.channel[0].plane, y);
625
2.64M
          float* const JXL_RESTRICT row_out = get_row(c, y);
626
2.64M
          HWY_DYNAMIC_DISPATCH(MultiplySum)
627
2.64M
          (xsize_shifted, row_in, row_in_Y, factor, row_out);
628
2.64M
          return true;
629
2.64M
        };
630
34.3k
        JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted,
631
34.3k
                                      ThreadPool::NoInit, process_row,
632
34.3k
                                      "ModularIntToFloat"));
633
226k
      } else if (fp) {
634
25.1k
        int bits = metadata->m.bit_depth.bits_per_sample;
635
25.1k
        int exp_bits = metadata->m.bit_depth.exponent_bits_per_sample;
636
25.1k
        const auto process_row = [&](const uint32_t task,
637
362k
                                     size_t /* thread */) -> Status {
638
362k
          const size_t y = task;
639
362k
          const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y);
640
362k
          if (rgb_from_gray) {
641
1.77k
            for (size_t cc = 0; cc < 3; cc++) {
642
1.33k
              float* const JXL_RESTRICT row_out = get_row(cc, y);
643
1.33k
              JXL_RETURN_IF_ERROR(
644
1.33k
                  int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits));
645
1.33k
            }
646
362k
          } else {
647
362k
            float* const JXL_RESTRICT row_out = get_row(c, y);
648
362k
            JXL_RETURN_IF_ERROR(
649
362k
                int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits));
650
362k
          }
651
362k
          return true;
652
362k
        };
653
25.1k
        JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted,
654
25.1k
                                      ThreadPool::NoInit, process_row,
655
25.1k
                                      "ModularIntToFloat_losslessfloat"));
656
201k
      } else {
657
201k
        const auto process_row = [&](const uint32_t task,
658
9.74M
                                     size_t /* thread */) -> Status {
659
9.74M
          const size_t y = task;
660
9.74M
          const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y);
661
9.74M
          if (rgb_from_gray) {
662
58.9k
            if (full_image.bitdepth < 23) {
663
56.3k
              HWY_DYNAMIC_DISPATCH(RgbFromSingle)
664
56.3k
              (xsize_shifted, row_in, factor, get_row(0, y), get_row(1, y),
665
56.3k
               get_row(2, y));
666
56.3k
            } else {
667
2.55k
              SingleFromSingleAccurate(xsize_shifted, row_in, factor,
668
2.55k
                                       get_row(0, y));
669
2.55k
              SingleFromSingleAccurate(xsize_shifted, row_in, factor,
670
2.55k
                                       get_row(1, y));
671
2.55k
              SingleFromSingleAccurate(xsize_shifted, row_in, factor,
672
2.55k
                                       get_row(2, y));
673
2.55k
            }
674
9.68M
          } else {
675
9.68M
            float* const JXL_RESTRICT row_out = get_row(c, y);
676
9.68M
            if (full_image.bitdepth < 23) {
677
9.49M
              HWY_DYNAMIC_DISPATCH(SingleFromSingle)
678
9.49M
              (xsize_shifted, row_in, factor, row_out);
679
9.49M
            } else {
680
190k
              SingleFromSingleAccurate(xsize_shifted, row_in, factor, row_out);
681
190k
            }
682
9.68M
          }
683
9.74M
          return true;
684
9.74M
        };
685
201k
        JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted,
686
201k
                                      ThreadPool::NoInit, process_row,
687
201k
                                      "ModularIntToFloat"));
688
201k
      }
689
261k
      if (rgb_from_gray) {
690
1.93k
        break;
691
1.93k
      }
692
261k
    }
693
88.3k
    if (rgb_from_gray) {
694
1.93k
      c = 1;
695
1.93k
    }
696
88.3k
  }
697
91.0k
  size_t num_extra_channels = metadata->m.num_extra_channels;
698
164k
  for (size_t ec = 0; ec < num_extra_channels; ec++, c++) {
699
72.9k
    const ExtraChannelInfo& eci = metadata->m.extra_channel_info[ec];
700
72.9k
    int bits = eci.bit_depth.bits_per_sample;
701
72.9k
    int exp_bits = eci.bit_depth.exponent_bits_per_sample;
702
72.9k
    bool fp = eci.bit_depth.floating_point_sample;
703
72.9k
    JXL_ENSURE(fp || bits < 32);
704
72.9k
    const double factor = fp ? 0 : (1.0 / ((1u << bits) - 1));
705
72.9k
    JXL_ENSURE(c < gi.channel.size());
706
72.9k
    Channel& ch_in = gi.channel[c];
707
72.9k
    const auto& buffer = render_pipeline_input.GetBuffer(3 + ec);
708
72.9k
    Rect r = buffer.second;
709
72.9k
    Rect mr(modular_rect.x0() >> ch_in.hshift,
710
72.9k
            modular_rect.y0() >> ch_in.vshift,
711
72.9k
            DivCeil(modular_rect.xsize(), 1 << ch_in.hshift),
712
72.9k
            DivCeil(modular_rect.ysize(), 1 << ch_in.vshift));
713
72.9k
    mr = mr.Crop(ch_in.plane);
714
72.9k
    if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) {
715
0
      return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS
716
0
                         "x%" PRIuS
717
0
                         " modular channel into "
718
0
                         "a %" PRIuS "x%" PRIuS " rect",
719
0
                         mr.xsize(), mr.ysize(), r.xsize(), r.ysize());
720
0
    }
721
2.76M
    for (size_t y = 0; y < r.ysize(); ++y) {
722
2.69M
      float* const JXL_RESTRICT row_out = r.Row(buffer.first, y);
723
2.69M
      const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y);
724
2.69M
      if (fp) {
725
155k
        JXL_RETURN_IF_ERROR(
726
155k
            int_to_float(row_in, row_out, r.xsize(), bits, exp_bits));
727
2.53M
      } else {
728
2.53M
        if (full_image.bitdepth < 23) {
729
2.49M
          HWY_DYNAMIC_DISPATCH(SingleFromSingle)
730
2.49M
          (r.xsize(), row_in, factor, row_out);
731
2.49M
        } else {
732
39.0k
          SingleFromSingleAccurate(r.xsize(), row_in, factor, row_out);
733
39.0k
        }
734
2.53M
      }
735
2.69M
    }
736
72.9k
  }
737
91.0k
  return true;
738
91.0k
}
739
740
Status ModularFrameDecoder::FinalizeDecoding(const FrameHeader& frame_header,
741
                                             PassesDecoderState* dec_state,
742
                                             jxl::ThreadPool* pool,
743
97.8k
                                             bool inplace) {
744
97.8k
  if (!use_full_image) return true;
745
80.0k
  JxlMemoryManager* memory_manager = dec_state->memory_manager();
746
80.0k
  Image gi{memory_manager};
747
80.0k
  if (inplace) {
748
79.9k
    gi = std::move(full_image);
749
79.9k
  } else {
750
48
    JXL_ASSIGN_OR_RETURN(gi, Image::Clone(full_image));
751
48
  }
752
80.0k
  size_t xsize = gi.w;
753
80.0k
  size_t ysize = gi.h;
754
755
80.0k
  JXL_DEBUG_V(3, "Finalizing decoding for modular image: %s",
756
80.0k
              gi.DebugString().c_str());
757
758
  // Don't use threads if total image size is smaller than a group
759
80.0k
  if (xsize * ysize < frame_dim.group_dim * frame_dim.group_dim) pool = nullptr;
760
761
  // Undo the global transforms
762
80.0k
  gi.undo_transforms(global_header.wp_header, pool);
763
80.0k
  JXL_ENSURE(global_transform.empty());
764
80.0k
  if (gi.error) return JXL_FAILURE("Undoing transforms failed");
765
766
162k
  for (size_t i = 0; i < dec_state->shared->frame_dim.num_groups; i++) {
767
82.4k
    dec_state->render_pipeline->ClearDone(i);
768
82.4k
  }
769
770
80.0k
  const auto init = [&](size_t num_threads) -> Status {
771
80.0k
    bool use_group_ids = (frame_header.encoding == FrameEncoding::kVarDCT ||
772
79.6k
                          (frame_header.flags & FrameHeader::kNoise));
773
80.0k
    JXL_RETURN_IF_ERROR(dec_state->render_pipeline->PrepareForThreads(
774
80.0k
        num_threads, use_group_ids));
775
80.0k
    return true;
776
80.0k
  };
777
80.0k
  const auto process_group = [&](const uint32_t group,
778
82.3k
                                 size_t thread_id) -> Status {
779
82.3k
    RenderPipelineInput input =
780
82.3k
        dec_state->render_pipeline->GetInputBuffers(group, thread_id);
781
82.3k
    JXL_RETURN_IF_ERROR(ModularImageToDecodedRect(
782
82.3k
        frame_header, gi, dec_state, nullptr, input,
783
82.3k
        dec_state->shared->frame_dim.GroupRect(group)));
784
82.3k
    JXL_RETURN_IF_ERROR(input.Done());
785
82.3k
    return true;
786
82.3k
  };
787
80.0k
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0,
788
80.0k
                                dec_state->shared->frame_dim.num_groups, init,
789
80.0k
                                process_group, "ModularToRect"));
790
80.0k
  return true;
791
80.0k
}
792
793
static constexpr const float kAlmostZero = 1e-8f;
794
795
Status ModularFrameDecoder::DecodeQuantTable(
796
    JxlMemoryManager* memory_manager, size_t required_size_x,
797
    size_t required_size_y, BitReader* br, QuantEncoding* encoding, size_t idx,
798
570
    ModularFrameDecoder* modular_frame_decoder) {
799
570
  JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->qraw.qtable_den));
800
565
  if (encoding->qraw.qtable_den < kAlmostZero) {
801
    // qtable[] values are already checked for <= 0 so the denominator may not
802
    // be negative.
803
7
    return JXL_FAILURE("Invalid qtable_den: value too small");
804
7
  }
805
1.11k
  JXL_ASSIGN_OR_RETURN(
806
1.11k
      Image image,
807
1.11k
      Image::Create(memory_manager, required_size_x, required_size_y, 8, 3));
808
1.11k
  ModularOptions options;
809
1.11k
  if (modular_frame_decoder) {
810
558
    JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx));
811
558
    JXL_RETURN_IF_ERROR(ModularGenericDecompress(
812
558
        br, image, /*header=*/nullptr, qt.ID(modular_frame_decoder->frame_dim),
813
558
        &options, /*undo_transforms=*/true, &modular_frame_decoder->tree,
814
558
        &modular_frame_decoder->code, &modular_frame_decoder->context_map));
815
558
  } else {
816
0
    JXL_RETURN_IF_ERROR(ModularGenericDecompress(br, image, /*header=*/nullptr,
817
0
                                                 0, &options,
818
0
                                                 /*undo_transforms=*/true));
819
0
  }
820
544
  if (!encoding->qraw.qtable) {
821
544
    encoding->qraw.qtable =
822
544
        new std::vector<int>(required_size_x * required_size_y * 3);
823
544
  } else {
824
0
    JXL_ENSURE(encoding->qraw.qtable->size() ==
825
0
               required_size_x * required_size_y * 3);
826
0
  }
827
544
  int* qtable = encoding->qraw.qtable->data();
828
2.02k
  for (size_t c = 0; c < 3; c++) {
829
17.7k
    for (size_t y = 0; y < required_size_y; y++) {
830
16.2k
      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
831
984k
      for (size_t x = 0; x < required_size_x; x++) {
832
967k
        qtable[c * required_size_x * required_size_y + y * required_size_x +
833
967k
               x] = row[x];
834
967k
        if (row[x] <= 0) {
835
70
          return JXL_FAILURE("Invalid raw quantization table");
836
70
        }
837
967k
      }
838
16.2k
    }
839
1.54k
  }
840
474
  return true;
841
544
}
842
843
}  // namespace jxl
844
#endif  // HWY_ONCE