Coverage Report

Created: 2025-08-12 07:37

/src/libjxl/lib/jxl/enc_modular.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular.h"
7
8
#include <jxl/cms_interface.h>
9
#include <jxl/memory_manager.h>
10
#include <jxl/types.h>
11
12
#include <algorithm>
13
#include <array>
14
#include <cmath>
15
#include <cstddef>
16
#include <cstdint>
17
#include <cstdlib>
18
#include <cstring>
19
#include <limits>
20
#include <memory>
21
#include <tuple>
22
#include <utility>
23
#include <vector>
24
25
#include "lib/jxl/ac_strategy.h"
26
#include "lib/jxl/base/bits.h"
27
#include "lib/jxl/base/common.h"
28
#include "lib/jxl/base/compiler_specific.h"
29
#include "lib/jxl/base/data_parallel.h"
30
#include "lib/jxl/base/printf_macros.h"
31
#include "lib/jxl/base/rect.h"
32
#include "lib/jxl/base/status.h"
33
#include "lib/jxl/chroma_from_luma.h"
34
#include "lib/jxl/common.h"
35
#include "lib/jxl/compressed_dc.h"
36
#include "lib/jxl/dec_ans.h"
37
#include "lib/jxl/dec_modular.h"
38
#include "lib/jxl/enc_ans.h"
39
#include "lib/jxl/enc_ans_params.h"
40
#include "lib/jxl/enc_aux_out.h"
41
#include "lib/jxl/enc_bit_writer.h"
42
#include "lib/jxl/enc_cache.h"
43
#include "lib/jxl/enc_fields.h"
44
#include "lib/jxl/enc_gaborish.h"
45
#include "lib/jxl/enc_modular_simd.h"
46
#include "lib/jxl/enc_params.h"
47
#include "lib/jxl/enc_patch_dictionary.h"
48
#include "lib/jxl/enc_quant_weights.h"
49
#include "lib/jxl/fields.h"
50
#include "lib/jxl/frame_dimensions.h"
51
#include "lib/jxl/frame_header.h"
52
#include "lib/jxl/image.h"
53
#include "lib/jxl/image_metadata.h"
54
#include "lib/jxl/image_ops.h"
55
#include "lib/jxl/memory_manager_internal.h"
56
#include "lib/jxl/modular/encoding/context_predict.h"
57
#include "lib/jxl/modular/encoding/dec_ma.h"
58
#include "lib/jxl/modular/encoding/enc_encoding.h"
59
#include "lib/jxl/modular/encoding/enc_ma.h"
60
#include "lib/jxl/modular/encoding/encoding.h"
61
#include "lib/jxl/modular/encoding/ma_common.h"
62
#include "lib/jxl/modular/modular_image.h"
63
#include "lib/jxl/modular/options.h"
64
#include "lib/jxl/modular/transform/enc_rct.h"
65
#include "lib/jxl/modular/transform/enc_transform.h"
66
#include "lib/jxl/modular/transform/squeeze.h"
67
#include "lib/jxl/modular/transform/squeeze_params.h"
68
#include "lib/jxl/modular/transform/transform.h"
69
#include "lib/jxl/pack_signed.h"
70
#include "lib/jxl/passes_state.h"
71
#include "lib/jxl/quant_weights.h"
72
#include "modular/options.h"
73
74
namespace jxl {
75
76
namespace {
77
// constexpr bool kPrintTree = false;
78
79
// Squeeze default quantization factors
80
// these quantization factors are for -Q 50  (other qualities simply scale the
81
// factors; things are rounded down and obviously cannot get below 1)
82
const float squeeze_quality_factor =
83
    0.35;  // for easy tweaking of the quality range (decrease this number for
84
           // higher quality)
85
const float squeeze_luma_factor =
86
    1.1;  // for easy tweaking of the balance between luma (or anything
87
          // non-chroma) and chroma (decrease this number for higher quality
88
          // luma)
89
const float squeeze_quality_factor_xyb = 4.8f;
90
const float squeeze_xyb_qtable[3][16] = {
91
    {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16,
92
     0.08, 0.04, 0.02, 0.01, 0.005},  // Y
93
    {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5,
94
     0.5},  // X
95
    {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5,
96
     0.5},  // B-Y
97
};
98
99
const float squeeze_luma_qtable[16] = {163.84, 81.92, 40.96, 20.48, 10.24, 5.12,
100
                                       2.56,   1.28,  0.64,  0.32,  0.16,  0.08,
101
                                       0.04,   0.02,  0.01,  0.005};
102
// for 8-bit input, the range of YCoCg chroma is -255..255 so basically this
103
// does 4:2:0 subsampling (two most fine grained layers get quantized away)
104
const float squeeze_chroma_qtable[16] = {
105
    1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5};
106
107
// Merges the trees in `trees` using nodes that decide on stream_id, as defined
108
// by `tree_splits`.
109
Status MergeTrees(const std::vector<Tree>& trees,
110
                  const std::vector<size_t>& tree_splits, size_t begin,
111
572
                  size_t end, Tree* tree) {
112
572
  JXL_ENSURE(trees.size() + 1 == tree_splits.size());
113
572
  JXL_ENSURE(end > begin);
114
572
  JXL_ENSURE(end <= trees.size());
115
572
  if (end == begin + 1) {
116
    // Insert the tree, adding the opportune offset to all child nodes.
117
    // This will make the leaf IDs wrong, but subsequent roundtripping will fix
118
    // them.
119
398
    size_t sz = tree->size();
120
398
    tree->insert(tree->end(), trees[begin].begin(), trees[begin].end());
121
12.2k
    for (size_t i = sz; i < tree->size(); i++) {
122
11.8k
      (*tree)[i].lchild += sz;
123
11.8k
      (*tree)[i].rchild += sz;
124
11.8k
    }
125
398
    return true;
126
398
  }
127
174
  size_t mid = (begin + end) / 2;
128
174
  size_t splitval = tree_splits[mid] - 1;
129
174
  size_t cur = tree->size();
130
174
  tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1);
131
174
  (*tree)[cur].lchild = tree->size();
132
174
  JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, mid, end, tree));
133
174
  (*tree)[cur].rchild = tree->size();
134
174
  JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, begin, mid, tree));
135
174
  return true;
136
174
}
137
138
186
void QuantizeChannel(Channel& ch, const int q) {
139
186
  if (q == 1) return;
140
0
  for (size_t y = 0; y < ch.plane.ysize(); y++) {
141
0
    pixel_type* row = ch.plane.Row(y);
142
0
    for (size_t x = 0; x < ch.plane.xsize(); x++) {
143
0
      if (row[x] < 0) {
144
0
        row[x] = -((-row[x] + q / 2) / q) * q;
145
0
      } else {
146
0
        row[x] = ((row[x] + q / 2) / q) * q;
147
0
      }
148
0
    }
149
0
  }
150
0
}
151
152
// convert binary32 float that corresponds to custom [bits]-bit float (with
153
// [exp_bits] exponent bits) to a [bits]-bit integer representation that should
154
// fit in pixel_type
155
Status float_to_int(const float* const row_in, pixel_type* const row_out,
156
                    size_t xsize, unsigned int bits, unsigned int exp_bits,
157
8.69k
                    bool fp, double dfactor) {
158
8.69k
  JXL_ENSURE(sizeof(pixel_type) * 8 >= bits);
159
8.69k
  if (!fp) {
160
8.44k
    if (bits > 22) {
161
0
      for (size_t x = 0; x < xsize; ++x) {
162
0
        row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5);
163
0
      }
164
8.44k
    } else {
165
8.44k
      float factor = dfactor;
166
717k
      for (size_t x = 0; x < xsize; ++x) {
167
709k
        row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f);
168
709k
      }
169
8.44k
    }
170
8.44k
    return true;
171
8.44k
  }
172
257
  if (bits == 32 && fp) {
173
257
    JXL_ENSURE(exp_bits == 8);
174
257
    memcpy(static_cast<void*>(row_out), static_cast<const void*>(row_in),
175
257
           4 * xsize);
176
257
    return true;
177
257
  }
178
179
0
  JXL_ENSURE(bits > 0);
180
0
  int exp_bias = (1 << (exp_bits - 1)) - 1;
181
0
  int max_exp = (1 << exp_bits) - 1;
182
0
  uint32_t sign = (1u << (bits - 1));
183
0
  int mant_bits = bits - exp_bits - 1;
184
0
  int mant_shift = 23 - mant_bits;
185
0
  for (size_t x = 0; x < xsize; ++x) {
186
0
    uint32_t f;
187
0
    memcpy(&f, &row_in[x], 4);
188
0
    int signbit = (f >> 31);
189
0
    f &= 0x7fffffff;
190
0
    if (f == 0) {
191
0
      row_out[x] = (signbit ? sign : 0);
192
0
      continue;
193
0
    }
194
0
    int exp = (f >> 23) - 127;
195
0
    if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed");
196
0
    int mantissa = (f & 0x007fffff);
197
    // broke up the binary32 into its parts, now reassemble into
198
    // arbitrary float
199
0
    exp += exp_bias;
200
0
    if (exp < 0) {  // will become a subnormal number
201
      // add implicit leading 1 to mantissa
202
0
      mantissa |= 0x00800000;
203
0
      if (exp < -mant_bits) {
204
0
        return JXL_FAILURE(
205
0
            "Invalid float number: %g cannot be represented with %i "
206
0
            "exp_bits and %i mant_bits (exp %i)",
207
0
            row_in[x], exp_bits, mant_bits, exp);
208
0
      }
209
0
      mantissa >>= 1 - exp;
210
0
      exp = 0;
211
0
    }
212
    // exp should be representable in exp_bits, otherwise input was
213
    // invalid
214
0
    if (exp > max_exp) return JXL_FAILURE("Invalid float exponent");
215
0
    if (mantissa & ((1 << mant_shift) - 1)) {
216
0
      return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x],
217
0
                         mantissa);
218
0
    }
219
0
    mantissa >>= mant_shift;
220
0
    f = (signbit ? sign : 0);
221
0
    f |= (exp << mant_bits);
222
0
    f |= mantissa;
223
0
    row_out[x] = static_cast<pixel_type>(f);
224
0
  }
225
0
  return true;
226
0
}
227
228
0
float EstimateWPCost(const Image& img, size_t i) {
229
0
  size_t extra_bits = 0;
230
0
  float histo_cost = 0;
231
0
  HybridUintConfig config;
232
0
  int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31,
233
0
                       -23,  -15,  -11,  -7,   -4,   -3,  -1,  0,   1,
234
0
                       3,    5,    7,    11,   15,   23,  31,  47,  63,
235
0
                       95,   127,  191,  255,  392,  500};
236
0
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
237
0
  Histogram histo[nc] = {};
238
0
  weighted::Header wp_header;
239
0
  PredictorMode(i, &wp_header);
240
0
  for (const Channel& ch : img.channel) {
241
0
    const intptr_t onerow = ch.plane.PixelsPerRow();
242
0
    weighted::State wp_state(wp_header, ch.w, ch.h);
243
0
    Properties properties(1);
244
0
    for (size_t y = 0; y < ch.h; y++) {
245
0
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
246
0
      for (size_t x = 0; x < ch.w; x++) {
247
0
        size_t offset = 0;
248
0
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
249
0
        pixel_type_w top = (y ? *(r + x - onerow) : left);
250
0
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
251
0
        pixel_type_w topright =
252
0
            (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top);
253
0
        pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
254
0
        pixel_type guess = wp_state.Predict</*compute_properties=*/true>(
255
0
            x, y, ch.w, top, left, topright, topleft, toptop, &properties,
256
0
            offset);
257
0
        size_t ctx = 0;
258
0
        for (int c : cutoffs) {
259
0
          ctx += (c >= properties[0]) ? 1 : 0;
260
0
        }
261
0
        pixel_type res = r[x] - guess;
262
0
        uint32_t token;
263
0
        uint32_t nbits;
264
0
        uint32_t bits;
265
0
        config.Encode(PackSigned(res), &token, &nbits, &bits);
266
0
        histo[ctx].Add(token);
267
0
        extra_bits += nbits;
268
0
        wp_state.UpdateErrors(r[x], x, y, ch.w);
269
0
      }
270
0
    }
271
0
    for (auto& h : histo) {
272
0
      histo_cost += h.ShannonEntropy();
273
0
      h.Clear();
274
0
    }
275
0
  }
276
0
  return histo_cost + extra_bits;
277
0
}
278
279
bool do_transform(Image& image, const Transform& tr,
280
                  const weighted::Header& wp_header,
281
11
                  jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) {
282
11
  Transform t = tr;
283
11
  bool did_it = true;
284
11
  if (force_jxlart) {
285
0
    if (!t.MetaApply(image)) return false;
286
11
  } else {
287
11
    did_it = TransformForward(t, image, wp_header, pool);
288
11
  }
289
11
  if (did_it) image.transform.push_back(t);
290
11
  return did_it;
291
11
}
292
293
StatusOr<bool> maybe_do_transform(Image& image, const Transform& tr,
294
                                  const CompressParams& cparams,
295
                                  const weighted::Header& wp_header,
296
                                  float cost_before,
297
                                  jxl::ThreadPool* pool = nullptr,
298
11
                                  bool force_jxlart = false) {
299
11
  if (force_jxlart || cparams.speed_tier >= SpeedTier::kSquirrel) {
300
11
    return do_transform(image, tr, wp_header, pool, force_jxlart);
301
11
  }
302
0
  bool did_it = do_transform(image, tr, wp_header, pool);
303
0
  if (did_it) {
304
0
    JXL_ASSIGN_OR_RETURN(float cost_after, EstimateCost(image));
305
0
    JXL_DEBUG_V(7, "Cost before: %f  cost after: %f", cost_before, cost_after);
306
0
    if (cost_after > cost_before) {
307
0
      Transform t = image.transform.back();
308
0
      if (!t.Inverse(image, wp_header, pool)) {
309
0
        return false;
310
0
      }
311
0
      image.transform.pop_back();
312
0
      did_it = false;
313
0
    }
314
0
  }
315
0
  return did_it;
316
0
}
317
318
Status try_palettes(Image& gi, int& max_bitdepth, int& maxval,
319
                    const CompressParams& cparams_,
320
                    float channel_colors_percent,
321
73
                    jxl::ThreadPool* pool = nullptr) {
322
73
  float cost_before = 0.f;
323
73
  size_t did_palette = 0;
324
73
  float nb_pixels = gi.channel[0].w * gi.channel[0].h;
325
73
  int nb_chans = gi.channel.size() - gi.nb_meta_channels;
326
  // arbitrary estimate: 4.8 bpp for 8-bit RGB
327
73
  float arbitrary_bpp_estimate = 0.2f * gi.bitdepth * nb_chans;
328
329
73
  if (cparams_.palette_colors != 0 || cparams_.lossy_palette) {
330
    // when not estimating, assume some arbitrary bpp
331
11
    if (cparams_.speed_tier <= SpeedTier::kSquirrel) {
332
11
      JXL_ASSIGN_OR_RETURN(cost_before, EstimateCost(gi));
333
11
    } else {
334
0
      cost_before = nb_pixels * arbitrary_bpp_estimate;
335
0
    }
336
    // all-channel palette (e.g. RGBA)
337
11
    if (nb_chans > 1) {
338
0
      Transform maybe_palette(TransformId::kPalette);
339
0
      maybe_palette.begin_c = gi.nb_meta_channels;
340
0
      maybe_palette.num_c = nb_chans;
341
      // Heuristic choice of max colors for a palette:
342
      // max_colors = nb_pixels * estimated_bpp_without_palette * 0.0005 +
343
      //              + nb_pixels / 128 + 128
344
      //       (estimated_bpp_without_palette = cost_before / nb_pixels)
345
      // Rationale: small image with large palette is not effective;
346
      // also if the entropy (estimated bpp) is low (e.g. mostly solid/gradient
347
      // areas), palette is less useful and may even be counterproductive.
348
0
      maybe_palette.nb_colors = std::min(
349
0
          static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128),
350
0
          std::abs(cparams_.palette_colors));
351
0
      maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
352
0
      maybe_palette.lossy_palette =
353
0
          (cparams_.lossy_palette && maybe_palette.num_c == 3);
354
0
      if (maybe_palette.lossy_palette) {
355
0
        maybe_palette.predictor = Predictor::Average4;
356
0
      }
357
      // TODO(veluca): use a custom weighted header if using the weighted
358
      // predictor.
359
0
      JXL_ASSIGN_OR_RETURN(
360
0
          did_palette,
361
0
          maybe_do_transform(gi, maybe_palette, cparams_, weighted::Header(),
362
0
                             cost_before, pool, cparams_.options.zero_tokens));
363
0
    }
364
    // all-minus-one-channel palette (RGB with separate alpha, or CMY with
365
    // separate K)
366
11
    if (!did_palette && nb_chans > 3) {
367
0
      Transform maybe_palette_3(TransformId::kPalette);
368
0
      maybe_palette_3.begin_c = gi.nb_meta_channels;
369
0
      maybe_palette_3.num_c = nb_chans - 1;
370
0
      maybe_palette_3.nb_colors = std::min(
371
0
          static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128),
372
0
          std::abs(cparams_.palette_colors));
373
0
      maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0;
374
0
      maybe_palette_3.lossy_palette = cparams_.lossy_palette;
375
0
      if (maybe_palette_3.lossy_palette) {
376
0
        maybe_palette_3.predictor = Predictor::Average4;
377
0
      }
378
0
      JXL_ASSIGN_OR_RETURN(
379
0
          did_palette,
380
0
          maybe_do_transform(gi, maybe_palette_3, cparams_, weighted::Header(),
381
0
                             cost_before, pool, cparams_.options.zero_tokens));
382
0
    }
383
11
  }
384
385
73
  if (channel_colors_percent > 0) {
386
    // single channel palette (like FLIF's ChannelCompact)
387
11
    size_t nb_channels = gi.channel.size() - gi.nb_meta_channels - did_palette;
388
11
    int orig_bitdepth = max_bitdepth;
389
11
    max_bitdepth = 0;
390
11
    if (nb_channels > 0 && (did_palette || cost_before == 0)) {
391
9
      if (cparams_.speed_tier < SpeedTier::kSquirrel) {
392
0
        JXL_ASSIGN_OR_RETURN(cost_before, EstimateCost(gi));
393
9
      } else {
394
9
        cost_before = 0;
395
9
      }
396
9
    }
397
22
    for (size_t i = did_palette; i < nb_channels + did_palette; i++) {
398
11
      int32_t min;
399
11
      int32_t max;
400
11
      compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
401
11
      int64_t colors = static_cast<int64_t>(max) - min + 1;
402
11
      JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max);
403
11
      Transform maybe_palette_1(TransformId::kPalette);
404
11
      maybe_palette_1.begin_c = i + gi.nb_meta_channels;
405
11
      maybe_palette_1.num_c = 1;
406
      // simple heuristic: if less than X percent of the values in the range
407
      // actually occur, it is probably worth it to do a compaction
408
      // (but only if the channel palette is less than 6% the size of the
409
      // image itself)
410
11
      maybe_palette_1.nb_colors =
411
11
          std::min(static_cast<int>(nb_pixels / 16),
412
11
                   static_cast<int>(channel_colors_percent / 100. * colors));
413
11
      JXL_ASSIGN_OR_RETURN(
414
11
          bool did_ch_palette,
415
11
          maybe_do_transform(gi, maybe_palette_1, cparams_, weighted::Header(),
416
11
                             cost_before, pool));
417
11
      if (did_ch_palette) {
418
        // effective bit depth is lower, adjust quantization accordingly
419
1
        compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
420
1
        if (max < maxval) maxval = max;
421
1
        int ch_bitdepth =
422
1
            (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0);
423
1
        if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth;
424
10
      } else {
425
10
        max_bitdepth = orig_bitdepth;
426
10
      }
427
11
    }
428
11
  }
429
73
  return true;
430
73
}
431
432
}  // namespace
433
434
StatusOr<std::unique_ptr<ModularFrameEncoder>> ModularFrameEncoder::Create(
435
    JxlMemoryManager* memory_manager, const FrameHeader& frame_header,
436
224
    const CompressParams& cparams_orig, bool streaming_mode) {
437
224
  auto self = std::unique_ptr<ModularFrameEncoder>(
438
224
      new ModularFrameEncoder(memory_manager));
439
224
  JXL_RETURN_IF_ERROR(self->Init(frame_header, cparams_orig, streaming_mode));
440
224
  return self;
441
224
}
442
443
ModularFrameEncoder::ModularFrameEncoder(JxlMemoryManager* memory_manager)
444
224
    : memory_manager_(memory_manager) {}
445
446
Status ModularFrameEncoder::Init(const FrameHeader& frame_header,
447
                                 const CompressParams& cparams_orig,
448
224
                                 bool streaming_mode) {
449
224
  frame_dim_ = frame_header.ToFrameDimensions();
450
224
  cparams_ = cparams_orig;
451
452
224
  size_t num_streams =
453
224
      ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes);
454
455
  // Progressive lossless only benefits from levels 2 and higher
456
  // Lower levels of faster decoding can outperform higher tiers
457
  // depending on the PC
458
224
  if (cparams_.responsive == 1 && cparams_.IsLossless() &&
459
224
      cparams_.decoding_speed_tier == 1) {
460
0
    cparams_.decoding_speed_tier = 2;
461
0
  }
462
224
  if (cparams_.responsive == 1 && cparams_.IsLossless()) {
463
    // RCT selection seems bugged with Squeeze, YCoCg works well.
464
0
    if (cparams_.colorspace < 0) {
465
0
      cparams_.colorspace = 6;
466
0
    }
467
0
  }
468
469
224
  if (cparams_.ModularPartIsLossless()) {
470
162
    const auto disable_wp = [this] () {
471
0
        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kNoWP;
472
0
        if (cparams_.options.predictor == Predictor::Weighted) {
473
          // Predictor::Best turns to Predictor::Gradient anyways.
474
0
          cparams_.options.predictor = Predictor::Gradient;
475
0
        }
476
0
    };
477
162
    switch (cparams_.decoding_speed_tier) {
478
162
      case 0:
479
162
        cparams_.options.fast_decode_multiplier = 1.001f;
480
162
        break;
481
0
      case 1:  // No Weighted predictor
482
0
        cparams_.options.fast_decode_multiplier = 1.005f;
483
0
        disable_wp();
484
0
        break;
485
0
      case 2: {  // No Weighted predictor and Group size 0 defined in
486
                 // enc_frame.cc
487
0
        cparams_.options.fast_decode_multiplier = 1.015f;
488
0
        disable_wp();
489
0
        break;
490
0
      }
491
0
      case 3: {  // Gradient only, Group size 0, and Fast MA tree
492
0
        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly;
493
0
        cparams_.options.predictor = Predictor::Gradient;
494
0
        break;
495
0
      }
496
0
      default: {  // Gradient only, Group size 0, and No MA tree
497
0
        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly;
498
0
        cparams_.options.predictor = Predictor::Gradient;
499
0
        cparams_.options.nb_repeats = 0;
500
        // Disabling MA Trees sometimes doesn't increase decode speed
501
        // depending on PC
502
0
        break;
503
0
      }
504
162
    }
505
162
  }
506
507
5.52k
  for (size_t i = 0; i < num_streams; ++i) {
508
5.30k
    stream_images_.emplace_back(memory_manager_);
509
5.30k
  }
510
511
  // use a sensible default if nothing explicit is specified:
512
  // Squeeze for lossy, no squeeze for lossless
513
224
  if (cparams_.responsive < 0) {
514
162
    if (cparams_.ModularPartIsLossless()) {
515
162
      cparams_.responsive = 0;
516
162
    } else {
517
0
      cparams_.responsive = 1;
518
0
    }
519
162
  }
520
521
224
  cparams_.options.splitting_heuristics_node_threshold =
522
224
      75 + 14 * static_cast<int>(cparams_.speed_tier) +
523
224
      10 * cparams_.decoding_speed_tier;
524
525
224
  {
526
    // Set properties.
527
224
    std::vector<uint32_t> prop_order;
528
224
    if (cparams_.responsive) {
529
      // Properties in order of their likelihood of being useful for Squeeze
530
      // residuals.
531
0
      prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3};
532
224
    } else {
533
      // Same, but for the non-Squeeze case.
534
224
      prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8};
535
      // if few groups, don't use group as a property
536
224
      if (num_streams < 30 && cparams_.speed_tier > SpeedTier::kTortoise &&
537
224
          cparams_orig.ModularPartIsLossless()) {
538
162
        prop_order.erase(prop_order.begin() + 1);
539
162
      }
540
224
    }
541
224
    int max_properties = std::min<int>(
542
224
        cparams_.options.max_properties,
543
224
        static_cast<int>(
544
224
            frame_header.nonserialized_metadata->m.num_extra_channels) +
545
224
            (frame_header.encoding == FrameEncoding::kModular ? 2 : -1));
546
224
    switch (cparams_.speed_tier) {
547
0
      case SpeedTier::kHare:
548
0
        cparams_.options.splitting_heuristics_properties.assign(
549
0
            prop_order.begin(), prop_order.begin() + 4);
550
0
        cparams_.options.max_property_values = 24;
551
0
        break;
552
0
      case SpeedTier::kWombat:
553
0
        cparams_.options.splitting_heuristics_properties.assign(
554
0
            prop_order.begin(), prop_order.begin() + 5);
555
0
        cparams_.options.max_property_values = 32;
556
0
        break;
557
224
      case SpeedTier::kSquirrel:
558
224
        cparams_.options.splitting_heuristics_properties.assign(
559
224
            prop_order.begin(), prop_order.begin() + 7);
560
224
        cparams_.options.max_property_values = 48;
561
224
        break;
562
0
      case SpeedTier::kKitten:
563
0
        cparams_.options.splitting_heuristics_properties.assign(
564
0
            prop_order.begin(), prop_order.begin() + 10);
565
0
        cparams_.options.max_property_values = 96;
566
0
        break;
567
0
      case SpeedTier::kGlacier:
568
0
      case SpeedTier::kTortoise:
569
0
        cparams_.options.splitting_heuristics_properties = prop_order;
570
0
        cparams_.options.max_property_values = 256;
571
0
        break;
572
0
      default:
573
0
        cparams_.options.splitting_heuristics_properties.assign(
574
0
            prop_order.begin(), prop_order.begin() + 3);
575
0
        cparams_.options.max_property_values = 16;
576
0
        break;
577
224
    }
578
224
    if (cparams_.speed_tier > SpeedTier::kTortoise) {
579
      // Gradient in previous channels.
580
224
      for (int i = 0; i < max_properties; i++) {
581
0
        cparams_.options.splitting_heuristics_properties.push_back(
582
0
            kNumNonrefProperties + i * 4 + 3);
583
0
      }
584
224
    } else {
585
      // All the extra properties in Tortoise mode.
586
0
      for (int i = 0; i < max_properties * 4; i++) {
587
0
        cparams_.options.splitting_heuristics_properties.push_back(
588
0
            kNumNonrefProperties + i);
589
0
      }
590
0
    }
591
224
  }
592
593
224
  if ((cparams_.options.predictor == Predictor::Average0 ||
594
224
       cparams_.options.predictor == Predictor::Average1 ||
595
224
       cparams_.options.predictor == Predictor::Average2 ||
596
224
       cparams_.options.predictor == Predictor::Average3 ||
597
224
       cparams_.options.predictor == Predictor::Average4 ||
598
224
       cparams_.options.predictor == Predictor::Weighted) &&
599
224
      !cparams_.ModularPartIsLossless()) {
600
    // Lossy + Average/Weighted predictors does not work, so switch to default
601
    // predictors.
602
0
    cparams_.options.predictor = kUndefinedPredictor;
603
0
  }
604
605
224
  if (cparams_.options.predictor == kUndefinedPredictor) {
606
    // no explicit predictor(s) given, set a good default
607
162
    if ((cparams_.speed_tier <= SpeedTier::kGlacier ||
608
162
         cparams_.modular_mode == false) &&
609
162
        cparams_.IsLossless() && cparams_.responsive == JXL_FALSE) {
610
      // TODO(veluca): allow all predictors that don't break residual
611
      // multipliers in lossy mode.
612
0
      cparams_.options.predictor = Predictor::Variable;
613
162
    } else if (cparams_.responsive || cparams_.lossy_palette) {
614
      // zero predictor for Squeeze residues and lossy palette indices
615
      // TODO: Try adding 'Squeezed' predictor set, with the most
616
      // common predictors used by Variable in squeezed images, including none.
617
0
      cparams_.options.predictor = Predictor::Zero;
618
162
    } else if (!cparams_.IsLossless()) {
619
      // If not responsive and lossy. TODO(veluca): use near_lossless instead?
620
162
      cparams_.options.predictor = Predictor::Gradient;
621
162
    } else if (cparams_.speed_tier < SpeedTier::kFalcon) {
622
      // try median and weighted predictor for anything else
623
0
      cparams_.options.predictor = Predictor::Best;
624
0
    } else if (cparams_.speed_tier == SpeedTier::kFalcon) {
625
      // just weighted predictor in falcon mode
626
0
      cparams_.options.predictor = Predictor::Weighted;
627
0
    } else if (cparams_.speed_tier > SpeedTier::kFalcon) {
628
      // just gradient predictor in thunder mode
629
0
      cparams_.options.predictor = Predictor::Gradient;
630
0
    }
631
162
  } else {
632
62
    if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero;
633
62
  }
634
224
  if (!cparams_.ModularPartIsLossless()) {
635
62
    if (cparams_.options.predictor == Predictor::Weighted ||
636
62
        cparams_.options.predictor == Predictor::Variable ||
637
62
        cparams_.options.predictor == Predictor::Best)
638
0
      cparams_.options.predictor = Predictor::Zero;
639
62
  }
640
224
  tree_splits_.push_back(0);
641
224
  if (cparams_.modular_mode == false) {
642
162
    JXL_ASSIGN_OR_RETURN(ModularStreamId qt0, ModularStreamId::QuantTable(0));
643
162
    cparams_.options.fast_decode_multiplier = 1.0f;
644
162
    tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_));
645
162
    tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_));
646
162
    tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_));
647
162
    tree_splits_.push_back(qt0.ID(frame_dim_));
648
162
    tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_));
649
162
    ac_metadata_size.resize(frame_dim_.num_dc_groups);
650
162
    extra_dc_precision.resize(frame_dim_.num_dc_groups);
651
162
  }
652
224
  tree_splits_.push_back(num_streams);
653
224
  cparams_.options.max_chan_size = frame_dim_.group_dim;
654
224
  cparams_.options.group_dim = frame_dim_.group_dim;
655
656
  // TODO(veluca): figure out how to use different predictor sets per channel.
657
224
  stream_options_.resize(num_streams, cparams_.options);
658
659
224
  stream_options_[0] = cparams_.options;
660
224
  if (cparams_.speed_tier == SpeedTier::kFalcon) {
661
0
    stream_options_[0].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
662
224
  } else if (cparams_.speed_tier == SpeedTier::kThunder) {
663
0
    stream_options_[0].tree_kind = ModularOptions::TreeKind::kGradientFixedDC;
664
0
  }
665
224
  stream_options_[0].histogram_params =
666
224
      HistogramParams::ForModular(cparams_, {}, streaming_mode);
667
224
  return true;
668
224
}
669
670
Status ModularFrameEncoder::ComputeEncodingData(
671
    const FrameHeader& frame_header, const ImageMetadata& metadata,
672
    Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels,
673
    const Rect& group_rect, const FrameDimensions& patch_dim,
674
    const Rect& frame_area_rect, PassesEncoderState* JXL_RESTRICT enc_state,
675
    const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out,
676
73
    bool do_color) {
677
73
  JxlMemoryManager* memory_manager = enc_state->memory_manager();
678
73
  JXL_DEBUG_V(6, "Computing modular encoding data for frame %s",
679
73
              frame_header.DebugString().c_str());
680
681
73
  bool groupwise = enc_state->streaming_mode;
682
683
73
  if (do_color && frame_header.loop_filter.gab && !groupwise) {
684
0
    float w = 0.9908511000000001f;
685
0
    float weights[3] = {w, w, w};
686
0
    JXL_RETURN_IF_ERROR(GaborishInverse(color, Rect(*color), weights, pool));
687
0
  }
688
689
73
  if (do_color && metadata.bit_depth.bits_per_sample <= 16 &&
690
73
      cparams_.speed_tier < SpeedTier::kCheetah &&
691
73
      cparams_.decoding_speed_tier < 2 && !groupwise) {
692
62
    JXL_RETURN_IF_ERROR(FindBestPatchDictionary(
693
62
        *color, enc_state, cms, nullptr, aux_out,
694
62
        cparams_.color_transform == ColorTransform::kXYB));
695
62
    JXL_RETURN_IF_ERROR(PatchDictionaryEncoder::SubtractFrom(
696
62
        enc_state->shared.image_features.patches, color));
697
62
  }
698
699
73
  if (cparams_.custom_splines.HasAny()) {
700
0
    PassesSharedState& shared = enc_state->shared;
701
0
    ImageFeatures& image_features = shared.image_features;
702
0
    image_features.splines = cparams_.custom_splines;
703
0
  }
704
705
  // Convert ImageBundle to modular Image object
706
73
  const size_t xsize = patch_dim.xsize;
707
73
  const size_t ysize = patch_dim.ysize;
708
709
73
  int nb_chans = 3;
710
73
  if (metadata.color_encoding.IsGray() &&
711
73
      cparams_.color_transform == ColorTransform::kNone) {
712
0
    nb_chans = 1;
713
0
  }
714
73
  if (!do_color) nb_chans = 0;
715
716
73
  nb_chans += extra_channels.size();
717
718
73
  bool fp = metadata.bit_depth.floating_point_sample &&
719
73
            cparams_.color_transform != ColorTransform::kXYB;
720
721
  // bits_per_sample is just metadata for XYB images.
722
73
  if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
723
73
      cparams_.color_transform != ColorTransform::kXYB) {
724
0
    if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
725
0
      return JXL_FAILURE("uint32_t not supported in enc_modular");
726
0
    } else if (metadata.bit_depth.bits_per_sample > 32) {
727
0
      return JXL_FAILURE("bits_per_sample > 32 not supported");
728
0
    }
729
0
  }
730
731
  // in the non-float case, there is an implicit 0 sign bit
732
73
  int max_bitdepth =
733
73
      do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0;
734
73
  Image& gi = stream_images_[0];
735
73
  JXL_ASSIGN_OR_RETURN(
736
73
      gi, Image::Create(memory_manager, xsize, ysize,
737
73
                        metadata.bit_depth.bits_per_sample, nb_chans));
738
73
  int c = 0;
739
73
  if (cparams_.color_transform == ColorTransform::kXYB &&
740
73
      cparams_.modular_mode == true) {
741
62
    float enc_factors[3] = {65536.0f, 4096.0f, 4096.0f};
742
62
    if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) {
743
      // quantize XYB here and then treat it as a lossless image
744
62
      enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance);
745
62
      enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
746
62
      enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
747
62
      cparams_.butteraugli_distance = 0;
748
62
    }
749
62
    if (cparams_.manual_xyb_factors.size() == 3) {
750
0
      JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC(
751
0
          memory_manager, &enc_state->shared.matrices,
752
0
          cparams_.manual_xyb_factors.data()));
753
      // TODO(jon): update max_bitdepth in this case
754
62
    } else {
755
62
      JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC(
756
62
          memory_manager, &enc_state->shared.matrices, enc_factors));
757
62
      max_bitdepth = 12;
758
62
    }
759
62
  }
760
73
  pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0;
761
73
  if (do_color) {
762
248
    for (; c < 3; c++) {
763
186
      if (metadata.color_encoding.IsGray() &&
764
186
          cparams_.color_transform == ColorTransform::kNone &&
765
186
          c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0))
766
0
        continue;
767
186
      int c_out = c;
768
      // XYB is encoded as YX(B-Y)
769
186
      if (cparams_.color_transform == ColorTransform::kXYB && c < 2)
770
124
        c_out = 1 - c_out;
771
186
      double factor = maxval;
772
186
      if (cparams_.color_transform == ColorTransform::kXYB)
773
186
        factor = enc_state->shared.matrices.InvDCQuant(c);
774
186
      if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) {
775
62
        JXL_ENSURE(!fp);
776
4.15k
        for (size_t y = 0; y < ysize; ++y) {
777
4.09k
          const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
778
4.09k
          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
779
4.09k
          pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y);
780
351k
          for (size_t x = 0; x < xsize; ++x) {
781
            // TODO(eustas): check if std::roundf is appropriate
782
347k
            row_out[x] = row_in[x] * factor + 0.5f;
783
347k
            row_out[x] -= row_Y[x];
784
347k
          }
785
4.09k
        }
786
124
      } else {
787
124
        int bits = metadata.bit_depth.bits_per_sample;
788
124
        int exp_bits = metadata.bit_depth.exponent_bits_per_sample;
789
124
        gi.channel[c_out].hshift = frame_header.chroma_subsampling.HShift(c);
790
124
        gi.channel[c_out].vshift = frame_header.chroma_subsampling.VShift(c);
791
124
        size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift);
792
124
        size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift);
793
124
        JXL_RETURN_IF_ERROR(
794
124
            gi.channel[c_out].shrink(xsize_shifted, ysize_shifted));
795
124
        const auto process_row = [&](const int task,
796
8.18k
                                     const int thread) -> Status {
797
8.18k
          const size_t y = task;
798
8.18k
          const float* const JXL_RESTRICT row_in =
799
8.18k
              color->PlaneRow(c, y + group_rect.y0()) + group_rect.x0();
800
8.18k
          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
801
8.18k
          JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out, xsize_shifted, bits,
802
8.18k
                                           exp_bits, fp, factor));
803
8.18k
          return true;
804
8.18k
        };
805
124
        JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted,
806
124
                                      ThreadPool::NoInit, process_row,
807
124
                                      "float2int"));
808
124
      }
809
186
    }
810
62
    if (metadata.color_encoding.IsGray() &&
811
62
        cparams_.color_transform == ColorTransform::kNone)
812
0
      c = 1;
813
62
  }
814
815
84
  for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) {
816
11
    const ExtraChannelInfo& eci = metadata.extra_channel_info[ec];
817
11
    size_t ecups = frame_header.extra_channel_upsampling[ec];
818
11
    JXL_RETURN_IF_ERROR(
819
11
        gi.channel[c].shrink(DivCeil(patch_dim.xsize_upsampled, ecups),
820
11
                             DivCeil(patch_dim.ysize_upsampled, ecups)));
821
11
    gi.channel[c].hshift = gi.channel[c].vshift =
822
11
        CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
823
824
11
    int bits = eci.bit_depth.bits_per_sample;
825
11
    int exp_bits = eci.bit_depth.exponent_bits_per_sample;
826
11
    bool ec_fp = eci.bit_depth.floating_point_sample;
827
11
    double factor = (ec_fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1));
828
11
    if (bits + (ec_fp ? 0 : 1) > max_bitdepth) {
829
11
      max_bitdepth = bits + (ec_fp ? 0 : 1);
830
11
    }
831
515
    const auto process_row = [&](const int task, const int thread) -> Status {
832
515
      const size_t y = task;
833
515
      const float* const JXL_RESTRICT row_in =
834
515
          extra_channels[ec].Row(y + group_rect.y0()) + group_rect.x0();
835
515
      pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y);
836
515
      JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out,
837
515
                                       gi.channel[c].plane.xsize(), bits,
838
515
                                       exp_bits, ec_fp, factor));
839
515
      return true;
840
515
    };
841
11
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, gi.channel[c].plane.ysize(),
842
11
                                  ThreadPool::NoInit, process_row,
843
11
                                  "float2int"));
844
11
  }
845
73
  JXL_ENSURE(c == nb_chans);
846
847
73
  int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32);
848
73
  if (max_bitdepth > level_max_bitdepth) {
849
0
    return JXL_FAILURE(
850
0
        "Bitdepth too high for level %i (need %i bits, have only %i in this "
851
0
        "level)",
852
0
        cparams_.level, max_bitdepth, level_max_bitdepth);
853
0
  }
854
855
  // Set options and apply transformations
856
73
  if (!cparams_.ModularPartIsLossless()) {
857
62
    if (cparams_.palette_colors != 0) {
858
62
      JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms");
859
62
    }
860
62
    if (cparams_.color_transform == ColorTransform::kXYB) {
861
62
      cparams_.channel_colors_pre_transform_percent = 0;
862
62
    }
863
62
    cparams_.channel_colors_percent = 0;
864
62
    cparams_.palette_colors = 0;
865
62
    cparams_.lossy_palette = false;
866
62
  }
867
868
  // Global palette transforms
869
73
  float channel_colors_percent = 0;
870
73
  if (!cparams_.lossy_palette &&
871
73
      (cparams_.speed_tier <= SpeedTier::kThunder ||
872
73
       (do_color && metadata.bit_depth.bits_per_sample > 8))) {
873
73
    channel_colors_percent = cparams_.channel_colors_pre_transform_percent;
874
73
  }
875
73
  if (!groupwise) {
876
73
    JXL_RETURN_IF_ERROR(try_palettes(gi, max_bitdepth, maxval, cparams_,
877
73
                                     channel_colors_percent, pool));
878
73
  }
879
880
  // don't do an RCT if we're short on bits
881
73
  if (cparams_.color_transform == ColorTransform::kNone && do_color &&
882
73
      gi.channel.size() - gi.nb_meta_channels >= 3 &&
883
73
      max_bitdepth + 1 < level_max_bitdepth) {
884
0
    if (cparams_.colorspace < 0 && (!cparams_.ModularPartIsLossless() ||
885
0
                                    cparams_.speed_tier > SpeedTier::kHare)) {
886
0
      Transform ycocg{TransformId::kRCT};
887
0
      ycocg.rct_type = 6;
888
0
      ycocg.begin_c = gi.nb_meta_channels;
889
0
      do_transform(gi, ycocg, weighted::Header(), pool);
890
0
      max_bitdepth++;
891
0
    } else if (cparams_.colorspace > 0) {
892
0
      Transform sg(TransformId::kRCT);
893
0
      sg.begin_c = gi.nb_meta_channels;
894
0
      sg.rct_type = cparams_.colorspace;
895
0
      do_transform(gi, sg, weighted::Header(), pool);
896
0
      max_bitdepth++;
897
0
    }
898
0
  }
899
900
73
  if (cparams_.move_to_front_from_channel > 0) {
901
0
    for (size_t tgt = 0;
902
0
         tgt + cparams_.move_to_front_from_channel < gi.channel.size(); tgt++) {
903
0
      size_t pos = cparams_.move_to_front_from_channel;
904
0
      while (pos > 0) {
905
0
        Transform move(TransformId::kRCT);
906
0
        if (pos == 1) {
907
0
          move.begin_c = tgt;
908
0
          move.rct_type = 28;  // RGB -> GRB
909
0
          pos -= 1;
910
0
        } else {
911
0
          move.begin_c = tgt + pos - 2;
912
0
          move.rct_type = 14;  // RGB -> BRG
913
0
          pos -= 2;
914
0
        }
915
0
        do_transform(gi, move, weighted::Header(), pool);
916
0
      }
917
0
    }
918
0
  }
919
920
  // don't do squeeze if we don't have some spare bits
921
73
  if (!groupwise && cparams_.responsive && !gi.channel.empty() &&
922
73
      max_bitdepth + 2 < level_max_bitdepth) {
923
0
    Transform t(TransformId::kSqueeze);
924
    // Check if default squeeze parameters are ok.
925
0
    std::vector<SqueezeParams> params;
926
0
    DefaultSqueezeParameters(&params, gi);
927
    // If image is smaller than group_dim, then default squeeze parameters
928
    // are not going too far. Else, channel size don't turn zero. Thus we only
929
    // check if tile does not go to zero-dim.
930
0
    size_t shift_cap = 7 + frame_header.group_size_shift;
931
0
    size_t hshift = 0;
932
0
    size_t vshift = 0;
933
0
    for (size_t i = 0; i < params.size(); ++i) {
934
0
      if (params[i].horizontal) {
935
0
        hshift++;
936
0
      } else {
937
0
        vshift++;
938
0
      }
939
0
      size_t dc_boost = (std::min(hshift, vshift) >= 3) ? 3 : 0;
940
      // In case we squeeze too much, truncate squeeze script.
941
0
      if (std::max(hshift, vshift) > shift_cap + dc_boost) {
942
0
        params.resize(i - 1);
943
0
        t.squeezes = params;
944
0
        break;
945
0
      }
946
0
    }
947
0
    do_transform(gi, t, weighted::Header(), pool);
948
0
    max_bitdepth += 2;
949
0
  }
950
951
73
  if (max_bitdepth + 1 > level_max_bitdepth) {
952
    // force no group RCTs if we don't have a spare bit
953
1
    cparams_.colorspace = 0;
954
1
  }
955
73
  JXL_ENSURE(max_bitdepth <= level_max_bitdepth);
956
957
73
  if (!cparams_.ModularPartIsLossless()) {
958
62
    quants_.resize(gi.channel.size(), 1);
959
62
    float quantizer = 0.25f;
960
62
    if (!cparams_.responsive) {
961
62
      JXL_DEBUG_V(1,
962
62
                  "Warning: lossy compression without Squeeze "
963
62
                  "transform is just color quantization.");
964
62
      quantizer *= 0.1f;
965
62
    }
966
62
    float bitdepth_correction = 1.f;
967
62
    if (cparams_.color_transform != ColorTransform::kXYB) {
968
0
      bitdepth_correction = maxval / 255.f;
969
0
    }
970
62
    std::vector<float> quantizers;
971
248
    for (size_t i = 0; i < 3; i++) {
972
186
      float dist = cparams_.butteraugli_distance;
973
186
      quantizers.push_back(quantizer * dist * bitdepth_correction);
974
186
    }
975
62
    for (size_t i = 0; i < extra_channels.size(); i++) {
976
0
      int ec_bitdepth =
977
0
          metadata.extra_channel_info[i].bit_depth.bits_per_sample;
978
0
      pixel_type ec_maxval = ec_bitdepth < 32 ? (1u << ec_bitdepth) - 1 : 0;
979
0
      bitdepth_correction = ec_maxval / 255.f;
980
0
      float dist = 0;
981
0
      if (i < cparams_.ec_distance.size()) dist = cparams_.ec_distance[i];
982
0
      if (dist < 0) dist = cparams_.butteraugli_distance;
983
0
      quantizers.push_back(quantizer * dist * bitdepth_correction);
984
0
    }
985
62
    if (cparams_.options.nb_repeats == 0) {
986
0
      return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!");
987
0
    }
988
248
    for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) {
989
186
      Channel& ch = gi.channel[i];
990
186
      int shift = ch.hshift + ch.vshift;  // number of pixel halvings
991
186
      if (shift > 16) shift = 16;
992
186
      if (shift > 0) shift--;
993
186
      int q;
994
      // assuming default Squeeze here
995
186
      int component =
996
186
          (do_color ? 0 : 3) + ((i - gi.nb_meta_channels) % nb_chans);
997
      // last 4 channels are final chroma residuals
998
186
      if (nb_chans > 2 && i >= gi.channel.size() - 4 && cparams_.responsive) {
999
0
        component = 1;
1000
0
      }
1001
186
      if (cparams_.color_transform == ColorTransform::kXYB && component < 3) {
1002
186
        q = quantizers[component] * squeeze_quality_factor_xyb *
1003
186
            squeeze_xyb_qtable[component][shift];
1004
186
      } else {
1005
0
        if (cparams_.colorspace != 0 && component > 0 && component < 3) {
1006
0
          q = quantizers[component] * squeeze_quality_factor *
1007
0
              squeeze_chroma_qtable[shift];
1008
0
        } else {
1009
0
          q = quantizers[component] * squeeze_quality_factor *
1010
0
              squeeze_luma_factor * squeeze_luma_qtable[shift];
1011
0
        }
1012
0
      }
1013
186
      if (q < 1) q = 1;
1014
186
      QuantizeChannel(gi.channel[i], q);
1015
186
      quants_[i] = q;
1016
186
    }
1017
62
  }
1018
1019
  // Fill other groups.
1020
  // DC
1021
146
  for (size_t group_id = 0; group_id < patch_dim.num_dc_groups; group_id++) {
1022
73
    const size_t rgx = group_id % patch_dim.xsize_dc_groups;
1023
73
    const size_t rgy = group_id / patch_dim.xsize_dc_groups;
1024
73
    const Rect rect(rgx * patch_dim.dc_group_dim, rgy * patch_dim.dc_group_dim,
1025
73
                    patch_dim.dc_group_dim, patch_dim.dc_group_dim);
1026
73
    size_t gx = rgx + frame_area_rect.x0() / 2048;
1027
73
    size_t gy = rgy + frame_area_rect.y0() / 2048;
1028
73
    size_t real_group_id = gy * frame_dim_.xsize_dc_groups + gx;
1029
    // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim
1030
    // maxShift==1000 is infinity
1031
73
    stream_params_.push_back(
1032
73
        GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(real_group_id)});
1033
73
  }
1034
  // AC global -> nothing.
1035
  // AC
1036
149
  for (size_t group_id = 0; group_id < patch_dim.num_groups; group_id++) {
1037
76
    const size_t rgx = group_id % patch_dim.xsize_groups;
1038
76
    const size_t rgy = group_id / patch_dim.xsize_groups;
1039
76
    const Rect mrect(rgx * patch_dim.group_dim, rgy * patch_dim.group_dim,
1040
76
                     patch_dim.group_dim, patch_dim.group_dim);
1041
76
    size_t gx = rgx + frame_area_rect.x0() / (frame_dim_.group_dim);
1042
76
    size_t gy = rgy + frame_area_rect.y0() / (frame_dim_.group_dim);
1043
76
    size_t real_group_id = gy * frame_dim_.xsize_groups + gx;
1044
152
    for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses();
1045
76
         i++) {
1046
76
      int maxShift;
1047
76
      int minShift;
1048
76
      frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift);
1049
76
      stream_params_.push_back(
1050
76
          GroupParams{mrect, minShift, maxShift,
1051
76
                      ModularStreamId::ModularAC(real_group_id, i)});
1052
76
    }
1053
76
  }
1054
  // if there's only one group, everything ends up in GlobalModular
1055
  // in that case, also try RCTs/WP params for the one group
1056
73
  if (stream_params_.size() == 2) {
1057
72
    stream_params_.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000,
1058
72
                                         ModularStreamId::Global()});
1059
72
  }
1060
73
  gi_channel_.resize(stream_images_.size());
1061
1062
73
  const auto process_row = [&](const uint32_t i,
1063
221
                               size_t /* thread */) -> Status {
1064
221
    size_t stream = stream_params_[i].id.ID(frame_dim_);
1065
221
    if (stream != 0) {
1066
149
      stream_options_[stream] = stream_options_[0];
1067
149
    }
1068
221
    JXL_RETURN_IF_ERROR(PrepareStreamParams(
1069
221
        stream_params_[i].rect, cparams_, stream_params_[i].minShift,
1070
221
        stream_params_[i].maxShift, stream_params_[i].id, do_color, groupwise));
1071
221
    return true;
1072
221
  };
1073
73
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, stream_params_.size(),
1074
73
                                ThreadPool::NoInit, process_row,
1075
73
                                "ChooseParams"));
1076
73
  {
1077
    // Clear out channels that have been copied to groups.
1078
73
    Image& full_image = stream_images_[0];
1079
73
    size_t ch = full_image.nb_meta_channels;
1080
269
    for (; ch < full_image.channel.size(); ch++) {
1081
197
      Channel& fc = full_image.channel[ch];
1082
197
      if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
1083
197
    }
1084
74
    for (; ch < full_image.channel.size(); ch++) {
1085
      // TODO(eustas): shrink / assign channel to keep size consistency
1086
1
      full_image.channel[ch].plane = ImageI();
1087
1
    }
1088
73
  }
1089
1090
73
  JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0]));
1091
73
  return true;
1092
73
}
1093
1094
224
Status ModularFrameEncoder::ComputeTree(ThreadPool* pool) {
1095
224
  std::vector<ModularMultiplierInfo> multiplier_info;
1096
224
  if (!quants_.empty()) {
1097
1.42k
    for (uint32_t stream_id = 0; stream_id < stream_images_.size();
1098
1.36k
         stream_id++) {
1099
      // skip non-modular stream_ids
1100
1.36k
      if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
1101
62
      const Image& image = stream_images_[stream_id];
1102
62
      const ModularOptions& options = stream_options_[stream_id];
1103
248
      for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) {
1104
186
        if (image.channel[i].w > options.max_chan_size ||
1105
186
            image.channel[i].h > options.max_chan_size) {
1106
0
          continue;
1107
0
        }
1108
186
        if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
1109
186
        size_t ch_id = stream_id == 0
1110
186
                           ? i
1111
186
                           : gi_channel_[stream_id][i - image.nb_meta_channels];
1112
186
        uint32_t q = quants_[ch_id];
1113
        // Inform the tree splitting heuristics that each channel in each group
1114
        // used this quantization factor. This will produce a tree with the
1115
        // given multipliers.
1116
186
        if (multiplier_info.empty() ||
1117
186
            multiplier_info.back().range[1][0] != stream_id ||
1118
186
            multiplier_info.back().multiplier != q) {
1119
62
          StaticPropRange range;
1120
62
          range[0] = {{i, i + 1}};
1121
62
          range[1] = {{stream_id, stream_id + 1}};
1122
62
          multiplier_info.push_back({range, q});
1123
124
        } else {
1124
          // Previous channel in the same group had the same quantization
1125
          // factor. Don't provide two different ranges, as that creates
1126
          // unnecessary nodes.
1127
124
          multiplier_info.back().range[0][1] = i + 1;
1128
124
        }
1129
186
      }
1130
62
    }
1131
    // Merge group+channel settings that have the same channels and quantization
1132
    // factors, to avoid unnecessary nodes.
1133
62
    std::sort(multiplier_info.begin(), multiplier_info.end(),
1134
62
              [](ModularMultiplierInfo a, ModularMultiplierInfo b) {
1135
0
                return std::make_tuple(a.range, a.multiplier) <
1136
0
                       std::make_tuple(b.range, b.multiplier);
1137
0
              });
1138
62
    size_t new_num = 1;
1139
62
    for (size_t i = 1; i < multiplier_info.size(); i++) {
1140
0
      ModularMultiplierInfo& prev = multiplier_info[new_num - 1];
1141
0
      ModularMultiplierInfo& cur = multiplier_info[i];
1142
0
      if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier &&
1143
0
          prev.range[1][1] == cur.range[1][0]) {
1144
0
        prev.range[1][1] = cur.range[1][1];
1145
0
      } else {
1146
0
        multiplier_info[new_num++] = multiplier_info[i];
1147
0
      }
1148
0
    }
1149
62
    multiplier_info.resize(new_num);
1150
62
  }
1151
1152
224
  if (!cparams_.custom_fixed_tree.empty()) {
1153
0
    tree_ = cparams_.custom_fixed_tree;
1154
224
  } else if (cparams_.speed_tier < SpeedTier::kFalcon ||
1155
224
             !cparams_.modular_mode) {
1156
    // Avoid creating a tree with leaves that don't correspond to any pixels.
1157
224
    std::vector<size_t> useful_splits;
1158
224
    useful_splits.reserve(tree_splits_.size());
1159
1.25k
    for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) {
1160
1.03k
      bool has_pixels = false;
1161
1.03k
      size_t start = tree_splits_[chunk];
1162
1.03k
      size_t stop = tree_splits_[chunk + 1];
1163
6.33k
      for (size_t i = start; i < stop; i++) {
1164
5.30k
        if (!stream_images_[i].empty()) has_pixels = true;
1165
5.30k
      }
1166
1.03k
      if (has_pixels) {
1167
398
        useful_splits.push_back(tree_splits_[chunk]);
1168
398
      }
1169
1.03k
    }
1170
    // Don't do anything if modular mode does not have any pixels in this image
1171
224
    if (useful_splits.empty()) return true;
1172
224
    useful_splits.push_back(tree_splits_.back());
1173
1174
224
    std::vector<Tree> trees(useful_splits.size() - 1);
1175
224
    const auto process_chunk = [&](const uint32_t chunk,
1176
398
                                   size_t /* thread */) -> Status {
1177
      // TODO(veluca): parallelize more.
1178
398
      uint32_t start = useful_splits[chunk];
1179
398
      uint32_t stop = useful_splits[chunk + 1];
1180
398
      while (start < stop && stream_images_[start].empty()) ++start;
1181
5.15k
      while (start < stop && stream_images_[stop - 1].empty()) --stop;
1182
1183
398
      if (stream_options_[start].tree_kind ==
1184
398
          ModularOptions::TreeKind::kLearn) {
1185
74
        JXL_ASSIGN_OR_RETURN(
1186
74
            trees[chunk],
1187
74
            LearnTree(stream_images_.data(), stream_options_.data(), start,
1188
74
                      stop, multiplier_info));
1189
324
      } else {
1190
324
        size_t total_pixels = 0;
1191
648
        for (size_t i = start; i < stop; i++) {
1192
1.13k
          for (const Channel& ch : stream_images_[i].channel) {
1193
1.13k
            total_pixels += ch.w * ch.h;
1194
1.13k
          }
1195
324
        }
1196
324
        total_pixels = std::max<size_t>(total_pixels, 1);
1197
1198
324
        trees[chunk] = PredefinedTree(stream_options_[start].tree_kind,
1199
324
                                      total_pixels, 8, 0);
1200
324
      }
1201
398
      return true;
1202
398
    };
1203
224
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, useful_splits.size() - 1,
1204
224
                                  ThreadPool::NoInit, process_chunk,
1205
224
                                  "LearnTrees"));
1206
224
    tree_.clear();
1207
224
    JXL_RETURN_IF_ERROR(
1208
224
        MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_));
1209
224
  } else {
1210
    // Fixed tree.
1211
0
    size_t total_pixels = 0;
1212
0
    int max_bitdepth = 0;
1213
0
    for (const Image& img : stream_images_) {
1214
0
      max_bitdepth = std::max(max_bitdepth, img.bitdepth);
1215
0
      for (const Channel& ch : img.channel) {
1216
0
        total_pixels += ch.w * ch.h;
1217
0
      }
1218
0
    }
1219
0
    if (cparams_.speed_tier <= SpeedTier::kFalcon) {
1220
0
      tree_ = PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels,
1221
0
                             max_bitdepth, stream_options_[0].max_properties);
1222
0
    } else if (cparams_.speed_tier <= SpeedTier::kThunder) {
1223
0
      tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC,
1224
0
                             total_pixels, max_bitdepth,
1225
0
                             stream_options_[0].max_properties);
1226
0
    } else {
1227
0
      tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)};
1228
0
    }
1229
0
  }
1230
224
  tree_tokens_.resize(1);
1231
224
  tree_tokens_[0].clear();
1232
224
  Tree decoded_tree;
1233
224
  JXL_RETURN_IF_ERROR(TokenizeTree(tree_, tree_tokens_.data(), &decoded_tree));
1234
224
  JXL_ENSURE(tree_.size() == decoded_tree.size());
1235
224
  tree_ = std::move(decoded_tree);
1236
1237
  /* TODO(szabadka) Add text output callback to cparams
1238
  if (kPrintTree && WantDebugOutput(aux_out)) {
1239
    if (frame_header.dc_level > 0) {
1240
      PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" +
1241
                           std::to_string(frame_header.dc_level) + "_tree");
1242
    } else {
1243
      PrintTree(tree_, aux_out->debug_prefix + "/global_tree");
1244
    }
1245
  } */
1246
224
  return true;
1247
224
}
1248
1249
224
Status ModularFrameEncoder::ComputeTokens(ThreadPool* pool) {
1250
224
  size_t num_streams = stream_images_.size();
1251
224
  stream_headers_.resize(num_streams);
1252
224
  tokens_.resize(num_streams);
1253
224
  image_widths_.resize(num_streams);
1254
224
  const auto process_stream = [&](const uint32_t stream_id,
1255
5.30k
                                  size_t /* thread */) -> Status {
1256
5.30k
    tokens_[stream_id].clear();
1257
5.30k
    JXL_RETURN_IF_ERROR(
1258
5.30k
        ModularCompress(stream_images_[stream_id], stream_options_[stream_id],
1259
5.30k
                        stream_id, tree_, stream_headers_[stream_id],
1260
5.30k
                        tokens_[stream_id], &image_widths_[stream_id]));
1261
5.30k
    return true;
1262
5.30k
  };
1263
224
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, num_streams, ThreadPool::NoInit,
1264
224
                                process_stream, "ComputeTokens"));
1265
224
  return true;
1266
224
}
1267
1268
Status ModularFrameEncoder::EncodeGlobalInfo(bool streaming_mode,
1269
                                             BitWriter* writer,
1270
224
                                             AuxOut* aux_out) {
1271
224
  JxlMemoryManager* memory_manager = writer->memory_manager();
1272
224
  bool skip_rest = false;
1273
224
  JXL_RETURN_IF_ERROR(
1274
224
      writer->WithMaxBits(1, LayerType::ModularTree, aux_out, [&] {
1275
        // If we are using brotli, or not using modular mode.
1276
224
        if (tree_tokens_.empty() || tree_tokens_[0].empty()) {
1277
224
          writer->Write(1, 0);
1278
224
          skip_rest = true;
1279
224
        } else {
1280
224
          writer->Write(1, 1);
1281
224
        }
1282
224
        return true;
1283
224
      }));
1284
224
  if (skip_rest) return true;
1285
1286
  // Write tree
1287
224
  HistogramParams params =
1288
224
      HistogramParams::ForModular(cparams_, extra_dc_precision, streaming_mode);
1289
224
  {
1290
224
    EntropyEncodingData tree_code;
1291
224
    JXL_ASSIGN_OR_RETURN(
1292
224
        size_t cost, BuildAndEncodeHistograms(
1293
224
                         memory_manager, params, kNumTreeContexts, tree_tokens_,
1294
224
                         &tree_code, writer, LayerType::ModularTree, aux_out));
1295
224
    (void)cost;
1296
224
    JXL_RETURN_IF_ERROR(WriteTokens(tree_tokens_[0], tree_code, 0, writer,
1297
224
                                    LayerType::ModularTree, aux_out));
1298
224
  }
1299
224
  params.streaming_mode = streaming_mode;
1300
224
  params.add_missing_symbols = streaming_mode;
1301
224
  params.image_widths = image_widths_;
1302
  // Write histograms.
1303
224
  JXL_ASSIGN_OR_RETURN(
1304
224
      size_t cost, BuildAndEncodeHistograms(
1305
224
                       memory_manager, params, (tree_.size() + 1) / 2, tokens_,
1306
224
                       &code_, writer, LayerType::ModularGlobal, aux_out));
1307
224
  (void)cost;
1308
224
  return true;
1309
224
}
1310
1311
Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out,
1312
                                         LayerType layer,
1313
1.37k
                                         const ModularStreamId& stream) {
1314
1.37k
  size_t stream_id = stream.ID(frame_dim_);
1315
1.37k
  if (stream_images_[stream_id].channel.empty()) {
1316
972
    JXL_DEBUG_V(10, "Modular stream %" PRIuS " is empty.", stream_id);
1317
972
    return true;  // Image with no channels, header never gets decoded.
1318
972
  }
1319
401
  if (tokens_.empty()) {
1320
0
    JXL_RETURN_IF_ERROR(ModularGenericCompress(
1321
0
        stream_images_[stream_id], stream_options_[stream_id], *writer, aux_out,
1322
0
        layer, stream_id));
1323
401
  } else {
1324
401
    JXL_RETURN_IF_ERROR(
1325
401
        Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out));
1326
401
    JXL_RETURN_IF_ERROR(
1327
401
        WriteTokens(tokens_[stream_id], code_, 0, writer, layer, aux_out));
1328
401
  }
1329
401
  return true;
1330
401
}
1331
1332
0
void ModularFrameEncoder::ClearStreamData(const ModularStreamId& stream) {
1333
0
  size_t stream_id = stream.ID(frame_dim_);
1334
0
  Image empty_image(stream_images_[stream_id].memory_manager());
1335
0
  std::swap(stream_images_[stream_id], empty_image);
1336
0
}
1337
1338
0
void ModularFrameEncoder::ClearModularStreamData() {
1339
0
  for (const auto& group : stream_params_) {
1340
0
    ClearStreamData(group.id);
1341
0
  }
1342
0
  stream_params_.clear();
1343
0
}
1344
1345
size_t ModularFrameEncoder::ComputeStreamingAbsoluteAcGroupId(
1346
    size_t dc_group_id, size_t ac_group_id,
1347
0
    const FrameDimensions& patch_dim) const {
1348
0
  size_t dc_group_x = dc_group_id % frame_dim_.xsize_dc_groups;
1349
0
  size_t dc_group_y = dc_group_id / frame_dim_.xsize_dc_groups;
1350
0
  size_t ac_group_x = ac_group_id % patch_dim.xsize_groups;
1351
0
  size_t ac_group_y = ac_group_id / patch_dim.xsize_groups;
1352
0
  return (dc_group_x * 8 + ac_group_x) +
1353
0
         (dc_group_y * 8 + ac_group_y) * frame_dim_.xsize_groups;
1354
0
}
1355
1356
Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect,
1357
                                                const CompressParams& cparams,
1358
                                                int minShift, int maxShift,
1359
                                                const ModularStreamId& stream,
1360
221
                                                bool do_color, bool groupwise) {
1361
221
  size_t stream_id = stream.ID(frame_dim_);
1362
221
  if (stream_id == 0 && frame_dim_.num_groups != 1) {
1363
    // If we have multiple groups, then the stream with ID 0 holds the full
1364
    // image and we do not want to apply transforms or in general change the
1365
    // pixel values.
1366
0
    return true;
1367
0
  }
1368
221
  Image& full_image = stream_images_[0];
1369
221
  JxlMemoryManager* memory_manager = full_image.memory_manager();
1370
221
  const size_t xsize = rect.xsize();
1371
221
  const size_t ysize = rect.ysize();
1372
221
  Image& gi = stream_images_[stream_id];
1373
221
  if (stream_id > 0) {
1374
149
    JXL_ASSIGN_OR_RETURN(gi, Image::Create(memory_manager, xsize, ysize,
1375
149
                                           full_image.bitdepth, 0));
1376
    // start at the first bigger-than-frame_dim.group_dim non-metachannel
1377
149
    size_t c = full_image.nb_meta_channels;
1378
149
    if (!groupwise) {
1379
541
      for (; c < full_image.channel.size(); c++) {
1380
397
        Channel& fc = full_image.channel[c];
1381
397
        if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
1382
397
      }
1383
149
    }
1384
154
    for (; c < full_image.channel.size(); c++) {
1385
5
      Channel& fc = full_image.channel[c];
1386
5
      int shift = std::min(fc.hshift, fc.vshift);
1387
5
      if (shift > maxShift) continue;
1388
5
      if (shift < minShift) continue;
1389
4
      Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
1390
4
             rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
1391
4
      if (r.xsize() == 0 || r.ysize() == 0) continue;
1392
4
      gi_channel_[stream_id].push_back(c);
1393
4
      JXL_ASSIGN_OR_RETURN(
1394
4
          Channel gc, Channel::Create(memory_manager, r.xsize(), r.ysize()));
1395
4
      gc.hshift = fc.hshift;
1396
4
      gc.vshift = fc.vshift;
1397
518
      for (size_t y = 0; y < r.ysize(); ++y) {
1398
514
        memcpy(gc.Row(y), r.ConstRow(fc.plane, y),
1399
514
               r.xsize() * sizeof(pixel_type));
1400
514
      }
1401
4
      gi.channel.emplace_back(std::move(gc));
1402
4
    }
1403
1404
149
    if (gi.channel.empty()) return true;
1405
    // Do some per-group transforms
1406
1407
    // Local palette transforms
1408
    // TODO(veluca): make this work with quantize-after-prediction in lossy
1409
    // mode.
1410
4
    if (cparams.butteraugli_distance == 0.f && !cparams.lossy_palette &&
1411
4
        cparams.speed_tier < SpeedTier::kCheetah) {
1412
0
      int max_bitdepth = 0, maxval = 0;  // don't care about that here
1413
0
      float channel_color_percent = 0;
1414
0
      if (!(cparams.responsive &&
1415
0
            (cparams.decoding_speed_tier >= 1 || cparams.IsLossless()))) {
1416
0
        channel_color_percent = cparams.channel_colors_percent;
1417
0
      }
1418
0
      JXL_RETURN_IF_ERROR(try_palettes(gi, max_bitdepth, maxval, cparams,
1419
0
                                       channel_color_percent));
1420
0
    }
1421
4
  }
1422
1423
  // lossless and no specific color transform specified: try Nothing, YCoCg,
1424
  // and 17 RCTs
1425
76
  if (cparams.color_transform == ColorTransform::kNone &&
1426
76
      cparams.IsLossless() && cparams.colorspace < 0 &&
1427
76
      gi.channel.size() - gi.nb_meta_channels >= 3 &&
1428
76
      cparams.responsive == JXL_FALSE && do_color &&
1429
76
      cparams.speed_tier <= SpeedTier::kHare) {
1430
0
    size_t nb_rcts_to_try = 0;
1431
0
    switch (cparams.speed_tier) {
1432
0
      case SpeedTier::kLightning:
1433
0
      case SpeedTier::kThunder:
1434
0
      case SpeedTier::kFalcon:
1435
0
      case SpeedTier::kCheetah:
1436
0
        nb_rcts_to_try = 0;  // Just do global YCoCg
1437
0
        break;
1438
0
      case SpeedTier::kHare:
1439
0
        nb_rcts_to_try = 4;
1440
0
        break;
1441
0
      case SpeedTier::kWombat:
1442
0
        nb_rcts_to_try = 5;
1443
0
        break;
1444
0
      case SpeedTier::kSquirrel:
1445
0
        nb_rcts_to_try = 7;
1446
0
        break;
1447
0
      case SpeedTier::kKitten:
1448
0
        nb_rcts_to_try = 9;
1449
0
        break;
1450
0
      case SpeedTier::kTectonicPlate:
1451
0
      case SpeedTier::kGlacier:
1452
0
      case SpeedTier::kTortoise:
1453
0
        nb_rcts_to_try = 19;
1454
0
        break;
1455
0
    }
1456
0
    float best_cost = std::numeric_limits<float>::max();
1457
0
    size_t best_rct = 0;
1458
0
    bool need_to_restore = (nb_rcts_to_try > 1);
1459
0
    std::vector<Channel> orig;
1460
0
    orig.reserve(3);
1461
    // These should be 19 actually different transforms; the remaining ones
1462
    // are equivalent to one of these (note that the first two are do-nothing
1463
    // and YCoCg) modulo channel reordering (which only matters in the case of
1464
    // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR)
1465
0
    for (int rct_type : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5,
1466
0
                         5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4,
1467
0
                         1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4,
1468
0
                         4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) {
1469
0
      if (nb_rcts_to_try == 0) break;
1470
0
      nb_rcts_to_try--;
1471
      // no-op rct_type; use as baseline cost
1472
0
      if (rct_type == 0) {
1473
0
        JXL_ASSIGN_OR_RETURN(best_cost, EstimateCost(gi));
1474
0
        for (size_t c = 0; c < 3; ++c) {
1475
0
          Channel& genuine = gi.channel[gi.nb_meta_channels + c];
1476
0
          JXL_ASSIGN_OR_RETURN(
1477
0
              Channel ch,
1478
0
              Channel::Create(genuine.memory_manager(), genuine.w, genuine.h,
1479
0
                              genuine.hshift, genuine.vshift));
1480
0
          orig.emplace_back(std::move(ch));
1481
0
          genuine.plane.Swap(orig[c].plane);
1482
0
        }
1483
0
      } else {
1484
0
        std::array<const Channel*, 3> in = {&orig[0], &orig[1], &orig[2]};
1485
0
        std::array<Channel*, 3> out = {&gi.channel[gi.nb_meta_channels + 0],
1486
0
                                       &gi.channel[gi.nb_meta_channels + 1],
1487
0
                                       &gi.channel[gi.nb_meta_channels + 2]};
1488
0
        JXL_RETURN_IF_ERROR(FwdRct(in, out, rct_type, /* pool */ nullptr));
1489
0
        JXL_ASSIGN_OR_RETURN(float cost, EstimateCost(gi));
1490
0
        if (cost < best_cost) {
1491
0
          best_rct = rct_type;
1492
0
          best_cost = cost;
1493
0
        }
1494
0
      }
1495
0
    }
1496
0
    if (need_to_restore) {
1497
0
      for (size_t c = 0; c < 3; ++c) {
1498
0
        gi.channel[gi.nb_meta_channels + c].plane.Swap(orig[c].plane);
1499
0
      }
1500
0
    }
1501
    // Apply the best RCT to the image for future encoding.
1502
0
    if (best_rct != 0) {
1503
0
      Transform sg(TransformId::kRCT);
1504
0
      sg.begin_c = gi.nb_meta_channels;
1505
0
      sg.rct_type = best_rct;
1506
0
      do_transform(gi, sg, weighted::Header());
1507
0
    }
1508
76
  } else {
1509
    // No need to try anything, just use the default options.
1510
76
  }
1511
76
  size_t nb_wp_modes = 1;
1512
76
  if (cparams.speed_tier <= SpeedTier::kTortoise) {
1513
0
    nb_wp_modes = 5;
1514
76
  } else if (cparams.speed_tier <= SpeedTier::kKitten) {
1515
0
    nb_wp_modes = 2;
1516
0
  }
1517
76
  if (nb_wp_modes > 1 &&
1518
76
      (stream_options_[stream_id].predictor == Predictor::Weighted ||
1519
0
       stream_options_[stream_id].predictor == Predictor::Best ||
1520
0
       stream_options_[stream_id].predictor == Predictor::Variable)) {
1521
0
    float best_cost = std::numeric_limits<float>::max();
1522
0
    stream_options_[stream_id].wp_mode = 0;
1523
0
    for (size_t i = 0; i < nb_wp_modes; i++) {
1524
0
      float cost = EstimateWPCost(gi, i);
1525
0
      if (cost < best_cost) {
1526
0
        best_cost = cost;
1527
0
        stream_options_[stream_id].wp_mode = i;
1528
0
      }
1529
0
    }
1530
0
  }
1531
76
  return true;
1532
76
}
1533
1534
constexpr float q_deadzone = 0.62f;
1535
int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y,
1536
               size_t w, weighted::State* wp_state, float value,
1537
1.05M
               float inv_factor) {
1538
1.05M
  float svalue = value * inv_factor;
1539
1.05M
  PredictionResult pred =
1540
1.05M
      PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state);
1541
1.05M
  svalue -= pred.guess;
1542
1.05M
  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
1543
1.05M
  int residual = std::round(svalue);
1544
1.05M
  if (residual > 2 || residual < -2) residual = std::round(svalue * 0.5f) * 2;
1545
1.05M
  return residual + pred.guess;
1546
1.05M
}
1547
1548
int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x,
1549
0
                     size_t y, size_t w, float value, float inv_factor) {
1550
0
  float svalue = value * inv_factor;
1551
0
  PredictionResult pred =
1552
0
      PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient);
1553
0
  svalue -= pred.guess;
1554
0
  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
1555
0
  int residual = std::round(svalue);
1556
0
  if (residual > 2 || residual < -2) residual = std::round(svalue * 0.5f) * 2;
1557
0
  return residual + pred.guess;
1558
0
}
1559
1560
Status ModularFrameEncoder::AddVarDCTDC(const FrameHeader& frame_header,
1561
                                        const Image3F& dc, const Rect& r,
1562
                                        size_t group_index, bool nl_dc,
1563
                                        PassesEncoderState* enc_state,
1564
162
                                        bool jpeg_transcode) {
1565
162
  JxlMemoryManager* memory_manager = dc.memory_manager();
1566
162
  extra_dc_precision[group_index] = nl_dc ? 1 : 0;
1567
162
  float mul = 1 << extra_dc_precision[group_index];
1568
1569
162
  size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_);
1570
162
  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
1571
162
  stream_options_[stream_id].predictor = Predictor::Weighted;
1572
162
  stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
1573
162
  if (cparams_.speed_tier >= SpeedTier::kSquirrel) {
1574
162
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
1575
162
  }
1576
162
  if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) {
1577
0
    stream_options_[stream_id].predictor =
1578
0
        (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable
1579
0
                                                  : Predictor::Best);
1580
0
    stream_options_[stream_id].wp_tree_mode =
1581
0
        ModularOptions::TreeMode::kDefault;
1582
0
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
1583
0
  }
1584
162
  if (cparams_.decoding_speed_tier >= 1) {
1585
0
    stream_options_[stream_id].tree_kind =
1586
0
        ModularOptions::TreeKind::kGradientFixedDC;
1587
0
  }
1588
162
  stream_options_[stream_id].histogram_params =
1589
162
      stream_options_[0].histogram_params;
1590
1591
162
  JXL_ASSIGN_OR_RETURN(
1592
162
      stream_images_[stream_id],
1593
162
      Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 3));
1594
162
  const ColorCorrelation& color_correlation = enc_state->shared.cmap.base();
1595
162
  if (nl_dc && stream_options_[stream_id].tree_kind ==
1596
162
                   ModularOptions::TreeKind::kGradientFixedDC) {
1597
0
    JXL_ENSURE(frame_header.chroma_subsampling.Is444());
1598
0
    for (size_t c : {1, 0, 2}) {
1599
0
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1600
0
      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
1601
0
      float cfl_factor = color_correlation.DCFactors()[c];
1602
0
      for (size_t y = 0; y < r.ysize(); y++) {
1603
0
        int32_t* quant_row =
1604
0
            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
1605
0
        size_t stride = stream_images_[stream_id]
1606
0
                            .channel[c < 2 ? c ^ 1 : c]
1607
0
                            .plane.PixelsPerRow();
1608
0
        const float* row = r.ConstPlaneRow(dc, c, y);
1609
0
        if (c == 1) {
1610
0
          for (size_t x = 0; x < r.xsize(); x++) {
1611
0
            quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y,
1612
0
                                            r.xsize(), row[x], inv_factor);
1613
0
          }
1614
0
        } else {
1615
0
          int32_t* quant_row_y =
1616
0
              stream_images_[stream_id].channel[0].plane.Row(y);
1617
0
          for (size_t x = 0; x < r.xsize(); x++) {
1618
0
            quant_row[x] = QuantizeGradient(
1619
0
                quant_row, stride, c, x, y, r.xsize(),
1620
0
                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
1621
0
          }
1622
0
        }
1623
0
      }
1624
0
    }
1625
162
  } else if (nl_dc) {
1626
162
    JXL_ENSURE(frame_header.chroma_subsampling.Is444());
1627
486
    for (size_t c : {1, 0, 2}) {
1628
486
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1629
486
      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
1630
486
      float cfl_factor = color_correlation.DCFactors()[c];
1631
486
      weighted::Header header;
1632
486
      weighted::State wp_state(header, r.xsize(), r.ysize());
1633
20.6k
      for (size_t y = 0; y < r.ysize(); y++) {
1634
20.1k
        int32_t* quant_row =
1635
20.1k
            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
1636
20.1k
        size_t stride = stream_images_[stream_id]
1637
20.1k
                            .channel[c < 2 ? c ^ 1 : c]
1638
20.1k
                            .plane.PixelsPerRow();
1639
20.1k
        const float* row = r.ConstPlaneRow(dc, c, y);
1640
20.1k
        if (c == 1) {
1641
359k
          for (size_t x = 0; x < r.xsize(); x++) {
1642
353k
            quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(),
1643
353k
                                      &wp_state, row[x], inv_factor);
1644
353k
            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
1645
353k
          }
1646
13.4k
        } else {
1647
13.4k
          int32_t* quant_row_y =
1648
13.4k
              stream_images_[stream_id].channel[0].plane.Row(y);
1649
719k
          for (size_t x = 0; x < r.xsize(); x++) {
1650
706k
            quant_row[x] = QuantizeWP(
1651
706k
                quant_row, stride, c, x, y, r.xsize(), &wp_state,
1652
706k
                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
1653
706k
            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
1654
706k
          }
1655
13.4k
        }
1656
20.1k
      }
1657
486
    }
1658
162
  } else if (frame_header.chroma_subsampling.Is444()) {
1659
0
    for (size_t c : {1, 0, 2}) {
1660
0
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1661
0
      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
1662
0
      float cfl_factor = color_correlation.DCFactors()[c];
1663
0
      for (size_t y = 0; y < r.ysize(); y++) {
1664
0
        int32_t* quant_row =
1665
0
            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
1666
0
        const float* row = r.ConstPlaneRow(dc, c, y);
1667
0
        if (c == 1) {
1668
0
          for (size_t x = 0; x < r.xsize(); x++) {
1669
0
            quant_row[x] = std::round(row[x] * inv_factor);
1670
0
          }
1671
0
        } else {
1672
0
          int32_t* quant_row_y =
1673
0
              stream_images_[stream_id].channel[0].plane.Row(y);
1674
0
          for (size_t x = 0; x < r.xsize(); x++) {
1675
0
            quant_row[x] =
1676
0
                std::round((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) *
1677
0
                           inv_factor);
1678
0
          }
1679
0
        }
1680
0
      }
1681
0
    }
1682
0
  } else {
1683
0
    for (size_t c : {1, 0, 2}) {
1684
0
      Rect rect(r.x0() >> frame_header.chroma_subsampling.HShift(c),
1685
0
                r.y0() >> frame_header.chroma_subsampling.VShift(c),
1686
0
                r.xsize() >> frame_header.chroma_subsampling.HShift(c),
1687
0
                r.ysize() >> frame_header.chroma_subsampling.VShift(c));
1688
0
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1689
0
      size_t ys = rect.ysize();
1690
0
      size_t xs = rect.xsize();
1691
0
      Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c];
1692
0
      ch.w = xs;
1693
0
      ch.h = ys;
1694
0
      JXL_RETURN_IF_ERROR(ch.shrink());
1695
0
      for (size_t y = 0; y < ys; y++) {
1696
0
        int32_t* quant_row = ch.plane.Row(y);
1697
0
        const float* row = rect.ConstPlaneRow(dc, c, y);
1698
0
        for (size_t x = 0; x < xs; x++) {
1699
0
          quant_row[x] = std::round(row[x] * inv_factor);
1700
0
        }
1701
0
      }
1702
0
    }
1703
0
  }
1704
1705
162
  DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc,
1706
162
            stream_images_[stream_id], enc_state->shared.quantizer.MulDC(),
1707
162
            1.0 / mul, color_correlation.DCFactors(),
1708
162
            frame_header.chroma_subsampling, enc_state->shared.block_ctx_map);
1709
162
  return true;
1710
162
}
1711
1712
Status ModularFrameEncoder::AddACMetadata(const Rect& r, size_t group_index,
1713
                                          bool jpeg_transcode,
1714
162
                                          PassesEncoderState* enc_state) {
1715
162
  JxlMemoryManager* memory_manager = enc_state->memory_manager();
1716
162
  size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_);
1717
162
  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
1718
162
  if (stream_options_[stream_id].predictor != Predictor::Weighted) {
1719
162
    stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP;
1720
162
  }
1721
162
  if (jpeg_transcode) {
1722
0
    stream_options_[stream_id].tree_kind =
1723
0
        ModularOptions::TreeKind::kJpegTranscodeACMeta;
1724
162
  } else if (cparams_.speed_tier >= SpeedTier::kFalcon) {
1725
0
    stream_options_[stream_id].tree_kind =
1726
0
        ModularOptions::TreeKind::kFalconACMeta;
1727
162
  } else if (cparams_.speed_tier > SpeedTier::kKitten) {
1728
162
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta;
1729
162
  }
1730
  // If we are using a non-constant CfL field, and are in a slow enough mode,
1731
  // re-enable tree computation for it.
1732
162
  if (cparams_.speed_tier < SpeedTier::kSquirrel &&
1733
162
      cparams_.force_cfl_jpeg_recompression) {
1734
0
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
1735
0
  }
1736
162
  stream_options_[stream_id].histogram_params =
1737
162
      stream_options_[0].histogram_params;
1738
  // YToX, YToB, ACS + QF, EPF
1739
162
  Image& image = stream_images_[stream_id];
1740
162
  JXL_ASSIGN_OR_RETURN(
1741
162
      image, Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 4));
1742
162
  static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
1743
162
  Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
1744
162
  JXL_ASSIGN_OR_RETURN(
1745
162
      image.channel[0],
1746
162
      Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3));
1747
162
  JXL_ASSIGN_OR_RETURN(
1748
162
      image.channel[1],
1749
162
      Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3));
1750
162
  JXL_ASSIGN_OR_RETURN(
1751
162
      image.channel[2],
1752
162
      Channel::Create(memory_manager, r.xsize() * r.ysize(), 2, 0, 0));
1753
162
  JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map,
1754
162
                                           Rect(image.channel[0].plane),
1755
162
                                           &image.channel[0].plane));
1756
162
  JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map,
1757
162
                                           Rect(image.channel[1].plane),
1758
162
                                           &image.channel[1].plane));
1759
162
  size_t num = 0;
1760
6.88k
  for (size_t y = 0; y < r.ysize(); y++) {
1761
6.72k
    AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y);
1762
6.72k
    const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y);
1763
6.72k
    const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y);
1764
6.72k
    int32_t* out_acs = image.channel[2].plane.Row(0);
1765
6.72k
    int32_t* out_qf = image.channel[2].plane.Row(1);
1766
6.72k
    int32_t* row_out_epf = image.channel[3].plane.Row(y);
1767
359k
    for (size_t x = 0; x < r.xsize(); x++) {
1768
353k
      row_out_epf[x] = row_epf[x];
1769
353k
      if (!row_acs[x].IsFirstBlock()) continue;
1770
181k
      out_acs[num] = row_acs[x].RawStrategy();
1771
181k
      out_qf[num] = row_qf[x] - 1;
1772
181k
      num++;
1773
181k
    }
1774
6.72k
  }
1775
162
  image.channel[2].w = num;
1776
162
  ac_metadata_size[group_index] = num;
1777
162
  return true;
1778
162
}
1779
1780
Status ModularFrameEncoder::EncodeQuantTable(
1781
    JxlMemoryManager* memory_manager, size_t size_x, size_t size_y,
1782
    BitWriter* writer, const QuantEncoding& encoding, size_t idx,
1783
0
    ModularFrameEncoder* modular_frame_encoder) {
1784
0
  JXL_ENSURE(encoding.qraw.qtable);
1785
0
  JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size());
1786
0
  JXL_ENSURE(idx < kNumQuantTables);
1787
0
  int* qtable = encoding.qraw.qtable->data();
1788
0
  JXL_RETURN_IF_ERROR(F16Coder::Write(encoding.qraw.qtable_den, writer));
1789
0
  if (modular_frame_encoder) {
1790
0
    JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx));
1791
0
    JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream(
1792
0
        writer, nullptr, LayerType::Header, qt));
1793
0
    return true;
1794
0
  }
1795
0
  JXL_ASSIGN_OR_RETURN(Image image,
1796
0
                       Image::Create(memory_manager, size_x, size_y, 8, 3));
1797
0
  for (size_t c = 0; c < 3; c++) {
1798
0
    for (size_t y = 0; y < size_y; y++) {
1799
0
      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
1800
0
      for (size_t x = 0; x < size_x; x++) {
1801
0
        row[x] = qtable[c * size_x * size_y + y * size_x + x];
1802
0
      }
1803
0
    }
1804
0
  }
1805
0
  ModularOptions cfopts;
1806
0
  JXL_RETURN_IF_ERROR(ModularGenericCompress(image, cfopts, *writer));
1807
0
  return true;
1808
0
}
1809
1810
Status ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y,
1811
                                          const QuantEncoding& encoding,
1812
0
                                          size_t idx) {
1813
0
  JXL_ENSURE(idx < kNumQuantTables);
1814
0
  JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx));
1815
0
  size_t stream_id = qt.ID(frame_dim_);
1816
0
  JXL_ENSURE(encoding.qraw.qtable);
1817
0
  JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size());
1818
0
  int* qtable = encoding.qraw.qtable->data();
1819
0
  Image& image = stream_images_[stream_id];
1820
0
  JxlMemoryManager* memory_manager = image.memory_manager();
1821
0
  JXL_ASSIGN_OR_RETURN(image,
1822
0
                       Image::Create(memory_manager, size_x, size_y, 8, 3));
1823
0
  for (size_t c = 0; c < 3; c++) {
1824
0
    for (size_t y = 0; y < size_y; y++) {
1825
0
      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
1826
0
      for (size_t x = 0; x < size_x; x++) {
1827
0
        row[x] = qtable[c * size_x * size_y + y * size_x + x];
1828
0
      }
1829
0
    }
1830
0
  }
1831
0
  return true;
1832
0
}
1833
}  // namespace jxl