Coverage Report

Created: 2026-02-14 07:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_modular.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular.h"
7
8
#include <jxl/cms_interface.h>
9
#include <jxl/memory_manager.h>
10
#include <jxl/types.h>
11
12
#include <algorithm>
13
#include <array>
14
#include <cmath>
15
#include <cstddef>
16
#include <cstdint>
17
#include <cstdlib>
18
#include <cstring>
19
#include <limits>
20
#include <memory>
21
#include <tuple>
22
#include <utility>
23
#include <vector>
24
25
#include "lib/jxl/ac_strategy.h"
26
#include "lib/jxl/base/bits.h"
27
#include "lib/jxl/base/common.h"
28
#include "lib/jxl/base/compiler_specific.h"
29
#include "lib/jxl/base/data_parallel.h"
30
#include "lib/jxl/base/printf_macros.h"
31
#include "lib/jxl/base/rect.h"
32
#include "lib/jxl/base/status.h"
33
#include "lib/jxl/chroma_from_luma.h"
34
#include "lib/jxl/common.h"
35
#include "lib/jxl/compressed_dc.h"
36
#include "lib/jxl/dec_ans.h"
37
#include "lib/jxl/dec_modular.h"
38
#include "lib/jxl/enc_ans.h"
39
#include "lib/jxl/enc_ans_params.h"
40
#include "lib/jxl/enc_aux_out.h"
41
#include "lib/jxl/enc_bit_writer.h"
42
#include "lib/jxl/enc_cache.h"
43
#include "lib/jxl/enc_fields.h"
44
#include "lib/jxl/enc_gaborish.h"
45
#include "lib/jxl/enc_modular_simd.h"
46
#include "lib/jxl/enc_params.h"
47
#include "lib/jxl/enc_patch_dictionary.h"
48
#include "lib/jxl/enc_quant_weights.h"
49
#include "lib/jxl/fields.h"
50
#include "lib/jxl/frame_dimensions.h"
51
#include "lib/jxl/frame_header.h"
52
#include "lib/jxl/image.h"
53
#include "lib/jxl/image_metadata.h"
54
#include "lib/jxl/image_ops.h"
55
#include "lib/jxl/memory_manager_internal.h"
56
#include "lib/jxl/modular/encoding/context_predict.h"
57
#include "lib/jxl/modular/encoding/dec_ma.h"
58
#include "lib/jxl/modular/encoding/enc_encoding.h"
59
#include "lib/jxl/modular/encoding/enc_ma.h"
60
#include "lib/jxl/modular/encoding/encoding.h"
61
#include "lib/jxl/modular/encoding/ma_common.h"
62
#include "lib/jxl/modular/modular_image.h"
63
#include "lib/jxl/modular/options.h"
64
#include "lib/jxl/modular/transform/enc_rct.h"
65
#include "lib/jxl/modular/transform/enc_transform.h"
66
#include "lib/jxl/modular/transform/squeeze.h"
67
#include "lib/jxl/modular/transform/squeeze_params.h"
68
#include "lib/jxl/modular/transform/transform.h"
69
#include "lib/jxl/pack_signed.h"
70
#include "lib/jxl/passes_state.h"
71
#include "lib/jxl/quant_weights.h"
72
#include "modular/options.h"
73
74
namespace jxl {
75
76
namespace {
77
// constexpr bool kPrintTree = false;
78
79
// Squeeze default quantization factors
80
// these quantization factors are for -Q 50  (other qualities simply scale the
81
// factors; things are rounded down and obviously cannot get below 1)
82
const float squeeze_quality_factor =
83
    0.35;  // for easy tweaking of the quality range (decrease this number for
84
           // higher quality)
85
const float squeeze_luma_factor =
86
    1.1;  // for easy tweaking of the balance between luma (or anything
87
          // non-chroma) and chroma (decrease this number for higher quality
88
          // luma)
89
const float squeeze_quality_factor_xyb = 4.0f;
90
const float squeeze_quality_factor_y = 1.5f;
91
92
const float squeeze_xyb_qtable[3][16] = {
93
    {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16,
94
     0.08, 0.04, 0.02, 0.01, 0.005},  // Y
95
    {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5,
96
     0.5},  // X
97
    {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5,
98
     0.5},  // B-Y
99
};
100
101
const float squeeze_luma_qtable[16] = {163.84, 81.92, 40.96, 20.48, 10.24, 5.12,
102
                                       2.56,   1.28,  0.64,  0.32,  0.16,  0.08,
103
                                       0.04,   0.02,  0.01,  0.005};
104
// for 8-bit input, the range of YCoCg chroma is -255..255 so basically this
105
// does 4:2:0 subsampling (two most fine grained layers get quantized away)
106
const float squeeze_chroma_qtable[16] = {
107
    1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5};
108
109
// Merges the trees in `trees` using nodes that decide on stream_id, as defined
110
// by `tree_splits`.
111
Status MergeTrees(const std::vector<Tree>& trees,
112
                  const std::vector<size_t>& tree_splits, size_t begin,
113
2.33k
                  size_t end, Tree* tree) {
114
2.33k
  JXL_ENSURE(trees.size() + 1 == tree_splits.size());
115
2.33k
  JXL_ENSURE(end > begin);
116
2.33k
  JXL_ENSURE(end <= trees.size());
117
2.33k
  if (end == begin + 1) {
118
    // Insert the tree, adding the opportune offset to all child nodes.
119
    // This will make the leaf IDs wrong, but subsequent roundtripping will fix
120
    // them.
121
1.70k
    size_t sz = tree->size();
122
1.70k
    tree->insert(tree->end(), trees[begin].begin(), trees[begin].end());
123
57.0k
    for (size_t i = sz; i < tree->size(); i++) {
124
55.3k
      (*tree)[i].lchild += sz;
125
55.3k
      (*tree)[i].rchild += sz;
126
55.3k
    }
127
1.70k
    return true;
128
1.70k
  }
129
633
  size_t mid = (begin + end) / 2;
130
633
  size_t splitval = tree_splits[mid] - 1;
131
633
  size_t cur = tree->size();
132
633
  tree->emplace_back(1 /*stream_id*/, static_cast<int>(splitval), 0, 0,
133
633
                     Predictor::Zero, 0, 1);
134
633
  (*tree)[cur].lchild = tree->size();
135
633
  JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, mid, end, tree));
136
633
  (*tree)[cur].rchild = tree->size();
137
633
  JXL_RETURN_IF_ERROR(MergeTrees(trees, tree_splits, begin, mid, tree));
138
633
  return true;
139
633
}
140
141
6.52k
void QuantizeChannel(Channel& ch, const int q) {
142
6.52k
  if (q == 1) return;
143
233k
  for (size_t y = 0; y < ch.plane.ysize(); y++) {
144
230k
    pixel_type* row = ch.plane.Row(y);
145
5.34M
    for (size_t x = 0; x < ch.plane.xsize(); x++) {
146
5.11M
      if (row[x] < 0) {
147
1.15M
        row[x] = -((-row[x] + q / 2) / q) * q;
148
3.96M
      } else {
149
3.96M
        row[x] = ((row[x] + q / 2) / q) * q;
150
3.96M
      }
151
5.11M
    }
152
230k
  }
153
2.76k
}
154
155
// convert binary32 float that corresponds to custom [bits]-bit float (with
156
// [exp_bits] exponent bits) to a [bits]-bit integer representation that should
157
// fit in pixel_type
158
Status float_to_int(const float* const row_in, pixel_type* const row_out,
159
                    size_t xsize, unsigned int bits, unsigned int exp_bits,
160
2.43M
                    bool fp, double dfactor) {
161
2.43M
  JXL_ENSURE(sizeof(pixel_type) * 8 >= bits);
162
2.50M
  if (!fp) {
163
2.50M
    if (bits > 22) {
164
0
      for (size_t x = 0; x < xsize; ++x) {
165
0
        row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5);
166
0
      }
167
2.50M
    } else {
168
2.50M
      float factor = dfactor;
169
111M
      for (size_t x = 0; x < xsize; ++x) {
170
108M
        row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f);
171
108M
      }
172
2.50M
    }
173
2.50M
    return true;
174
2.50M
  }
175
18.4E
  if (bits == 32 && fp) {
176
0
    JXL_ENSURE(exp_bits == 8);
177
0
    memcpy(static_cast<void*>(row_out), static_cast<const void*>(row_in),
178
0
           4 * xsize);
179
0
    return true;
180
0
  }
181
182
18.4E
  JXL_ENSURE(bits > 0);
183
18.4E
  int exp_bias = (1 << (exp_bits - 1)) - 1;
184
18.4E
  int max_exp = (1 << exp_bits) - 1;
185
18.4E
  uint32_t sign = (1u << (bits - 1));
186
18.4E
  int mant_bits = bits - exp_bits - 1;
187
18.4E
  int mant_shift = 23 - mant_bits;
188
18.4E
  for (size_t x = 0; x < xsize; ++x) {
189
0
    uint32_t f;
190
0
    memcpy(&f, &row_in[x], 4);
191
0
    int signbit = (f >> 31);
192
0
    f &= 0x7fffffff;
193
0
    if (f == 0) {
194
0
      row_out[x] = (signbit ? sign : 0);
195
0
      continue;
196
0
    }
197
0
    int exp = (f >> 23) - 127;
198
0
    int mantissa = (f & 0x007fffff);
199
    // broke up the binary32 into its parts, now reassemble into
200
    // arbitrary float
201
0
    if (exp == 128) {
202
      // NaN or infinity
203
0
      f = (signbit ? sign : 0);
204
0
      f |= ((1 << exp_bits) - 1) << mant_bits;
205
0
      f |= mantissa >> mant_shift;
206
0
      row_out[x] = static_cast<pixel_type>(f);
207
0
      continue;
208
0
    }
209
0
    exp += exp_bias;
210
0
    if (exp <= 0) {  // will become a subnormal number
211
      // add implicit leading 1 to mantissa
212
0
      mantissa |= 0x00800000;
213
0
      if (exp < -mant_bits) {
214
0
        return JXL_FAILURE(
215
0
            "Invalid float number: %g cannot be represented with %i "
216
0
            "exp_bits and %i mant_bits (exp %i)",
217
0
            row_in[x], exp_bits, mant_bits, exp);
218
0
      }
219
0
      mantissa >>= 1 - exp;
220
0
      exp = 0;
221
0
    }
222
    // exp should be representable in exp_bits, otherwise input was
223
    // invalid; max_exp is NaN or infinity
224
0
    if (exp >= max_exp) return JXL_FAILURE("Invalid float exponent");
225
0
    if (mantissa & ((1 << mant_shift) - 1)) {
226
0
      return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x],
227
0
                         mantissa);
228
0
    }
229
0
    mantissa >>= mant_shift;
230
0
    f = (signbit ? sign : 0);
231
0
    f |= (exp << mant_bits);
232
0
    f |= mantissa;
233
0
    row_out[x] = static_cast<pixel_type>(f);
234
0
  }
235
18.4E
  return true;
236
18.4E
}
237
238
352
float EstimateWPCost(const Image& img, size_t i) {
239
352
  size_t extra_bits = 0;
240
352
  float histo_cost = 0;
241
352
  HybridUintConfig config;
242
352
  int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31,
243
352
                       -23,  -15,  -11,  -7,   -4,   -3,  -1,  0,   1,
244
352
                       3,    5,    7,    11,   15,   23,  31,  47,  63,
245
352
                       95,   127,  191,  255,  392,  500};
246
352
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
247
352
  Histogram histo[nc] = {};
248
352
  weighted::Header wp_header;
249
352
  PredictorMode(i, &wp_header);
250
672
  for (const Channel& ch : img.channel) {
251
672
    const ptrdiff_t onerow = ch.plane.PixelsPerRow();
252
672
    weighted::State wp_state(wp_header, ch.w, ch.h);
253
672
    Properties properties(1);
254
20.4k
    for (size_t y = 0; y < ch.h; y++) {
255
19.7k
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
256
117k
      for (size_t x = 0; x < ch.w; x++) {
257
97.9k
        size_t offset = 0;
258
18.4E
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
259
97.9k
        pixel_type_w top = (y ? *(r + x - onerow) : left);
260
97.9k
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
261
97.9k
        pixel_type_w topright =
262
97.9k
            (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top);
263
97.9k
        pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
264
97.9k
        pixel_type guess = wp_state.Predict</*compute_properties=*/true>(
265
97.9k
            x, y, ch.w, top, left, topright, topleft, toptop, &properties,
266
97.9k
            offset);
267
97.9k
        size_t ctx = 0;
268
2.92M
        for (int c : cutoffs) {
269
2.92M
          ctx += (c >= properties[0]) ? 1 : 0;
270
2.92M
        }
271
97.9k
        pixel_type res = r[x] - guess;
272
97.9k
        uint32_t token;
273
97.9k
        uint32_t nbits;
274
97.9k
        uint32_t bits;
275
97.9k
        config.Encode(PackSigned(res), &token, &nbits, &bits);
276
97.9k
        histo[ctx].Add(token);
277
97.9k
        extra_bits += nbits;
278
97.9k
        wp_state.UpdateErrors(r[x], x, y, ch.w);
279
97.9k
      }
280
19.7k
    }
281
22.6k
    for (auto& h : histo) {
282
22.6k
      histo_cost += h.ShannonEntropy();
283
22.6k
      h.Clear();
284
22.6k
    }
285
672
  }
286
352
  return histo_cost + extra_bits;
287
352
}
288
289
bool do_transform(Image& image, const Transform& tr,
290
                  const weighted::Header& wp_header,
291
19.3k
                  jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) {
292
19.3k
  Transform t = tr;
293
19.3k
  bool did_it = true;
294
19.3k
  if (force_jxlart) {
295
0
    if (!t.MetaApply(image)) return false;
296
19.3k
  } else {
297
19.3k
    did_it = TransformForward(t, image, wp_header, pool);
298
19.3k
  }
299
19.3k
  if (did_it) image.transform.push_back(t);
300
19.3k
  return did_it;
301
19.3k
}
302
303
StatusOr<bool> maybe_do_transform(Image& image, const Transform& tr,
304
                                  const CompressParams& cparams,
305
                                  const weighted::Header& wp_header,
306
                                  float cost_before,
307
                                  jxl::ThreadPool* pool = nullptr,
308
18.3k
                                  bool force_jxlart = false) {
309
18.3k
  if (force_jxlart || cparams.speed_tier >= SpeedTier::kSquirrel) {
310
14.4k
    return do_transform(image, tr, wp_header, pool, force_jxlart);
311
14.4k
  }
312
3.90k
  bool did_it = do_transform(image, tr, wp_header, pool);
313
3.90k
  if (did_it) {
314
812
    JXL_ASSIGN_OR_RETURN(float cost_after, EstimateCost(image));
315
812
    JXL_DEBUG_V(7, "Cost before: %f  cost after: %f", cost_before, cost_after);
316
812
    if (cost_after > cost_before) {
317
333
      Transform t = image.transform.back();
318
333
      if (!t.Inverse(image, wp_header, pool)) {
319
0
        return false;
320
0
      }
321
333
      image.transform.pop_back();
322
333
      did_it = false;
323
333
    }
324
812
  }
325
3.90k
  return did_it;
326
3.90k
}
327
328
Status try_palettes(Image& gi, int& max_bitdepth, int& maxval,
329
                    const CompressParams& cparams_,
330
                    float channel_colors_percent,
331
11.1k
                    jxl::ThreadPool* pool = nullptr) {
332
11.1k
  float cost_before = 0.f;
333
11.1k
  size_t did_palette = 0;
334
11.1k
  float nb_pixels = gi.channel[0].w * gi.channel[0].h;
335
11.1k
  int nb_chans = gi.channel.size() - gi.nb_meta_channels;
336
  // arbitrary estimate: 4.8 bpp for 8-bit RGB
337
11.1k
  float arbitrary_bpp_estimate = 0.2f * gi.bitdepth * nb_chans;
338
339
11.1k
  if (cparams_.palette_colors != 0 || cparams_.lossy_palette) {
340
    // when not estimating, assume some arbitrary bpp
341
10.2k
    if (cparams_.speed_tier <= SpeedTier::kSquirrel) {
342
2.86k
      JXL_ASSIGN_OR_RETURN(cost_before, EstimateCost(gi));
343
7.42k
    } else {
344
7.42k
      cost_before = nb_pixels * arbitrary_bpp_estimate;
345
7.42k
    }
346
    // all-channel palette (e.g. RGBA)
347
10.2k
    if (nb_chans > 1) {
348
5.50k
      Transform maybe_palette(TransformId::kPalette);
349
5.50k
      maybe_palette.begin_c = gi.nb_meta_channels;
350
5.50k
      maybe_palette.num_c = nb_chans;
351
      // Heuristic choice of max colors for a palette:
352
      // max_colors = nb_pixels * estimated_bpp_without_palette * 0.0005 +
353
      //              + nb_pixels / 128 + 128
354
      //       (estimated_bpp_without_palette = cost_before / nb_pixels)
355
      // Rationale: small image with large palette is not effective;
356
      // also if the entropy (estimated bpp) is low (e.g. mostly solid/gradient
357
      // areas), palette is less useful and may even be counterproductive.
358
5.50k
      maybe_palette.nb_colors = std::min(
359
5.50k
          static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128),
360
5.50k
          std::abs(cparams_.palette_colors));
361
5.50k
      maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
362
5.50k
      maybe_palette.lossy_palette =
363
5.50k
          (cparams_.lossy_palette && maybe_palette.num_c == 3);
364
5.50k
      if (maybe_palette.lossy_palette) {
365
58
        maybe_palette.predictor = Predictor::Average4;
366
58
      }
367
      // TODO(veluca): use a custom weighted header if using the weighted
368
      // predictor.
369
5.50k
      JXL_ASSIGN_OR_RETURN(
370
5.50k
          did_palette,
371
5.50k
          maybe_do_transform(gi, maybe_palette, cparams_, weighted::Header(),
372
5.50k
                             cost_before, pool, cparams_.options.zero_tokens));
373
5.50k
    }
374
    // all-minus-one-channel palette (RGB with separate alpha, or CMY with
375
    // separate K)
376
10.2k
    if (!did_palette && nb_chans > 3) {
377
1.56k
      Transform maybe_palette_3(TransformId::kPalette);
378
1.56k
      maybe_palette_3.begin_c = gi.nb_meta_channels;
379
1.56k
      maybe_palette_3.num_c = nb_chans - 1;
380
1.56k
      maybe_palette_3.nb_colors = std::min(
381
1.56k
          static_cast<int>(cost_before * 0.0005f + nb_pixels / 128 + 128),
382
1.56k
          std::abs(cparams_.palette_colors));
383
1.56k
      maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0;
384
1.56k
      maybe_palette_3.lossy_palette = cparams_.lossy_palette;
385
1.56k
      if (maybe_palette_3.lossy_palette) {
386
4
        maybe_palette_3.predictor = Predictor::Average4;
387
4
      }
388
1.56k
      JXL_ASSIGN_OR_RETURN(
389
1.56k
          did_palette,
390
1.56k
          maybe_do_transform(gi, maybe_palette_3, cparams_, weighted::Header(),
391
1.56k
                             cost_before, pool, cparams_.options.zero_tokens));
392
1.56k
    }
393
10.2k
  }
394
395
11.1k
  if (channel_colors_percent > 0) {
396
    // single channel palette (like FLIF's ChannelCompact)
397
8.27k
    size_t nb_channels = gi.channel.size() - gi.nb_meta_channels - did_palette;
398
8.27k
    int orig_bitdepth = max_bitdepth;
399
8.27k
    max_bitdepth = 0;
400
8.27k
    if (nb_channels > 0 && (did_palette || cost_before == 0)) {
401
104
      if (cparams_.speed_tier < SpeedTier::kSquirrel) {
402
15
        JXL_ASSIGN_OR_RETURN(cost_before, EstimateCost(gi));
403
89
      } else {
404
89
        cost_before = 0;
405
89
      }
406
104
    }
407
19.6k
    for (size_t i = did_palette; i < nb_channels + did_palette; i++) {
408
11.3k
      int32_t min;
409
11.3k
      int32_t max;
410
11.3k
      compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
411
11.3k
      int64_t colors = static_cast<int64_t>(max) - min + 1;
412
11.3k
      JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max);
413
11.3k
      Transform maybe_palette_1(TransformId::kPalette);
414
11.3k
      maybe_palette_1.begin_c = i + gi.nb_meta_channels;
415
11.3k
      maybe_palette_1.num_c = 1;
416
      // simple heuristic: if less than X percent of the values in the range
417
      // actually occur, it is probably worth it to do a compaction
418
      // (but only if the channel palette is less than 6% the size of the
419
      // image itself)
420
11.3k
      maybe_palette_1.nb_colors =
421
11.3k
          std::min(static_cast<int>(nb_pixels / 16),
422
11.3k
                   static_cast<int>(channel_colors_percent / 100. * colors));
423
11.3k
      JXL_ASSIGN_OR_RETURN(
424
11.3k
          bool did_ch_palette,
425
11.3k
          maybe_do_transform(gi, maybe_palette_1, cparams_, weighted::Header(),
426
11.3k
                             cost_before, pool));
427
11.3k
      if (did_ch_palette) {
428
        // effective bit depth is lower, adjust quantization accordingly
429
2.07k
        compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
430
2.07k
        if (max < maxval) maxval = max;
431
2.07k
        int ch_bitdepth =
432
2.07k
            (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0);
433
2.07k
        if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth;
434
9.29k
      } else {
435
9.29k
        max_bitdepth = orig_bitdepth;
436
9.29k
      }
437
11.3k
    }
438
8.27k
  }
439
11.1k
  return true;
440
11.1k
}
441
442
}  // namespace
443
444
StatusOr<std::unique_ptr<ModularFrameEncoder>> ModularFrameEncoder::Create(
445
    JxlMemoryManager* memory_manager, const FrameHeader& frame_header,
446
2.54k
    const CompressParams& cparams_orig, bool streaming_mode) {
447
2.54k
  auto self = std::unique_ptr<ModularFrameEncoder>(
448
2.54k
      new ModularFrameEncoder(memory_manager));
449
2.54k
  JXL_RETURN_IF_ERROR(self->Init(frame_header, cparams_orig, streaming_mode));
450
2.54k
  return self;
451
2.54k
}
452
453
ModularFrameEncoder::ModularFrameEncoder(JxlMemoryManager* memory_manager)
454
2.54k
    : memory_manager_(memory_manager) {}
455
456
Status ModularFrameEncoder::Init(const FrameHeader& frame_header,
457
                                 const CompressParams& cparams_orig,
458
2.54k
                                 bool streaming_mode) {
459
2.54k
  frame_dim_ = frame_header.ToFrameDimensions();
460
2.54k
  cparams_ = cparams_orig;
461
462
2.54k
  size_t num_streams =
463
2.54k
      ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes);
464
465
  // Progressive lossless only benefits from levels 2 and higher
466
  // Lower levels of faster decoding can outperform higher tiers
467
  // depending on the PC
468
2.54k
  if (cparams_.responsive == 1 && cparams_.IsLossless() &&
469
70
      cparams_.decoding_speed_tier == 1) {
470
16
    cparams_.decoding_speed_tier = 2;
471
16
  }
472
2.54k
  if (cparams_.responsive == 1 && cparams_.IsLossless()) {
473
    // RCT selection seems bugged with Squeeze, YCoCg works well.
474
70
    if (cparams_.colorspace < 0) {
475
12
      cparams_.colorspace = 6;
476
12
    }
477
70
  }
478
479
2.54k
  if (cparams_.ModularPartIsLossless()) {
480
2.10k
    const auto disable_wp = [this] () {
481
463
        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kNoWP;
482
463
        if (cparams_.options.predictor == Predictor::Weighted) {
483
          // Predictor::Best turns to Predictor::Gradient anyways.
484
6
          cparams_.options.predictor = Predictor::Gradient;
485
6
        }
486
463
    };
487
2.10k
    switch (cparams_.decoding_speed_tier) {
488
1.24k
      case 0:
489
1.24k
        cparams_.options.fast_decode_multiplier = 1.001f;
490
1.24k
        break;
491
234
      case 1:  // No Weighted predictor
492
234
        cparams_.options.fast_decode_multiplier = 1.005f;
493
234
        disable_wp();
494
234
        break;
495
229
      case 2: {  // No Weighted predictor and Group size 0 defined in
496
                 // enc_frame.cc
497
229
        cparams_.options.fast_decode_multiplier = 1.015f;
498
229
        disable_wp();
499
229
        break;
500
0
      }
501
229
      case 3: {  // Gradient only, Group size 0, and Fast MA tree
502
229
        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly;
503
229
        cparams_.options.predictor = Predictor::Gradient;
504
229
        break;
505
0
      }
506
174
      default: {  // Gradient only, Group size 0, and No MA tree
507
174
        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly;
508
174
        cparams_.options.predictor = Predictor::Gradient;
509
174
        cparams_.options.nb_repeats = 0;
510
        // Disabling MA Trees sometimes doesn't increase decode speed
511
        // depending on PC
512
174
        break;
513
0
      }
514
2.10k
    }
515
2.10k
  }
516
517
165k
  for (size_t i = 0; i < num_streams; ++i) {
518
162k
    stream_images_.emplace_back(memory_manager_);
519
162k
  }
520
521
  // use a sensible default if nothing explicit is specified:
522
  // Squeeze for lossy, no squeeze for lossless
523
2.54k
  if (cparams_.responsive < 0) {
524
1.33k
    if (cparams_.ModularPartIsLossless()) {
525
1.24k
      cparams_.responsive = 0;
526
1.24k
    } else {
527
92
      cparams_.responsive = 1;
528
92
    }
529
1.33k
  }
530
531
2.54k
  cparams_.options.splitting_heuristics_node_threshold =
532
2.54k
      75 + 14 * static_cast<int>(cparams_.speed_tier) +
533
2.54k
      10 * cparams_.decoding_speed_tier;
534
535
2.54k
  {
536
    // Set properties.
537
2.54k
    std::vector<uint32_t> prop_order;
538
2.54k
    if (cparams_.responsive) {
539
      // Properties in order of their likelihood of being useful for Squeeze
540
      // residuals.
541
634
      prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3};
542
1.90k
    } else {
543
      // Same, but for the non-Squeeze case.
544
1.90k
      prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8};
545
      // if few groups, don't use group as a property
546
1.90k
      if (num_streams < 30 && cparams_.speed_tier > SpeedTier::kTortoise &&
547
988
          cparams_orig.ModularPartIsLossless()) {
548
804
        prop_order.erase(prop_order.begin() + 1);
549
804
      }
550
1.90k
    }
551
2.54k
    int max_properties = std::min<int>(
552
2.54k
        cparams_.options.max_properties,
553
2.54k
        static_cast<int>(
554
2.54k
            frame_header.nonserialized_metadata->m.num_extra_channels) +
555
2.54k
            (frame_header.encoding == FrameEncoding::kModular ? 2 : -1));
556
2.54k
    switch (cparams_.speed_tier) {
557
64
      case SpeedTier::kHare:
558
64
        cparams_.options.splitting_heuristics_properties.assign(
559
64
            prop_order.begin(), prop_order.begin() + 4);
560
64
        cparams_.options.max_property_values = 48;
561
64
        cparams_.options.nb_repeats *= 0.5f;
562
64
        break;
563
186
      case SpeedTier::kWombat:
564
186
        cparams_.options.splitting_heuristics_properties.assign(
565
186
            prop_order.begin(), prop_order.begin() + 5);
566
186
        cparams_.options.max_property_values = 64;
567
186
        cparams_.options.nb_repeats *= 0.7f;
568
186
        break;
569
486
      case SpeedTier::kSquirrel:
570
486
        cparams_.options.splitting_heuristics_properties.assign(
571
486
            prop_order.begin(), prop_order.begin() + 7);
572
486
        cparams_.options.max_property_values = 96;
573
486
        break;
574
724
      case SpeedTier::kKitten:
575
724
        cparams_.options.splitting_heuristics_properties.assign(
576
724
            prop_order.begin(), prop_order.begin() + 10);
577
724
        cparams_.options.max_property_values = 128;
578
724
        cparams_.options.nb_repeats *= 1.1f;
579
724
        break;
580
0
      case SpeedTier::kGlacier:
581
430
      case SpeedTier::kTortoise:
582
430
        cparams_.options.splitting_heuristics_properties = prop_order;
583
430
        cparams_.options.max_property_values = 256;
584
430
        cparams_.options.nb_repeats *= 1.3f;
585
430
        break;
586
653
      default:
587
653
        cparams_.options.splitting_heuristics_properties.assign(
588
653
            prop_order.begin(), prop_order.begin() + 3);
589
653
        cparams_.options.max_property_values = 32;
590
653
        cparams_.options.nb_repeats *= 0.3f;
591
653
        break;
592
2.54k
    }
593
2.54k
    if (cparams_.speed_tier > SpeedTier::kTortoise) {
594
      // Gradient in previous channels.
595
3.13k
      for (int i = 0; i < max_properties; i++) {
596
1.01k
        cparams_.options.splitting_heuristics_properties.push_back(
597
1.01k
            kNumNonrefProperties + i * 4 + 3);
598
1.01k
      }
599
2.11k
    } else {
600
      // All the extra properties in Tortoise mode.
601
1.42k
      for (int i = 0; i < max_properties * 4; i++) {
602
992
        cparams_.options.splitting_heuristics_properties.push_back(
603
992
            kNumNonrefProperties + i);
604
992
      }
605
430
    }
606
2.54k
  }
607
0
  cparams_.options.nb_repeats = std::min(1.0f, cparams_.options.nb_repeats);
608
609
2.54k
  if ((cparams_.options.predictor == Predictor::Average0 ||
610
2.53k
       cparams_.options.predictor == Predictor::Average1 ||
611
2.49k
       cparams_.options.predictor == Predictor::Average2 ||
612
2.40k
       cparams_.options.predictor == Predictor::Average3 ||
613
2.26k
       cparams_.options.predictor == Predictor::Average4 ||
614
2.24k
       cparams_.options.predictor == Predictor::Weighted) &&
615
338
      !cparams_.ModularPartIsLossless()) {
616
    // Lossy + Average/Weighted predictors does not work, so switch to default
617
    // predictors.
618
26
    cparams_.options.predictor = kUndefinedPredictor;
619
26
  }
620
621
2.54k
  if (cparams_.options.predictor == kUndefinedPredictor) {
622
    // no explicit predictor(s) given, set a good default
623
918
    if ((cparams_.speed_tier <= SpeedTier::kGlacier ||
624
918
         cparams_.modular_mode == false) &&
625
646
        cparams_.IsLossless() && cparams_.responsive == JXL_FALSE) {
626
      // TODO(veluca): allow all predictors that don't break residual
627
      // multipliers in lossy mode.
628
0
      cparams_.options.predictor = Predictor::Variable;
629
918
    } else if (cparams_.responsive || cparams_.lossy_palette) {
630
      // zero predictor for Squeeze residues and lossy palette indices
631
      // TODO: Try adding 'Squeezed' predictor set, with the most
632
      // common predictors used by Variable in squeezed images, including none.
633
162
      cparams_.options.predictor = Predictor::Zero;
634
756
    } else if (!cparams_.IsLossless()) {
635
      // If not responsive and lossy. TODO(veluca): use near_lossless instead?
636
614
      cparams_.options.predictor = Predictor::Gradient;
637
614
    } else if (cparams_.speed_tier < SpeedTier::kFalcon) {
638
      // try median and weighted predictor for anything else
639
100
      cparams_.options.predictor = Predictor::Best;
640
100
    } else if (cparams_.speed_tier == SpeedTier::kFalcon) {
641
      // just weighted predictor in falcon mode
642
6
      cparams_.options.predictor = Predictor::Weighted;
643
36
    } else if (cparams_.speed_tier > SpeedTier::kFalcon) {
644
      // just gradient predictor in thunder mode
645
36
      cparams_.options.predictor = Predictor::Gradient;
646
36
    }
647
1.62k
  } else {
648
1.62k
    if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero;
649
1.62k
  }
650
2.54k
  if (!cparams_.ModularPartIsLossless()) {
651
436
    if (cparams_.options.predictor == Predictor::Weighted ||
652
436
        cparams_.options.predictor == Predictor::Variable ||
653
418
        cparams_.options.predictor == Predictor::Best)
654
18
      cparams_.options.predictor = Predictor::Zero;
655
436
  }
656
2.54k
  tree_splits_.push_back(0);
657
2.54k
  if (cparams_.modular_mode == false) {
658
1.63k
    JXL_ASSIGN_OR_RETURN(ModularStreamId qt0, ModularStreamId::QuantTable(0));
659
1.63k
    cparams_.options.fast_decode_multiplier = 1.0f;
660
1.63k
    tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_));
661
1.63k
    tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_));
662
1.63k
    tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_));
663
1.63k
    tree_splits_.push_back(qt0.ID(frame_dim_));
664
1.63k
    tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_));
665
1.63k
    ac_metadata_size.resize(frame_dim_.num_dc_groups);
666
1.63k
    extra_dc_precision.resize(frame_dim_.num_dc_groups);
667
1.63k
  }
668
2.54k
  tree_splits_.push_back(num_streams);
669
2.54k
  cparams_.options.max_chan_size = frame_dim_.group_dim;
670
2.54k
  cparams_.options.group_dim = frame_dim_.group_dim;
671
672
  // TODO(veluca): figure out how to use different predictor sets per channel.
673
2.54k
  stream_options_.resize(num_streams, cparams_.options);
674
675
2.54k
  stream_options_[0] = cparams_.options;
676
2.54k
  if (cparams_.speed_tier == SpeedTier::kFalcon) {
677
68
    stream_options_[0].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
678
2.47k
  } else if (cparams_.speed_tier == SpeedTier::kThunder) {
679
403
    stream_options_[0].tree_kind = ModularOptions::TreeKind::kGradientFixedDC;
680
403
  }
681
2.54k
  stream_options_[0].histogram_params =
682
2.54k
      HistogramParams::ForModular(cparams_, {}, streaming_mode);
683
2.54k
  return true;
684
2.54k
}
685
686
Status ModularFrameEncoder::ComputeEncodingData(
687
    const FrameHeader& frame_header, const ImageMetadata& metadata,
688
    Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels,
689
    const Rect& group_rect, const FrameDimensions& patch_dim,
690
    const Rect& frame_area_rect, PassesEncoderState* JXL_RESTRICT enc_state,
691
    const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out,
692
2.11k
    bool do_color) {
693
2.11k
  JxlMemoryManager* memory_manager = enc_state->memory_manager();
694
2.11k
  JXL_DEBUG_V(6, "Computing modular encoding data for frame %s",
695
2.11k
              frame_header.DebugString().c_str());
696
697
2.11k
  bool groupwise = enc_state->streaming_mode;
698
699
2.11k
  if (do_color && frame_header.loop_filter.gab && !groupwise) {
700
210
    float w = 0.9908511000000001f;
701
210
    float weights[3] = {w, w, w};
702
210
    JXL_RETURN_IF_ERROR(GaborishInverse(color, Rect(*color), weights, pool));
703
210
  }
704
705
2.11k
  if (do_color && metadata.bit_depth.bits_per_sample <= 16 &&
706
1.98k
      cparams_.speed_tier < SpeedTier::kCheetah &&
707
890
      cparams_.decoding_speed_tier < 2 && !groupwise) {
708
336
    JXL_RETURN_IF_ERROR(FindBestPatchDictionary(
709
336
        *color, enc_state, cms, nullptr, aux_out,
710
336
        cparams_.color_transform == ColorTransform::kXYB));
711
336
    JXL_RETURN_IF_ERROR(PatchDictionaryEncoder::SubtractFrom(
712
336
        enc_state->shared.image_features.patches, color));
713
336
  }
714
715
2.11k
  if (cparams_.custom_splines.HasAny()) {
716
0
    PassesSharedState& shared = enc_state->shared;
717
0
    ImageFeatures& image_features = shared.image_features;
718
0
    image_features.splines = cparams_.custom_splines;
719
0
  }
720
721
  // Convert ImageBundle to modular Image object
722
2.11k
  const size_t xsize = patch_dim.xsize;
723
2.11k
  const size_t ysize = patch_dim.ysize;
724
725
2.11k
  int nb_chans = 3;
726
2.11k
  if (metadata.color_encoding.IsGray() &&
727
660
      cparams_.color_transform == ColorTransform::kNone) {
728
516
    nb_chans = 1;
729
516
  }
730
2.11k
  if (!do_color) nb_chans = 0;
731
732
2.11k
  nb_chans += extra_channels.size();
733
734
2.11k
  bool fp = metadata.bit_depth.floating_point_sample &&
735
0
            cparams_.color_transform != ColorTransform::kXYB;
736
737
  // bits_per_sample is just metadata for XYB images.
738
2.11k
  if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
739
0
      cparams_.color_transform != ColorTransform::kXYB) {
740
0
    if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
741
0
      return JXL_FAILURE("uint32_t not supported in enc_modular");
742
0
    } else if (metadata.bit_depth.bits_per_sample > 32) {
743
0
      return JXL_FAILURE("bits_per_sample > 32 not supported");
744
0
    }
745
0
  }
746
747
  // in the non-float case, there is an implicit 0 sign bit
748
2.11k
  int max_bitdepth =
749
2.11k
      do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0;
750
2.11k
  Image& gi = stream_images_[0];
751
2.11k
  JXL_ASSIGN_OR_RETURN(
752
2.11k
      gi, Image::Create(memory_manager, xsize, ysize,
753
2.11k
                        metadata.bit_depth.bits_per_sample, nb_chans));
754
2.11k
  int c = 0;
755
2.11k
  if (cparams_.color_transform == ColorTransform::kXYB &&
756
563
      cparams_.modular_mode == true) {
757
436
    float enc_factors[3] = {65536.0f, 4096.0f, 4096.0f};
758
436
    if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) {
759
      // quantize XYB here and then treat it as a lossless image
760
216
      enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance);
761
216
      enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
762
216
      enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
763
216
      cparams_.butteraugli_distance = 0;
764
216
    }
765
436
    if (cparams_.manual_xyb_factors.size() == 3) {
766
0
      JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC(
767
0
          memory_manager, &enc_state->shared.matrices,
768
0
          cparams_.manual_xyb_factors.data()));
769
      // TODO(jon): update max_bitdepth in this case
770
436
    } else {
771
436
      JXL_RETURN_IF_ERROR(DequantMatricesSetCustomDC(
772
436
          memory_manager, &enc_state->shared.matrices, enc_factors));
773
436
      max_bitdepth = 12;
774
436
    }
775
436
  }
776
2.11k
  pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0;
777
2.11k
  if (do_color) {
778
7.93k
    for (; c < 3; c++) {
779
5.95k
      if (metadata.color_encoding.IsGray() &&
780
1.80k
          cparams_.color_transform == ColorTransform::kNone &&
781
1.54k
          c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0))
782
1.03k
        continue;
783
4.92k
      int c_out = c;
784
      // XYB is encoded as YX(B-Y)
785
4.92k
      if (cparams_.color_transform == ColorTransform::kXYB && c < 2)
786
872
        c_out = 1 - c_out;
787
4.92k
      double factor = maxval;
788
4.92k
      if (cparams_.color_transform == ColorTransform::kXYB)
789
1.30k
        factor = enc_state->shared.matrices.InvDCQuant(c);
790
4.92k
      if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) {
791
436
        JXL_ENSURE(!fp);
792
97.0k
        for (size_t y = 0; y < ysize; ++y) {
793
96.5k
          const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
794
96.5k
          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
795
96.5k
          pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y);
796
5.25M
          for (size_t x = 0; x < xsize; ++x) {
797
            // TODO(eustas): check if std::roundf is appropriate
798
5.15M
            row_out[x] = row_in[x] * factor + 0.5f;
799
5.15M
            row_out[x] -= row_Y[x];
800
5.15M
          }
801
96.5k
        }
802
4.48k
      } else {
803
4.48k
        int bits = metadata.bit_depth.bits_per_sample;
804
4.48k
        int exp_bits = metadata.bit_depth.exponent_bits_per_sample;
805
4.48k
        gi.channel[c_out].hshift = frame_header.chroma_subsampling.HShift(c);
806
4.48k
        gi.channel[c_out].vshift = frame_header.chroma_subsampling.VShift(c);
807
4.48k
        size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift);
808
4.48k
        size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift);
809
4.48k
        JXL_RETURN_IF_ERROR(
810
4.48k
            gi.channel[c_out].shrink(xsize_shifted, ysize_shifted));
811
4.48k
        const auto process_row = [&](const int task,
812
2.20M
                                     const int thread) -> Status {
813
2.20M
          const size_t y = task;
814
2.20M
          const float* const JXL_RESTRICT row_in =
815
2.20M
              color->PlaneRow(c, y + group_rect.y0()) + group_rect.x0();
816
2.20M
          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
817
2.20M
          JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out, xsize_shifted, bits,
818
2.20M
                                           exp_bits, fp, factor));
819
2.20M
          return true;
820
2.20M
        };
821
4.48k
        JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, ysize_shifted,
822
4.48k
                                      ThreadPool::NoInit, process_row,
823
4.48k
                                      "float2int"));
824
4.48k
      }
825
4.92k
    }
826
1.98k
    if (metadata.color_encoding.IsGray() &&
827
600
        cparams_.color_transform == ColorTransform::kNone)
828
516
      c = 1;
829
1.98k
  }
830
831
2.94k
  for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) {
832
833
    const ExtraChannelInfo& eci = metadata.extra_channel_info[ec];
833
833
    size_t ecups = frame_header.extra_channel_upsampling[ec];
834
833
    JXL_RETURN_IF_ERROR(
835
833
        gi.channel[c].shrink(DivCeil(patch_dim.xsize_upsampled, ecups),
836
833
                             DivCeil(patch_dim.ysize_upsampled, ecups)));
837
833
    gi.channel[c].hshift = gi.channel[c].vshift =
838
833
        CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
839
840
833
    int bits = eci.bit_depth.bits_per_sample;
841
833
    int exp_bits = eci.bit_depth.exponent_bits_per_sample;
842
833
    bool ec_fp = eci.bit_depth.floating_point_sample;
843
833
    double factor = (ec_fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1));
844
833
    if (bits + (ec_fp ? 0 : 1) > max_bitdepth) {
845
145
      max_bitdepth = bits + (ec_fp ? 0 : 1);
846
145
    }
847
567k
    const auto process_row = [&](const int task, const int thread) -> Status {
848
567k
      const size_t y = task;
849
567k
      const float* const JXL_RESTRICT row_in =
850
567k
          extra_channels[ec].Row(y + group_rect.y0()) + group_rect.x0();
851
567k
      pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y);
852
567k
      JXL_RETURN_IF_ERROR(float_to_int(row_in, row_out,
853
567k
                                       gi.channel[c].plane.xsize(), bits,
854
567k
                                       exp_bits, ec_fp, factor));
855
567k
      return true;
856
567k
    };
857
833
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, gi.channel[c].plane.ysize(),
858
833
                                  ThreadPool::NoInit, process_row,
859
833
                                  "float2int"));
860
833
  }
861
2.11k
  JXL_ENSURE(c == nb_chans);
862
863
2.11k
  int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32);
864
2.11k
  if (max_bitdepth > level_max_bitdepth) {
865
0
    return JXL_FAILURE(
866
0
        "Bitdepth too high for level %i (need %i bits, have only %i in this "
867
0
        "level)",
868
0
        cparams_.level, max_bitdepth, level_max_bitdepth);
869
0
  }
870
871
  // Set options and apply transformations
872
2.11k
  if (!cparams_.ModularPartIsLossless()) {
873
436
    if (cparams_.palette_colors != 0) {
874
428
      JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms");
875
428
    }
876
436
    if (cparams_.color_transform == ColorTransform::kXYB) {
877
436
      cparams_.channel_colors_pre_transform_percent = 0;
878
436
    }
879
436
    cparams_.channel_colors_percent = 0;
880
436
    cparams_.palette_colors = 0;
881
436
    cparams_.lossy_palette = false;
882
436
  }
883
884
  // Global palette transforms
885
2.11k
  float channel_colors_percent = 0;
886
2.11k
  if (!cparams_.lossy_palette &&
887
2.00k
      (cparams_.speed_tier <= SpeedTier::kThunder ||
888
2.00k
       (do_color && metadata.bit_depth.bits_per_sample > 8))) {
889
2.00k
    channel_colors_percent = cparams_.channel_colors_pre_transform_percent;
890
2.00k
  }
891
2.11k
  if (!groupwise) {
892
916
    JXL_RETURN_IF_ERROR(try_palettes(gi, max_bitdepth, maxval, cparams_,
893
916
                                     channel_colors_percent, pool));
894
916
  }
895
896
  // don't do an RCT if we're short on bits
897
2.11k
  if (cparams_.color_transform == ColorTransform::kNone && do_color &&
898
1.54k
      gi.channel.size() - gi.nb_meta_channels >= 3 &&
899
813
      max_bitdepth + 1 < level_max_bitdepth) {
900
813
    if (cparams_.colorspace < 0 && (!cparams_.ModularPartIsLossless() ||
901
222
                                    cparams_.speed_tier > SpeedTier::kHare)) {
902
79
      Transform ycocg{TransformId::kRCT};
903
79
      ycocg.rct_type = 6;
904
79
      ycocg.begin_c = gi.nb_meta_channels;
905
79
      do_transform(gi, ycocg, weighted::Header(), pool);
906
79
      max_bitdepth++;
907
734
    } else if (cparams_.colorspace > 0) {
908
589
      Transform sg(TransformId::kRCT);
909
589
      sg.begin_c = gi.nb_meta_channels;
910
589
      sg.rct_type = cparams_.colorspace;
911
589
      do_transform(gi, sg, weighted::Header(), pool);
912
589
      max_bitdepth++;
913
589
    }
914
813
  }
915
916
2.11k
  if (cparams_.move_to_front_from_channel > 0) {
917
0
    for (size_t tgt = 0;
918
0
         tgt + cparams_.move_to_front_from_channel < gi.channel.size(); tgt++) {
919
0
      size_t pos = cparams_.move_to_front_from_channel;
920
0
      while (pos > 0) {
921
0
        Transform move(TransformId::kRCT);
922
0
        if (pos == 1) {
923
0
          move.begin_c = tgt;
924
0
          move.rct_type = 28;  // RGB -> GRB
925
0
          pos -= 1;
926
0
        } else {
927
0
          move.begin_c = tgt + pos - 2;
928
0
          move.rct_type = 14;  // RGB -> BRG
929
0
          pos -= 2;
930
0
        }
931
0
        do_transform(gi, move, weighted::Header(), pool);
932
0
      }
933
0
    }
934
0
  }
935
936
  // don't do squeeze if we don't have some spare bits
937
2.11k
  if (!groupwise && cparams_.responsive && !gi.channel.empty() &&
938
322
      max_bitdepth + 2 < level_max_bitdepth) {
939
322
    Transform t(TransformId::kSqueeze);
940
    // Check if default squeeze parameters are ok.
941
322
    std::vector<SqueezeParams> params;
942
322
    DefaultSqueezeParameters(&params, gi);
943
    // If image is smaller than group_dim, then default squeeze parameters
944
    // are not going too far. Else, channel size don't turn zero. Thus we only
945
    // check if tile does not go to zero-dim.
946
322
    size_t shift_cap = 7 + frame_header.group_size_shift;
947
322
    size_t hshift = 0;
948
322
    size_t vshift = 0;
949
2.84k
    for (size_t i = 0; i < params.size(); ++i) {
950
2.62k
      if (params[i].horizontal) {
951
1.69k
        hshift++;
952
1.69k
      } else {
953
932
        vshift++;
954
932
      }
955
2.62k
      size_t dc_boost = (std::min(hshift, vshift) >= 3) ? 3 : 0;
956
      // In case we squeeze too much, truncate squeeze script.
957
2.62k
      if (std::max(hshift, vshift) > shift_cap + dc_boost) {
958
106
        params.resize(i - 1);
959
106
        t.squeezes = params;
960
106
        break;
961
106
      }
962
2.62k
    }
963
322
    do_transform(gi, t, weighted::Header(), pool);
964
322
    max_bitdepth += 2;
965
322
  }
966
967
2.11k
  if (max_bitdepth + 1 > level_max_bitdepth) {
968
    // force no group RCTs if we don't have a spare bit
969
0
    cparams_.colorspace = 0;
970
0
  }
971
2.11k
  JXL_ENSURE(max_bitdepth <= level_max_bitdepth);
972
973
2.11k
  if (!cparams_.ModularPartIsLossless()) {
974
436
    quants_.resize(gi.channel.size(), 1);
975
436
    float quantizer = 0.25f;
976
436
    if (!cparams_.responsive) {
977
216
      JXL_DEBUG_V(1,
978
216
                  "Warning: lossy compression without Squeeze "
979
216
                  "transform is just color quantization.");
980
216
      quantizer *= 0.1f;
981
216
    }
982
436
    float bitdepth_correction = 1.f;
983
436
    if (cparams_.color_transform != ColorTransform::kXYB) {
984
0
      bitdepth_correction = maxval / 255.f;
985
0
    }
986
436
    std::vector<float> quantizers;
987
1.74k
    for (size_t i = 0; i < 3; i++) {
988
1.30k
      float dist = cparams_.butteraugli_distance;
989
1.30k
      quantizers.push_back(quantizer * powf(dist, 1.2) * bitdepth_correction);
990
1.30k
    }
991
520
    for (size_t i = 0; i < extra_channels.size(); i++) {
992
84
      int ec_bitdepth =
993
84
          metadata.extra_channel_info[i].bit_depth.bits_per_sample;
994
84
      pixel_type ec_maxval = ec_bitdepth < 32 ? (1u << ec_bitdepth) - 1 : 0;
995
84
      bitdepth_correction = ec_maxval / 255.f;
996
84
      float dist = 0;
997
84
      if (i < cparams_.ec_distance.size()) dist = cparams_.ec_distance[i];
998
84
      if (dist < 0) dist = cparams_.butteraugli_distance;
999
84
      quantizers.push_back(quantizer * dist * bitdepth_correction);
1000
84
    }
1001
436
    if (cparams_.options.nb_repeats == 0) {
1002
0
      return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!");
1003
0
    }
1004
6.95k
    for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) {
1005
6.52k
      Channel& ch = gi.channel[i];
1006
6.52k
      int shift = ch.hshift + ch.vshift;  // number of pixel halvings
1007
6.52k
      if (shift > 16) shift = 16;
1008
6.52k
      if (shift > 0) shift--;
1009
6.52k
      int component = (do_color ? 0 : 3) + ch.component;
1010
6.52k
      int q;
1011
6.52k
      if (cparams_.color_transform == ColorTransform::kXYB && component < 3) {
1012
6.02k
        q = quantizers[component] * squeeze_quality_factor_xyb *
1013
6.02k
            squeeze_xyb_qtable[component][shift];
1014
6.02k
        if (component == 0) q *= squeeze_quality_factor_y;
1015
6.02k
      } else {
1016
494
        if (cparams_.colorspace != 0 && component > 0 && component < 3) {
1017
0
          q = quantizers[component] * squeeze_quality_factor *
1018
0
              squeeze_chroma_qtable[shift];
1019
494
        } else {
1020
494
          q = quantizers[component] * squeeze_quality_factor *
1021
494
              squeeze_luma_factor * squeeze_luma_qtable[shift];
1022
494
        }
1023
494
      }
1024
6.52k
      if (q < 1) q = 1;
1025
6.52k
      QuantizeChannel(gi.channel[i], q);
1026
6.52k
      quants_[i] = q;
1027
6.52k
    }
1028
436
  }
1029
1030
  // Fill other groups.
1031
  // DC
1032
7.49k
  for (size_t group_id = 0; group_id < patch_dim.num_dc_groups; group_id++) {
1033
5.38k
    const size_t rgx = group_id % patch_dim.xsize_dc_groups;
1034
5.38k
    const size_t rgy = group_id / patch_dim.xsize_dc_groups;
1035
5.38k
    const Rect rect(rgx * patch_dim.dc_group_dim, rgy * patch_dim.dc_group_dim,
1036
5.38k
                    patch_dim.dc_group_dim, patch_dim.dc_group_dim);
1037
5.38k
    size_t gx = rgx + frame_area_rect.x0() / 2048;
1038
5.38k
    size_t gy = rgy + frame_area_rect.y0() / 2048;
1039
5.38k
    size_t real_group_id = gy * frame_dim_.xsize_dc_groups + gx;
1040
    // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim
1041
    // maxShift==1000 is infinity
1042
5.38k
    stream_params_.push_back(
1043
5.38k
        GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(real_group_id)});
1044
5.38k
  }
1045
  // AC global -> nothing.
1046
  // AC
1047
40.4k
  for (size_t group_id = 0; group_id < patch_dim.num_groups; group_id++) {
1048
38.3k
    const size_t rgx = group_id % patch_dim.xsize_groups;
1049
38.3k
    const size_t rgy = group_id / patch_dim.xsize_groups;
1050
38.3k
    const Rect mrect(rgx * patch_dim.group_dim, rgy * patch_dim.group_dim,
1051
38.3k
                     patch_dim.group_dim, patch_dim.group_dim);
1052
38.3k
    size_t gx = rgx + frame_area_rect.x0() / (frame_dim_.group_dim);
1053
38.3k
    size_t gy = rgy + frame_area_rect.y0() / (frame_dim_.group_dim);
1054
38.3k
    size_t real_group_id = gy * frame_dim_.xsize_groups + gx;
1055
92.6k
    for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses();
1056
54.3k
         i++) {
1057
54.3k
      int maxShift;
1058
54.3k
      int minShift;
1059
54.3k
      frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift);
1060
54.3k
      stream_params_.push_back(
1061
54.3k
          GroupParams{mrect, minShift, maxShift,
1062
54.3k
                      ModularStreamId::ModularAC(real_group_id, i)});
1063
54.3k
    }
1064
38.3k
  }
1065
  // if there's only one group, everything ends up in GlobalModular
1066
  // in that case, also try RCTs/WP params for the one group
1067
2.11k
  if (stream_params_.size() == 2) {
1068
362
    stream_params_.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000,
1069
362
                                         ModularStreamId::Global()});
1070
362
  }
1071
2.11k
  gi_channel_.resize(stream_images_.size());
1072
1073
2.11k
  const auto process_row = [&](const uint32_t i,
1074
59.7k
                               size_t /* thread */) -> Status {
1075
59.7k
    size_t stream = stream_params_[i].id.ID(frame_dim_);
1076
59.7k
    if (stream != 0) {
1077
59.3k
      stream_options_[stream] = stream_options_[0];
1078
59.3k
    }
1079
59.7k
    JXL_RETURN_IF_ERROR(PrepareStreamParams(
1080
59.7k
        stream_params_[i].rect, cparams_, stream_params_[i].minShift,
1081
59.7k
        stream_params_[i].maxShift, stream_params_[i].id, do_color, groupwise));
1082
59.7k
    return true;
1083
59.7k
  };
1084
2.11k
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, stream_params_.size(),
1085
2.11k
                                ThreadPool::NoInit, process_row,
1086
2.11k
                                "ChooseParams"));
1087
2.11k
  {
1088
    // Clear out channels that have been copied to groups.
1089
2.11k
    Image& full_image = stream_images_[0];
1090
2.11k
    size_t ch = full_image.nb_meta_channels;
1091
8.31k
    for (; ch < full_image.channel.size(); ch++) {
1092
7.81k
      Channel& fc = full_image.channel[ch];
1093
7.81k
      if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
1094
7.81k
    }
1095
7.44k
    for (; ch < full_image.channel.size(); ch++) {
1096
      // TODO(eustas): shrink / assign channel to keep size consistency
1097
5.33k
      full_image.channel[ch].plane = ImageI();
1098
5.33k
    }
1099
2.11k
  }
1100
1101
2.11k
  JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0]));
1102
2.11k
  return true;
1103
2.11k
}
1104
1105
1.19k
Status ModularFrameEncoder::ComputeTree(ThreadPool* pool) {
1106
1.19k
  std::vector<ModularMultiplierInfo> multiplier_info;
1107
1.19k
  if (!quants_.empty()) {
1108
18.4k
    for (uint32_t stream_id = 0; stream_id < stream_images_.size();
1109
17.9k
         stream_id++) {
1110
      // skip non-modular stream_ids
1111
17.9k
      if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
1112
5.29k
      const Image& image = stream_images_[stream_id];
1113
5.29k
      const ModularOptions& options = stream_options_[stream_id];
1114
67.8k
      for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) {
1115
62.5k
        if (image.channel[i].w > options.max_chan_size ||
1116
61.6k
            image.channel[i].h > options.max_chan_size) {
1117
1.26k
          continue;
1118
1.26k
        }
1119
61.2k
        if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
1120
61.2k
        size_t ch_id = stream_id == 0
1121
61.2k
                           ? i
1122
61.2k
                           : gi_channel_[stream_id][i - image.nb_meta_channels];
1123
61.2k
        uint32_t q = quants_[ch_id];
1124
        // Inform the tree splitting heuristics that each channel in each group
1125
        // used this quantization factor. This will produce a tree with the
1126
        // given multipliers.
1127
61.2k
        if (multiplier_info.empty() ||
1128
60.8k
            multiplier_info.back().range[1][0] != stream_id ||
1129
56.0k
            multiplier_info.back().multiplier != q) {
1130
39.3k
          StaticPropRange range;
1131
39.3k
          range[0] = {{i, i + 1}};
1132
39.3k
          range[1] = {{stream_id, stream_id + 1}};
1133
39.3k
          multiplier_info.push_back({range, q});
1134
39.3k
        } else {
1135
          // Previous channel in the same group had the same quantization
1136
          // factor. Don't provide two different ranges, as that creates
1137
          // unnecessary nodes.
1138
21.9k
          multiplier_info.back().range[0][1] = i + 1;
1139
21.9k
        }
1140
61.2k
      }
1141
5.29k
    }
1142
    // Merge group+channel settings that have the same channels and quantization
1143
    // factors, to avoid unnecessary nodes.
1144
436
    std::sort(multiplier_info.begin(), multiplier_info.end(),
1145
442k
              [](ModularMultiplierInfo a, ModularMultiplierInfo b) {
1146
442k
                return std::make_tuple(a.range, a.multiplier) <
1147
442k
                       std::make_tuple(b.range, b.multiplier);
1148
442k
              });
1149
436
    size_t new_num = 1;
1150
39.3k
    for (size_t i = 1; i < multiplier_info.size(); i++) {
1151
38.9k
      ModularMultiplierInfo& prev = multiplier_info[new_num - 1];
1152
38.9k
      ModularMultiplierInfo& cur = multiplier_info[i];
1153
38.9k
      if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier &&
1154
35.6k
          prev.range[1][1] == cur.range[1][0]) {
1155
35.6k
        prev.range[1][1] = cur.range[1][1];
1156
35.6k
      } else {
1157
3.31k
        multiplier_info[new_num++] = multiplier_info[i];
1158
3.31k
      }
1159
38.9k
    }
1160
436
    multiplier_info.resize(new_num);
1161
436
  }
1162
1163
1.19k
  if (!cparams_.custom_fixed_tree.empty()) {
1164
0
    tree_ = cparams_.custom_fixed_tree;
1165
1.19k
  } else if (cparams_.speed_tier < SpeedTier::kFalcon ||
1166
1.06k
             !cparams_.modular_mode) {
1167
    // Avoid creating a tree with leaves that don't correspond to any pixels.
1168
1.06k
    std::vector<size_t> useful_splits;
1169
1.06k
    useful_splits.reserve(tree_splits_.size());
1170
5.16k
    for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) {
1171
4.09k
      bool has_pixels = false;
1172
4.09k
      size_t start = tree_splits_[chunk];
1173
4.09k
      size_t stop = tree_splits_[chunk + 1];
1174
54.3k
      for (size_t i = start; i < stop; i++) {
1175
50.2k
        if (!stream_images_[i].empty()) has_pixels = true;
1176
50.2k
      }
1177
4.09k
      if (has_pixels) {
1178
1.70k
        useful_splits.push_back(tree_splits_[chunk]);
1179
1.70k
      }
1180
4.09k
    }
1181
    // Don't do anything if modular mode does not have any pixels in this image
1182
1.06k
    if (useful_splits.empty()) return true;
1183
1.06k
    useful_splits.push_back(tree_splits_.back());
1184
1185
1.06k
    std::vector<Tree> trees(useful_splits.size() - 1);
1186
1.06k
    const auto process_chunk = [&](const uint32_t chunk,
1187
1.70k
                                   size_t /* thread */) -> Status {
1188
      // TODO(veluca): parallelize more.
1189
1.70k
      uint32_t start = useful_splits[chunk];
1190
1.70k
      uint32_t stop = useful_splits[chunk + 1];
1191
3.40k
      while (start < stop && stream_images_[start].empty()) ++start;
1192
27.8k
      while (start < stop && stream_images_[stop - 1].empty()) --stop;
1193
1194
1.70k
      if (stream_options_[start].tree_kind ==
1195
1.70k
          ModularOptions::TreeKind::kLearn) {
1196
711
        JXL_ASSIGN_OR_RETURN(
1197
711
            trees[chunk],
1198
711
            LearnTree(stream_images_.data(), stream_options_.data(), start,
1199
711
                      stop, multiplier_info));
1200
990
      } else {
1201
990
        size_t total_pixels = 0;
1202
3.71k
        for (size_t i = start; i < stop; i++) {
1203
8.66k
          for (const Channel& ch : stream_images_[i].channel) {
1204
8.66k
            total_pixels += ch.w * ch.h;
1205
8.66k
          }
1206
2.72k
        }
1207
990
        total_pixels = std::max<size_t>(total_pixels, 1);
1208
1209
990
        trees[chunk] = PredefinedTree(stream_options_[start].tree_kind,
1210
990
                                      total_pixels, 8, 0);
1211
990
      }
1212
1.70k
      return true;
1213
1.70k
    };
1214
1.06k
    JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, useful_splits.size() - 1,
1215
1.06k
                                  ThreadPool::NoInit, process_chunk,
1216
1.06k
                                  "LearnTrees"));
1217
1.06k
    tree_.clear();
1218
1.06k
    JXL_RETURN_IF_ERROR(
1219
1.06k
        MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_));
1220
1.06k
  } else {
1221
    // Fixed tree.
1222
129
    size_t total_pixels = 0;
1223
129
    int max_bitdepth = 0;
1224
8.68k
    for (const Image& img : stream_images_) {
1225
8.68k
      max_bitdepth = std::max(max_bitdepth, img.bitdepth);
1226
19.5k
      for (const Channel& ch : img.channel) {
1227
19.5k
        total_pixels += ch.w * ch.h;
1228
19.5k
      }
1229
8.68k
    }
1230
129
    if (cparams_.speed_tier <= SpeedTier::kFalcon) {
1231
42
      tree_ = PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels,
1232
42
                             max_bitdepth, stream_options_[0].max_properties);
1233
87
    } else if (cparams_.speed_tier <= SpeedTier::kThunder) {
1234
87
      tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC,
1235
87
                             total_pixels, max_bitdepth,
1236
87
                             stream_options_[0].max_properties);
1237
87
    } else {
1238
0
      tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)};
1239
0
    }
1240
129
  }
1241
1.19k
  tree_tokens_.resize(1);
1242
1.19k
  tree_tokens_[0].clear();
1243
1.19k
  Tree decoded_tree;
1244
1.19k
  JXL_RETURN_IF_ERROR(TokenizeTree(tree_, tree_tokens_.data(), &decoded_tree));
1245
1.19k
  JXL_ENSURE(tree_.size() == decoded_tree.size());
1246
1.19k
  tree_ = std::move(decoded_tree);
1247
1248
  /* TODO(szabadka) Add text output callback to cparams
1249
  if (kPrintTree && WantDebugOutput(aux_out)) {
1250
    if (frame_header.dc_level > 0) {
1251
      PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" +
1252
                           std::to_string(frame_header.dc_level) + "_tree");
1253
    } else {
1254
      PrintTree(tree_, aux_out->debug_prefix + "/global_tree");
1255
    }
1256
  } */
1257
1.19k
  return true;
1258
1.19k
}
1259
1260
1.19k
Status ModularFrameEncoder::ComputeTokens(ThreadPool* pool) {
1261
1.19k
  size_t num_streams = stream_images_.size();
1262
1.19k
  stream_headers_.resize(num_streams);
1263
1.19k
  tokens_.resize(num_streams);
1264
1.19k
  image_widths_.resize(num_streams);
1265
1.19k
  const auto process_stream = [&](const uint32_t stream_id,
1266
58.5k
                                  size_t /* thread */) -> Status {
1267
58.5k
    tokens_[stream_id].clear();
1268
58.5k
    JXL_RETURN_IF_ERROR(
1269
58.5k
        ModularCompress(stream_images_[stream_id], stream_options_[stream_id],
1270
58.5k
                        stream_id, tree_, stream_headers_[stream_id],
1271
58.5k
                        tokens_[stream_id], &image_widths_[stream_id]));
1272
58.5k
    return true;
1273
58.5k
  };
1274
1.19k
  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, num_streams, ThreadPool::NoInit,
1275
1.19k
                                process_stream, "ComputeTokens"));
1276
1.19k
  return true;
1277
1.19k
}
1278
1279
Status ModularFrameEncoder::EncodeGlobalInfo(bool streaming_mode,
1280
                                             BitWriter* writer,
1281
1.87k
                                             AuxOut* aux_out) {
1282
1.87k
  JxlMemoryManager* memory_manager = writer->memory_manager();
1283
1.87k
  bool skip_rest = false;
1284
1.87k
  JXL_RETURN_IF_ERROR(
1285
1.87k
      writer->WithMaxBits(1, LayerType::ModularTree, aux_out, [&] {
1286
        // If we are using brotli, or not using modular mode.
1287
1.87k
        if (tree_tokens_.empty() || tree_tokens_[0].empty()) {
1288
1.87k
          writer->Write(1, 0);
1289
1.87k
          skip_rest = true;
1290
1.87k
        } else {
1291
1.87k
          writer->Write(1, 1);
1292
1.87k
        }
1293
1.87k
        return true;
1294
1.87k
      }));
1295
1.87k
  if (skip_rest) return true;
1296
1297
  // Write tree
1298
1.19k
  HistogramParams params =
1299
1.19k
      HistogramParams::ForModular(cparams_, extra_dc_precision, streaming_mode);
1300
1.19k
  {
1301
1.19k
    EntropyEncodingData tree_code;
1302
1.19k
    JXL_ASSIGN_OR_RETURN(
1303
1.19k
        size_t cost, BuildAndEncodeHistograms(
1304
1.19k
                         memory_manager, params, kNumTreeContexts, tree_tokens_,
1305
1.19k
                         &tree_code, writer, LayerType::ModularTree, aux_out));
1306
1.19k
    (void)cost;
1307
1.19k
    JXL_RETURN_IF_ERROR(WriteTokens(tree_tokens_[0], tree_code, 0, writer,
1308
1.19k
                                    LayerType::ModularTree, aux_out));
1309
1.19k
  }
1310
1.19k
  params.streaming_mode = streaming_mode;
1311
1.19k
  params.add_missing_symbols = streaming_mode;
1312
1.19k
  params.image_widths = image_widths_;
1313
  // Write histograms.
1314
1.19k
  JXL_ASSIGN_OR_RETURN(
1315
1.19k
      size_t cost, BuildAndEncodeHistograms(
1316
1.19k
                       memory_manager, params, (tree_.size() + 1) / 2, tokens_,
1317
1.19k
                       &code_, writer, LayerType::ModularGlobal, aux_out));
1318
1.19k
  (void)cost;
1319
1.19k
  return true;
1320
1.19k
}
1321
1322
Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out,
1323
                                         LayerType layer,
1324
91.1k
                                         const ModularStreamId& stream) {
1325
91.1k
  size_t stream_id = stream.ID(frame_dim_);
1326
91.1k
  if (stream_images_[stream_id].channel.empty()) {
1327
44.9k
    JXL_DEBUG_V(10, "Modular stream %" PRIuS " is empty.", stream_id);
1328
44.9k
    return true;  // Image with no channels, header never gets decoded.
1329
44.9k
  }
1330
46.2k
  if (tokens_.empty()) {
1331
30.3k
    JXL_RETURN_IF_ERROR(ModularGenericCompress(
1332
30.3k
        stream_images_[stream_id], stream_options_[stream_id], *writer, aux_out,
1333
30.3k
        layer, stream_id));
1334
30.3k
  } else {
1335
15.8k
    JXL_RETURN_IF_ERROR(
1336
15.8k
        Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out));
1337
15.8k
    JXL_RETURN_IF_ERROR(
1338
15.8k
        WriteTokens(tokens_[stream_id], code_, 0, writer, layer, aux_out));
1339
15.8k
  }
1340
46.2k
  return true;
1341
46.2k
}
1342
1343
14.0k
void ModularFrameEncoder::ClearStreamData(const ModularStreamId& stream) {
1344
14.0k
  size_t stream_id = stream.ID(frame_dim_);
1345
14.0k
  Image empty_image(stream_images_[stream_id].memory_manager());
1346
14.0k
  std::swap(stream_images_[stream_id], empty_image);
1347
14.0k
}
1348
1349
1.60k
void ModularFrameEncoder::ClearModularStreamData() {
1350
10.7k
  for (const auto& group : stream_params_) {
1351
10.7k
    ClearStreamData(group.id);
1352
10.7k
  }
1353
1.60k
  stream_params_.clear();
1354
1.60k
}
1355
1356
size_t ModularFrameEncoder::ComputeStreamingAbsoluteAcGroupId(
1357
    size_t dc_group_id, size_t ac_group_id,
1358
13.0k
    const FrameDimensions& patch_dim) const {
1359
13.0k
  size_t dc_group_x = dc_group_id % frame_dim_.xsize_dc_groups;
1360
13.0k
  size_t dc_group_y = dc_group_id / frame_dim_.xsize_dc_groups;
1361
13.0k
  size_t ac_group_x = ac_group_id % patch_dim.xsize_groups;
1362
13.0k
  size_t ac_group_y = ac_group_id / patch_dim.xsize_groups;
1363
13.0k
  return (dc_group_x * 8 + ac_group_x) +
1364
13.0k
         (dc_group_y * 8 + ac_group_y) * frame_dim_.xsize_groups;
1365
13.0k
}
1366
1367
Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect,
1368
                                                const CompressParams& cparams,
1369
                                                int minShift, int maxShift,
1370
                                                const ModularStreamId& stream,
1371
58.8k
                                                bool do_color, bool groupwise) {
1372
58.8k
  size_t stream_id = stream.ID(frame_dim_);
1373
58.8k
  if (stream_id == 0 && frame_dim_.num_groups != 1) {
1374
    // If we have multiple groups, then the stream with ID 0 holds the full
1375
    // image and we do not want to apply transforms or in general change the
1376
    // pixel values.
1377
14
    return true;
1378
14
  }
1379
58.8k
  Image& full_image = stream_images_[0];
1380
58.8k
  JxlMemoryManager* memory_manager = full_image.memory_manager();
1381
58.8k
  const size_t xsize = rect.xsize();
1382
58.8k
  const size_t ysize = rect.ysize();
1383
58.8k
  Image& gi = stream_images_[stream_id];
1384
58.9k
  if (stream_id > 0) {
1385
58.9k
    JXL_ASSIGN_OR_RETURN(gi, Image::Create(memory_manager, xsize, ysize,
1386
58.9k
                                           full_image.bitdepth, 0));
1387
    // start at the first bigger-than-frame_dim.group_dim non-metachannel
1388
58.9k
    size_t c = full_image.nb_meta_channels;
1389
58.9k
    if (!groupwise) {
1390
90.9k
      for (; c < full_image.channel.size(); c++) {
1391
89.7k
        Channel& fc = full_image.channel[c];
1392
89.7k
        if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
1393
89.7k
      }
1394
48.4k
    }
1395
341k
    for (; c < full_image.channel.size(); c++) {
1396
282k
      Channel& fc = full_image.channel[c];
1397
282k
      int shift = std::min(fc.hshift, fc.vshift);
1398
282k
      if (shift > maxShift) continue;
1399
252k
      if (shift < minShift) continue;
1400
145k
      Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
1401
145k
             rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
1402
145k
      if (r.xsize() == 0 || r.ysize() == 0) continue;
1403
142k
      gi_channel_[stream_id].push_back(c);
1404
142k
      JXL_ASSIGN_OR_RETURN(
1405
142k
          Channel gc, Channel::Create(memory_manager, r.xsize(), r.ysize()));
1406
142k
      gc.hshift = fc.hshift;
1407
142k
      gc.vshift = fc.vshift;
1408
2.51M
      for (size_t y = 0; y < r.ysize(); ++y) {
1409
2.37M
        memcpy(gc.Row(y), r.ConstRow(fc.plane, y),
1410
2.37M
               r.xsize() * sizeof(pixel_type));
1411
2.37M
      }
1412
142k
      gi.channel.emplace_back(std::move(gc));
1413
142k
    }
1414
1415
58.9k
    if (gi.channel.empty()) return true;
1416
    // Do some per-group transforms
1417
1418
    // Local palette transforms
1419
    // TODO(veluca): make this work with quantize-after-prediction in lossy
1420
    // mode.
1421
39.0k
    if (cparams.butteraugli_distance == 0.f && !cparams.lossy_palette &&
1422
29.3k
        cparams.speed_tier < SpeedTier::kCheetah) {
1423
10.1k
      int max_bitdepth = 0, maxval = 0;  // don't care about that here
1424
10.1k
      float channel_color_percent = 0;
1425
10.1k
      if (!(cparams.responsive &&
1426
8.85k
            (cparams.decoding_speed_tier >= 1 || cparams.IsLossless()))) {
1427
8.85k
        channel_color_percent = cparams.channel_colors_percent;
1428
8.85k
      }
1429
10.1k
      JXL_RETURN_IF_ERROR(try_palettes(gi, max_bitdepth, maxval, cparams,
1430
10.1k
                                       channel_color_percent));
1431
10.1k
    }
1432
39.0k
  }
1433
1434
  // lossless and no specific color transform specified: try Nothing, YCoCg,
1435
  // and 17 RCTs
1436
38.9k
  if (cparams.color_transform == ColorTransform::kNone &&
1437
30.7k
      cparams.IsLossless() && cparams.colorspace < 0 &&
1438
8.69k
      gi.channel.size() - gi.nb_meta_channels >= 3 &&
1439
753
      cparams.responsive == JXL_FALSE && do_color &&
1440
754
      cparams.speed_tier <= SpeedTier::kHare) {
1441
6
    size_t nb_rcts_to_try = 0;
1442
6
    switch (cparams.speed_tier) {
1443
0
      case SpeedTier::kLightning:
1444
0
      case SpeedTier::kThunder:
1445
0
      case SpeedTier::kFalcon:
1446
0
      case SpeedTier::kCheetah:
1447
0
        nb_rcts_to_try = 0;  // Just do global YCoCg
1448
0
        break;
1449
2
      case SpeedTier::kHare:
1450
2
        nb_rcts_to_try = 4;
1451
2
        break;
1452
0
      case SpeedTier::kWombat:
1453
0
        nb_rcts_to_try = 5;
1454
0
        break;
1455
0
      case SpeedTier::kSquirrel:
1456
0
        nb_rcts_to_try = 7;
1457
0
        break;
1458
4
      case SpeedTier::kKitten:
1459
4
        nb_rcts_to_try = 9;
1460
4
        break;
1461
0
      case SpeedTier::kTectonicPlate:
1462
0
      case SpeedTier::kGlacier:
1463
0
      case SpeedTier::kTortoise:
1464
0
        nb_rcts_to_try = 19;
1465
0
        break;
1466
6
    }
1467
6
    float best_cost = std::numeric_limits<float>::max();
1468
6
    size_t best_rct = 0;
1469
6
    bool need_to_restore = (nb_rcts_to_try > 1);
1470
6
    std::vector<Channel> orig;
1471
6
    orig.reserve(3);
1472
    // These should be 19 actually different transforms; the remaining ones
1473
    // are equivalent to one of these (note that the first two are do-nothing
1474
    // and YCoCg) modulo channel reordering (which only matters in the case of
1475
    // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR)
1476
6
    for (int rct_type : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5,
1477
6
                         5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4,
1478
6
                         1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4,
1479
50
                         4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) {
1480
50
      if (nb_rcts_to_try == 0) break;
1481
44
      nb_rcts_to_try--;
1482
      // no-op rct_type; use as baseline cost
1483
44
      if (rct_type == 0) {
1484
6
        JXL_ASSIGN_OR_RETURN(best_cost, EstimateCost(gi));
1485
24
        for (size_t c = 0; c < 3; ++c) {
1486
18
          Channel& genuine = gi.channel[gi.nb_meta_channels + c];
1487
18
          JXL_ASSIGN_OR_RETURN(
1488
18
              Channel ch,
1489
18
              Channel::Create(genuine.memory_manager(), genuine.w, genuine.h,
1490
18
                              genuine.hshift, genuine.vshift));
1491
18
          orig.emplace_back(std::move(ch));
1492
18
          genuine.plane.Swap(orig[c].plane);
1493
18
        }
1494
38
      } else {
1495
38
        std::array<const Channel*, 3> in = {&orig[0], &orig[1], &orig[2]};
1496
38
        std::array<Channel*, 3> out = {&gi.channel[gi.nb_meta_channels + 0],
1497
38
                                       &gi.channel[gi.nb_meta_channels + 1],
1498
38
                                       &gi.channel[gi.nb_meta_channels + 2]};
1499
38
        JXL_RETURN_IF_ERROR(FwdRct(in, out, rct_type, /* pool */ nullptr));
1500
76
        JXL_ASSIGN_OR_RETURN(float cost, EstimateCost(gi));
1501
76
        if (cost < best_cost) {
1502
0
          best_rct = rct_type;
1503
0
          best_cost = cost;
1504
0
        }
1505
76
      }
1506
44
    }
1507
6
    if (need_to_restore) {
1508
24
      for (size_t c = 0; c < 3; ++c) {
1509
18
        gi.channel[gi.nb_meta_channels + c].plane.Swap(orig[c].plane);
1510
18
      }
1511
6
    }
1512
    // Apply the best RCT to the image for future encoding.
1513
6
    if (best_rct != 0) {
1514
0
      Transform sg(TransformId::kRCT);
1515
0
      sg.begin_c = gi.nb_meta_channels;
1516
0
      sg.rct_type = best_rct;
1517
0
      do_transform(gi, sg, weighted::Header());
1518
0
    }
1519
38.9k
  } else {
1520
    // No need to try anything, just use the default options.
1521
38.9k
  }
1522
38.9k
  size_t nb_wp_modes = 1;
1523
38.9k
  if (cparams.speed_tier <= SpeedTier::kTortoise) {
1524
1.19k
    nb_wp_modes = 5;
1525
37.7k
  } else if (cparams.speed_tier <= SpeedTier::kKitten) {
1526
2.96k
    nb_wp_modes = 2;
1527
2.96k
  }
1528
38.9k
  if (nb_wp_modes > 1 &&
1529
4.16k
      PredictorHasWeighted(stream_options_[stream_id].predictor)) {
1530
80
    float best_cost = std::numeric_limits<float>::max();
1531
80
    stream_options_[stream_id].wp_mode = 0;
1532
432
    for (size_t i = 0; i < nb_wp_modes; i++) {
1533
352
      float cost = EstimateWPCost(gi, i);
1534
352
      if (cost < best_cost) {
1535
106
        best_cost = cost;
1536
106
        stream_options_[stream_id].wp_mode = i;
1537
106
      }
1538
352
    }
1539
80
  }
1540
38.9k
  return true;
1541
38.9k
}
1542
1543
constexpr float q_deadzone = 0.62f;
1544
int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y,
1545
               size_t w, weighted::State* wp_state, float value,
1546
1.43M
               float inv_factor, bool* has_outliers) {
1547
1.43M
  float svalue = value * inv_factor;
1548
1.43M
  PredictionResult pred =
1549
1.43M
      PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state);
1550
1.43M
  svalue -= pred.guess;
1551
1.43M
  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
1552
1.43M
  int residual = 0;
1553
1.43M
  if (svalue > static_cast<float>(std::numeric_limits<int>::max()) ||
1554
1.38M
      svalue < static_cast<float>(std::numeric_limits<int>::min())) {
1555
0
    *has_outliers = true;
1556
1.43M
  } else {
1557
1.43M
    residual = std::round(svalue);
1558
1.43M
  }
1559
1.43M
  if (residual > 2 || residual < -2) residual = std::round(svalue * 0.5f) * 2;
1560
1.43M
  return residual + pred.guess;
1561
1.43M
}
1562
1563
int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x,
1564
1.12M
                     size_t y, size_t w, float value, float inv_factor) {
1565
1.12M
  float svalue = value * inv_factor;
1566
1.12M
  PredictionResult pred =
1567
1.12M
      PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient);
1568
1.12M
  svalue -= pred.guess;
1569
1.12M
  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
1570
1.12M
  int residual = std::round(svalue);
1571
1.12M
  if (residual > 2 || residual < -2) residual = std::round(svalue * 0.5f) * 2;
1572
1.12M
  return residual + pred.guess;
1573
1.12M
}
1574
1575
Status ModularFrameEncoder::AddVarDCTDC(const FrameHeader& frame_header,
1576
                                        const Image3F& dc, const Rect& r,
1577
                                        size_t group_index, bool nl_dc,
1578
                                        PassesEncoderState* enc_state,
1579
3.67k
                                        bool jpeg_transcode) {
1580
3.67k
  JxlMemoryManager* memory_manager = dc.memory_manager();
1581
3.67k
  extra_dc_precision[group_index] = nl_dc ? 1 : 0;
1582
3.67k
  float mul = 1 << extra_dc_precision[group_index];
1583
3.67k
  bool has_outliers = false;
1584
1585
3.67k
  size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_);
1586
3.67k
  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
1587
3.67k
  stream_options_[stream_id].predictor = Predictor::Weighted;
1588
3.67k
  stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
1589
3.67k
  if (cparams_.speed_tier >= SpeedTier::kSquirrel) {
1590
2.24k
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
1591
2.24k
  }
1592
3.67k
  if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) {
1593
0
    stream_options_[stream_id].predictor =
1594
0
        (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable
1595
0
                                                  : Predictor::Best);
1596
0
    stream_options_[stream_id].wp_tree_mode =
1597
0
        ModularOptions::TreeMode::kDefault;
1598
0
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
1599
0
  }
1600
3.67k
  if (cparams_.decoding_speed_tier >= 1) {
1601
1.49k
    stream_options_[stream_id].tree_kind =
1602
1.49k
        ModularOptions::TreeKind::kGradientFixedDC;
1603
1.49k
  }
1604
3.67k
  stream_options_[stream_id].histogram_params =
1605
3.67k
      stream_options_[0].histogram_params;
1606
1607
3.67k
  JXL_ASSIGN_OR_RETURN(
1608
3.67k
      stream_images_[stream_id],
1609
3.67k
      Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 3));
1610
3.67k
  const ColorCorrelation& color_correlation = enc_state->shared.cmap.base();
1611
3.67k
  if (nl_dc && stream_options_[stream_id].tree_kind ==
1612
2.56k
                   ModularOptions::TreeKind::kGradientFixedDC) {
1613
1.04k
    JXL_ENSURE(frame_header.chroma_subsampling.Is444());
1614
3.13k
    for (size_t c : {1, 0, 2}) {
1615
3.13k
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1616
3.13k
      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
1617
3.13k
      float cfl_factor = color_correlation.DCFactors()[c];
1618
104k
      for (size_t y = 0; y < r.ysize(); y++) {
1619
101k
        int32_t* quant_row =
1620
101k
            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
1621
101k
        size_t stride = stream_images_[stream_id]
1622
101k
                            .channel[c < 2 ? c ^ 1 : c]
1623
101k
                            .plane.PixelsPerRow();
1624
101k
        const float* row = r.ConstPlaneRow(dc, c, y);
1625
101k
        if (c == 1) {
1626
424k
          for (size_t x = 0; x < r.xsize(); x++) {
1627
391k
            quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y,
1628
391k
                                            r.xsize(), row[x], inv_factor);
1629
391k
          }
1630
68.1k
        } else {
1631
68.1k
          int32_t* quant_row_y =
1632
68.1k
              stream_images_[stream_id].channel[0].plane.Row(y);
1633
837k
          for (size_t x = 0; x < r.xsize(); x++) {
1634
769k
            quant_row[x] = QuantizeGradient(
1635
769k
                quant_row, stride, c, x, y, r.xsize(),
1636
769k
                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
1637
769k
          }
1638
68.1k
        }
1639
101k
      }
1640
3.13k
    }
1641
2.62k
  } else if (nl_dc) {
1642
1.52k
    JXL_ENSURE(frame_header.chroma_subsampling.Is444());
1643
4.55k
    for (size_t c : {1, 0, 2}) {
1644
4.55k
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1645
4.55k
      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
1646
4.55k
      float cfl_factor = color_correlation.DCFactors()[c];
1647
4.55k
      weighted::Header header;
1648
4.55k
      weighted::State wp_state(header, r.xsize(), r.ysize());
1649
121k
      for (size_t y = 0; y < r.ysize(); y++) {
1650
116k
        int32_t* quant_row =
1651
116k
            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
1652
116k
        size_t stride = stream_images_[stream_id]
1653
116k
                            .channel[c < 2 ? c ^ 1 : c]
1654
116k
                            .plane.PixelsPerRow();
1655
116k
        const float* row = r.ConstPlaneRow(dc, c, y);
1656
116k
        if (c == 1) {
1657
526k
          for (size_t x = 0; x < r.xsize(); x++) {
1658
487k
            quant_row[x] =
1659
487k
                QuantizeWP(quant_row, stride, c, x, y, r.xsize(), &wp_state,
1660
487k
                           row[x], inv_factor, &has_outliers);
1661
487k
            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
1662
487k
          }
1663
77.8k
        } else {
1664
77.8k
          int32_t* quant_row_y =
1665
77.8k
              stream_images_[stream_id].channel[0].plane.Row(y);
1666
1.04M
          for (size_t x = 0; x < r.xsize(); x++) {
1667
971k
            quant_row[x] =
1668
971k
                QuantizeWP(quant_row, stride, c, x, y, r.xsize(), &wp_state,
1669
971k
                           row[x] - quant_row_y[x] * (y_factor * cfl_factor),
1670
971k
                           inv_factor, &has_outliers);
1671
971k
            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
1672
971k
          }
1673
77.8k
        }
1674
116k
      }
1675
4.55k
    }
1676
1.52k
  } else if (frame_header.chroma_subsampling.Is444()) {
1677
3.31k
    for (size_t c : {1, 0, 2}) {
1678
3.31k
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1679
3.31k
      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
1680
3.31k
      float cfl_factor = color_correlation.DCFactors()[c];
1681
188k
      for (size_t y = 0; y < r.ysize(); y++) {
1682
185k
        int32_t* quant_row =
1683
185k
            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
1684
185k
        const float* row = r.ConstPlaneRow(dc, c, y);
1685
185k
        if (c == 1) {
1686
2.15M
          for (size_t x = 0; x < r.xsize(); x++) {
1687
2.09M
            quant_row[x] = std::round(row[x] * inv_factor);
1688
2.09M
          }
1689
125k
        } else {
1690
125k
          int32_t* quant_row_y =
1691
125k
              stream_images_[stream_id].channel[0].plane.Row(y);
1692
4.28M
          for (size_t x = 0; x < r.xsize(); x++) {
1693
4.15M
            quant_row[x] =
1694
4.15M
                std::round((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) *
1695
4.15M
                           inv_factor);
1696
4.15M
          }
1697
125k
        }
1698
185k
      }
1699
3.31k
    }
1700
1.10k
  } else {
1701
0
    for (size_t c : {1, 0, 2}) {
1702
0
      Rect rect(r.x0() >> frame_header.chroma_subsampling.HShift(c),
1703
0
                r.y0() >> frame_header.chroma_subsampling.VShift(c),
1704
0
                r.xsize() >> frame_header.chroma_subsampling.HShift(c),
1705
0
                r.ysize() >> frame_header.chroma_subsampling.VShift(c));
1706
0
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1707
0
      size_t ys = rect.ysize();
1708
0
      size_t xs = rect.xsize();
1709
0
      Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c];
1710
0
      ch.w = xs;
1711
0
      ch.h = ys;
1712
0
      JXL_RETURN_IF_ERROR(ch.shrink());
1713
0
      for (size_t y = 0; y < ys; y++) {
1714
0
        int32_t* quant_row = ch.plane.Row(y);
1715
0
        const float* row = rect.ConstPlaneRow(dc, c, y);
1716
0
        for (size_t x = 0; x < xs; x++) {
1717
0
          quant_row[x] = std::round(row[x] * inv_factor);
1718
0
        }
1719
0
      }
1720
0
    }
1721
0
  }
1722
1723
3.67k
  if (has_outliers) {
1724
0
    return JXL_FAILURE("Unsupported range of DC values");
1725
0
  }
1726
1727
3.67k
  DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc,
1728
3.67k
            stream_images_[stream_id], enc_state->shared.quantizer.MulDC(),
1729
3.67k
            1.0 / mul, color_correlation.DCFactors(),
1730
3.67k
            frame_header.chroma_subsampling, enc_state->shared.block_ctx_map);
1731
3.67k
  return true;
1732
3.67k
}
1733
1734
Status ModularFrameEncoder::AddACMetadata(const Rect& r, size_t group_index,
1735
                                          bool jpeg_transcode,
1736
2.95k
                                          PassesEncoderState* enc_state) {
1737
2.95k
  JxlMemoryManager* memory_manager = enc_state->memory_manager();
1738
2.95k
  size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_);
1739
2.95k
  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
1740
2.95k
  if (stream_options_[stream_id].predictor != Predictor::Weighted) {
1741
2.93k
    stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP;
1742
2.93k
  }
1743
2.95k
  if (jpeg_transcode) {
1744
0
    stream_options_[stream_id].tree_kind =
1745
0
        ModularOptions::TreeKind::kJpegTranscodeACMeta;
1746
2.95k
  } else if (cparams_.speed_tier >= SpeedTier::kFalcon) {
1747
1.28k
    stream_options_[stream_id].tree_kind =
1748
1.28k
        ModularOptions::TreeKind::kFalconACMeta;
1749
1.67k
  } else if (cparams_.speed_tier > SpeedTier::kKitten) {
1750
1.30k
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta;
1751
1.30k
  }
1752
  // If we are using a non-constant CfL field, and are in a slow enough mode,
1753
  // re-enable tree computation for it.
1754
2.95k
  if (cparams_.speed_tier < SpeedTier::kSquirrel &&
1755
360
      cparams_.force_cfl_jpeg_recompression) {
1756
360
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
1757
360
  }
1758
2.95k
  stream_options_[stream_id].histogram_params =
1759
2.95k
      stream_options_[0].histogram_params;
1760
  // YToX, YToB, ACS + QF, EPF
1761
2.95k
  Image& image = stream_images_[stream_id];
1762
2.95k
  JXL_ASSIGN_OR_RETURN(
1763
2.95k
      image, Image::Create(memory_manager, r.xsize(), r.ysize(), 8, 4));
1764
2.95k
  static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
1765
2.95k
  Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
1766
2.95k
  JXL_ASSIGN_OR_RETURN(
1767
2.95k
      image.channel[0],
1768
2.95k
      Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3));
1769
2.95k
  JXL_ASSIGN_OR_RETURN(
1770
2.95k
      image.channel[1],
1771
2.95k
      Channel::Create(memory_manager, cr.xsize(), cr.ysize(), 3, 3));
1772
2.95k
  JXL_ASSIGN_OR_RETURN(
1773
2.95k
      image.channel[2],
1774
2.95k
      Channel::Create(memory_manager, r.xsize() * r.ysize(), 2, 0, 0));
1775
2.95k
  JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map,
1776
2.95k
                                           Rect(image.channel[0].plane),
1777
2.95k
                                           &image.channel[0].plane));
1778
2.95k
  JXL_RETURN_IF_ERROR(ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map,
1779
2.95k
                                           Rect(image.channel[1].plane),
1780
2.95k
                                           &image.channel[1].plane));
1781
2.95k
  size_t num = 0;
1782
128k
  for (size_t y = 0; y < r.ysize(); y++) {
1783
125k
    AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y);
1784
125k
    const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y);
1785
125k
    const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y);
1786
125k
    int32_t* out_acs = image.channel[2].plane.Row(0);
1787
125k
    int32_t* out_qf = image.channel[2].plane.Row(1);
1788
125k
    int32_t* row_out_epf = image.channel[3].plane.Row(y);
1789
2.87M
    for (size_t x = 0; x < r.xsize(); x++) {
1790
2.74M
      row_out_epf[x] = row_epf[x];
1791
2.74M
      if (!row_acs[x].IsFirstBlock()) continue;
1792
2.58M
      out_acs[num] = row_acs[x].RawStrategy();
1793
2.58M
      out_qf[num] = row_qf[x] - 1;
1794
2.58M
      num++;
1795
2.58M
    }
1796
125k
  }
1797
2.95k
  image.channel[2].w = num;
1798
2.95k
  ac_metadata_size[group_index] = num;
1799
2.95k
  return true;
1800
2.95k
}
1801
1802
Status ModularFrameEncoder::EncodeQuantTable(
1803
    JxlMemoryManager* memory_manager, size_t size_x, size_t size_y,
1804
    BitWriter* writer, const QuantEncoding& encoding, size_t idx,
1805
0
    ModularFrameEncoder* modular_frame_encoder) {
1806
0
  JXL_ENSURE(encoding.qraw.qtable);
1807
0
  JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size());
1808
0
  JXL_ENSURE(idx < kNumQuantTables);
1809
0
  int* qtable = encoding.qraw.qtable->data();
1810
0
  JXL_RETURN_IF_ERROR(F16Coder::Write(encoding.qraw.qtable_den, writer));
1811
0
  if (modular_frame_encoder) {
1812
0
    JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx));
1813
0
    JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream(
1814
0
        writer, nullptr, LayerType::Header, qt));
1815
0
    return true;
1816
0
  }
1817
0
  JXL_ASSIGN_OR_RETURN(Image image,
1818
0
                       Image::Create(memory_manager, size_x, size_y, 8, 3));
1819
0
  for (size_t c = 0; c < 3; c++) {
1820
0
    for (size_t y = 0; y < size_y; y++) {
1821
0
      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
1822
0
      for (size_t x = 0; x < size_x; x++) {
1823
0
        row[x] = qtable[c * size_x * size_y + y * size_x + x];
1824
0
      }
1825
0
    }
1826
0
  }
1827
0
  ModularOptions cfopts;
1828
0
  JXL_RETURN_IF_ERROR(ModularGenericCompress(image, cfopts, *writer));
1829
0
  return true;
1830
0
}
1831
1832
Status ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y,
1833
                                          const QuantEncoding& encoding,
1834
0
                                          size_t idx) {
1835
0
  JXL_ENSURE(idx < kNumQuantTables);
1836
0
  JXL_ASSIGN_OR_RETURN(ModularStreamId qt, ModularStreamId::QuantTable(idx));
1837
0
  size_t stream_id = qt.ID(frame_dim_);
1838
0
  JXL_ENSURE(encoding.qraw.qtable);
1839
0
  JXL_ENSURE(size_x * size_y * 3 == encoding.qraw.qtable->size());
1840
0
  int* qtable = encoding.qraw.qtable->data();
1841
0
  Image& image = stream_images_[stream_id];
1842
0
  JxlMemoryManager* memory_manager = image.memory_manager();
1843
0
  JXL_ASSIGN_OR_RETURN(image,
1844
0
                       Image::Create(memory_manager, size_x, size_y, 8, 3));
1845
0
  for (size_t c = 0; c < 3; c++) {
1846
0
    for (size_t y = 0; y < size_y; y++) {
1847
0
      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
1848
0
      for (size_t x = 0; x < size_x; x++) {
1849
0
        row[x] = qtable[c * size_x * size_y + y * size_x + x];
1850
0
      }
1851
0
    }
1852
0
  }
1853
0
  return true;
1854
0
}
1855
}  // namespace jxl