Coverage Report

Created: 2022-08-24 06:04

/src/libjxl/lib/jxl/enc_modular.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_modular.h"
7
8
#include <stddef.h>
9
#include <stdint.h>
10
11
#include <array>
12
#include <atomic>
13
#include <limits>
14
#include <queue>
15
#include <utility>
16
#include <vector>
17
18
#include "lib/jxl/aux_out.h"
19
#include "lib/jxl/base/compiler_specific.h"
20
#include "lib/jxl/base/padded_bytes.h"
21
#include "lib/jxl/base/printf_macros.h"
22
#include "lib/jxl/base/status.h"
23
#include "lib/jxl/compressed_dc.h"
24
#include "lib/jxl/dec_ans.h"
25
#include "lib/jxl/enc_bit_writer.h"
26
#include "lib/jxl/enc_cluster.h"
27
#include "lib/jxl/enc_params.h"
28
#include "lib/jxl/enc_patch_dictionary.h"
29
#include "lib/jxl/enc_quant_weights.h"
30
#include "lib/jxl/frame_header.h"
31
#include "lib/jxl/gaborish.h"
32
#include "lib/jxl/modular/encoding/context_predict.h"
33
#include "lib/jxl/modular/encoding/enc_debug_tree.h"
34
#include "lib/jxl/modular/encoding/enc_encoding.h"
35
#include "lib/jxl/modular/encoding/encoding.h"
36
#include "lib/jxl/modular/encoding/ma_common.h"
37
#include "lib/jxl/modular/modular_image.h"
38
#include "lib/jxl/modular/options.h"
39
#include "lib/jxl/modular/transform/enc_transform.h"
40
#include "lib/jxl/toc.h"
41
42
namespace jxl {
43
44
namespace {
45
// Squeeze default quantization factors
46
// these quantization factors are for -Q 50  (other qualities simply scale the
47
// factors; things are rounded down and obviously cannot get below 1)
48
static const float squeeze_quality_factor =
49
    0.35;  // for easy tweaking of the quality range (decrease this number for
50
           // higher quality)
51
static const float squeeze_luma_factor =
52
    1.1;  // for easy tweaking of the balance between luma (or anything
53
          // non-chroma) and chroma (decrease this number for higher quality
54
          // luma)
55
static const float squeeze_quality_factor_xyb = 2.4f;
56
static const float squeeze_xyb_qtable[3][16] = {
57
    {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16,
58
     0.08, 0.04, 0.02, 0.01, 0.005},  // Y
59
    {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5,
60
     0.5},  // X
61
    {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5,
62
     0.5},  // B-Y
63
};
64
65
static const float squeeze_luma_qtable[16] = {
66
    163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28,
67
    0.64,   0.32,  0.16,  0.08,  0.04,  0.02, 0.01, 0.005};
68
// for 8-bit input, the range of YCoCg chroma is -255..255 so basically this
69
// does 4:2:0 subsampling (two most fine grained layers get quantized away)
70
static const float squeeze_chroma_qtable[16] = {
71
    1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5};
72
73
// `cutoffs` must be sorted.
74
Tree MakeFixedTree(int property, const std::vector<int32_t>& cutoffs,
75
92
                   Predictor pred, size_t num_pixels) {
76
92
  size_t log_px = CeilLog2Nonzero(num_pixels);
77
92
  size_t min_gap = 0;
78
  // Reduce fixed tree height when encoding small images.
79
92
  if (log_px < 14) {
80
92
    min_gap = 8 * (14 - log_px);
81
92
  }
82
92
  Tree tree;
83
92
  struct NodeInfo {
84
92
    size_t begin, end, pos;
85
92
  };
86
92
  std::queue<NodeInfo> q;
87
  // Leaf IDs will be set by roundtrip decoding the tree.
88
92
  tree.push_back(PropertyDecisionNode::Leaf(pred));
89
92
  q.push(NodeInfo{0, cutoffs.size(), 0});
90
184
  while (!q.empty()) {
91
92
    NodeInfo info = q.front();
92
92
    q.pop();
93
92
    if (info.begin + min_gap >= info.end) continue;
94
0
    uint32_t split = (info.begin + info.end) / 2;
95
0
    tree[info.pos] =
96
0
        PropertyDecisionNode::Split(property, cutoffs[split], tree.size());
97
0
    q.push(NodeInfo{split + 1, info.end, tree.size()});
98
0
    tree.push_back(PropertyDecisionNode::Leaf(pred));
99
0
    q.push(NodeInfo{info.begin, split, tree.size()});
100
0
    tree.push_back(PropertyDecisionNode::Leaf(pred));
101
0
  }
102
92
  return tree;
103
92
}
104
105
184
Tree PredefinedTree(ModularOptions::TreeKind tree_kind, size_t total_pixels) {
106
184
  if (tree_kind == ModularOptions::TreeKind::kJpegTranscodeACMeta ||
107
184
      tree_kind == ModularOptions::TreeKind::kTrivialTreeNoPredictor) {
108
    // All the data is 0, so no need for a fancy tree.
109
0
    return {PropertyDecisionNode::Leaf(Predictor::Zero)};
110
0
  }
111
184
  if (tree_kind == ModularOptions::TreeKind::kFalconACMeta) {
112
    // All the data is 0 except the quant field. TODO(veluca): make that 0 too.
113
0
    return {PropertyDecisionNode::Leaf(Predictor::Left)};
114
0
  }
115
184
  if (tree_kind == ModularOptions::TreeKind::kACMeta) {
116
    // Small image.
117
92
    if (total_pixels < 1024) {
118
92
      return {PropertyDecisionNode::Leaf(Predictor::Left)};
119
92
    }
120
0
    Tree tree;
121
    // 0: c > 1
122
0
    tree.push_back(PropertyDecisionNode::Split(0, 1, 1));
123
    // 1: c > 2
124
0
    tree.push_back(PropertyDecisionNode::Split(0, 2, 3));
125
    // 2: c > 0
126
0
    tree.push_back(PropertyDecisionNode::Split(0, 0, 5));
127
    // 3: EPF control field (all 0 or 4), top > 0
128
0
    tree.push_back(PropertyDecisionNode::Split(6, 0, 21));
129
    // 4: ACS+QF, y > 0
130
0
    tree.push_back(PropertyDecisionNode::Split(2, 0, 7));
131
    // 5: CfL x
132
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
133
    // 6: CfL b
134
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
135
    // 7: QF: split according to the left quant value.
136
0
    tree.push_back(PropertyDecisionNode::Split(7, 5, 9));
137
    // 8: ACS: split in 4 segments (8x8 from 0 to 3, large square 4-5, large
138
    // rectangular 6-11, 8x8 12+), according to previous ACS value.
139
0
    tree.push_back(PropertyDecisionNode::Split(7, 5, 15));
140
    // QF
141
0
    tree.push_back(PropertyDecisionNode::Split(7, 11, 11));
142
0
    tree.push_back(PropertyDecisionNode::Split(7, 3, 13));
143
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
144
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
145
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
146
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
147
    // ACS
148
0
    tree.push_back(PropertyDecisionNode::Split(7, 11, 17));
149
0
    tree.push_back(PropertyDecisionNode::Split(7, 3, 19));
150
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
151
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
152
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
153
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
154
    // EPF, left > 0
155
0
    tree.push_back(PropertyDecisionNode::Split(7, 0, 23));
156
0
    tree.push_back(PropertyDecisionNode::Split(7, 0, 25));
157
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
158
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
159
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
160
0
    tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
161
0
    return tree;
162
92
  }
163
92
  if (tree_kind == ModularOptions::TreeKind::kWPFixedDC) {
164
92
    std::vector<int32_t> cutoffs = {
165
92
        -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
166
92
        -11,  -7,   -4,   -3,   -1,   0,   1,   3,   5,   7,   11,
167
92
        15,   23,   31,   47,   63,   95,  127, 191, 255, 392, 500};
168
92
    return MakeFixedTree(kWPProp, cutoffs, Predictor::Weighted, total_pixels);
169
92
  }
170
0
  if (tree_kind == ModularOptions::TreeKind::kGradientFixedDC) {
171
0
    std::vector<int32_t> cutoffs = {
172
0
        -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
173
0
        -11,  -7,   -4,   -3,   -1,   0,   1,   3,   5,   7,   11,
174
0
        15,   23,   31,   47,   63,   95,  127, 191, 255, 392, 500};
175
0
    return MakeFixedTree(kGradientProp, cutoffs, Predictor::Gradient,
176
0
                         total_pixels);
177
0
  }
178
0
  JXL_ABORT("Unreachable");
179
0
  return {};
180
0
}
181
182
// Merges the trees in `trees` using nodes that decide on stream_id, as defined
183
// by `tree_splits`.
184
void MergeTrees(const std::vector<Tree>& trees,
185
                const std::vector<size_t>& tree_splits, size_t begin,
186
276
                size_t end, Tree* tree) {
187
276
  JXL_ASSERT(trees.size() + 1 == tree_splits.size());
188
276
  JXL_ASSERT(end > begin);
189
276
  JXL_ASSERT(end <= trees.size());
190
276
  if (end == begin + 1) {
191
    // Insert the tree, adding the opportune offset to all child nodes.
192
    // This will make the leaf IDs wrong, but subsequent roundtripping will fix
193
    // them.
194
184
    size_t sz = tree->size();
195
184
    tree->insert(tree->end(), trees[begin].begin(), trees[begin].end());
196
368
    for (size_t i = sz; i < tree->size(); i++) {
197
184
      (*tree)[i].lchild += sz;
198
184
      (*tree)[i].rchild += sz;
199
184
    }
200
184
    return;
201
184
  }
202
92
  size_t mid = (begin + end) / 2;
203
92
  size_t splitval = tree_splits[mid] - 1;
204
92
  size_t cur = tree->size();
205
92
  tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1);
206
92
  (*tree)[cur].lchild = tree->size();
207
92
  MergeTrees(trees, tree_splits, mid, end, tree);
208
92
  (*tree)[cur].rchild = tree->size();
209
92
  MergeTrees(trees, tree_splits, begin, mid, tree);
210
92
}
211
212
0
void QuantizeChannel(Channel& ch, const int q) {
213
0
  if (q == 1) return;
214
0
  for (size_t y = 0; y < ch.plane.ysize(); y++) {
215
0
    pixel_type* row = ch.plane.Row(y);
216
0
    for (size_t x = 0; x < ch.plane.xsize(); x++) {
217
0
      if (row[x] < 0) {
218
0
        row[x] = -((-row[x] + q / 2) / q) * q;
219
0
      } else {
220
0
        row[x] = ((row[x] + q / 2) / q) * q;
221
0
      }
222
0
    }
223
0
  }
224
0
}
225
226
// convert binary32 float that corresponds to custom [bits]-bit float (with
227
// [exp_bits] exponent bits) to a [bits]-bit integer representation that should
228
// fit in pixel_type
229
Status float_to_int(const float* const row_in, pixel_type* const row_out,
230
                    size_t xsize, unsigned int bits, unsigned int exp_bits,
231
0
                    bool fp, double dfactor) {
232
0
  JXL_ASSERT(sizeof(pixel_type) * 8 >= bits);
233
0
  if (!fp) {
234
0
    if (bits > 22) {
235
0
      for (size_t x = 0; x < xsize; ++x) {
236
0
        row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5);
237
0
      }
238
0
    } else {
239
0
      float factor = dfactor;
240
0
      for (size_t x = 0; x < xsize; ++x) {
241
0
        row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f);
242
0
      }
243
0
    }
244
0
    return true;
245
0
  }
246
0
  if (bits == 32 && fp) {
247
0
    JXL_ASSERT(exp_bits == 8);
248
0
    memcpy((void*)row_out, (const void*)row_in, 4 * xsize);
249
0
    return true;
250
0
  }
251
252
0
  int exp_bias = (1 << (exp_bits - 1)) - 1;
253
0
  int max_exp = (1 << exp_bits) - 1;
254
0
  uint32_t sign = (1u << (bits - 1));
255
0
  int mant_bits = bits - exp_bits - 1;
256
0
  int mant_shift = 23 - mant_bits;
257
0
  for (size_t x = 0; x < xsize; ++x) {
258
0
    uint32_t f;
259
0
    memcpy(&f, &row_in[x], 4);
260
0
    int signbit = (f >> 31);
261
0
    f &= 0x7fffffff;
262
0
    if (f == 0) {
263
0
      row_out[x] = (signbit ? sign : 0);
264
0
      continue;
265
0
    }
266
0
    int exp = (f >> 23) - 127;
267
0
    if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed");
268
0
    int mantissa = (f & 0x007fffff);
269
    // broke up the binary32 into its parts, now reassemble into
270
    // arbitrary float
271
0
    exp += exp_bias;
272
0
    if (exp < 0) {  // will become a subnormal number
273
      // add implicit leading 1 to mantissa
274
0
      mantissa |= 0x00800000;
275
0
      if (exp < -mant_bits) {
276
0
        return JXL_FAILURE(
277
0
            "Invalid float number: %g cannot be represented with %i "
278
0
            "exp_bits and %i mant_bits (exp %i)",
279
0
            row_in[x], exp_bits, mant_bits, exp);
280
0
      }
281
0
      mantissa >>= 1 - exp;
282
0
      exp = 0;
283
0
    }
284
    // exp should be representable in exp_bits, otherwise input was
285
    // invalid
286
0
    if (exp > max_exp) return JXL_FAILURE("Invalid float exponent");
287
0
    if (mantissa & ((1 << mant_shift) - 1)) {
288
0
      return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x],
289
0
                         mantissa);
290
0
    }
291
0
    mantissa >>= mant_shift;
292
0
    f = (signbit ? sign : 0);
293
0
    f |= (exp << mant_bits);
294
0
    f |= mantissa;
295
0
    row_out[x] = (pixel_type)f;
296
0
  }
297
0
  return true;
298
0
}
299
}  // namespace
300
301
ModularFrameEncoder::ModularFrameEncoder(const FrameHeader& frame_header,
302
                                         const CompressParams& cparams_orig)
303
92
    : frame_dim_(frame_header.ToFrameDimensions()), cparams_(cparams_orig) {
304
92
  size_t num_streams =
305
92
      ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes);
306
92
  if (cparams_.IsLossless()) {
307
0
    switch (cparams_.decoding_speed_tier) {
308
0
      case 0:
309
0
        break;
310
0
      case 1:
311
0
        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
312
0
        break;
313
0
      case 2: {
314
0
        cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly;
315
0
        cparams_.options.predictor = Predictor::Gradient;
316
0
        break;
317
0
      }
318
0
      case 3: {  // LZ77, no Gradient.
319
0
        cparams_.options.nb_repeats = 0;
320
0
        cparams_.options.predictor = Predictor::Gradient;
321
0
        break;
322
0
      }
323
0
      default: {  // LZ77, no predictor.
324
0
        cparams_.options.nb_repeats = 0;
325
0
        cparams_.options.predictor = Predictor::Zero;
326
0
        break;
327
0
      }
328
0
    }
329
0
  }
330
92
  if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive &&
331
92
      cparams_.IsLossless()) {
332
0
    cparams_.options.tree_kind =
333
0
        ModularOptions::TreeKind::kTrivialTreeNoPredictor;
334
0
    cparams_.options.nb_repeats = 0;
335
0
  }
336
92
  stream_images_.resize(num_streams);
337
338
  // use a sensible default if nothing explicit is specified:
339
  // Squeeze for lossy, no squeeze for lossless
340
92
  if (cparams_.responsive < 0) {
341
92
    if (cparams_.IsLossless()) {
342
0
      cparams_.responsive = 0;
343
92
    } else {
344
92
      cparams_.responsive = 1;
345
92
    }
346
92
  }
347
348
92
  if (cparams_.speed_tier > SpeedTier::kWombat) {
349
0
    cparams_.options.splitting_heuristics_node_threshold = 192;
350
92
  } else {
351
92
    cparams_.options.splitting_heuristics_node_threshold = 96;
352
92
  }
353
92
  {
354
    // Set properties.
355
92
    std::vector<uint32_t> prop_order;
356
92
    if (cparams_.responsive) {
357
      // Properties in order of their likelihood of being useful for Squeeze
358
      // residuals.
359
92
      prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3};
360
92
    } else {
361
      // Same, but for the non-Squeeze case.
362
0
      prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8};
363
0
    }
364
92
    switch (cparams_.speed_tier) {
365
92
      case SpeedTier::kSquirrel:
366
92
        cparams_.options.splitting_heuristics_properties.assign(
367
92
            prop_order.begin(), prop_order.begin() + 8);
368
92
        cparams_.options.max_property_values = 32;
369
92
        break;
370
0
      case SpeedTier::kKitten:
371
0
        cparams_.options.splitting_heuristics_properties.assign(
372
0
            prop_order.begin(), prop_order.begin() + 10);
373
0
        cparams_.options.max_property_values = 64;
374
0
        break;
375
0
      case SpeedTier::kTortoise:
376
0
        cparams_.options.splitting_heuristics_properties = prop_order;
377
0
        cparams_.options.max_property_values = 256;
378
0
        break;
379
0
      default:
380
0
        cparams_.options.splitting_heuristics_properties.assign(
381
0
            prop_order.begin(), prop_order.begin() + 6);
382
0
        cparams_.options.max_property_values = 16;
383
0
        break;
384
92
    }
385
92
    if (cparams_.speed_tier > SpeedTier::kTortoise) {
386
      // Gradient in previous channels.
387
92
      for (int i = 0; i < cparams_.options.max_properties; i++) {
388
0
        cparams_.options.splitting_heuristics_properties.push_back(
389
0
            kNumNonrefProperties + i * 4 + 3);
390
0
      }
391
92
    } else {
392
      // All the extra properties in Tortoise mode.
393
0
      for (int i = 0; i < cparams_.options.max_properties * 4; i++) {
394
0
        cparams_.options.splitting_heuristics_properties.push_back(
395
0
            kNumNonrefProperties + i);
396
0
      }
397
0
    }
398
92
  }
399
400
92
  if (cparams_.options.predictor == static_cast<Predictor>(-1)) {
401
    // no explicit predictor(s) given, set a good default
402
92
    if ((cparams_.speed_tier <= SpeedTier::kTortoise ||
403
92
         cparams_.modular_mode == false) &&
404
92
        cparams_.IsLossless() && cparams_.responsive == false) {
405
      // TODO(veluca): allow all predictors that don't break residual
406
      // multipliers in lossy mode.
407
0
      cparams_.options.predictor = Predictor::Variable;
408
92
    } else if (cparams_.responsive || cparams_.lossy_palette) {
409
      // zero predictor for Squeeze residues and lossy palette
410
92
      cparams_.options.predictor = Predictor::Zero;
411
92
    } else if (!cparams_.IsLossless()) {
412
      // If not responsive and lossy. TODO(veluca): use near_lossless instead?
413
0
      cparams_.options.predictor = Predictor::Gradient;
414
0
    } else if (cparams_.speed_tier < SpeedTier::kFalcon) {
415
      // try median and weighted predictor for anything else
416
0
      cparams_.options.predictor = Predictor::Best;
417
0
    } else if (cparams_.speed_tier == SpeedTier::kFalcon) {
418
      // just weighted predictor in falcon mode
419
0
      cparams_.options.predictor = Predictor::Weighted;
420
0
    } else if (cparams_.speed_tier > SpeedTier::kFalcon) {
421
      // just gradient predictor in thunder mode
422
0
      cparams_.options.predictor = Predictor::Gradient;
423
0
    }
424
92
  } else {
425
0
    delta_pred_ = cparams_.options.predictor;
426
0
    if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero;
427
0
  }
428
92
  if (!cparams_.IsLossless()) {
429
92
    if (cparams_.options.predictor == Predictor::Weighted ||
430
92
        cparams_.options.predictor == Predictor::Variable ||
431
92
        cparams_.options.predictor == Predictor::Best)
432
0
      cparams_.options.predictor = Predictor::Zero;
433
92
  }
434
92
  tree_splits_.push_back(0);
435
92
  if (cparams_.modular_mode == false) {
436
92
    cparams_.options.fast_decode_multiplier = 1.0f;
437
92
    tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_));
438
92
    tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_));
439
92
    tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_));
440
92
    tree_splits_.push_back(ModularStreamId::QuantTable(0).ID(frame_dim_));
441
92
    tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_));
442
92
    ac_metadata_size.resize(frame_dim_.num_dc_groups);
443
92
    extra_dc_precision.resize(frame_dim_.num_dc_groups);
444
92
  }
445
92
  tree_splits_.push_back(num_streams);
446
92
  cparams_.options.max_chan_size = frame_dim_.group_dim;
447
92
  cparams_.options.group_dim = frame_dim_.group_dim;
448
449
  // TODO(veluca): figure out how to use different predictor sets per channel.
450
92
  stream_options_.resize(num_streams, cparams_.options);
451
92
}
452
453
bool do_transform(Image& image, const Transform& tr,
454
                  const weighted::Header& wp_header,
455
0
                  jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) {
456
0
  Transform t = tr;
457
0
  bool did_it = true;
458
0
  if (force_jxlart) {
459
0
    if (!t.MetaApply(image)) return false;
460
0
  } else {
461
0
    did_it = TransformForward(t, image, wp_header, pool);
462
0
  }
463
0
  if (did_it) image.transform.push_back(t);
464
0
  return did_it;
465
0
}
466
467
Status ModularFrameEncoder::ComputeEncodingData(
468
    const FrameHeader& frame_header, const ImageMetadata& metadata,
469
    Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels,
470
    PassesEncoderState* JXL_RESTRICT enc_state, const JxlCmsInterface& cms,
471
92
    ThreadPool* pool, AuxOut* aux_out, bool do_color) {
472
92
  JXL_DEBUG_V(6, "Computing modular encoding data for frame %s",
473
92
              frame_header.DebugString().c_str());
474
475
92
  if (do_color && frame_header.loop_filter.gab) {
476
0
    GaborishInverse(color, 0.9908511000000001f, pool);
477
0
  }
478
479
92
  if (do_color && metadata.bit_depth.bits_per_sample <= 16 &&
480
92
      cparams_.speed_tier < SpeedTier::kCheetah &&
481
92
      cparams_.decoding_speed_tier < 2) {
482
0
    FindBestPatchDictionary(*color, enc_state, cms, nullptr, aux_out,
483
0
                            cparams_.color_transform == ColorTransform::kXYB);
484
0
    PatchDictionaryEncoder::SubtractFrom(
485
0
        enc_state->shared.image_features.patches, color);
486
0
  }
487
488
  // Convert ImageBundle to modular Image object
489
92
  const size_t xsize = frame_dim_.xsize;
490
92
  const size_t ysize = frame_dim_.ysize;
491
492
92
  int nb_chans = 3;
493
92
  if (metadata.color_encoding.IsGray() &&
494
92
      cparams_.color_transform == ColorTransform::kNone) {
495
0
    nb_chans = 1;
496
0
  }
497
92
  if (!do_color) nb_chans = 0;
498
499
92
  nb_chans += extra_channels.size();
500
501
92
  bool fp = metadata.bit_depth.floating_point_sample &&
502
92
            cparams_.color_transform != ColorTransform::kXYB;
503
504
  // bits_per_sample is just metadata for XYB images.
505
92
  if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
506
92
      cparams_.color_transform != ColorTransform::kXYB) {
507
0
    if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
508
0
      return JXL_FAILURE("uint32_t not supported in enc_modular");
509
0
    } else if (metadata.bit_depth.bits_per_sample > 32) {
510
0
      return JXL_FAILURE("bits_per_sample > 32 not supported");
511
0
    }
512
0
  }
513
514
  // in the non-float case, there is an implicit 0 sign bit
515
92
  int max_bitdepth =
516
92
      do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0;
517
92
  Image& gi = stream_images_[0];
518
92
  gi = Image(xsize, ysize, metadata.bit_depth.bits_per_sample, nb_chans);
519
92
  int c = 0;
520
92
  if (cparams_.color_transform == ColorTransform::kXYB &&
521
92
      cparams_.modular_mode == true) {
522
0
    float enc_factors[3] = {32768.0f, 2048.0f, 2048.0f};
523
0
    if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) {
524
      // quantize XYB here and then treat it as a lossless image
525
0
      enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance);
526
0
      enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
527
0
      enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
528
0
      cparams_.butteraugli_distance = 0;
529
0
    }
530
0
    if (cparams_.manual_xyb_factors.size() == 3) {
531
0
      DequantMatricesSetCustomDC(&enc_state->shared.matrices,
532
0
                                 cparams_.manual_xyb_factors.data());
533
      // TODO(jon): update max_bitdepth in this case
534
0
    } else {
535
0
      DequantMatricesSetCustomDC(&enc_state->shared.matrices, enc_factors);
536
0
      max_bitdepth = 12;
537
0
    }
538
0
  }
539
92
  pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0;
540
92
  if (do_color) {
541
0
    for (; c < 3; c++) {
542
0
      if (metadata.color_encoding.IsGray() &&
543
0
          cparams_.color_transform == ColorTransform::kNone &&
544
0
          c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0))
545
0
        continue;
546
0
      int c_out = c;
547
      // XYB is encoded as YX(B-Y)
548
0
      if (cparams_.color_transform == ColorTransform::kXYB && c < 2)
549
0
        c_out = 1 - c_out;
550
0
      double factor = maxval;
551
0
      if (cparams_.color_transform == ColorTransform::kXYB)
552
0
        factor = enc_state->shared.matrices.InvDCQuant(c);
553
0
      if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) {
554
0
        JXL_ASSERT(!fp);
555
0
        for (size_t y = 0; y < ysize; ++y) {
556
0
          const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
557
0
          pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
558
0
          pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y);
559
0
          for (size_t x = 0; x < xsize; ++x) {
560
0
            row_out[x] = row_in[x] * factor + 0.5f;
561
0
            row_out[x] -= row_Y[x];
562
            // zero the lsb of B
563
0
            row_out[x] = row_out[x] / 2 * 2;
564
0
          }
565
0
        }
566
0
      } else {
567
0
        int bits = metadata.bit_depth.bits_per_sample;
568
0
        int exp_bits = metadata.bit_depth.exponent_bits_per_sample;
569
0
        gi.channel[c_out].hshift =
570
0
            enc_state->shared.frame_header.chroma_subsampling.HShift(c);
571
0
        gi.channel[c_out].vshift =
572
0
            enc_state->shared.frame_header.chroma_subsampling.VShift(c);
573
0
        size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift);
574
0
        size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift);
575
0
        gi.channel[c_out].shrink(xsize_shifted, ysize_shifted);
576
0
        std::atomic<bool> has_error{false};
577
0
        JXL_RETURN_IF_ERROR(RunOnPool(
578
0
            pool, 0, ysize_shifted, ThreadPool::NoInit,
579
0
            [&](const int task, const int thread) {
580
0
              const size_t y = task;
581
0
              const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
582
0
              pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
583
0
              if (!float_to_int(row_in, row_out, xsize_shifted, bits, exp_bits,
584
0
                                fp, factor)) {
585
0
                has_error = true;
586
0
              };
587
0
            },
588
0
            "float2int"));
589
0
        if (has_error) {
590
0
          return JXL_FAILURE("Error in float to integer conversion");
591
0
        }
592
0
      }
593
0
    }
594
0
    if (metadata.color_encoding.IsGray() &&
595
0
        cparams_.color_transform == ColorTransform::kNone)
596
0
      c = 1;
597
0
  }
598
599
92
  for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) {
600
0
    const ExtraChannelInfo& eci = metadata.extra_channel_info[ec];
601
0
    size_t ecups = frame_header.extra_channel_upsampling[ec];
602
0
    gi.channel[c].shrink(DivCeil(frame_dim_.xsize_upsampled, ecups),
603
0
                         DivCeil(frame_dim_.ysize_upsampled, ecups));
604
0
    gi.channel[c].hshift = gi.channel[c].vshift =
605
0
        CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
606
607
0
    int bits = eci.bit_depth.bits_per_sample;
608
0
    int exp_bits = eci.bit_depth.exponent_bits_per_sample;
609
0
    bool fp = eci.bit_depth.floating_point_sample;
610
0
    double factor = (fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1));
611
0
    if (bits + (fp ? 0 : 1) > max_bitdepth) max_bitdepth = bits + (fp ? 0 : 1);
612
0
    std::atomic<bool> has_error{false};
613
0
    JXL_RETURN_IF_ERROR(RunOnPool(
614
0
        pool, 0, gi.channel[c].plane.ysize(), ThreadPool::NoInit,
615
0
        [&](const int task, const int thread) {
616
0
          const size_t y = task;
617
0
          const float* const JXL_RESTRICT row_in = extra_channels[ec].Row(y);
618
0
          pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y);
619
0
          if (!float_to_int(row_in, row_out, gi.channel[c].plane.xsize(), bits,
620
0
                            exp_bits, fp, factor)) {
621
0
            has_error = true;
622
0
          };
623
0
        },
624
0
        "float2int"));
625
0
    if (has_error) return JXL_FAILURE("Error in float to integer conversion");
626
0
  }
627
92
  JXL_ASSERT(c == nb_chans);
628
629
92
  int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32);
630
92
  if (max_bitdepth > level_max_bitdepth)
631
0
    return JXL_FAILURE(
632
92
        "Bitdepth too high for level %i (need %i bits, have only %i in this "
633
92
        "level)",
634
92
        cparams_.level, max_bitdepth, level_max_bitdepth);
635
636
  // Set options and apply transformations
637
638
92
  if (cparams_.butteraugli_distance > 0) {
639
92
    if (cparams_.palette_colors != 0) {
640
92
      JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms");
641
92
    }
642
92
    if (cparams_.color_transform == ColorTransform::kXYB) {
643
92
      cparams_.channel_colors_pre_transform_percent = 0;
644
92
    }
645
92
    cparams_.channel_colors_percent = 0;
646
92
    cparams_.palette_colors = 0;
647
92
    cparams_.lossy_palette = false;
648
92
  }
649
650
  // if few colors, do all-channel palette before trying channel palette
651
  // Logic is as follows:
652
  // - if you can make a palette with few colors (arbitrary threshold: 200),
653
  //   then you can also make channel palettes, but they will just be extra
654
  //   signaling cost for almost no benefit
655
  // - if the palette needs more colors, then channel palette might help to
656
  //   reduce palette signaling cost
657
92
  if (cparams_.palette_colors != 0 &&
658
92
      cparams_.speed_tier < SpeedTier::kFalcon) {
659
    // all-channel palette (e.g. RGBA)
660
0
    if (gi.channel.size() > 1) {
661
0
      Transform maybe_palette(TransformId::kPalette);
662
0
      maybe_palette.begin_c = gi.nb_meta_channels;
663
0
      maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
664
0
      maybe_palette.nb_colors =
665
0
          std::min(std::min(200, (int)(xsize * ysize / 8)),
666
0
                   std::abs(cparams_.palette_colors) / 16);
667
0
      maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
668
0
      maybe_palette.lossy_palette = false;
669
0
      do_transform(gi, maybe_palette, weighted::Header(), pool);
670
0
    }
671
0
  }
672
673
  // Global channel palette
674
92
  if (cparams_.channel_colors_pre_transform_percent > 0 &&
675
92
      !cparams_.lossy_palette &&
676
92
      (cparams_.speed_tier <= SpeedTier::kThunder ||
677
0
       (do_color && metadata.bit_depth.bits_per_sample > 8))) {
678
    // single channel palette (like FLIF's ChannelCompact)
679
0
    size_t nb_channels = gi.channel.size() - gi.nb_meta_channels;
680
0
    int orig_bitdepth = max_bitdepth;
681
0
    max_bitdepth = 0;
682
0
    for (size_t i = 0; i < nb_channels; i++) {
683
0
      int32_t min, max;
684
0
      compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
685
0
      int64_t colors = max - min + 1;
686
0
      JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max);
687
0
      Transform maybe_palette_1(TransformId::kPalette);
688
0
      maybe_palette_1.begin_c = i + gi.nb_meta_channels;
689
0
      maybe_palette_1.num_c = 1;
690
      // simple heuristic: if less than X percent of the values in the range
691
      // actually occur, it is probably worth it to do a compaction
692
      // (but only if the channel palette is less than 6% the size of the
693
      // image itself)
694
0
      maybe_palette_1.nb_colors = std::min(
695
0
          (int)(xsize * ysize / 16),
696
0
          (int)(cparams_.channel_colors_pre_transform_percent / 100. * colors));
697
0
      if (do_transform(gi, maybe_palette_1, weighted::Header(), pool)) {
698
        // effective bit depth is lower, adjust quantization accordingly
699
0
        compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
700
0
        if (max < maxval) maxval = max;
701
0
        int ch_bitdepth =
702
0
            (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0);
703
0
        if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth;
704
0
      } else
705
0
        max_bitdepth = orig_bitdepth;
706
0
    }
707
0
  }
708
709
  // Global palette
710
92
  if ((cparams_.palette_colors != 0 || cparams_.lossy_palette) &&
711
92
      cparams_.speed_tier < SpeedTier::kFalcon) {
712
    // all-channel palette (e.g. RGBA)
713
0
    if (gi.channel.size() - gi.nb_meta_channels > 1) {
714
0
      Transform maybe_palette(TransformId::kPalette);
715
0
      maybe_palette.begin_c = gi.nb_meta_channels;
716
0
      maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
717
0
      maybe_palette.nb_colors =
718
0
          std::min((int)(xsize * ysize / 8), std::abs(cparams_.palette_colors));
719
0
      maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
720
0
      maybe_palette.lossy_palette =
721
0
          (cparams_.lossy_palette && maybe_palette.num_c == 3);
722
0
      if (maybe_palette.lossy_palette) {
723
0
        maybe_palette.predictor = delta_pred_;
724
0
      }
725
      // TODO(veluca): use a custom weighted header if using the weighted
726
      // predictor.
727
0
      do_transform(gi, maybe_palette, weighted::Header(), pool,
728
0
                   cparams_.options.zero_tokens);
729
0
    }
730
    // all-minus-one-channel palette (RGB with separate alpha, or CMY with
731
    // separate K)
732
0
    if (gi.channel.size() - gi.nb_meta_channels > 3) {
733
0
      Transform maybe_palette_3(TransformId::kPalette);
734
0
      maybe_palette_3.begin_c = gi.nb_meta_channels;
735
0
      maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1;
736
0
      maybe_palette_3.nb_colors =
737
0
          std::min((int)(xsize * ysize / 8), std::abs(cparams_.palette_colors));
738
0
      maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0;
739
0
      maybe_palette_3.lossy_palette = cparams_.lossy_palette;
740
0
      if (maybe_palette_3.lossy_palette) {
741
0
        maybe_palette_3.predictor = delta_pred_;
742
0
      }
743
0
      do_transform(gi, maybe_palette_3, weighted::Header(), pool,
744
0
                   cparams_.options.zero_tokens);
745
0
    }
746
0
  }
747
748
  // don't do an RCT if we're short on bits
749
92
  if (cparams_.color_transform == ColorTransform::kNone && do_color &&
750
92
      gi.channel.size() - gi.nb_meta_channels >= 3 &&
751
92
      max_bitdepth + 1 < level_max_bitdepth) {
752
0
    if (cparams_.colorspace < 0 &&
753
0
        (!cparams_.IsLossless() || cparams_.speed_tier > SpeedTier::kHare)) {
754
0
      Transform ycocg{TransformId::kRCT};
755
0
      ycocg.rct_type = 6;
756
0
      ycocg.begin_c = gi.nb_meta_channels;
757
0
      do_transform(gi, ycocg, weighted::Header(), pool);
758
0
      max_bitdepth++;
759
0
    } else if (cparams_.colorspace > 0) {
760
0
      Transform sg(TransformId::kRCT);
761
0
      sg.begin_c = gi.nb_meta_channels;
762
0
      sg.rct_type = cparams_.colorspace;
763
0
      do_transform(gi, sg, weighted::Header(), pool);
764
0
      max_bitdepth++;
765
0
    }
766
0
  }
767
768
  // don't do squeeze if we don't have some spare bits
769
92
  if (cparams_.responsive && !gi.channel.empty() &&
770
92
      max_bitdepth + 2 < level_max_bitdepth) {
771
0
    Transform t(TransformId::kSqueeze);
772
0
    t.squeezes = cparams_.squeezes;
773
0
    do_transform(gi, t, weighted::Header(), pool);
774
0
    max_bitdepth += 2;
775
0
  }
776
777
92
  if (max_bitdepth + 1 > level_max_bitdepth) {
778
    // force no group RCTs if we don't have a spare bit
779
0
    cparams_.colorspace = 0;
780
0
  }
781
92
  JXL_ASSERT(max_bitdepth <= level_max_bitdepth);
782
783
92
  std::vector<uint32_t> quants;
784
785
92
  if (cparams_.butteraugli_distance > 0) {
786
92
    quants.resize(gi.channel.size(), 1);
787
92
    float quality = 0.25f * cparams_.butteraugli_distance;
788
92
    JXL_DEBUG_V(2,
789
92
                "Adding quantization constants corresponding to distance %.3f ",
790
92
                quality);
791
92
    if (!cparams_.responsive) {
792
0
      JXL_DEBUG_V(1,
793
0
                  "Warning: lossy compression without Squeeze "
794
0
                  "transform is just color quantization.");
795
0
      quality *= 0.1f;
796
0
    }
797
92
    if (cparams_.color_transform != ColorTransform::kXYB) {
798
0
      quality *= maxval / 255.f;
799
0
    }
800
92
    if (cparams_.options.nb_repeats == 0) {
801
0
      return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!");
802
0
    }
803
92
    for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) {
804
0
      Channel& ch = gi.channel[i];
805
0
      int shift = ch.hshift + ch.vshift;  // number of pixel halvings
806
0
      if (shift > 16) shift = 16;
807
0
      if (shift > 0) shift--;
808
0
      int q;
809
      // assuming default Squeeze here
810
0
      int component =
811
0
          (do_color ? 0 : 3) + ((i - gi.nb_meta_channels) % nb_chans);
812
      // last 4 channels are final chroma residuals
813
0
      if (nb_chans > 2 && i >= gi.channel.size() - 4 && cparams_.responsive) {
814
0
        component = 1;
815
0
      }
816
0
      if (cparams_.color_transform == ColorTransform::kXYB && component < 3) {
817
0
        q = quality * squeeze_quality_factor_xyb *
818
0
            squeeze_xyb_qtable[component][shift];
819
0
      } else {
820
0
        if (cparams_.colorspace != 0 && component > 0 && component < 3) {
821
0
          q = quality * squeeze_quality_factor * squeeze_chroma_qtable[shift];
822
0
        } else {
823
0
          q = quality * squeeze_quality_factor * squeeze_luma_factor *
824
0
              squeeze_luma_qtable[shift];
825
0
        }
826
0
      }
827
0
      if (q < 1) q = 1;
828
0
      QuantizeChannel(gi.channel[i], q);
829
0
      quants[i] = q;
830
0
    }
831
92
  }
832
833
  // Fill other groups.
834
92
  struct GroupParams {
835
92
    Rect rect;
836
92
    int minShift;
837
92
    int maxShift;
838
92
    ModularStreamId id;
839
92
  };
840
92
  std::vector<GroupParams> stream_params;
841
842
92
  stream_options_[0] = cparams_.options;
843
844
  // DC
845
184
  for (size_t group_id = 0; group_id < frame_dim_.num_dc_groups; group_id++) {
846
92
    const size_t gx = group_id % frame_dim_.xsize_dc_groups;
847
92
    const size_t gy = group_id / frame_dim_.xsize_dc_groups;
848
92
    const Rect rect(gx * frame_dim_.dc_group_dim, gy * frame_dim_.dc_group_dim,
849
92
                    frame_dim_.dc_group_dim, frame_dim_.dc_group_dim);
850
    // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim
851
    // maxShift==1000 is infinity
852
92
    stream_params.push_back(
853
92
        GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(group_id)});
854
92
  }
855
  // AC global -> nothing.
856
  // AC
857
184
  for (size_t group_id = 0; group_id < frame_dim_.num_groups; group_id++) {
858
92
    const size_t gx = group_id % frame_dim_.xsize_groups;
859
92
    const size_t gy = group_id / frame_dim_.xsize_groups;
860
92
    const Rect mrect(gx * frame_dim_.group_dim, gy * frame_dim_.group_dim,
861
92
                     frame_dim_.group_dim, frame_dim_.group_dim);
862
184
    for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses();
863
92
         i++) {
864
92
      int maxShift, minShift;
865
92
      frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift);
866
92
      stream_params.push_back(GroupParams{
867
92
          mrect, minShift, maxShift, ModularStreamId::ModularAC(group_id, i)});
868
92
    }
869
92
  }
870
  // if there's only one group, everything ends up in GlobalModular
871
  // in that case, also try RCTs/WP params for the one group
872
92
  if (stream_params.size() == 2) {
873
92
    stream_params.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000,
874
92
                                        ModularStreamId::Global()});
875
92
  }
876
92
  gi_channel_.resize(stream_images_.size());
877
878
92
  JXL_RETURN_IF_ERROR(RunOnPool(
879
92
      pool, 0, stream_params.size(), ThreadPool::NoInit,
880
92
      [&](const uint32_t i, size_t /* thread */) {
881
92
        stream_options_[stream_params[i].id.ID(frame_dim_)] = cparams_.options;
882
92
        JXL_CHECK(PrepareStreamParams(
883
92
            stream_params[i].rect, cparams_, stream_params[i].minShift,
884
92
            stream_params[i].maxShift, stream_params[i].id, do_color));
885
92
      },
886
92
      "ChooseParams"));
887
92
  {
888
    // Clear out channels that have been copied to groups.
889
92
    Image& full_image = stream_images_[0];
890
92
    size_t c = full_image.nb_meta_channels;
891
92
    for (; c < full_image.channel.size(); c++) {
892
0
      Channel& fc = full_image.channel[c];
893
0
      if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
894
0
    }
895
92
    for (; c < full_image.channel.size(); c++) {
896
0
      full_image.channel[c].plane = ImageI();
897
0
    }
898
92
  }
899
900
92
  if (!quants.empty()) {
901
0
    for (uint32_t stream_id = 0; stream_id < stream_images_.size();
902
0
         stream_id++) {
903
      // skip non-modular stream_ids
904
0
      if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
905
0
      const Image& image = stream_images_[stream_id];
906
0
      const ModularOptions& options = stream_options_[stream_id];
907
0
      for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) {
908
0
        if (i >= image.nb_meta_channels &&
909
0
            (image.channel[i].w > options.max_chan_size ||
910
0
             image.channel[i].h > options.max_chan_size)) {
911
0
          continue;
912
0
        }
913
0
        if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
914
0
        size_t ch_id = stream_id == 0
915
0
                           ? i
916
0
                           : gi_channel_[stream_id][i - image.nb_meta_channels];
917
0
        uint32_t q = quants[ch_id];
918
        // Inform the tree splitting heuristics that each channel in each group
919
        // used this quantization factor. This will produce a tree with the
920
        // given multipliers.
921
0
        if (multiplier_info_.empty() ||
922
0
            multiplier_info_.back().range[1][0] != stream_id ||
923
0
            multiplier_info_.back().multiplier != q) {
924
0
          StaticPropRange range;
925
0
          range[0] = {{i, i + 1}};
926
0
          range[1] = {{stream_id, stream_id + 1}};
927
0
          multiplier_info_.push_back({range, (uint32_t)q});
928
0
        } else {
929
          // Previous channel in the same group had the same quantization
930
          // factor. Don't provide two different ranges, as that creates
931
          // unnecessary nodes.
932
0
          multiplier_info_.back().range[0][1] = i + 1;
933
0
        }
934
0
      }
935
0
    }
936
    // Merge group+channel settings that have the same channels and quantization
937
    // factors, to avoid unnecessary nodes.
938
0
    std::sort(multiplier_info_.begin(), multiplier_info_.end(),
939
0
              [](ModularMultiplierInfo a, ModularMultiplierInfo b) {
940
0
                return std::make_tuple(a.range, a.multiplier) <
941
0
                       std::make_tuple(b.range, b.multiplier);
942
0
              });
943
0
    size_t new_num = 1;
944
0
    for (size_t i = 1; i < multiplier_info_.size(); i++) {
945
0
      ModularMultiplierInfo& prev = multiplier_info_[new_num - 1];
946
0
      ModularMultiplierInfo& cur = multiplier_info_[i];
947
0
      if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier &&
948
0
          prev.range[1][1] == cur.range[1][0]) {
949
0
        prev.range[1][1] = cur.range[1][1];
950
0
      } else {
951
0
        multiplier_info_[new_num++] = multiplier_info_[i];
952
0
      }
953
0
    }
954
0
    multiplier_info_.resize(new_num);
955
0
  }
956
957
92
  JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0]));
958
959
92
  return PrepareEncoding(frame_header, pool, enc_state->heuristics.get(),
960
92
                         aux_out);
961
92
}
962
963
Status ModularFrameEncoder::PrepareEncoding(const FrameHeader& frame_header,
964
                                            ThreadPool* pool,
965
                                            EncoderHeuristics* heuristics,
966
92
                                            AuxOut* aux_out) {
967
92
  if (!tree_.empty()) return true;
968
969
  // Compute tree.
970
92
  size_t num_streams = stream_images_.size();
971
92
  stream_headers_.resize(num_streams);
972
92
  tokens_.resize(num_streams);
973
974
92
  if (heuristics->CustomFixedTreeLossless(frame_dim_, &tree_)) {
975
    // Using a fixed tree.
976
92
  } else if (cparams_.speed_tier < SpeedTier::kFalcon ||
977
92
             !cparams_.modular_mode) {
978
    // Avoid creating a tree with leaves that don't correspond to any pixels.
979
92
    std::vector<size_t> useful_splits;
980
92
    useful_splits.reserve(tree_splits_.size());
981
644
    for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) {
982
552
      bool has_pixels = false;
983
552
      size_t start = tree_splits_[chunk];
984
552
      size_t stop = tree_splits_[chunk + 1];
985
2.57k
      for (size_t i = start; i < stop; i++) {
986
2.02k
        if (!stream_images_[i].empty()) has_pixels = true;
987
2.02k
      }
988
552
      if (has_pixels) {
989
184
        useful_splits.push_back(tree_splits_[chunk]);
990
184
      }
991
552
    }
992
    // Don't do anything if modular mode does not have any pixels in this image
993
92
    if (useful_splits.empty()) return true;
994
92
    useful_splits.push_back(tree_splits_.back());
995
996
92
    std::atomic_flag invalid_force_wp = ATOMIC_FLAG_INIT;
997
998
92
    std::vector<Tree> trees(useful_splits.size() - 1);
999
92
    JXL_RETURN_IF_ERROR(RunOnPool(
1000
92
        pool, 0, useful_splits.size() - 1, ThreadPool::NoInit,
1001
92
        [&](const uint32_t chunk, size_t /* thread */) {
1002
          // TODO(veluca): parallelize more.
1003
92
          size_t total_pixels = 0;
1004
92
          uint32_t start = useful_splits[chunk];
1005
92
          uint32_t stop = useful_splits[chunk + 1];
1006
92
          while (start < stop && stream_images_[start].empty()) ++start;
1007
92
          while (start < stop && stream_images_[stop - 1].empty()) --stop;
1008
92
          uint32_t max_c = 0;
1009
92
          if (stream_options_[start].tree_kind !=
1010
92
              ModularOptions::TreeKind::kLearn) {
1011
92
            for (size_t i = start; i < stop; i++) {
1012
92
              for (const Channel& ch : stream_images_[i].channel) {
1013
92
                total_pixels += ch.w * ch.h;
1014
92
              }
1015
92
            }
1016
92
            trees[chunk] =
1017
92
                PredefinedTree(stream_options_[start].tree_kind, total_pixels);
1018
92
            return;
1019
92
          }
1020
92
          TreeSamples tree_samples;
1021
92
          if (!tree_samples.SetPredictor(stream_options_[start].predictor,
1022
92
                                         stream_options_[start].wp_tree_mode)) {
1023
92
            invalid_force_wp.test_and_set(std::memory_order_acq_rel);
1024
92
            return;
1025
92
          }
1026
92
          if (!tree_samples.SetProperties(
1027
92
                  stream_options_[start].splitting_heuristics_properties,
1028
92
                  stream_options_[start].wp_tree_mode)) {
1029
92
            invalid_force_wp.test_and_set(std::memory_order_acq_rel);
1030
92
            return;
1031
92
          }
1032
92
          std::vector<pixel_type> pixel_samples;
1033
92
          std::vector<pixel_type> diff_samples;
1034
92
          std::vector<uint32_t> group_pixel_count;
1035
92
          std::vector<uint32_t> channel_pixel_count;
1036
92
          for (size_t i = start; i < stop; i++) {
1037
92
            max_c = std::max<uint32_t>(stream_images_[i].channel.size(), max_c);
1038
92
            CollectPixelSamples(stream_images_[i], stream_options_[i], i,
1039
92
                                group_pixel_count, channel_pixel_count,
1040
92
                                pixel_samples, diff_samples);
1041
92
          }
1042
92
          StaticPropRange range;
1043
92
          range[0] = {{0, max_c}};
1044
92
          range[1] = {{start, stop}};
1045
92
          auto local_multiplier_info = multiplier_info_;
1046
1047
92
          tree_samples.PreQuantizeProperties(
1048
92
              range, local_multiplier_info, group_pixel_count,
1049
92
              channel_pixel_count, pixel_samples, diff_samples,
1050
92
              stream_options_[start].max_property_values);
1051
92
          for (size_t i = start; i < stop; i++) {
1052
92
            JXL_CHECK(ModularGenericCompress(
1053
92
                stream_images_[i], stream_options_[i], /*writer=*/nullptr,
1054
92
                /*aux_out=*/nullptr, 0, i, &tree_samples, &total_pixels));
1055
92
          }
1056
1057
          // TODO(veluca): parallelize more.
1058
92
          trees[chunk] =
1059
92
              LearnTree(std::move(tree_samples), total_pixels,
1060
92
                        stream_options_[start], local_multiplier_info, range);
1061
92
        },
1062
92
        "LearnTrees"));
1063
92
    if (invalid_force_wp.test_and_set(std::memory_order_acq_rel)) {
1064
0
      return JXL_FAILURE("PrepareEncoding: force_no_wp with {Weighted}");
1065
0
    }
1066
92
    tree_.clear();
1067
92
    MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_);
1068
92
  } else {
1069
    // Fixed tree.
1070
0
    size_t total_pixels = 0;
1071
0
    for (const Image& img : stream_images_) {
1072
0
      for (const Channel& ch : img.channel) {
1073
0
        total_pixels += ch.w * ch.h;
1074
0
      }
1075
0
    }
1076
0
    if (cparams_.speed_tier <= SpeedTier::kFalcon) {
1077
0
      tree_ =
1078
0
          PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels);
1079
0
    } else if (cparams_.speed_tier <= SpeedTier::kThunder) {
1080
0
      tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC,
1081
0
                             total_pixels);
1082
0
    } else {
1083
0
      tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)};
1084
0
    }
1085
0
  }
1086
92
  tree_tokens_.resize(1);
1087
92
  tree_tokens_[0].clear();
1088
92
  Tree decoded_tree;
1089
92
  TokenizeTree(tree_, &tree_tokens_[0], &decoded_tree);
1090
92
  JXL_ASSERT(tree_.size() == decoded_tree.size());
1091
92
  tree_ = std::move(decoded_tree);
1092
1093
92
  if (WantDebugOutput(aux_out)) {
1094
0
    if (frame_header.dc_level > 0) {
1095
0
      PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" +
1096
0
                           std::to_string(frame_header.dc_level) + "_tree");
1097
0
    } else {
1098
0
      PrintTree(tree_, aux_out->debug_prefix + "/global_tree");
1099
0
    }
1100
0
  }
1101
1102
92
  image_widths_.resize(num_streams);
1103
92
  JXL_RETURN_IF_ERROR(RunOnPool(
1104
92
      pool, 0, num_streams, ThreadPool::NoInit,
1105
92
      [&](const uint32_t stream_id, size_t /* thread */) {
1106
92
        AuxOut my_aux_out;
1107
92
        if (aux_out) {
1108
92
          my_aux_out.dump_image = aux_out->dump_image;
1109
92
          my_aux_out.debug_prefix = aux_out->debug_prefix;
1110
92
        }
1111
92
        tokens_[stream_id].clear();
1112
92
        JXL_CHECK(ModularGenericCompress(
1113
92
            stream_images_[stream_id], stream_options_[stream_id],
1114
92
            /*writer=*/nullptr, &my_aux_out, 0, stream_id,
1115
92
            /*tree_samples=*/nullptr,
1116
92
            /*total_pixels=*/nullptr,
1117
92
            /*tree=*/&tree_, /*header=*/&stream_headers_[stream_id],
1118
92
            /*tokens=*/&tokens_[stream_id],
1119
92
            /*widths=*/&image_widths_[stream_id]));
1120
92
      },
1121
92
      "ComputeTokens"));
1122
92
  return true;
1123
92
}
1124
1125
Status ModularFrameEncoder::EncodeGlobalInfo(BitWriter* writer,
1126
92
                                             AuxOut* aux_out) {
1127
92
  BitWriter::Allotment allotment(writer, 1);
1128
  // If we are using brotli, or not using modular mode.
1129
92
  if (tree_tokens_.empty() || tree_tokens_[0].empty()) {
1130
0
    writer->Write(1, 0);
1131
0
    ReclaimAndCharge(writer, &allotment, kLayerModularTree, aux_out);
1132
0
    return true;
1133
0
  }
1134
92
  writer->Write(1, 1);
1135
92
  ReclaimAndCharge(writer, &allotment, kLayerModularTree, aux_out);
1136
1137
  // Write tree
1138
92
  HistogramParams params;
1139
92
  if (cparams_.speed_tier > SpeedTier::kKitten) {
1140
92
    params.clustering = HistogramParams::ClusteringType::kFast;
1141
92
    params.ans_histogram_strategy =
1142
92
        cparams_.speed_tier > SpeedTier::kThunder
1143
92
            ? HistogramParams::ANSHistogramStrategy::kFast
1144
92
            : HistogramParams::ANSHistogramStrategy::kApproximate;
1145
92
    params.lz77_method =
1146
92
        cparams_.decoding_speed_tier >= 3 && cparams_.modular_mode
1147
92
            ? (cparams_.speed_tier >= SpeedTier::kFalcon
1148
0
                   ? HistogramParams::LZ77Method::kRLE
1149
0
                   : HistogramParams::LZ77Method::kLZ77)
1150
92
            : HistogramParams::LZ77Method::kNone;
1151
    // Near-lossless DC, as well as modular mode, require choosing hybrid uint
1152
    // more carefully.
1153
92
    if ((!extra_dc_precision.empty() && extra_dc_precision[0] != 0) ||
1154
92
        (cparams_.modular_mode && cparams_.speed_tier < SpeedTier::kCheetah)) {
1155
0
      params.uint_method = HistogramParams::HybridUintMethod::kFast;
1156
92
    } else {
1157
92
      params.uint_method = HistogramParams::HybridUintMethod::kNone;
1158
92
    }
1159
92
  } else if (cparams_.speed_tier <= SpeedTier::kTortoise) {
1160
0
    params.lz77_method = HistogramParams::LZ77Method::kOptimal;
1161
0
  } else {
1162
0
    params.lz77_method = HistogramParams::LZ77Method::kLZ77;
1163
0
  }
1164
92
  if (cparams_.decoding_speed_tier >= 1) {
1165
0
    params.max_histograms = 12;
1166
0
  }
1167
92
  if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive) {
1168
0
    params.lz77_method = cparams_.speed_tier >= SpeedTier::kCheetah
1169
0
                             ? HistogramParams::LZ77Method::kRLE
1170
0
                         : cparams_.speed_tier >= SpeedTier::kKitten
1171
0
                             ? HistogramParams::LZ77Method::kLZ77
1172
0
                             : HistogramParams::LZ77Method::kOptimal;
1173
0
  }
1174
92
  if (cparams_.decoding_speed_tier >= 2 && cparams_.responsive) {
1175
0
    params.uint_method = HistogramParams::HybridUintMethod::k000;
1176
0
    params.force_huffman = true;
1177
0
  }
1178
92
  BuildAndEncodeHistograms(params, kNumTreeContexts, tree_tokens_, &code_,
1179
92
                           &context_map_, writer, kLayerModularTree, aux_out);
1180
92
  WriteTokens(tree_tokens_[0], code_, context_map_, writer, kLayerModularTree,
1181
92
              aux_out);
1182
92
  params.image_widths = image_widths_;
1183
  // Write histograms.
1184
92
  BuildAndEncodeHistograms(params, (tree_.size() + 1) / 2, tokens_, &code_,
1185
92
                           &context_map_, writer, kLayerModularGlobal, aux_out);
1186
92
  return true;
1187
92
}
1188
1189
Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out,
1190
                                         size_t layer,
1191
460
                                         const ModularStreamId& stream) {
1192
460
  size_t stream_id = stream.ID(frame_dim_);
1193
460
  if (stream_images_[stream_id].channel.empty()) {
1194
276
    return true;  // Image with no channels, header never gets decoded.
1195
276
  }
1196
184
  JXL_RETURN_IF_ERROR(
1197
184
      Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out));
1198
184
  WriteTokens(tokens_[stream_id], code_, context_map_, writer, layer, aux_out);
1199
184
  return true;
1200
184
}
1201
1202
namespace {
1203
0
float EstimateWPCost(const Image& img, size_t i) {
1204
0
  size_t extra_bits = 0;
1205
0
  float histo_cost = 0;
1206
0
  HybridUintConfig config;
1207
0
  int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31,
1208
0
                       -23,  -15,  -11,  -7,   -4,   -3,  -1,  0,   1,
1209
0
                       3,    5,    7,    11,   15,   23,  31,  47,  63,
1210
0
                       95,   127,  191,  255,  392,  500};
1211
0
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
1212
0
  Histogram histo[nc] = {};
1213
0
  weighted::Header wp_header;
1214
0
  PredictorMode(i, &wp_header);
1215
0
  for (const Channel& ch : img.channel) {
1216
0
    const intptr_t onerow = ch.plane.PixelsPerRow();
1217
0
    weighted::State wp_state(wp_header, ch.w, ch.h);
1218
0
    Properties properties(1);
1219
0
    for (size_t y = 0; y < ch.h; y++) {
1220
0
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
1221
0
      for (size_t x = 0; x < ch.w; x++) {
1222
0
        size_t offset = 0;
1223
0
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
1224
0
        pixel_type_w top = (y ? *(r + x - onerow) : left);
1225
0
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
1226
0
        pixel_type_w topright =
1227
0
            (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top);
1228
0
        pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
1229
0
        pixel_type guess = wp_state.Predict</*compute_properties=*/true>(
1230
0
            x, y, ch.w, top, left, topright, topleft, toptop, &properties,
1231
0
            offset);
1232
0
        size_t ctx = 0;
1233
0
        for (int c : cutoffs) {
1234
0
          ctx += c >= properties[0];
1235
0
        }
1236
0
        pixel_type res = r[x] - guess;
1237
0
        uint32_t token, nbits, bits;
1238
0
        config.Encode(PackSigned(res), &token, &nbits, &bits);
1239
0
        histo[ctx].Add(token);
1240
0
        extra_bits += nbits;
1241
0
        wp_state.UpdateErrors(r[x], x, y, ch.w);
1242
0
      }
1243
0
    }
1244
0
    for (size_t h = 0; h < nc; h++) {
1245
0
      histo_cost += histo[h].ShannonEntropy();
1246
0
      histo[h].Clear();
1247
0
    }
1248
0
  }
1249
0
  return histo_cost + extra_bits;
1250
0
}
1251
1252
0
float EstimateCost(const Image& img) {
1253
  // TODO(veluca): consider SIMDfication of this code.
1254
0
  size_t extra_bits = 0;
1255
0
  float histo_cost = 0;
1256
0
  HybridUintConfig config;
1257
0
  uint32_t cutoffs[] = {0,  1,  3,  5,   7,   11,  15,  23, 31,
1258
0
                        47, 63, 95, 127, 191, 255, 392, 500};
1259
0
  constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
1260
0
  Histogram histo[nc] = {};
1261
0
  for (const Channel& ch : img.channel) {
1262
0
    const intptr_t onerow = ch.plane.PixelsPerRow();
1263
0
    for (size_t y = 0; y < ch.h; y++) {
1264
0
      const pixel_type* JXL_RESTRICT r = ch.Row(y);
1265
0
      for (size_t x = 0; x < ch.w; x++) {
1266
0
        pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
1267
0
        pixel_type_w top = (y ? *(r + x - onerow) : left);
1268
0
        pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
1269
0
        size_t maxdiff = std::max(std::max(left, top), topleft) -
1270
0
                         std::min(std::min(left, top), topleft);
1271
0
        size_t ctx = 0;
1272
0
        for (uint32_t c : cutoffs) {
1273
0
          ctx += c > maxdiff;
1274
0
        }
1275
0
        pixel_type res = r[x] - ClampedGradient(top, left, topleft);
1276
0
        uint32_t token, nbits, bits;
1277
0
        config.Encode(PackSigned(res), &token, &nbits, &bits);
1278
0
        histo[ctx].Add(token);
1279
0
        extra_bits += nbits;
1280
0
      }
1281
0
    }
1282
0
    for (size_t h = 0; h < nc; h++) {
1283
0
      histo_cost += histo[h].ShannonEntropy();
1284
0
      histo[h].Clear();
1285
0
    }
1286
0
  }
1287
0
  return histo_cost + extra_bits;
1288
0
}
1289
1290
}  // namespace
1291
1292
Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect,
1293
                                                const CompressParams& cparams_,
1294
                                                int minShift, int maxShift,
1295
                                                const ModularStreamId& stream,
1296
276
                                                bool do_color) {
1297
276
  size_t stream_id = stream.ID(frame_dim_);
1298
276
  Image& full_image = stream_images_[0];
1299
276
  const size_t xsize = rect.xsize();
1300
276
  const size_t ysize = rect.ysize();
1301
276
  Image& gi = stream_images_[stream_id];
1302
276
  if (stream_id > 0) {
1303
184
    gi = Image(xsize, ysize, full_image.bitdepth, 0);
1304
    // start at the first bigger-than-frame_dim.group_dim non-metachannel
1305
184
    size_t c = full_image.nb_meta_channels;
1306
184
    for (; c < full_image.channel.size(); c++) {
1307
0
      Channel& fc = full_image.channel[c];
1308
0
      if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
1309
0
    }
1310
184
    for (; c < full_image.channel.size(); c++) {
1311
0
      Channel& fc = full_image.channel[c];
1312
0
      int shift = std::min(fc.hshift, fc.vshift);
1313
0
      if (shift > maxShift) continue;
1314
0
      if (shift < minShift) continue;
1315
0
      Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
1316
0
             rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
1317
0
      if (r.xsize() == 0 || r.ysize() == 0) continue;
1318
0
      gi_channel_[stream_id].push_back(c);
1319
0
      Channel gc(r.xsize(), r.ysize());
1320
0
      gc.hshift = fc.hshift;
1321
0
      gc.vshift = fc.vshift;
1322
0
      for (size_t y = 0; y < r.ysize(); ++y) {
1323
0
        memcpy(gc.Row(y), r.ConstRow(fc.plane, y),
1324
0
               r.xsize() * sizeof(pixel_type));
1325
0
      }
1326
0
      gi.channel.emplace_back(std::move(gc));
1327
0
    }
1328
1329
184
    if (gi.channel.empty()) return true;
1330
    // Do some per-group transforms
1331
1332
    // Local palette
1333
    // TODO(veluca): make this work with quantize-after-prediction in lossy
1334
    // mode.
1335
0
    if (cparams_.butteraugli_distance == 0.f && cparams_.palette_colors != 0 &&
1336
0
        cparams_.speed_tier < SpeedTier::kCheetah) {
1337
      // all-channel palette (e.g. RGBA)
1338
0
      if (gi.channel.size() - gi.nb_meta_channels > 1) {
1339
0
        Transform maybe_palette(TransformId::kPalette);
1340
0
        maybe_palette.begin_c = gi.nb_meta_channels;
1341
0
        maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
1342
0
        maybe_palette.nb_colors = std::abs(cparams_.palette_colors);
1343
0
        maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
1344
0
        do_transform(gi, maybe_palette, weighted::Header());
1345
0
      }
1346
      // all-minus-one-channel palette (RGB with separate alpha, or CMY with
1347
      // separate K)
1348
0
      if (gi.channel.size() - gi.nb_meta_channels > 3) {
1349
0
        Transform maybe_palette_3(TransformId::kPalette);
1350
0
        maybe_palette_3.begin_c = gi.nb_meta_channels;
1351
0
        maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1;
1352
0
        maybe_palette_3.nb_colors = std::abs(cparams_.palette_colors);
1353
0
        maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0;
1354
0
        maybe_palette_3.lossy_palette = cparams_.lossy_palette;
1355
0
        if (maybe_palette_3.lossy_palette) {
1356
0
          maybe_palette_3.predictor = Predictor::Weighted;
1357
0
        }
1358
0
        do_transform(gi, maybe_palette_3, weighted::Header());
1359
0
      }
1360
0
    }
1361
1362
    // Local channel palette
1363
0
    if (cparams_.channel_colors_percent > 0 &&
1364
0
        cparams_.butteraugli_distance == 0.f && !cparams_.lossy_palette &&
1365
0
        cparams_.speed_tier < SpeedTier::kCheetah &&
1366
0
        !(cparams_.responsive && cparams_.decoding_speed_tier >= 1)) {
1367
      // single channel palette (like FLIF's ChannelCompact)
1368
0
      size_t nb_channels = gi.channel.size() - gi.nb_meta_channels;
1369
0
      for (size_t i = 0; i < nb_channels; i++) {
1370
0
        int32_t min, max;
1371
0
        compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
1372
0
        int colors = max - min + 1;
1373
0
        JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max);
1374
0
        Transform maybe_palette_1(TransformId::kPalette);
1375
0
        maybe_palette_1.begin_c = i + gi.nb_meta_channels;
1376
0
        maybe_palette_1.num_c = 1;
1377
        // simple heuristic: if less than X percent of the values in the range
1378
        // actually occur, it is probably worth it to do a compaction
1379
        // (but only if the channel palette is less than 80% the size of the
1380
        // image itself)
1381
0
        maybe_palette_1.nb_colors =
1382
0
            std::min((int)(xsize * ysize * 0.8),
1383
0
                     (int)(cparams_.channel_colors_percent / 100. * colors));
1384
0
        do_transform(gi, maybe_palette_1, weighted::Header());
1385
0
      }
1386
0
    }
1387
0
  }
1388
1389
  // lossless and no specific color transform specified: try Nothing, YCoCg,
1390
  // and 17 RCTs
1391
92
  if (cparams_.color_transform == ColorTransform::kNone &&
1392
92
      cparams_.IsLossless() && cparams_.colorspace < 0 &&
1393
92
      gi.channel.size() - gi.nb_meta_channels >= 3 &&
1394
92
      cparams_.responsive == false && do_color &&
1395
92
      cparams_.speed_tier <= SpeedTier::kHare) {
1396
0
    Transform sg(TransformId::kRCT);
1397
0
    sg.begin_c = gi.nb_meta_channels;
1398
0
    size_t nb_rcts_to_try = 0;
1399
0
    switch (cparams_.speed_tier) {
1400
0
      case SpeedTier::kLightning:
1401
0
      case SpeedTier::kThunder:
1402
0
      case SpeedTier::kFalcon:
1403
0
      case SpeedTier::kCheetah:
1404
0
        nb_rcts_to_try = 0;  // Just do global YCoCg
1405
0
        break;
1406
0
      case SpeedTier::kHare:
1407
0
        nb_rcts_to_try = 4;
1408
0
        break;
1409
0
      case SpeedTier::kWombat:
1410
0
        nb_rcts_to_try = 5;
1411
0
        break;
1412
0
      case SpeedTier::kSquirrel:
1413
0
        nb_rcts_to_try = 7;
1414
0
        break;
1415
0
      case SpeedTier::kKitten:
1416
0
        nb_rcts_to_try = 9;
1417
0
        break;
1418
0
      case SpeedTier::kTortoise:
1419
0
        nb_rcts_to_try = 19;
1420
0
        break;
1421
0
    }
1422
0
    float best_cost = std::numeric_limits<float>::max();
1423
0
    size_t best_rct = 0;
1424
    // These should be 19 actually different transforms; the remaining ones
1425
    // are equivalent to one of these (note that the first two are do-nothing
1426
    // and YCoCg) modulo channel reordering (which only matters in the case of
1427
    // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR)
1428
0
    for (int i : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5,
1429
0
                  5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4,
1430
0
                  1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4,
1431
0
                  4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) {
1432
0
      if (nb_rcts_to_try == 0) break;
1433
0
      sg.rct_type = i;
1434
0
      nb_rcts_to_try--;
1435
0
      if (do_transform(gi, sg, weighted::Header())) {
1436
0
        float cost = EstimateCost(gi);
1437
0
        if (cost < best_cost) {
1438
0
          best_rct = i;
1439
0
          best_cost = cost;
1440
0
        }
1441
0
        Transform t = gi.transform.back();
1442
0
        JXL_RETURN_IF_ERROR(t.Inverse(gi, weighted::Header(), nullptr));
1443
0
        gi.transform.pop_back();
1444
0
      }
1445
0
    }
1446
    // Apply the best RCT to the image for future encoding.
1447
0
    sg.rct_type = best_rct;
1448
0
    do_transform(gi, sg, weighted::Header());
1449
92
  } else {
1450
    // No need to try anything, just use the default options.
1451
92
  }
1452
92
  size_t nb_wp_modes = 1;
1453
92
  if (cparams_.speed_tier <= SpeedTier::kTortoise) {
1454
0
    nb_wp_modes = 5;
1455
92
  } else if (cparams_.speed_tier <= SpeedTier::kKitten) {
1456
0
    nb_wp_modes = 2;
1457
0
  }
1458
92
  if (nb_wp_modes > 1 &&
1459
92
      (stream_options_[stream_id].predictor == Predictor::Weighted ||
1460
0
       stream_options_[stream_id].predictor == Predictor::Best ||
1461
0
       stream_options_[stream_id].predictor == Predictor::Variable)) {
1462
0
    float best_cost = std::numeric_limits<float>::max();
1463
0
    stream_options_[stream_id].wp_mode = 0;
1464
0
    for (size_t i = 0; i < nb_wp_modes; i++) {
1465
0
      float cost = EstimateWPCost(gi, i);
1466
0
      if (cost < best_cost) {
1467
0
        best_cost = cost;
1468
0
        stream_options_[stream_id].wp_mode = i;
1469
0
      }
1470
0
    }
1471
0
  }
1472
92
  return true;
1473
92
}
1474
1475
constexpr float q_deadzone = 0.62f;
1476
int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y,
1477
               size_t w, weighted::State* wp_state, float value,
1478
0
               float inv_factor) {
1479
0
  float svalue = value * inv_factor;
1480
0
  PredictionResult pred =
1481
0
      PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state);
1482
0
  svalue -= pred.guess;
1483
0
  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
1484
0
  int residual = roundf(svalue);
1485
0
  if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
1486
0
  return residual + pred.guess;
1487
0
}
1488
1489
int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x,
1490
0
                     size_t y, size_t w, float value, float inv_factor) {
1491
0
  float svalue = value * inv_factor;
1492
0
  PredictionResult pred =
1493
0
      PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient);
1494
0
  svalue -= pred.guess;
1495
0
  if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
1496
0
  int residual = roundf(svalue);
1497
0
  if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
1498
0
  return residual + pred.guess;
1499
0
}
1500
1501
void ModularFrameEncoder::AddVarDCTDC(const Image3F& dc, size_t group_index,
1502
                                      bool nl_dc, PassesEncoderState* enc_state,
1503
92
                                      bool jpeg_transcode) {
1504
92
  const Rect r = enc_state->shared.DCGroupRect(group_index);
1505
92
  extra_dc_precision[group_index] = nl_dc ? 1 : 0;
1506
92
  float mul = 1 << extra_dc_precision[group_index];
1507
1508
92
  size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_);
1509
92
  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
1510
92
  stream_options_[stream_id].predictor = Predictor::Weighted;
1511
92
  stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
1512
92
  if (cparams_.speed_tier >= SpeedTier::kSquirrel) {
1513
92
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
1514
92
  }
1515
92
  if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) {
1516
0
    stream_options_[stream_id].predictor =
1517
0
        (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable
1518
0
                                                  : Predictor::Best);
1519
0
    stream_options_[stream_id].wp_tree_mode =
1520
0
        ModularOptions::TreeMode::kDefault;
1521
0
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
1522
0
  }
1523
92
  if (cparams_.decoding_speed_tier >= 1) {
1524
0
    stream_options_[stream_id].tree_kind =
1525
0
        ModularOptions::TreeKind::kGradientFixedDC;
1526
0
  }
1527
1528
92
  stream_images_[stream_id] = Image(r.xsize(), r.ysize(), 8, 3);
1529
92
  if (nl_dc && stream_options_[stream_id].tree_kind ==
1530
0
                   ModularOptions::TreeKind::kGradientFixedDC) {
1531
0
    JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444());
1532
0
    for (size_t c : {1, 0, 2}) {
1533
0
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1534
0
      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
1535
0
      float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
1536
0
      for (size_t y = 0; y < r.ysize(); y++) {
1537
0
        int32_t* quant_row =
1538
0
            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
1539
0
        size_t stride = stream_images_[stream_id]
1540
0
                            .channel[c < 2 ? c ^ 1 : c]
1541
0
                            .plane.PixelsPerRow();
1542
0
        const float* row = r.ConstPlaneRow(dc, c, y);
1543
0
        if (c == 1) {
1544
0
          for (size_t x = 0; x < r.xsize(); x++) {
1545
0
            quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y,
1546
0
                                            r.xsize(), row[x], inv_factor);
1547
0
          }
1548
0
        } else {
1549
0
          int32_t* quant_row_y =
1550
0
              stream_images_[stream_id].channel[0].plane.Row(y);
1551
0
          for (size_t x = 0; x < r.xsize(); x++) {
1552
0
            quant_row[x] = QuantizeGradient(
1553
0
                quant_row, stride, c, x, y, r.xsize(),
1554
0
                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
1555
0
          }
1556
0
        }
1557
0
      }
1558
0
    }
1559
92
  } else if (nl_dc) {
1560
0
    JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444());
1561
0
    for (size_t c : {1, 0, 2}) {
1562
0
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1563
0
      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
1564
0
      float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
1565
0
      weighted::Header header;
1566
0
      weighted::State wp_state(header, r.xsize(), r.ysize());
1567
0
      for (size_t y = 0; y < r.ysize(); y++) {
1568
0
        int32_t* quant_row =
1569
0
            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
1570
0
        size_t stride = stream_images_[stream_id]
1571
0
                            .channel[c < 2 ? c ^ 1 : c]
1572
0
                            .plane.PixelsPerRow();
1573
0
        const float* row = r.ConstPlaneRow(dc, c, y);
1574
0
        if (c == 1) {
1575
0
          for (size_t x = 0; x < r.xsize(); x++) {
1576
0
            quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(),
1577
0
                                      &wp_state, row[x], inv_factor);
1578
0
            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
1579
0
          }
1580
0
        } else {
1581
0
          int32_t* quant_row_y =
1582
0
              stream_images_[stream_id].channel[0].plane.Row(y);
1583
0
          for (size_t x = 0; x < r.xsize(); x++) {
1584
0
            quant_row[x] = QuantizeWP(
1585
0
                quant_row, stride, c, x, y, r.xsize(), &wp_state,
1586
0
                row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
1587
0
            wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
1588
0
          }
1589
0
        }
1590
0
      }
1591
0
    }
1592
92
  } else if (enc_state->shared.frame_header.chroma_subsampling.Is444()) {
1593
276
    for (size_t c : {1, 0, 2}) {
1594
276
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1595
276
      float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
1596
276
      float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
1597
552
      for (size_t y = 0; y < r.ysize(); y++) {
1598
276
        int32_t* quant_row =
1599
276
            stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
1600
276
        const float* row = r.ConstPlaneRow(dc, c, y);
1601
276
        if (c == 1) {
1602
184
          for (size_t x = 0; x < r.xsize(); x++) {
1603
92
            quant_row[x] = roundf(row[x] * inv_factor);
1604
92
          }
1605
184
        } else {
1606
184
          int32_t* quant_row_y =
1607
184
              stream_images_[stream_id].channel[0].plane.Row(y);
1608
368
          for (size_t x = 0; x < r.xsize(); x++) {
1609
184
            quant_row[x] =
1610
184
                roundf((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) *
1611
184
                       inv_factor);
1612
184
          }
1613
184
        }
1614
276
      }
1615
276
    }
1616
92
  } else {
1617
0
    for (size_t c : {1, 0, 2}) {
1618
0
      Rect rect(
1619
0
          r.x0() >> enc_state->shared.frame_header.chroma_subsampling.HShift(c),
1620
0
          r.y0() >> enc_state->shared.frame_header.chroma_subsampling.VShift(c),
1621
0
          r.xsize() >>
1622
0
              enc_state->shared.frame_header.chroma_subsampling.HShift(c),
1623
0
          r.ysize() >>
1624
0
              enc_state->shared.frame_header.chroma_subsampling.VShift(c));
1625
0
      float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
1626
0
      size_t ys = rect.ysize();
1627
0
      size_t xs = rect.xsize();
1628
0
      Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c];
1629
0
      ch.w = xs;
1630
0
      ch.h = ys;
1631
0
      ch.shrink();
1632
0
      for (size_t y = 0; y < ys; y++) {
1633
0
        int32_t* quant_row = ch.plane.Row(y);
1634
0
        const float* row = rect.ConstPlaneRow(dc, c, y);
1635
0
        for (size_t x = 0; x < xs; x++) {
1636
0
          quant_row[x] = roundf(row[x] * inv_factor);
1637
0
        }
1638
0
      }
1639
0
    }
1640
0
  }
1641
1642
92
  DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc,
1643
92
            stream_images_[stream_id], enc_state->shared.quantizer.MulDC(),
1644
92
            1.0 / mul, enc_state->shared.cmap.DCFactors(),
1645
92
            enc_state->shared.frame_header.chroma_subsampling,
1646
92
            enc_state->shared.block_ctx_map);
1647
92
}
1648
1649
void ModularFrameEncoder::AddACMetadata(size_t group_index, bool jpeg_transcode,
1650
92
                                        PassesEncoderState* enc_state) {
1651
92
  const Rect r = enc_state->shared.DCGroupRect(group_index);
1652
92
  size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_);
1653
92
  stream_options_[stream_id].max_chan_size = 0xFFFFFF;
1654
92
  stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP;
1655
92
  if (jpeg_transcode) {
1656
0
    stream_options_[stream_id].tree_kind =
1657
0
        ModularOptions::TreeKind::kJpegTranscodeACMeta;
1658
92
  } else if (cparams_.speed_tier >= SpeedTier::kFalcon) {
1659
0
    stream_options_[stream_id].tree_kind =
1660
0
        ModularOptions::TreeKind::kFalconACMeta;
1661
92
  } else if (cparams_.speed_tier > SpeedTier::kKitten) {
1662
92
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta;
1663
92
  }
1664
  // If we are using a non-constant CfL field, and are in a slow enough mode,
1665
  // re-enable tree computation for it.
1666
92
  if (cparams_.speed_tier < SpeedTier::kSquirrel &&
1667
92
      cparams_.force_cfl_jpeg_recompression) {
1668
0
    stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
1669
0
  }
1670
  // YToX, YToB, ACS + QF, EPF
1671
92
  Image& image = stream_images_[stream_id];
1672
92
  image = Image(r.xsize(), r.ysize(), 8, 4);
1673
92
  static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
1674
92
  Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
1675
92
  image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3);
1676
92
  image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3);
1677
92
  image.channel[2] = Channel(r.xsize() * r.ysize(), 2, 0, 0);
1678
92
  ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map,
1679
92
                       Rect(image.channel[0].plane), &image.channel[0].plane);
1680
92
  ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map,
1681
92
                       Rect(image.channel[1].plane), &image.channel[1].plane);
1682
92
  size_t num = 0;
1683
184
  for (size_t y = 0; y < r.ysize(); y++) {
1684
92
    AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y);
1685
92
    const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y);
1686
92
    const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y);
1687
92
    int32_t* out_acs = image.channel[2].plane.Row(0);
1688
92
    int32_t* out_qf = image.channel[2].plane.Row(1);
1689
92
    int32_t* row_out_epf = image.channel[3].plane.Row(y);
1690
184
    for (size_t x = 0; x < r.xsize(); x++) {
1691
92
      row_out_epf[x] = row_epf[x];
1692
92
      if (!row_acs[x].IsFirstBlock()) continue;
1693
92
      out_acs[num] = row_acs[x].RawStrategy();
1694
92
      out_qf[num] = row_qf[x] - 1;
1695
92
      num++;
1696
92
    }
1697
92
  }
1698
92
  image.channel[2].w = num;
1699
92
  ac_metadata_size[group_index] = num;
1700
92
}
1701
1702
void ModularFrameEncoder::EncodeQuantTable(
1703
    size_t size_x, size_t size_y, BitWriter* writer,
1704
    const QuantEncoding& encoding, size_t idx,
1705
0
    ModularFrameEncoder* modular_frame_encoder) {
1706
0
  JXL_ASSERT(encoding.qraw.qtable != nullptr);
1707
0
  JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size());
1708
0
  JXL_CHECK(F16Coder::Write(encoding.qraw.qtable_den, writer));
1709
0
  if (modular_frame_encoder) {
1710
0
    JXL_CHECK(modular_frame_encoder->EncodeStream(
1711
0
        writer, nullptr, 0, ModularStreamId::QuantTable(idx)));
1712
0
    return;
1713
0
  }
1714
0
  Image image(size_x, size_y, 8, 3);
1715
0
  for (size_t c = 0; c < 3; c++) {
1716
0
    for (size_t y = 0; y < size_y; y++) {
1717
0
      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
1718
0
      for (size_t x = 0; x < size_x; x++) {
1719
0
        row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x];
1720
0
      }
1721
0
    }
1722
0
  }
1723
0
  ModularOptions cfopts;
1724
0
  JXL_CHECK(ModularGenericCompress(image, cfopts, writer));
1725
0
}
1726
1727
void ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y,
1728
                                        const QuantEncoding& encoding,
1729
0
                                        size_t idx) {
1730
0
  size_t stream_id = ModularStreamId::QuantTable(idx).ID(frame_dim_);
1731
0
  JXL_ASSERT(encoding.qraw.qtable != nullptr);
1732
0
  JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size());
1733
0
  Image& image = stream_images_[stream_id];
1734
0
  image = Image(size_x, size_y, 8, 3);
1735
0
  for (size_t c = 0; c < 3; c++) {
1736
0
    for (size_t y = 0; y < size_y; y++) {
1737
0
      int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
1738
0
      for (size_t x = 0; x < size_x; x++) {
1739
0
        row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x];
1740
0
      }
1741
0
    }
1742
0
  }
1743
0
}
1744
}  // namespace jxl