Coverage Report

Created: 2025-06-22 08:04

/src/libjxl/lib/jxl/enc_group.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_group.h"
7
8
#include <jxl/memory_manager.h>
9
10
#include "lib/jxl/base/status.h"
11
#include "lib/jxl/memory_manager_internal.h"
12
13
#undef HWY_TARGET_INCLUDE
14
#define HWY_TARGET_INCLUDE "lib/jxl/enc_group.cc"
15
#include <hwy/foreach_target.h>
16
#include <hwy/highway.h>
17
18
#include "lib/jxl/ac_strategy.h"
19
#include "lib/jxl/base/bits.h"
20
#include "lib/jxl/base/compiler_specific.h"
21
#include "lib/jxl/base/rect.h"
22
#include "lib/jxl/common.h"  // kMaxNumPasses
23
#include "lib/jxl/dct_util.h"
24
#include "lib/jxl/dec_transforms-inl.h"
25
#include "lib/jxl/enc_aux_out.h"
26
#include "lib/jxl/enc_cache.h"
27
#include "lib/jxl/enc_params.h"
28
#include "lib/jxl/enc_transforms-inl.h"
29
#include "lib/jxl/image.h"
30
#include "lib/jxl/quantizer-inl.h"
31
#include "lib/jxl/quantizer.h"
32
#include "lib/jxl/simd_util.h"
33
HWY_BEFORE_NAMESPACE();
34
namespace jxl {
35
namespace HWY_NAMESPACE {
36
37
// These templates are not found via ADL.
38
using hwy::HWY_NAMESPACE::Abs;
39
using hwy::HWY_NAMESPACE::Ge;
40
using hwy::HWY_NAMESPACE::IfThenElse;
41
using hwy::HWY_NAMESPACE::IfThenElseZero;
42
using hwy::HWY_NAMESPACE::MaskFromVec;
43
using hwy::HWY_NAMESPACE::Round;
44
45
// NOTE: caller takes care of extracting quant from rect of RawQuantField.
46
void QuantizeBlockAC(const Quantizer& quantizer, const bool error_diffusion,
47
                     size_t c, float qm_multiplier, AcStrategyType quant_kind,
48
                     size_t xsize, size_t ysize, float* thresholds,
49
                     const float* JXL_RESTRICT block_in, const int32_t* quant,
50
0
                     int32_t* JXL_RESTRICT block_out) {
51
0
  const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
52
0
  float qac = quantizer.Scale() * (*quant);
53
  // Not SIMD-ified for now.
54
0
  if (c != 1 && xsize * ysize >= 4) {
55
0
    for (int i = 0; i < 4; ++i) {
56
0
      thresholds[i] -= 0.00744f * xsize * ysize;
57
0
      if (thresholds[i] < 0.5) {
58
0
        thresholds[i] = 0.5;
59
0
      }
60
0
    }
61
0
  }
62
0
  HWY_CAPPED(float, kBlockDim) df;
63
0
  HWY_CAPPED(int32_t, kBlockDim) di;
64
0
  HWY_CAPPED(uint32_t, kBlockDim) du;
65
0
  const auto quantv = Set(df, qac * qm_multiplier);
66
0
  for (size_t y = 0; y < ysize * kBlockDim; y++) {
67
0
    size_t yfix = static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2;
68
0
    const size_t off = y * kBlockDim * xsize;
69
0
    for (size_t x = 0; x < xsize * kBlockDim; x += Lanes(df)) {
70
0
      auto threshold = Zero(df);
71
0
      if (xsize == 1) {
72
0
        HWY_ALIGN uint32_t kMask[kBlockDim] = {0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u};
73
0
        const auto mask = MaskFromVec(BitCast(df, Load(du, kMask + x)));
74
0
        threshold = IfThenElse(mask, Set(df, thresholds[yfix + 1]),
75
0
                               Set(df, thresholds[yfix]));
76
0
      } else {
77
        // Same for all lanes in the vector.
78
0
        threshold = Set(
79
0
            df,
80
0
            thresholds[yfix + static_cast<size_t>(x >= xsize * kBlockDim / 2)]);
81
0
      }
82
0
      const auto q = Mul(Load(df, qm + off + x), quantv);
83
0
      const auto in = Load(df, block_in + off + x);
84
0
      const auto val = Mul(q, in);
85
0
      const auto nzero_mask = Ge(Abs(val), threshold);
86
0
      const auto v = ConvertTo(di, IfThenElseZero(nzero_mask, Round(val)));
87
0
      Store(v, di, block_out + off + x);
88
0
    }
89
0
  }
90
0
}
91
92
void AdjustQuantBlockAC(const Quantizer& quantizer, size_t c,
93
                        float qm_multiplier, AcStrategyType quant_kind,
94
                        size_t xsize, size_t ysize, float* thresholds,
95
0
                        const float* JXL_RESTRICT block_in, int32_t* quant) {
96
  // No quantization adjusting for these small blocks.
97
  // Quantization adjusting attempts to fix some known issues
98
  // with larger blocks and on the 8x8 dct's emerging 8x8 blockiness
99
  // when there are not many non-zeros.
100
0
  constexpr size_t kPartialBlockKinds =
101
0
      (1 << static_cast<size_t>(AcStrategyType::IDENTITY)) |
102
0
      (1 << static_cast<size_t>(AcStrategyType::DCT2X2)) |
103
0
      (1 << static_cast<size_t>(AcStrategyType::DCT4X4)) |
104
0
      (1 << static_cast<size_t>(AcStrategyType::DCT4X8)) |
105
0
      (1 << static_cast<size_t>(AcStrategyType::DCT8X4)) |
106
0
      (1 << static_cast<size_t>(AcStrategyType::AFV0)) |
107
0
      (1 << static_cast<size_t>(AcStrategyType::AFV1)) |
108
0
      (1 << static_cast<size_t>(AcStrategyType::AFV2)) |
109
0
      (1 << static_cast<size_t>(AcStrategyType::AFV3));
110
0
  if ((1 << static_cast<size_t>(quant_kind)) & kPartialBlockKinds) {
111
0
    return;
112
0
  }
113
114
0
  const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
115
0
  float qac = quantizer.Scale() * (*quant);
116
0
  if (xsize > 1 || ysize > 1) {
117
0
    for (int i = 0; i < 4; ++i) {
118
0
      thresholds[i] -= Clamp1(0.003f * xsize * ysize, 0.f, 0.08f);
119
0
      if (thresholds[i] < 0.54) {
120
0
        thresholds[i] = 0.54;
121
0
      }
122
0
    }
123
0
  }
124
0
  float sum_of_highest_freq_row_and_column = 0;
125
0
  float sum_of_error = 0;
126
0
  float sum_of_vals = 0;
127
0
  float hfNonZeros[4] = {};
128
0
  float hfMaxError[4] = {};
129
130
0
  for (size_t y = 0; y < ysize * kBlockDim; y++) {
131
0
    for (size_t x = 0; x < xsize * kBlockDim; x++) {
132
0
      const size_t pos = y * kBlockDim * xsize + x;
133
0
      if (x < xsize && y < ysize) {
134
0
        continue;
135
0
      }
136
0
      const size_t hfix = (static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2 +
137
0
                           static_cast<size_t>(x >= xsize * kBlockDim / 2));
138
0
      const float val = block_in[pos] * (qm[pos] * qac * qm_multiplier);
139
0
      const float v = (std::abs(val) < thresholds[hfix]) ? 0 : rintf(val);
140
0
      const float error = std::abs(val - v);
141
0
      sum_of_error += error;
142
0
      sum_of_vals += std::abs(v);
143
0
      if (c == 1 && v == 0) {
144
0
        if (hfMaxError[hfix] < error) {
145
0
          hfMaxError[hfix] = error;
146
0
        }
147
0
      }
148
0
      if (v != 0.0f) {
149
0
        hfNonZeros[hfix] += std::abs(v);
150
0
        bool in_corner = y >= 7 * ysize && x >= 7 * xsize;
151
0
        bool on_border =
152
0
            y == ysize * kBlockDim - 1 || x == xsize * kBlockDim - 1;
153
0
        bool in_larger_corner = x >= 4 * xsize && y >= 4 * ysize;
154
0
        if (in_corner || (on_border && in_larger_corner)) {
155
0
          sum_of_highest_freq_row_and_column += std::abs(val);
156
0
        }
157
0
      }
158
0
    }
159
0
  }
160
0
  if (c == 1 && sum_of_vals * 8 < xsize * ysize) {
161
0
    static const double kLimit[4] = {
162
0
        0.46,
163
0
        0.46,
164
0
        0.46,
165
0
        0.46,
166
0
    };
167
0
    static const double kMul[4] = {
168
0
        0.9999,
169
0
        0.9999,
170
0
        0.9999,
171
0
        0.9999,
172
0
    };
173
0
    const int32_t orig_quant = *quant;
174
0
    int32_t new_quant = *quant;
175
0
    for (int i = 1; i < 4; ++i) {
176
0
      if (hfNonZeros[i] == 0.0 && hfMaxError[i] > kLimit[i]) {
177
0
        new_quant = orig_quant + 1;
178
0
        break;
179
0
      }
180
0
    }
181
0
    *quant = new_quant;
182
0
    if (hfNonZeros[3] == 0.0 && hfMaxError[3] > kLimit[3]) {
183
0
      thresholds[3] = kMul[3] * hfMaxError[3] * new_quant / orig_quant;
184
0
    } else if ((hfNonZeros[1] == 0.0 && hfMaxError[1] > kLimit[1]) ||
185
0
               (hfNonZeros[2] == 0.0 && hfMaxError[2] > kLimit[2])) {
186
0
      thresholds[1] = kMul[1] * std::max(hfMaxError[1], hfMaxError[2]) *
187
0
                      new_quant / orig_quant;
188
0
      thresholds[2] = thresholds[1];
189
0
    } else if (hfNonZeros[0] == 0.0 && hfMaxError[0] > kLimit[0]) {
190
0
      thresholds[0] = kMul[0] * hfMaxError[0] * new_quant / orig_quant;
191
0
    }
192
0
  }
193
  // Heuristic for improving accuracy of high-frequency patterns
194
  // occurring in an environment with no medium-frequency masking
195
  // patterns.
196
0
  {
197
0
    float all =
198
0
        hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] + 1;
199
0
    float mul[3] = {70, 30, 60};
200
0
    if (mul[c] * sum_of_highest_freq_row_and_column >= all) {
201
0
      *quant += mul[c] * sum_of_highest_freq_row_and_column / all;
202
0
      if (*quant >= Quantizer::kQuantMax) {
203
0
        *quant = Quantizer::kQuantMax - 1;
204
0
      }
205
0
    }
206
0
  }
207
0
  if (quant_kind == AcStrategyType::DCT) {
208
    // If this 8x8 block is too flat, increase the adaptive quantization level
209
    // a bit to reduce visible block boundaries and requantize the block.
210
0
    if (hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] < 11) {
211
0
      *quant += 1;
212
0
      if (*quant >= Quantizer::kQuantMax) {
213
0
        *quant = Quantizer::kQuantMax - 1;
214
0
      }
215
0
    }
216
0
  }
217
0
  {
218
0
    static const double kMul1[4][3] = {
219
0
        {
220
0
            0.22080615753848404,
221
0
            0.45797479824262011,
222
0
            0.29859235095977965,
223
0
        },
224
0
        {
225
0
            0.70109486510286834,
226
0
            0.16185281305512639,
227
0
            0.14387691730035473,
228
0
        },
229
0
        {
230
0
            0.114985964456218638,
231
0
            0.44656840441027695,
232
0
            0.10587658215149048,
233
0
        },
234
0
        {
235
0
            0.46849665264409396,
236
0
            0.41239077937781954,
237
0
            0.088667407767185444,
238
0
        },
239
0
    };
240
0
    static const double kMul2[4][3] = {
241
0
        {
242
0
            0.27450281941822197,
243
0
            1.1255766549984996,
244
0
            0.98950459134128388,
245
0
        },
246
0
        {
247
0
            0.4652168675598285,
248
0
            0.40945807983455818,
249
0
            0.36581899811751367,
250
0
        },
251
0
        {
252
0
            0.28034972424715715,
253
0
            0.9182653201929738,
254
0
            1.5581531543057416,
255
0
        },
256
0
        {
257
0
            0.26873118114033728,
258
0
            0.68863712390392484,
259
0
            1.2082185408666786,
260
0
        },
261
0
    };
262
0
    static const double kQuantNormalizer = 2.2942708343284721;
263
0
    sum_of_error *= kQuantNormalizer;
264
0
    sum_of_vals *= kQuantNormalizer;
265
0
    if (quant_kind >= AcStrategyType::DCT16X16) {
266
0
      int ix = 3;
267
0
      if (quant_kind == AcStrategyType::DCT32X16 ||
268
0
          quant_kind == AcStrategyType::DCT16X32) {
269
0
        ix = 1;
270
0
      } else if (quant_kind == AcStrategyType::DCT16X16) {
271
0
        ix = 0;
272
0
      } else if (quant_kind == AcStrategyType::DCT32X32) {
273
0
        ix = 2;
274
0
      }
275
0
      int step =
276
0
          sum_of_error / (kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim +
277
0
                          kMul2[ix][c] * sum_of_vals);
278
0
      if (step >= 2) {
279
0
        step = 2;
280
0
      }
281
0
      if (step < 0) {
282
0
        step = 0;
283
0
      }
284
0
      if (sum_of_error > kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim +
285
0
                             kMul2[ix][c] * sum_of_vals) {
286
0
        *quant += step;
287
0
        if (*quant >= Quantizer::kQuantMax) {
288
0
          *quant = Quantizer::kQuantMax - 1;
289
0
        }
290
0
      }
291
0
    }
292
0
  }
293
0
  {
294
    // Reduce quant in highly active areas.
295
0
    int32_t div = (xsize * ysize);
296
0
    int32_t activity = (static_cast<int32_t>(hfNonZeros[0]) + div / 2) / div;
297
0
    int32_t orig_qp_limit = std::max(4, *quant / 2);
298
0
    for (int i = 1; i < 4; ++i) {
299
0
      activity = std::min(
300
0
          activity, (static_cast<int32_t>(hfNonZeros[i]) + div / 2) / div);
301
0
    }
302
0
    if (activity >= 15) {
303
0
      activity = 15;
304
0
    }
305
0
    int32_t qp = *quant - activity;
306
0
    if (c == 1) {
307
0
      for (int i = 1; i < 4; ++i) {
308
0
        thresholds[i] += 0.01 * activity;
309
0
      }
310
0
    }
311
0
    if (qp < orig_qp_limit) {
312
0
      qp = orig_qp_limit;
313
0
    }
314
0
    *quant = qp;
315
0
  }
316
0
}
317
318
// NOTE: caller takes care of extracting quant from rect of RawQuantField.
319
void QuantizeRoundtripYBlockAC(PassesEncoderState* enc_state, const size_t size,
320
                               const Quantizer& quantizer,
321
                               const bool error_diffusion,
322
                               AcStrategyType quant_kind, size_t xsize,
323
                               size_t ysize, const float* JXL_RESTRICT biases,
324
                               int32_t* quant, float* JXL_RESTRICT inout,
325
0
                               int32_t* JXL_RESTRICT quantized) {
326
0
  float thres_y[4] = {0.58f, 0.64f, 0.64f, 0.64f};
327
0
  if (enc_state->cparams.speed_tier <= SpeedTier::kHare) {
328
0
    int32_t max_quant = 0;
329
0
    int quant_orig = *quant;
330
0
    float val[3] = {enc_state->x_qm_multiplier, 1.0f,
331
0
                    enc_state->b_qm_multiplier};
332
0
    for (int c : {1, 0, 2}) {
333
0
      float thres[4] = {0.58f, 0.64f, 0.64f, 0.64f};
334
0
      *quant = quant_orig;
335
0
      AdjustQuantBlockAC(quantizer, c, val[c], quant_kind, xsize, ysize,
336
0
                         &thres[0], inout + c * size, quant);
337
      // Dead zone adjustment
338
0
      if (c == 1) {
339
0
        for (int k = 0; k < 4; ++k) {
340
0
          thres_y[k] = thres[k];
341
0
        }
342
0
      }
343
0
      max_quant = std::max(*quant, max_quant);
344
0
    }
345
0
    *quant = max_quant;
346
0
  } else {
347
0
    thres_y[0] = 0.56;
348
0
    thres_y[1] = 0.62;
349
0
    thres_y[2] = 0.62;
350
0
    thres_y[3] = 0.62;
351
0
  }
352
353
0
  QuantizeBlockAC(quantizer, error_diffusion, 1, 1.0f, quant_kind, xsize, ysize,
354
0
                  &thres_y[0], inout + size, quant, quantized + size);
355
356
0
  const float* JXL_RESTRICT dequant_matrix =
357
0
      quantizer.DequantMatrix(quant_kind, 1);
358
359
0
  HWY_CAPPED(float, kDCTBlockSize) df;
360
0
  HWY_CAPPED(int32_t, kDCTBlockSize) di;
361
0
  const auto inv_qac = Set(df, quantizer.inv_quant_ac(*quant));
362
0
  for (size_t k = 0; k < kDCTBlockSize * xsize * ysize; k += Lanes(df)) {
363
0
    const auto quant = Load(di, quantized + size + k);
364
0
    const auto adj_quant = AdjustQuantBias(di, 1, quant, biases);
365
0
    const auto dequantm = Load(df, dequant_matrix + k);
366
0
    Store(Mul(Mul(adj_quant, dequantm), inv_qac), df, inout + size + k);
367
0
  }
368
0
}
369
370
Status ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
371
                           const Image3F& opsin, const Rect& rect,
372
0
                           Image3F* dc) {
373
0
  JxlMemoryManager* memory_manager = opsin.memory_manager();
374
0
  const Rect block_group_rect =
375
0
      enc_state->shared.frame_dim.BlockGroupRect(group_idx);
376
0
  const Rect cmap_rect(
377
0
      block_group_rect.x0() / kColorTileDimInBlocks,
378
0
      block_group_rect.y0() / kColorTileDimInBlocks,
379
0
      DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
380
0
      DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
381
0
  const Rect group_rect =
382
0
      enc_state->shared.frame_dim.GroupRect(group_idx).Translate(rect.x0(),
383
0
                                                                 rect.y0());
384
385
0
  const size_t xsize_blocks = block_group_rect.xsize();
386
0
  const size_t ysize_blocks = block_group_rect.ysize();
387
388
0
  const size_t dc_stride = static_cast<size_t>(dc->PixelsPerRow());
389
0
  const size_t opsin_stride = static_cast<size_t>(opsin.PixelsPerRow());
390
391
0
  ImageI& full_quant_field = enc_state->shared.raw_quant_field;
392
0
  const CompressParams& cparams = enc_state->cparams;
393
394
0
  const size_t dct_scratch_size =
395
0
      3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim;
396
397
  // TODO(veluca): consider strategies to reduce this memory.
398
0
  size_t mem_bytes = 3 * AcStrategy::kMaxCoeffArea * sizeof(int32_t);
399
0
  JXL_ASSIGN_OR_RETURN(auto mem,
400
0
                       AlignedMemory::Create(memory_manager, mem_bytes));
401
0
  size_t fmem_bytes =
402
0
      (5 * AcStrategy::kMaxCoeffArea + dct_scratch_size) * sizeof(float);
403
0
  JXL_ASSIGN_OR_RETURN(auto fmem,
404
0
                       AlignedMemory::Create(memory_manager, fmem_bytes));
405
0
  float* JXL_RESTRICT scratch_space =
406
0
      fmem.address<float>() + 3 * AcStrategy::kMaxCoeffArea;
407
0
  {
408
    // Only use error diffusion in Squirrel mode or slower.
409
0
    const bool error_diffusion = cparams.speed_tier <= SpeedTier::kSquirrel;
410
0
    constexpr HWY_CAPPED(float, kDCTBlockSize) d;
411
412
0
    int32_t* JXL_RESTRICT coeffs[3][kMaxNumPasses] = {};
413
0
    size_t num_passes = enc_state->progressive_splitter.GetNumPasses();
414
0
    JXL_ENSURE(num_passes > 0);
415
0
    for (size_t i = 0; i < num_passes; i++) {
416
      // TODO(veluca): 16-bit quantized coeffs are not implemented yet.
417
0
      JXL_ENSURE(enc_state->coeffs[i]->Type() == ACType::k32);
418
0
      for (size_t c = 0; c < 3; c++) {
419
0
        coeffs[c][i] = enc_state->coeffs[i]->PlaneRow(c, group_idx, 0).ptr32;
420
0
      }
421
0
    }
422
423
0
    HWY_ALIGN float* coeffs_in = fmem.address<float>();
424
0
    HWY_ALIGN int32_t* quantized = mem.address<int32_t>();
425
426
0
    for (size_t by = 0; by < ysize_blocks; ++by) {
427
0
      int32_t* JXL_RESTRICT row_quant_ac =
428
0
          block_group_rect.Row(&full_quant_field, by);
429
0
      size_t ty = by / kColorTileDimInBlocks;
430
0
      const int8_t* JXL_RESTRICT row_cmap[3] = {
431
0
          cmap_rect.ConstRow(enc_state->shared.cmap.ytox_map, ty),
432
0
          nullptr,
433
0
          cmap_rect.ConstRow(enc_state->shared.cmap.ytob_map, ty),
434
0
      };
435
0
      const float* JXL_RESTRICT opsin_rows[3] = {
436
0
          group_rect.ConstPlaneRow(opsin, 0, by * kBlockDim),
437
0
          group_rect.ConstPlaneRow(opsin, 1, by * kBlockDim),
438
0
          group_rect.ConstPlaneRow(opsin, 2, by * kBlockDim),
439
0
      };
440
0
      float* JXL_RESTRICT dc_rows[3] = {
441
0
          block_group_rect.PlaneRow(dc, 0, by),
442
0
          block_group_rect.PlaneRow(dc, 1, by),
443
0
          block_group_rect.PlaneRow(dc, 2, by),
444
0
      };
445
0
      AcStrategyRow ac_strategy_row =
446
0
          enc_state->shared.ac_strategy.ConstRow(block_group_rect, by);
447
0
      for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
448
0
           tx++) {
449
0
        const auto x_factor =
450
0
            Set(d, enc_state->shared.cmap.base().YtoXRatio(row_cmap[0][tx]));
451
0
        const auto b_factor =
452
0
            Set(d, enc_state->shared.cmap.base().YtoBRatio(row_cmap[2][tx]));
453
0
        for (size_t bx = tx * kColorTileDimInBlocks;
454
0
             bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks; ++bx) {
455
0
          const AcStrategy acs = ac_strategy_row[bx];
456
0
          if (!acs.IsFirstBlock()) continue;
457
458
0
          size_t xblocks = acs.covered_blocks_x();
459
0
          size_t yblocks = acs.covered_blocks_y();
460
461
0
          CoefficientLayout(&yblocks, &xblocks);
462
463
0
          size_t size = kDCTBlockSize * xblocks * yblocks;
464
465
          // DCT Y channel, roundtrip-quantize it and set DC.
466
0
          int32_t quant_ac = row_quant_ac[bx];
467
0
          for (size_t c : {0, 1, 2}) {
468
0
            TransformFromPixels(acs.Strategy(), opsin_rows[c] + bx * kBlockDim,
469
0
                                opsin_stride, coeffs_in + c * size,
470
0
                                scratch_space);
471
0
          }
472
0
          DCFromLowestFrequencies(acs.Strategy(), coeffs_in + size,
473
0
                                  dc_rows[1] + bx, dc_stride);
474
475
0
          QuantizeRoundtripYBlockAC(
476
0
              enc_state, size, enc_state->shared.quantizer, error_diffusion,
477
0
              acs.Strategy(), xblocks, yblocks, kDefaultQuantBias, &quant_ac,
478
0
              coeffs_in, quantized);
479
480
          // Unapply color correlation
481
0
          for (size_t k = 0; k < size; k += Lanes(d)) {
482
0
            const auto in_x = Load(d, coeffs_in + k);
483
0
            const auto in_y = Load(d, coeffs_in + size + k);
484
0
            const auto in_b = Load(d, coeffs_in + 2 * size + k);
485
0
            const auto out_x = NegMulAdd(x_factor, in_y, in_x);
486
0
            const auto out_b = NegMulAdd(b_factor, in_y, in_b);
487
0
            Store(out_x, d, coeffs_in + k);
488
0
            Store(out_b, d, coeffs_in + 2 * size + k);
489
0
          }
490
491
          // Quantize X and B channels and set DC.
492
0
          for (size_t c : {0, 2}) {
493
0
            float thres[4] = {0.58f, 0.62f, 0.62f, 0.62f};
494
0
            QuantizeBlockAC(enc_state->shared.quantizer, error_diffusion, c,
495
0
                            c == 0 ? enc_state->x_qm_multiplier
496
0
                                   : enc_state->b_qm_multiplier,
497
0
                            acs.Strategy(), xblocks, yblocks, &thres[0],
498
0
                            coeffs_in + c * size, &quant_ac,
499
0
                            quantized + c * size);
500
0
            DCFromLowestFrequencies(acs.Strategy(), coeffs_in + c * size,
501
0
                                    dc_rows[c] + bx, dc_stride);
502
0
          }
503
0
          row_quant_ac[bx] = quant_ac;
504
0
          for (size_t c = 0; c < 3; c++) {
505
0
            enc_state->progressive_splitter.SplitACCoefficients(
506
0
                quantized + c * size, acs, bx, by, coeffs[c]);
507
0
            for (size_t p = 0; p < num_passes; p++) {
508
0
              coeffs[c][p] += size;
509
0
            }
510
0
          }
511
0
        }
512
0
      }
513
0
    }
514
0
  }
515
0
  return true;
516
0
}
517
518
// NOLINTNEXTLINE(google-readability-namespace-comments)
519
}  // namespace HWY_NAMESPACE
520
}  // namespace jxl
521
HWY_AFTER_NAMESPACE();
522
523
#if HWY_ONCE
524
namespace jxl {
525
HWY_EXPORT(ComputeCoefficients);
526
Status ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
527
                           const Image3F& opsin, const Rect& rect,
528
0
                           Image3F* dc) {
529
0
  return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin,
530
0
                                                   rect, dc);
531
0
}
532
533
Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
534
                                        size_t histogram_idx,
535
                                        const PassesEncoderState& enc_state,
536
0
                                        BitWriter* writer, AuxOut* aux_out) {
537
  // Select which histogram to use among those of the current pass.
538
0
  const size_t num_histograms = enc_state.shared.num_histograms;
539
  // num_histograms is 0 only for lossless.
540
0
  JXL_ENSURE(num_histograms == 0 || histogram_idx < num_histograms);
541
0
  size_t histo_selector_bits = CeilLog2Nonzero(num_histograms);
542
543
0
  if (histo_selector_bits != 0) {
544
0
    JXL_RETURN_IF_ERROR(
545
0
        writer->WithMaxBits(histo_selector_bits, LayerType::Ac, aux_out, [&] {
546
0
          writer->Write(histo_selector_bits, histogram_idx);
547
0
          return true;
548
0
        }));
549
0
  }
550
0
  size_t context_offset =
551
0
      histogram_idx * enc_state.shared.block_ctx_map.NumACContexts();
552
0
  JXL_RETURN_IF_ERROR(WriteTokens(
553
0
      enc_state.passes[pass_idx].ac_tokens[group_idx],
554
0
      enc_state.passes[pass_idx].codes, enc_state.passes[pass_idx].context_map,
555
0
      context_offset, writer, LayerType::AcTokens, aux_out));
556
557
0
  return true;
558
0
}
559
560
}  // namespace jxl
561
#endif  // HWY_ONCE