/src/libjxl/lib/jxl/enc_group.cc

Source (jump to first uncovered line)
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "lib/jxl/enc_group.h"

#include <jxl/memory_manager.h>

#include "lib/jxl/base/status.h"
#include "lib/jxl/memory_manager_internal.h"

#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "lib/jxl/enc_group.cc"
#include <hwy/foreach_target.h>
#include <hwy/highway.h>

#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/bits.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/rect.h"
#include "lib/jxl/common.h"  // kMaxNumPasses
#include "lib/jxl/dct_util.h"
#include "lib/jxl/dec_transforms-inl.h"
#include "lib/jxl/enc_aux_out.h"
#include "lib/jxl/enc_cache.h"
#include "lib/jxl/enc_params.h"
#include "lib/jxl/enc_transforms-inl.h"
#include "lib/jxl/image.h"
#include "lib/jxl/quantizer-inl.h"
#include "lib/jxl/quantizer.h"
#include "lib/jxl/simd_util.h"
HWY_BEFORE_NAMESPACE();
namespace jxl {
namespace HWY_NAMESPACE {

// These templates are not found via ADL.
using hwy::HWY_NAMESPACE::Abs;
using hwy::HWY_NAMESPACE::Ge;
using hwy::HWY_NAMESPACE::IfThenElse;
using hwy::HWY_NAMESPACE::IfThenElseZero;
using hwy::HWY_NAMESPACE::MaskFromVec;
using hwy::HWY_NAMESPACE::Round;

// NOTE: caller takes care of extracting quant from rect of RawQuantField.
void QuantizeBlockAC(const Quantizer& quantizer, const bool error_diffusion,
                     size_t c, float qm_multiplier, AcStrategyType quant_kind,
                     size_t xsize, size_t ysize, float* thresholds,
                     const float* JXL_RESTRICT block_in, const int32_t* quant,
                     int32_t* JXL_RESTRICT block_out) {
  const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
  float qac = quantizer.Scale() * (*quant);
  // Not SIMD-ified for now.
  if (c != 1 && xsize * ysize >= 4) {
    for (int i = 0; i < 4; ++i) {
      thresholds[i] -= 0.00744f * xsize * ysize;
      if (thresholds[i] < 0.5) {
        thresholds[i] = 0.5;
      }
    }
  }
  HWY_CAPPED(float, kBlockDim) df;
  HWY_CAPPED(int32_t, kBlockDim) di;
  HWY_CAPPED(uint32_t, kBlockDim) du;
  const auto quantv = Set(df, qac * qm_multiplier);
  for (size_t y = 0; y < ysize * kBlockDim; y++) {
    size_t yfix = static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2;
    const size_t off = y * kBlockDim * xsize;
    for (size_t x = 0; x < xsize * kBlockDim; x += Lanes(df)) {
      auto threshold = Zero(df);
      if (xsize == 1) {
        HWY_ALIGN uint32_t kMask[kBlockDim] = {0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u};
        const auto mask = MaskFromVec(BitCast(df, Load(du, kMask + x)));
        threshold = IfThenElse(mask, Set(df, thresholds[yfix + 1]),
                               Set(df, thresholds[yfix]));
      } else {
        // Same for all lanes in the vector.
        threshold = Set(
            df,
            thresholds[yfix + static_cast<size_t>(x >= xsize * kBlockDim / 2)]);
      }
      const auto q = Mul(Load(df, qm + off + x), quantv);
      const auto in = Load(df, block_in + off + x);
      const auto val = Mul(q, in);
      const auto nzero_mask = Ge(Abs(val), threshold);
      const auto v = ConvertTo(di, IfThenElseZero(nzero_mask, Round(val)));
      Store(v, di, block_out + off + x);
    }
  }
}

void AdjustQuantBlockAC(const Quantizer& quantizer, size_t c,
                        float qm_multiplier, AcStrategyType quant_kind,
                        size_t xsize, size_t ysize, float* thresholds,
                        const float* JXL_RESTRICT block_in, int32_t* quant) {
  // No quantization adjusting for these small blocks.
  // Quantization adjusting attempts to fix some known issues
  // with larger blocks and on the 8x8 dct's emerging 8x8 blockiness
  // when there are not many non-zeros.
  constexpr size_t kPartialBlockKinds =
      (1 << static_cast<size_t>(AcStrategyType::IDENTITY)) |
      (1 << static_cast<size_t>(AcStrategyType::DCT2X2)) |
      (1 << static_cast<size_t>(AcStrategyType::DCT4X4)) |
      (1 << static_cast<size_t>(AcStrategyType::DCT4X8)) |
      (1 << static_cast<size_t>(AcStrategyType::DCT8X4)) |
      (1 << static_cast<size_t>(AcStrategyType::AFV0)) |
      (1 << static_cast<size_t>(AcStrategyType::AFV1)) |
      (1 << static_cast<size_t>(AcStrategyType::AFV2)) |
      (1 << static_cast<size_t>(AcStrategyType::AFV3));
  if ((1 << static_cast<size_t>(quant_kind)) & kPartialBlockKinds) {
    return;
  }

  const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
  float qac = quantizer.Scale() * (*quant);
  if (xsize > 1 || ysize > 1) {
    for (int i = 0; i < 4; ++i) {
      thresholds[i] -= Clamp1(0.003f * xsize * ysize, 0.f, 0.08f);
      if (thresholds[i] < 0.54) {
        thresholds[i] = 0.54;
      }
    }
  }
  float sum_of_highest_freq_row_and_column = 0;
  float sum_of_error = 0;
  float sum_of_vals = 0;
  float hfNonZeros[4] = {};
  float hfMaxError[4] = {};

  for (size_t y = 0; y < ysize * kBlockDim; y++) {
    for (size_t x = 0; x < xsize * kBlockDim; x++) {
      const size_t pos = y * kBlockDim * xsize + x;
      if (x < xsize && y < ysize) {
        continue;
      }
      const size_t hfix = (static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2 +
                           static_cast<size_t>(x >= xsize * kBlockDim / 2));
      const float val = block_in[pos] * (qm[pos] * qac * qm_multiplier);
      const float v = (std::abs(val) < thresholds[hfix]) ? 0 : rintf(val);
      const float error = std::abs(val - v);
      sum_of_error += error;
      sum_of_vals += std::abs(v);
      if (c == 1 && v == 0) {
        if (hfMaxError[hfix] < error) {
          hfMaxError[hfix] = error;
        }
      }
      if (v != 0.0f) {
        hfNonZeros[hfix] += std::abs(v);
        bool in_corner = y >= 7 * ysize && x >= 7 * xsize;
        bool on_border =
            y == ysize * kBlockDim - 1 || x == xsize * kBlockDim - 1;
        bool in_larger_corner = x >= 4 * xsize && y >= 4 * ysize;
        if (in_corner || (on_border && in_larger_corner)) {
          sum_of_highest_freq_row_and_column += std::abs(val);
        }
      }
    }
  }
  if (c == 1 && sum_of_vals * 8 < xsize * ysize) {
    static const double kLimit[4] = {
        0.46,
        0.46,
        0.46,
        0.46,
    };
    static const double kMul[4] = {
        0.9999,
        0.9999,
        0.9999,
        0.9999,
    };
    const int32_t orig_quant = *quant;
    int32_t new_quant = *quant;
    for (int i = 1; i < 4; ++i) {
      if (hfNonZeros[i] == 0.0 && hfMaxError[i] > kLimit[i]) {
        new_quant = orig_quant + 1;
        break;
      }
    }
    *quant = new_quant;
    if (hfNonZeros[3] == 0.0 && hfMaxError[3] > kLimit[3]) {
      thresholds[3] = kMul[3] * hfMaxError[3] * new_quant / orig_quant;
    } else if ((hfNonZeros[1] == 0.0 && hfMaxError[1] > kLimit[1]) ||
               (hfNonZeros[2] == 0.0 && hfMaxError[2] > kLimit[2])) {
      thresholds[1] = kMul[1] * std::max(hfMaxError[1], hfMaxError[2]) *
                      new_quant / orig_quant;
      thresholds[2] = thresholds[1];
    } else if (hfNonZeros[0] == 0.0 && hfMaxError[0] > kLimit[0]) {
      thresholds[0] = kMul[0] * hfMaxError[0] * new_quant / orig_quant;
    }
  }
  // Heuristic for improving accuracy of high-frequency patterns
  // occurring in an environment with no medium-frequency masking
  // patterns.
  {
    float all =
        hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] + 1;
    float mul[3] = {70, 30, 60};
    if (mul[c] * sum_of_highest_freq_row_and_column >= all) {
      *quant += mul[c] * sum_of_highest_freq_row_and_column / all;
      if (*quant >= Quantizer::kQuantMax) {
        *quant = Quantizer::kQuantMax - 1;
      }
    }
  }
  if (quant_kind == AcStrategyType::DCT) {
    // If this 8x8 block is too flat, increase the adaptive quantization level
    // a bit to reduce visible block boundaries and requantize the block.
    if (hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] < 11) {
      *quant += 1;
      if (*quant >= Quantizer::kQuantMax) {
        *quant = Quantizer::kQuantMax - 1;
      }
    }
  }
  {
    static const double kMul1[4][3] = {
        {
            0.22080615753848404,
            0.45797479824262011,
            0.29859235095977965,
        },
        {
            0.70109486510286834,
            0.16185281305512639,
            0.14387691730035473,
        },
        {
            0.114985964456218638,
            0.44656840441027695,
            0.10587658215149048,
        },
        {
            0.46849665264409396,
            0.41239077937781954,
            0.088667407767185444,
        },
    };
    static const double kMul2[4][3] = {
        {
            0.27450281941822197,
            1.1255766549984996,
            0.98950459134128388,
        },
        {
            0.4652168675598285,
            0.40945807983455818,
            0.36581899811751367,
        },
        {
            0.28034972424715715,
            0.9182653201929738,
            1.5581531543057416,
        },
        {
            0.26873118114033728,
            0.68863712390392484,
            1.2082185408666786,
        },
    };
    static const double kQuantNormalizer = 2.2942708343284721;
    sum_of_error *= kQuantNormalizer;
    sum_of_vals *= kQuantNormalizer;
    if (quant_kind >= AcStrategyType::DCT16X16) {
      int ix = 3;
      if (quant_kind == AcStrategyType::DCT32X16 ||
          quant_kind == AcStrategyType::DCT16X32) {
        ix = 1;
      } else if (quant_kind == AcStrategyType::DCT16X16) {
        ix = 0;
      } else if (quant_kind == AcStrategyType::DCT32X32) {
        ix = 2;
      }
      int step =
          sum_of_error / (kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim +
                          kMul2[ix][c] * sum_of_vals);
      if (step >= 2) {
        step = 2;
      }
      if (step < 0) {
        step = 0;
      }
      if (sum_of_error > kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim +
                             kMul2[ix][c] * sum_of_vals) {
        *quant += step;
        if (*quant >= Quantizer::kQuantMax) {
          *quant = Quantizer::kQuantMax - 1;
        }
      }
    }
  }
  {
    // Reduce quant in highly active areas.
    int32_t div = (xsize * ysize);
    int32_t activity = (static_cast<int32_t>(hfNonZeros[0]) + div / 2) / div;
    int32_t orig_qp_limit = std::max(4, *quant / 2);
    for (int i = 1; i < 4; ++i) {
      activity = std::min(
          activity, (static_cast<int32_t>(hfNonZeros[i]) + div / 2) / div);
    }
    if (activity >= 15) {
      activity = 15;
    }
    int32_t qp = *quant - activity;
    if (c == 1) {
      for (int i = 1; i < 4; ++i) {
        thresholds[i] += 0.01 * activity;
      }
    }
    if (qp < orig_qp_limit) {
      qp = orig_qp_limit;
    }
    *quant = qp;
  }
}

// NOTE: caller takes care of extracting quant from rect of RawQuantField.
void QuantizeRoundtripYBlockAC(PassesEncoderState* enc_state, const size_t size,
                               const Quantizer& quantizer,
                               const bool error_diffusion,
                               AcStrategyType quant_kind, size_t xsize,
                               size_t ysize, const float* JXL_RESTRICT biases,
                               int32_t* quant, float* JXL_RESTRICT inout,
                               int32_t* JXL_RESTRICT quantized) {
  float thres_y[4] = {0.58f, 0.64f, 0.64f, 0.64f};
  if (enc_state->cparams.speed_tier <= SpeedTier::kHare) {
    int32_t max_quant = 0;
    int quant_orig = *quant;
    float val[3] = {enc_state->x_qm_multiplier, 1.0f,
                    enc_state->b_qm_multiplier};
    for (int c : {1, 0, 2}) {
      float thres[4] = {0.58f, 0.64f, 0.64f, 0.64f};
      *quant = quant_orig;
      AdjustQuantBlockAC(quantizer, c, val[c], quant_kind, xsize, ysize,
                         &thres[0], inout + c * size, quant);
      // Dead zone adjustment
      if (c == 1) {
        for (int k = 0; k < 4; ++k) {
          thres_y[k] = thres[k];
        }
      }
      max_quant = std::max(*quant, max_quant);
    }
    *quant = max_quant;
  } else {
    thres_y[0] = 0.56;
    thres_y[1] = 0.62;
    thres_y[2] = 0.62;
    thres_y[3] = 0.62;
  }

  QuantizeBlockAC(quantizer, error_diffusion, 1, 1.0f, quant_kind, xsize, ysize,
                  &thres_y[0], inout + size, quant, quantized + size);

  const float* JXL_RESTRICT dequant_matrix =
      quantizer.DequantMatrix(quant_kind, 1);

  HWY_CAPPED(float, kDCTBlockSize) df;
  HWY_CAPPED(int32_t, kDCTBlockSize) di;
  const auto inv_qac = Set(df, quantizer.inv_quant_ac(*quant));
  for (size_t k = 0; k < kDCTBlockSize * xsize * ysize; k += Lanes(df)) {
    const auto quant = Load(di, quantized + size + k);
    const auto adj_quant = AdjustQuantBias(di, 1, quant, biases);
    const auto dequantm = Load(df, dequant_matrix + k);
    Store(Mul(Mul(adj_quant, dequantm), inv_qac), df, inout + size + k);
  }
}

Status ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
                           const Image3F& opsin, const Rect& rect,
                           Image3F* dc) {
  JxlMemoryManager* memory_manager = opsin.memory_manager();
  const Rect block_group_rect =
      enc_state->shared.frame_dim.BlockGroupRect(group_idx);
  const Rect cmap_rect(
      block_group_rect.x0() / kColorTileDimInBlocks,
      block_group_rect.y0() / kColorTileDimInBlocks,
      DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
      DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
  const Rect group_rect =
      enc_state->shared.frame_dim.GroupRect(group_idx).Translate(rect.x0(),
                                                                 rect.y0());

  const size_t xsize_blocks = block_group_rect.xsize();
  const size_t ysize_blocks = block_group_rect.ysize();

  const size_t dc_stride = static_cast<size_t>(dc->PixelsPerRow());
  const size_t opsin_stride = static_cast<size_t>(opsin.PixelsPerRow());

  ImageI& full_quant_field = enc_state->shared.raw_quant_field;
  const CompressParams& cparams = enc_state->cparams;

  const size_t dct_scratch_size =
      3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim;

  // TODO(veluca): consider strategies to reduce this memory.
  size_t mem_bytes = 3 * AcStrategy::kMaxCoeffArea * sizeof(int32_t);
  JXL_ASSIGN_OR_RETURN(auto mem,
                       AlignedMemory::Create(memory_manager, mem_bytes));
  size_t fmem_bytes =
      (5 * AcStrategy::kMaxCoeffArea + dct_scratch_size) * sizeof(float);
  JXL_ASSIGN_OR_RETURN(auto fmem,
                       AlignedMemory::Create(memory_manager, fmem_bytes));
  float* JXL_RESTRICT scratch_space =
      fmem.address<float>() + 3 * AcStrategy::kMaxCoeffArea;
  {
    // Only use error diffusion in Squirrel mode or slower.
    const bool error_diffusion = cparams.speed_tier <= SpeedTier::kSquirrel;
    constexpr HWY_CAPPED(float, kDCTBlockSize) d;

    int32_t* JXL_RESTRICT coeffs[3][kMaxNumPasses] = {};
    size_t num_passes = enc_state->progressive_splitter.GetNumPasses();
    JXL_ENSURE(num_passes > 0);
    for (size_t i = 0; i < num_passes; i++) {
      // TODO(veluca): 16-bit quantized coeffs are not implemented yet.
      JXL_ENSURE(enc_state->coeffs[i]->Type() == ACType::k32);
      for (size_t c = 0; c < 3; c++) {
        coeffs[c][i] = enc_state->coeffs[i]->PlaneRow(c, group_idx, 0).ptr32;
      }
    }

    HWY_ALIGN float* coeffs_in = fmem.address<float>();
    HWY_ALIGN int32_t* quantized = mem.address<int32_t>();

    for (size_t by = 0; by < ysize_blocks; ++by) {
      int32_t* JXL_RESTRICT row_quant_ac =
          block_group_rect.Row(&full_quant_field, by);
      size_t ty = by / kColorTileDimInBlocks;
      const int8_t* JXL_RESTRICT row_cmap[3] = {
          cmap_rect.ConstRow(enc_state->shared.cmap.ytox_map, ty),
          nullptr,
          cmap_rect.ConstRow(enc_state->shared.cmap.ytob_map, ty),
      };
      const float* JXL_RESTRICT opsin_rows[3] = {
          group_rect.ConstPlaneRow(opsin, 0, by * kBlockDim),
          group_rect.ConstPlaneRow(opsin, 1, by * kBlockDim),
          group_rect.ConstPlaneRow(opsin, 2, by * kBlockDim),
      };
      float* JXL_RESTRICT dc_rows[3] = {
          block_group_rect.PlaneRow(dc, 0, by),
          block_group_rect.PlaneRow(dc, 1, by),
          block_group_rect.PlaneRow(dc, 2, by),
      };
      AcStrategyRow ac_strategy_row =
          enc_state->shared.ac_strategy.ConstRow(block_group_rect, by);
      for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
           tx++) {
        const auto x_factor =
            Set(d, enc_state->shared.cmap.base().YtoXRatio(row_cmap[0][tx]));
        const auto b_factor =
            Set(d, enc_state->shared.cmap.base().YtoBRatio(row_cmap[2][tx]));
        for (size_t bx = tx * kColorTileDimInBlocks;
             bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks; ++bx) {
          const AcStrategy acs = ac_strategy_row[bx];
          if (!acs.IsFirstBlock()) continue;

          size_t xblocks = acs.covered_blocks_x();
          size_t yblocks = acs.covered_blocks_y();

          CoefficientLayout(&yblocks, &xblocks);

          size_t size = kDCTBlockSize * xblocks * yblocks;

          // DCT Y channel, roundtrip-quantize it and set DC.
          int32_t quant_ac = row_quant_ac[bx];
          for (size_t c : {0, 1, 2}) {
            TransformFromPixels(acs.Strategy(), opsin_rows[c] + bx * kBlockDim,
                                opsin_stride, coeffs_in + c * size,
                                scratch_space);
          }
          DCFromLowestFrequencies(acs.Strategy(), coeffs_in + size,
                                  dc_rows[1] + bx, dc_stride);

          QuantizeRoundtripYBlockAC(
              enc_state, size, enc_state->shared.quantizer, error_diffusion,
              acs.Strategy(), xblocks, yblocks, kDefaultQuantBias, &quant_ac,
              coeffs_in, quantized);

          // Unapply color correlation
          for (size_t k = 0; k < size; k += Lanes(d)) {
            const auto in_x = Load(d, coeffs_in + k);
            const auto in_y = Load(d, coeffs_in + size + k);
            const auto in_b = Load(d, coeffs_in + 2 * size + k);
            const auto out_x = NegMulAdd(x_factor, in_y, in_x);
            const auto out_b = NegMulAdd(b_factor, in_y, in_b);
            Store(out_x, d, coeffs_in + k);
            Store(out_b, d, coeffs_in + 2 * size + k);
          }

          // Quantize X and B channels and set DC.
          for (size_t c : {0, 2}) {
            float thres[4] = {0.58f, 0.62f, 0.62f, 0.62f};
            QuantizeBlockAC(enc_state->shared.quantizer, error_diffusion, c,
                            c == 0 ? enc_state->x_qm_multiplier
                                   : enc_state->b_qm_multiplier,
                            acs.Strategy(), xblocks, yblocks, &thres[0],
                            coeffs_in + c * size, &quant_ac,
                            quantized + c * size);
            DCFromLowestFrequencies(acs.Strategy(), coeffs_in + c * size,
                                    dc_rows[c] + bx, dc_stride);
          }
          row_quant_ac[bx] = quant_ac;
          for (size_t c = 0; c < 3; c++) {
            enc_state->progressive_splitter.SplitACCoefficients(
                quantized + c * size, acs, bx, by, coeffs[c]);
            for (size_t p = 0; p < num_passes; p++) {
              coeffs[c][p] += size;
            }
          }
        }
      }
    }
  }
  return true;
}

// NOLINTNEXTLINE(google-readability-namespace-comments)
}  // namespace HWY_NAMESPACE
}  // namespace jxl
HWY_AFTER_NAMESPACE();

#if HWY_ONCE
namespace jxl {
HWY_EXPORT(ComputeCoefficients);
Status ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
                           const Image3F& opsin, const Rect& rect,
                           Image3F* dc) {
  return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin,
                                                   rect, dc);
}

Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
                                        size_t histogram_idx,
                                        const PassesEncoderState& enc_state,
                                        BitWriter* writer, AuxOut* aux_out) {
  // Select which histogram to use among those of the current pass.
  const size_t num_histograms = enc_state.shared.num_histograms;
  // num_histograms is 0 only for lossless.
  JXL_ENSURE(num_histograms == 0 || histogram_idx < num_histograms);
  size_t histo_selector_bits = CeilLog2Nonzero(num_histograms);

  if (histo_selector_bits != 0) {
    JXL_RETURN_IF_ERROR(
        writer->WithMaxBits(histo_selector_bits, LayerType::Ac, aux_out, [&] {
          writer->Write(histo_selector_bits, histogram_idx);
          return true;
        }));
  }
  size_t context_offset =
      histogram_idx * enc_state.shared.block_ctx_map.NumACContexts();
  JXL_RETURN_IF_ERROR(WriteTokens(
      enc_state.passes[pass_idx].ac_tokens[group_idx],
      enc_state.passes[pass_idx].codes, enc_state.passes[pass_idx].context_map,
      context_offset, writer, LayerType::AcTokens, aux_out));

  return true;
}

}  // namespace jxl
#endif  // HWY_ONCE

Coverage Report

Created: 2025-06-22 08:04

Line	Count	Source (jump to first uncovered line)
1		// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2		//
3		// Use of this source code is governed by a BSD-style
4		// license that can be found in the LICENSE file.
5
6		#include "lib/jxl/enc_group.h"
7
8		#include <jxl/memory_manager.h>
9
10		#include "lib/jxl/base/status.h"
11		#include "lib/jxl/memory_manager_internal.h"
12
13		#undef HWY_TARGET_INCLUDE
14		#define HWY_TARGET_INCLUDE "lib/jxl/enc_group.cc"
15		#include <hwy/foreach_target.h>
16		#include <hwy/highway.h>
17
18		#include "lib/jxl/ac_strategy.h"
19		#include "lib/jxl/base/bits.h"
20		#include "lib/jxl/base/compiler_specific.h"
21		#include "lib/jxl/base/rect.h"
22		#include "lib/jxl/common.h" // kMaxNumPasses
23		#include "lib/jxl/dct_util.h"
24		#include "lib/jxl/dec_transforms-inl.h"
25		#include "lib/jxl/enc_aux_out.h"
26		#include "lib/jxl/enc_cache.h"
27		#include "lib/jxl/enc_params.h"
28		#include "lib/jxl/enc_transforms-inl.h"
29		#include "lib/jxl/image.h"
30		#include "lib/jxl/quantizer-inl.h"
31		#include "lib/jxl/quantizer.h"
32		#include "lib/jxl/simd_util.h"
33		HWY_BEFORE_NAMESPACE();
34		namespace jxl {
35		namespace HWY_NAMESPACE {
36
37		// These templates are not found via ADL.
38		using hwy::HWY_NAMESPACE::Abs;
39		using hwy::HWY_NAMESPACE::Ge;
40		using hwy::HWY_NAMESPACE::IfThenElse;
41		using hwy::HWY_NAMESPACE::IfThenElseZero;
42		using hwy::HWY_NAMESPACE::MaskFromVec;
43		using hwy::HWY_NAMESPACE::Round;
44
45		// NOTE: caller takes care of extracting quant from rect of RawQuantField.
46		void QuantizeBlockAC(const Quantizer& quantizer, const bool error_diffusion,
47		size_t c, float qm_multiplier, AcStrategyType quant_kind,
48		size_t xsize, size_t ysize, float* thresholds,
49		const float* JXL_RESTRICT block_in, const int32_t* quant,
50	0	int32_t* JXL_RESTRICT block_out) {
51	0	const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
52	0	float qac = quantizer.Scale() * (*quant);
53		// Not SIMD-ified for now.
54	0	if (c != 1 && xsize * ysize >= 4) {
55	0	for (int i = 0; i < 4; ++i) {
56	0	thresholds[i] -= 0.00744f * xsize * ysize;
57	0	if (thresholds[i] < 0.5) {
58	0	thresholds[i] = 0.5;
59	0	}
60	0	}
61	0	}
62	0	HWY_CAPPED(float, kBlockDim) df;
63	0	HWY_CAPPED(int32_t, kBlockDim) di;
64	0	HWY_CAPPED(uint32_t, kBlockDim) du;
65	0	const auto quantv = Set(df, qac * qm_multiplier);
66	0	for (size_t y = 0; y < ysize * kBlockDim; y++) {
67	0	size_t yfix = static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2;
68	0	const size_t off = y * kBlockDim * xsize;
69	0	for (size_t x = 0; x < xsize * kBlockDim; x += Lanes(df)) {
70	0	auto threshold = Zero(df);
71	0	if (xsize == 1) {
72	0	HWY_ALIGN uint32_t kMask[kBlockDim] = {0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u};
73	0	const auto mask = MaskFromVec(BitCast(df, Load(du, kMask + x)));
74	0	threshold = IfThenElse(mask, Set(df, thresholds[yfix + 1]),
75	0	Set(df, thresholds[yfix]));
76	0	} else {
77		// Same for all lanes in the vector.
78	0	threshold = Set(
79	0	df,
80	0	thresholds[yfix + static_cast<size_t>(x >= xsize * kBlockDim / 2)]);
81	0	}
82	0	const auto q = Mul(Load(df, qm + off + x), quantv);
83	0	const auto in = Load(df, block_in + off + x);
84	0	const auto val = Mul(q, in);
85	0	const auto nzero_mask = Ge(Abs(val), threshold);
86	0	const auto v = ConvertTo(di, IfThenElseZero(nzero_mask, Round(val)));
87	0	Store(v, di, block_out + off + x);
88	0	}
89	0	}
90	0	}
91
92		void AdjustQuantBlockAC(const Quantizer& quantizer, size_t c,
93		float qm_multiplier, AcStrategyType quant_kind,
94		size_t xsize, size_t ysize, float* thresholds,
95	0	const float* JXL_RESTRICT block_in, int32_t* quant) {
96		// No quantization adjusting for these small blocks.
97		// Quantization adjusting attempts to fix some known issues
98		// with larger blocks and on the 8x8 dct's emerging 8x8 blockiness
99		// when there are not many non-zeros.
100	0	constexpr size_t kPartialBlockKinds =
101	0	(1 << static_cast<size_t>(AcStrategyType::IDENTITY)) \|
102	0	(1 << static_cast<size_t>(AcStrategyType::DCT2X2)) \|
103	0	(1 << static_cast<size_t>(AcStrategyType::DCT4X4)) \|
104	0	(1 << static_cast<size_t>(AcStrategyType::DCT4X8)) \|
105	0	(1 << static_cast<size_t>(AcStrategyType::DCT8X4)) \|
106	0	(1 << static_cast<size_t>(AcStrategyType::AFV0)) \|
107	0	(1 << static_cast<size_t>(AcStrategyType::AFV1)) \|
108	0	(1 << static_cast<size_t>(AcStrategyType::AFV2)) \|
109	0	(1 << static_cast<size_t>(AcStrategyType::AFV3));
110	0	if ((1 << static_cast<size_t>(quant_kind)) & kPartialBlockKinds) {
111	0	return;
112	0	}
113
114	0	const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
115	0	float qac = quantizer.Scale() * (*quant);
116	0	if (xsize > 1 \|\| ysize > 1) {
117	0	for (int i = 0; i < 4; ++i) {
118	0	thresholds[i] -= Clamp1(0.003f * xsize * ysize, 0.f, 0.08f);
119	0	if (thresholds[i] < 0.54) {
120	0	thresholds[i] = 0.54;
121	0	}
122	0	}
123	0	}
124	0	float sum_of_highest_freq_row_and_column = 0;
125	0	float sum_of_error = 0;
126	0	float sum_of_vals = 0;
127	0	float hfNonZeros[4] = {};
128	0	float hfMaxError[4] = {};
129
130	0	for (size_t y = 0; y < ysize * kBlockDim; y++) {
131	0	for (size_t x = 0; x < xsize * kBlockDim; x++) {
132	0	const size_t pos = y * kBlockDim * xsize + x;
133	0	if (x < xsize && y < ysize) {
134	0	continue;
135	0	}
136	0	const size_t hfix = (static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2 +
137	0	static_cast<size_t>(x >= xsize * kBlockDim / 2));
138	0	const float val = block_in[pos] * (qm[pos] * qac * qm_multiplier);
139	0	const float v = (std::abs(val) < thresholds[hfix]) ? 0 : rintf(val);
140	0	const float error = std::abs(val - v);
141	0	sum_of_error += error;
142	0	sum_of_vals += std::abs(v);
143	0	if (c == 1 && v == 0) {
144	0	if (hfMaxError[hfix] < error) {
145	0	hfMaxError[hfix] = error;
146	0	}
147	0	}
148	0	if (v != 0.0f) {
149	0	hfNonZeros[hfix] += std::abs(v);
150	0	bool in_corner = y >= 7 * ysize && x >= 7 * xsize;
151	0	bool on_border =
152	0	y == ysize * kBlockDim - 1 \|\| x == xsize * kBlockDim - 1;
153	0	bool in_larger_corner = x >= 4 * xsize && y >= 4 * ysize;
154	0	if (in_corner \|\| (on_border && in_larger_corner)) {
155	0	sum_of_highest_freq_row_and_column += std::abs(val);
156	0	}
157	0	}
158	0	}
159	0	}
160	0	if (c == 1 && sum_of_vals * 8 < xsize * ysize) {
161	0	static const double kLimit[4] = {
162	0	0.46,
163	0	0.46,
164	0	0.46,
165	0	0.46,
166	0	};
167	0	static const double kMul[4] = {
168	0	0.9999,
169	0	0.9999,
170	0	0.9999,
171	0	0.9999,
172	0	};
173	0	const int32_t orig_quant = *quant;
174	0	int32_t new_quant = *quant;
175	0	for (int i = 1; i < 4; ++i) {
176	0	if (hfNonZeros[i] == 0.0 && hfMaxError[i] > kLimit[i]) {
177	0	new_quant = orig_quant + 1;
178	0	break;
179	0	}
180	0	}
181	0	*quant = new_quant;
182	0	if (hfNonZeros[3] == 0.0 && hfMaxError[3] > kLimit[3]) {
183	0	thresholds[3] = kMul[3] * hfMaxError[3] * new_quant / orig_quant;
184	0	} else if ((hfNonZeros[1] == 0.0 && hfMaxError[1] > kLimit[1]) \|\|
185	0	(hfNonZeros[2] == 0.0 && hfMaxError[2] > kLimit[2])) {
186	0	thresholds[1] = kMul[1] * std::max(hfMaxError[1], hfMaxError[2]) *
187	0	new_quant / orig_quant;
188	0	thresholds[2] = thresholds[1];
189	0	} else if (hfNonZeros[0] == 0.0 && hfMaxError[0] > kLimit[0]) {
190	0	thresholds[0] = kMul[0] * hfMaxError[0] * new_quant / orig_quant;
191	0	}
192	0	}
193		// Heuristic for improving accuracy of high-frequency patterns
194		// occurring in an environment with no medium-frequency masking
195		// patterns.
196	0	{
197	0	float all =
198	0	hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] + 1;
199	0	float mul[3] = {70, 30, 60};
200	0	if (mul[c] * sum_of_highest_freq_row_and_column >= all) {
201	0	quant += mul[c] sum_of_highest_freq_row_and_column / all;
202	0	if (*quant >= Quantizer::kQuantMax) {
203	0	*quant = Quantizer::kQuantMax - 1;
204	0	}
205	0	}
206	0	}
207	0	if (quant_kind == AcStrategyType::DCT) {
208		// If this 8x8 block is too flat, increase the adaptive quantization level
209		// a bit to reduce visible block boundaries and requantize the block.
210	0	if (hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] < 11) {
211	0	*quant += 1;
212	0	if (*quant >= Quantizer::kQuantMax) {
213	0	*quant = Quantizer::kQuantMax - 1;
214	0	}
215	0	}
216	0	}
217	0	{
218	0	static const double kMul1[4][3] = {
219	0	{
220	0	0.22080615753848404,
221	0	0.45797479824262011,
222	0	0.29859235095977965,
223	0	},
224	0	{
225	0	0.70109486510286834,
226	0	0.16185281305512639,
227	0	0.14387691730035473,
228	0	},
229	0	{
230	0	0.114985964456218638,
231	0	0.44656840441027695,
232	0	0.10587658215149048,
233	0	},
234	0	{
235	0	0.46849665264409396,
236	0	0.41239077937781954,
237	0	0.088667407767185444,
238	0	},
239	0	};
240	0	static const double kMul2[4][3] = {
241	0	{
242	0	0.27450281941822197,
243	0	1.1255766549984996,
244	0	0.98950459134128388,
245	0	},
246	0	{
247	0	0.4652168675598285,
248	0	0.40945807983455818,
249	0	0.36581899811751367,
250	0	},
251	0	{
252	0	0.28034972424715715,
253	0	0.9182653201929738,
254	0	1.5581531543057416,
255	0	},
256	0	{
257	0	0.26873118114033728,
258	0	0.68863712390392484,
259	0	1.2082185408666786,
260	0	},
261	0	};
262	0	static const double kQuantNormalizer = 2.2942708343284721;
263	0	sum_of_error *= kQuantNormalizer;
264	0	sum_of_vals *= kQuantNormalizer;
265	0	if (quant_kind >= AcStrategyType::DCT16X16) {
266	0	int ix = 3;
267	0	if (quant_kind == AcStrategyType::DCT32X16 \|\|
268	0	quant_kind == AcStrategyType::DCT16X32) {
269	0	ix = 1;
270	0	} else if (quant_kind == AcStrategyType::DCT16X16) {
271	0	ix = 0;
272	0	} else if (quant_kind == AcStrategyType::DCT32X32) {
273	0	ix = 2;
274	0	}
275	0	int step =
276	0	sum_of_error / (kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim +
277	0	kMul2[ix][c] * sum_of_vals);
278	0	if (step >= 2) {
279	0	step = 2;
280	0	}
281	0	if (step < 0) {
282	0	step = 0;
283	0	}
284	0	if (sum_of_error > kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim +
285	0	kMul2[ix][c] * sum_of_vals) {
286	0	*quant += step;
287	0	if (*quant >= Quantizer::kQuantMax) {
288	0	*quant = Quantizer::kQuantMax - 1;
289	0	}
290	0	}
291	0	}
292	0	}
293	0	{
294		// Reduce quant in highly active areas.
295	0	int32_t div = (xsize * ysize);
296	0	int32_t activity = (static_cast<int32_t>(hfNonZeros[0]) + div / 2) / div;
297	0	int32_t orig_qp_limit = std::max(4, *quant / 2);
298	0	for (int i = 1; i < 4; ++i) {
299	0	activity = std::min(
300	0	activity, (static_cast<int32_t>(hfNonZeros[i]) + div / 2) / div);
301	0	}
302	0	if (activity >= 15) {
303	0	activity = 15;
304	0	}
305	0	int32_t qp = *quant - activity;
306	0	if (c == 1) {
307	0	for (int i = 1; i < 4; ++i) {
308	0	thresholds[i] += 0.01 * activity;
309	0	}
310	0	}
311	0	if (qp < orig_qp_limit) {
312	0	qp = orig_qp_limit;
313	0	}
314	0	*quant = qp;
315	0	}
316	0	}
317
318		// NOTE: caller takes care of extracting quant from rect of RawQuantField.
319		void QuantizeRoundtripYBlockAC(PassesEncoderState* enc_state, const size_t size,
320		const Quantizer& quantizer,
321		const bool error_diffusion,
322		AcStrategyType quant_kind, size_t xsize,
323		size_t ysize, const float* JXL_RESTRICT biases,
324		int32_t* quant, float* JXL_RESTRICT inout,
325	0	int32_t* JXL_RESTRICT quantized) {
326	0	float thres_y[4] = {0.58f, 0.64f, 0.64f, 0.64f};
327	0	if (enc_state->cparams.speed_tier <= SpeedTier::kHare) {
328	0	int32_t max_quant = 0;
329	0	int quant_orig = *quant;
330	0	float val[3] = {enc_state->x_qm_multiplier, 1.0f,
331	0	enc_state->b_qm_multiplier};
332	0	for (int c : {1, 0, 2}) {
333	0	float thres[4] = {0.58f, 0.64f, 0.64f, 0.64f};
334	0	*quant = quant_orig;
335	0	AdjustQuantBlockAC(quantizer, c, val[c], quant_kind, xsize, ysize,
336	0	&thres[0], inout + c * size, quant);
337		// Dead zone adjustment
338	0	if (c == 1) {
339	0	for (int k = 0; k < 4; ++k) {
340	0	thres_y[k] = thres[k];
341	0	}
342	0	}
343	0	max_quant = std::max(*quant, max_quant);
344	0	}
345	0	*quant = max_quant;
346	0	} else {
347	0	thres_y[0] = 0.56;
348	0	thres_y[1] = 0.62;
349	0	thres_y[2] = 0.62;
350	0	thres_y[3] = 0.62;
351	0	}
352
353	0	QuantizeBlockAC(quantizer, error_diffusion, 1, 1.0f, quant_kind, xsize, ysize,
354	0	&thres_y[0], inout + size, quant, quantized + size);
355
356	0	const float* JXL_RESTRICT dequant_matrix =
357	0	quantizer.DequantMatrix(quant_kind, 1);
358
359	0	HWY_CAPPED(float, kDCTBlockSize) df;
360	0	HWY_CAPPED(int32_t, kDCTBlockSize) di;
361	0	const auto inv_qac = Set(df, quantizer.inv_quant_ac(*quant));
362	0	for (size_t k = 0; k < kDCTBlockSize * xsize * ysize; k += Lanes(df)) {
363	0	const auto quant = Load(di, quantized + size + k);
364	0	const auto adj_quant = AdjustQuantBias(di, 1, quant, biases);
365	0	const auto dequantm = Load(df, dequant_matrix + k);
366	0	Store(Mul(Mul(adj_quant, dequantm), inv_qac), df, inout + size + k);
367	0	}
368	0	}
369
370		Status ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
371		const Image3F& opsin, const Rect& rect,
372	0	Image3F* dc) {
373	0	JxlMemoryManager* memory_manager = opsin.memory_manager();
374	0	const Rect block_group_rect =
375	0	enc_state->shared.frame_dim.BlockGroupRect(group_idx);
376	0	const Rect cmap_rect(
377	0	block_group_rect.x0() / kColorTileDimInBlocks,
378	0	block_group_rect.y0() / kColorTileDimInBlocks,
379	0	DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
380	0	DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
381	0	const Rect group_rect =
382	0	enc_state->shared.frame_dim.GroupRect(group_idx).Translate(rect.x0(),
383	0	rect.y0());
384
385	0	const size_t xsize_blocks = block_group_rect.xsize();
386	0	const size_t ysize_blocks = block_group_rect.ysize();
387
388	0	const size_t dc_stride = static_cast<size_t>(dc->PixelsPerRow());
389	0	const size_t opsin_stride = static_cast<size_t>(opsin.PixelsPerRow());
390
391	0	ImageI& full_quant_field = enc_state->shared.raw_quant_field;
392	0	const CompressParams& cparams = enc_state->cparams;
393
394	0	const size_t dct_scratch_size =
395	0	3 * (MaxVectorSize() / sizeof(float)) * AcStrategy::kMaxBlockDim;
396
397		// TODO(veluca): consider strategies to reduce this memory.
398	0	size_t mem_bytes = 3 * AcStrategy::kMaxCoeffArea * sizeof(int32_t);
399	0	JXL_ASSIGN_OR_RETURN(auto mem,
400	0	AlignedMemory::Create(memory_manager, mem_bytes));
401	0	size_t fmem_bytes =
402	0	(5 * AcStrategy::kMaxCoeffArea + dct_scratch_size) * sizeof(float);
403	0	JXL_ASSIGN_OR_RETURN(auto fmem,
404	0	AlignedMemory::Create(memory_manager, fmem_bytes));
405	0	float* JXL_RESTRICT scratch_space =
406	0	fmem.address<float>() + 3 * AcStrategy::kMaxCoeffArea;
407	0	{
408		// Only use error diffusion in Squirrel mode or slower.
409	0	const bool error_diffusion = cparams.speed_tier <= SpeedTier::kSquirrel;
410	0	constexpr HWY_CAPPED(float, kDCTBlockSize) d;
411
412	0	int32_t* JXL_RESTRICT coeffs[3][kMaxNumPasses] = {};
413	0	size_t num_passes = enc_state->progressive_splitter.GetNumPasses();
414	0	JXL_ENSURE(num_passes > 0);
415	0	for (size_t i = 0; i < num_passes; i++) {
416		// TODO(veluca): 16-bit quantized coeffs are not implemented yet.
417	0	JXL_ENSURE(enc_state->coeffs[i]->Type() == ACType::k32);
418	0	for (size_t c = 0; c < 3; c++) {
419	0	coeffs[c][i] = enc_state->coeffs[i]->PlaneRow(c, group_idx, 0).ptr32;
420	0	}
421	0	}
422
423	0	HWY_ALIGN float* coeffs_in = fmem.address<float>();
424	0	HWY_ALIGN int32_t* quantized = mem.address<int32_t>();
425
426	0	for (size_t by = 0; by < ysize_blocks; ++by) {
427	0	int32_t* JXL_RESTRICT row_quant_ac =
428	0	block_group_rect.Row(&full_quant_field, by);
429	0	size_t ty = by / kColorTileDimInBlocks;
430	0	const int8_t* JXL_RESTRICT row_cmap[3] = {
431	0	cmap_rect.ConstRow(enc_state->shared.cmap.ytox_map, ty),
432	0	nullptr,
433	0	cmap_rect.ConstRow(enc_state->shared.cmap.ytob_map, ty),
434	0	};
435	0	const float* JXL_RESTRICT opsin_rows[3] = {
436	0	group_rect.ConstPlaneRow(opsin, 0, by * kBlockDim),
437	0	group_rect.ConstPlaneRow(opsin, 1, by * kBlockDim),
438	0	group_rect.ConstPlaneRow(opsin, 2, by * kBlockDim),
439	0	};
440	0	float* JXL_RESTRICT dc_rows[3] = {
441	0	block_group_rect.PlaneRow(dc, 0, by),
442	0	block_group_rect.PlaneRow(dc, 1, by),
443	0	block_group_rect.PlaneRow(dc, 2, by),
444	0	};
445	0	AcStrategyRow ac_strategy_row =
446	0	enc_state->shared.ac_strategy.ConstRow(block_group_rect, by);
447	0	for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
448	0	tx++) {
449	0	const auto x_factor =
450	0	Set(d, enc_state->shared.cmap.base().YtoXRatio(row_cmap[0][tx]));
451	0	const auto b_factor =
452	0	Set(d, enc_state->shared.cmap.base().YtoBRatio(row_cmap[2][tx]));
453	0	for (size_t bx = tx * kColorTileDimInBlocks;
454	0	bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks; ++bx) {
455	0	const AcStrategy acs = ac_strategy_row[bx];
456	0	if (!acs.IsFirstBlock()) continue;
457
458	0	size_t xblocks = acs.covered_blocks_x();
459	0	size_t yblocks = acs.covered_blocks_y();
460
461	0	CoefficientLayout(&yblocks, &xblocks);
462
463	0	size_t size = kDCTBlockSize * xblocks * yblocks;
464
465		// DCT Y channel, roundtrip-quantize it and set DC.
466	0	int32_t quant_ac = row_quant_ac[bx];
467	0	for (size_t c : {0, 1, 2}) {
468	0	TransformFromPixels(acs.Strategy(), opsin_rows[c] + bx * kBlockDim,
469	0	opsin_stride, coeffs_in + c * size,
470	0	scratch_space);
471	0	}
472	0	DCFromLowestFrequencies(acs.Strategy(), coeffs_in + size,
473	0	dc_rows[1] + bx, dc_stride);
474
475	0	QuantizeRoundtripYBlockAC(
476	0	enc_state, size, enc_state->shared.quantizer, error_diffusion,
477	0	acs.Strategy(), xblocks, yblocks, kDefaultQuantBias, &quant_ac,
478	0	coeffs_in, quantized);
479
480		// Unapply color correlation
481	0	for (size_t k = 0; k < size; k += Lanes(d)) {
482	0	const auto in_x = Load(d, coeffs_in + k);
483	0	const auto in_y = Load(d, coeffs_in + size + k);
484	0	const auto in_b = Load(d, coeffs_in + 2 * size + k);
485	0	const auto out_x = NegMulAdd(x_factor, in_y, in_x);
486	0	const auto out_b = NegMulAdd(b_factor, in_y, in_b);
487	0	Store(out_x, d, coeffs_in + k);
488	0	Store(out_b, d, coeffs_in + 2 * size + k);
489	0	}
490
491		// Quantize X and B channels and set DC.
492	0	for (size_t c : {0, 2}) {
493	0	float thres[4] = {0.58f, 0.62f, 0.62f, 0.62f};
494	0	QuantizeBlockAC(enc_state->shared.quantizer, error_diffusion, c,
495	0	c == 0 ? enc_state->x_qm_multiplier
496	0	: enc_state->b_qm_multiplier,
497	0	acs.Strategy(), xblocks, yblocks, &thres[0],
498	0	coeffs_in + c * size, &quant_ac,
499	0	quantized + c * size);
500	0	DCFromLowestFrequencies(acs.Strategy(), coeffs_in + c * size,
501	0	dc_rows[c] + bx, dc_stride);
502	0	}
503	0	row_quant_ac[bx] = quant_ac;
504	0	for (size_t c = 0; c < 3; c++) {
505	0	enc_state->progressive_splitter.SplitACCoefficients(
506	0	quantized + c * size, acs, bx, by, coeffs[c]);
507	0	for (size_t p = 0; p < num_passes; p++) {
508	0	coeffs[c][p] += size;
509	0	}
510	0	}
511	0	}
512	0	}
513	0	}
514	0	}
515	0	return true;
516	0	}
517
518		// NOLINTNEXTLINE(google-readability-namespace-comments)
519		} // namespace HWY_NAMESPACE
520		} // namespace jxl
521		HWY_AFTER_NAMESPACE();
522
523		#if HWY_ONCE
524		namespace jxl {
525		HWY_EXPORT(ComputeCoefficients);
526		Status ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
527		const Image3F& opsin, const Rect& rect,
528	0	Image3F* dc) {
529	0	return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin,
530	0	rect, dc);
531	0	}
532
533		Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
534		size_t histogram_idx,
535		const PassesEncoderState& enc_state,
536	0	BitWriter* writer, AuxOut* aux_out) {
537		// Select which histogram to use among those of the current pass.
538	0	const size_t num_histograms = enc_state.shared.num_histograms;
539		// num_histograms is 0 only for lossless.
540	0	JXL_ENSURE(num_histograms == 0 \|\| histogram_idx < num_histograms);
541	0	size_t histo_selector_bits = CeilLog2Nonzero(num_histograms);
542
543	0	if (histo_selector_bits != 0) {
544	0	JXL_RETURN_IF_ERROR(
545	0	writer->WithMaxBits(histo_selector_bits, LayerType::Ac, aux_out, [&] {
546	0	writer->Write(histo_selector_bits, histogram_idx);
547	0	return true;
548	0	}));
549	0	}
550	0	size_t context_offset =
551	0	histogram_idx * enc_state.shared.block_ctx_map.NumACContexts();
552	0	JXL_RETURN_IF_ERROR(WriteTokens(
553	0	enc_state.passes[pass_idx].ac_tokens[group_idx],
554	0	enc_state.passes[pass_idx].codes, enc_state.passes[pass_idx].context_map,
555	0	context_offset, writer, LayerType::AcTokens, aux_out));
556
557	0	return true;
558	0	}
559
560		} // namespace jxl
561		#endif // HWY_ONCE