/src/libjxl/lib/jxl/enc_entropy_coder.cc

Source
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "lib/jxl/enc_entropy_coder.h"

#include <cstddef>
#include <cstdint>
#include <vector>

#include "lib/jxl/base/rect.h"
#include "lib/jxl/enc_ans.h"
#include "lib/jxl/frame_dimensions.h"
#include "lib/jxl/frame_header.h"

#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc"
#include <hwy/foreach_target.h>
#include <hwy/highway.h>

#include "lib/jxl/ac_context.h"
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/bits.h"
#include "lib/jxl/base/compiler_specific.h"
#include "lib/jxl/base/status.h"
#include "lib/jxl/coeff_order.h"
#include "lib/jxl/coeff_order_fwd.h"
#include "lib/jxl/entropy_coder.h"
#include "lib/jxl/image.h"
#include "lib/jxl/pack_signed.h"

HWY_BEFORE_NAMESPACE();
namespace jxl {
namespace HWY_NAMESPACE {

// These templates are not found via ADL.
using hwy::HWY_NAMESPACE::Add;
using hwy::HWY_NAMESPACE::AndNot;
using hwy::HWY_NAMESPACE::Eq;
using hwy::HWY_NAMESPACE::GetLane;

// Returns number of non-zero coefficients (but skip LLF).
// We cannot rely on block[] being all-zero bits, so first truncate to integer.
// Also writes the per-8x8 block nzeros starting at nzeros_pos.
int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy,
                            const AcStrategy acs, const size_t covered_blocks,
                            const size_t log2_covered_blocks,
                            const int32_t* JXL_RESTRICT block,
                            const size_t nzeros_stride,
                            int32_t* JXL_RESTRICT nzeros_pos) {
  const HWY_CAPPED(int32_t, kBlockDim) di;

  const auto zero = Zero(di);
  // Add FF..FF for every zero coefficient, negate to get #zeros.
  auto neg_sum_zero = zero;

  {
    // Mask sufficient for one row of coefficients.
    HWY_ALIGN const int32_t
        llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
            -1, -1, -1, -1};
    // First cx=1,2,4 elements are FF..FF, others 0.
    const int32_t* llf_mask_pos =
        llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;

    // Rows with LLF: mask out the LLF
    for (size_t y = 0; y < cy; y++) {
      for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
        const auto llf_mask = LoadU(di, llf_mask_pos + x);

        // LLF counts as zero so we don't include it in nzeros.
        const auto coef =
            AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));

        neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
      }
    }
  }

  // Remaining rows: no mask
  for (size_t y = cy; y < cy * kBlockDim; y++) {
    for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
      const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
    }
  }

  // We want area - sum_zero, add because neg_sum_zero is already negated.
  const int32_t nzeros = static_cast<int32_t>(cx * cy * kDCTBlockSize) +
                         GetLane(SumOfLanes(di, neg_sum_zero));

  const int32_t shifted_nzeros = static_cast<int32_t>(
      (nzeros + covered_blocks - 1) >> log2_covered_blocks);
  // Need non-canonicalized dimensions!
  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
      nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
    }
  }

  return nzeros;
}

// Specialization for 8x8, where only top-left is LLF/DC.
// About 1% overall speedup vs. NumNonZeroExceptLLF.
int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
                              int32_t* JXL_RESTRICT nzeros_pos) {
  const HWY_CAPPED(int32_t, kBlockDim) di;

  const auto zero = Zero(di);
  // Add FF..FF for every zero coefficient, negate to get #zeros.
  auto neg_sum_zero = zero;

  {
    // First row has DC, so mask
    const size_t y = 0;
    HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};

    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
      const auto dc_mask = Load(di, dc_mask_lanes + x);

      // DC counts as zero so we don't include it in nzeros.
      const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));

      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
    }
  }

  // Remaining rows: no mask
  for (size_t y = 1; y < kBlockDim; y++) {
    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
      const auto coef = Load(di, &block[y * kBlockDim + x]);
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
    }
  }

  // We want 64 - sum_zero, add because neg_sum_zero is already negated.
  const int32_t nzeros = static_cast<int32_t>(kDCTBlockSize) +
                         GetLane(SumOfLanes(di, neg_sum_zero));

  *nzeros_pos = nzeros;

  return nzeros;
}

// The number of nonzeros of each block is predicted from the top and the left
// blocks, with opportune scaling to take into account the number of blocks of
// each strategy.  The predicted number of nonzeros divided by two is used as a
// context; if this number is above 63, a specific context is used.  If the
// number of nonzeros of a strategy is above 63, it is written directly using a
// fixed number of bits (that depends on the size of the strategy).
Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
                            const Rect& rect,
                            const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
                            const AcStrategyImage& ac_strategy,
                            const YCbCrChromaSubsampling& cs,
                            Image3I* JXL_RESTRICT tmp_num_nzeroes,
                            std::vector<Token>* JXL_RESTRICT output,
                            const ImageB& qdc, const ImageI& qf,
                            const BlockCtxMap& block_ctx_map) {
  const size_t xsize_blocks = rect.xsize();
  const size_t ysize_blocks = rect.ysize();
  output->clear();
  // TODO(user): update the estimate: usually less coefficients are used.
  output->reserve(3 * xsize_blocks * ysize_blocks * kDCTBlockSize);

  size_t offset[3] = {};
  const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
  for (size_t by = 0; by < ysize_blocks; ++by) {
    size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
                     by >> cs.VShift(2)};
    int32_t* JXL_RESTRICT row_nzeros[3] = {
        tmp_num_nzeroes->PlaneRow(0, sby[0]),
        tmp_num_nzeroes->PlaneRow(1, sby[1]),
        tmp_num_nzeroes->PlaneRow(2, sby[2]),
    };
    const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
        sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
        sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
        sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
    };
    const uint8_t* JXL_RESTRICT row_qdc =
        qdc.ConstRow(rect.y0() + by) + rect.x0();
    const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
      AcStrategy acs = acs_row[bx];
      if (!acs.IsFirstBlock()) continue;
      size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
                       bx >> cs.HShift(2)};
      size_t cx = acs.covered_blocks_x();
      size_t cy = acs.covered_blocks_y();
      const size_t covered_blocks = cx * cy;  // = #LLF coefficients
      const size_t log2_covered_blocks =
          Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
      const size_t size = covered_blocks * kDCTBlockSize;

      CoefficientLayout(&cy, &cx);  // swap cx/cy to canonical order

      for (int c : {1, 0, 2}) {
        if (sbx[c] << cs.HShift(c) != bx) continue;
        if (sby[c] << cs.VShift(c) != by) continue;
        const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];

        int32_t nzeros =
            (covered_blocks == 1)
                ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
                : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
                                      log2_covered_blocks, block, nzeros_stride,
                                      row_nzeros[c] + sbx[c]);

        int ord = kStrategyOrder[acs.RawStrategy()];
        const coeff_order_t* JXL_RESTRICT order =
            &orders[CoeffOrderOffset(ord, c)];

        int32_t predicted_nzeros =
            PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
        size_t block_ctx =
            block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
        const int32_t nzero_ctx =
            block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);

        output->emplace_back(nzero_ctx, nzeros);
        const size_t histo_offset =
            block_ctx_map.ZeroDensityContextsOffset(block_ctx);
        // Skip LLF.
        size_t prev = (nzeros > static_cast<ptrdiff_t>(size / 16) ? 0 : 1);
        for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
          int32_t coeff = block[order[k]];
          size_t ctx =
              histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
                                                log2_covered_blocks, prev);
          uint32_t u_coeff = PackSigned(coeff);
          output->emplace_back(static_cast<uint32_t>(ctx), u_coeff);
          prev = (coeff != 0) ? 1 : 0;
          nzeros -= prev;
        }
        JXL_ENSURE(nzeros == 0);
        offset[c] += size;
      }
    }
  }
  return true;
}

// NOLINTNEXTLINE(google-readability-namespace-comments)
}  // namespace HWY_NAMESPACE
}  // namespace jxl
HWY_AFTER_NAMESPACE();

#if HWY_ONCE
namespace jxl {
HWY_EXPORT(TokenizeCoefficients);
Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
                            const Rect& rect,
                            const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
                            const AcStrategyImage& ac_strategy,
                            const YCbCrChromaSubsampling& cs,
                            Image3I* JXL_RESTRICT tmp_num_nzeroes,
                            std::vector<Token>* JXL_RESTRICT output,
                            const ImageB& qdc, const ImageI& qf,
                            const BlockCtxMap& block_ctx_map) {
  return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)(
      orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf,
      block_ctx_map);
}

}  // namespace jxl
#endif  // HWY_ONCE

Coverage Report

Created: 2026-02-14 07:09

Line	Count	Source
1		// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2		//
3		// Use of this source code is governed by a BSD-style
4		// license that can be found in the LICENSE file.
5
6		#include "lib/jxl/enc_entropy_coder.h"
7
8		#include <cstddef>
9		#include <cstdint>
10		#include <vector>
11
12		#include "lib/jxl/base/rect.h"
13		#include "lib/jxl/enc_ans.h"
14		#include "lib/jxl/frame_dimensions.h"
15		#include "lib/jxl/frame_header.h"
16
17		#undef HWY_TARGET_INCLUDE
18		#define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc"
19		#include <hwy/foreach_target.h>
20		#include <hwy/highway.h>
21
22		#include "lib/jxl/ac_context.h"
23		#include "lib/jxl/ac_strategy.h"
24		#include "lib/jxl/base/bits.h"
25		#include "lib/jxl/base/compiler_specific.h"
26		#include "lib/jxl/base/status.h"
27		#include "lib/jxl/coeff_order.h"
28		#include "lib/jxl/coeff_order_fwd.h"
29		#include "lib/jxl/entropy_coder.h"
30		#include "lib/jxl/image.h"
31		#include "lib/jxl/pack_signed.h"
32
33		HWY_BEFORE_NAMESPACE();
34		namespace jxl {
35		namespace HWY_NAMESPACE {
36
37		// These templates are not found via ADL.
38		using hwy::HWY_NAMESPACE::Add;
39		using hwy::HWY_NAMESPACE::AndNot;
40		using hwy::HWY_NAMESPACE::Eq;
41		using hwy::HWY_NAMESPACE::GetLane;
42
43		// Returns number of non-zero coefficients (but skip LLF).
44		// We cannot rely on block[] being all-zero bits, so first truncate to integer.
45		// Also writes the per-8x8 block nzeros starting at nzeros_pos.
46		int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy,
47		const AcStrategy acs, const size_t covered_blocks,
48		const size_t log2_covered_blocks,
49		const int32_t* JXL_RESTRICT block,
50		const size_t nzeros_stride,
51	0	int32_t* JXL_RESTRICT nzeros_pos) {
52	0	const HWY_CAPPED(int32_t, kBlockDim) di;
53
54	0	const auto zero = Zero(di);
55		// Add FF..FF for every zero coefficient, negate to get #zeros.
56	0	auto neg_sum_zero = zero;
57
58	0	{
59		// Mask sufficient for one row of coefficients.
60	0	HWY_ALIGN const int32_t
61	0	llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
62	0	-1, -1, -1, -1};
63		// First cx=1,2,4 elements are FF..FF, others 0.
64	0	const int32_t* llf_mask_pos =
65	0	llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
66
67		// Rows with LLF: mask out the LLF
68	0	for (size_t y = 0; y < cy; y++) {
69	0	for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
70	0	const auto llf_mask = LoadU(di, llf_mask_pos + x);
71
72		// LLF counts as zero so we don't include it in nzeros.
73	0	const auto coef =
74	0	AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
75
76	0	neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
77	0	}
78	0	}
79	0	}
80
81		// Remaining rows: no mask
82	0	for (size_t y = cy; y < cy * kBlockDim; y++) {
83	0	for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
84	0	const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
85	0	neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
86	0	}
87	0	}
88
89		// We want area - sum_zero, add because neg_sum_zero is already negated.
90	0	const int32_t nzeros = static_cast<int32_t>(cx * cy * kDCTBlockSize) +
91	0	GetLane(SumOfLanes(di, neg_sum_zero));
92
93	0	const int32_t shifted_nzeros = static_cast<int32_t>(
94	0	(nzeros + covered_blocks - 1) >> log2_covered_blocks);
95		// Need non-canonicalized dimensions!
96	0	for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
97	0	for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
98	0	nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
99	0	}
100	0	}
101
102	0	return nzeros;
103	0	}
104
105		// Specialization for 8x8, where only top-left is LLF/DC.
106		// About 1% overall speedup vs. NumNonZeroExceptLLF.
107		int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
108	0	int32_t* JXL_RESTRICT nzeros_pos) {
109	0	const HWY_CAPPED(int32_t, kBlockDim) di;
110
111	0	const auto zero = Zero(di);
112		// Add FF..FF for every zero coefficient, negate to get #zeros.
113	0	auto neg_sum_zero = zero;
114
115	0	{
116		// First row has DC, so mask
117	0	const size_t y = 0;
118	0	HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
119
120	0	for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
121	0	const auto dc_mask = Load(di, dc_mask_lanes + x);
122
123		// DC counts as zero so we don't include it in nzeros.
124	0	const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
125
126	0	neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
127	0	}
128	0	}
129
130		// Remaining rows: no mask
131	0	for (size_t y = 1; y < kBlockDim; y++) {
132	0	for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
133	0	const auto coef = Load(di, &block[y * kBlockDim + x]);
134	0	neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
135	0	}
136	0	}
137
138		// We want 64 - sum_zero, add because neg_sum_zero is already negated.
139	0	const int32_t nzeros = static_cast<int32_t>(kDCTBlockSize) +
140	0	GetLane(SumOfLanes(di, neg_sum_zero));
141
142	0	*nzeros_pos = nzeros;
143
144	0	return nzeros;
145	0	}
146
147		// The number of nonzeros of each block is predicted from the top and the left
148		// blocks, with opportune scaling to take into account the number of blocks of
149		// each strategy. The predicted number of nonzeros divided by two is used as a
150		// context; if this number is above 63, a specific context is used. If the
151		// number of nonzeros of a strategy is above 63, it is written directly using a
152		// fixed number of bits (that depends on the size of the strategy).
153		Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
154		const Rect& rect,
155		const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
156		const AcStrategyImage& ac_strategy,
157		const YCbCrChromaSubsampling& cs,
158		Image3I* JXL_RESTRICT tmp_num_nzeroes,
159		std::vector<Token>* JXL_RESTRICT output,
160		const ImageB& qdc, const ImageI& qf,
161	0	const BlockCtxMap& block_ctx_map) {
162	0	const size_t xsize_blocks = rect.xsize();
163	0	const size_t ysize_blocks = rect.ysize();
164	0	output->clear();
165		// TODO(user): update the estimate: usually less coefficients are used.
166	0	output->reserve(3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
167
168	0	size_t offset[3] = {};
169	0	const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
170	0	for (size_t by = 0; by < ysize_blocks; ++by) {
171	0	size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
172	0	by >> cs.VShift(2)};
173	0	int32_t* JXL_RESTRICT row_nzeros[3] = {
174	0	tmp_num_nzeroes->PlaneRow(0, sby[0]),
175	0	tmp_num_nzeroes->PlaneRow(1, sby[1]),
176	0	tmp_num_nzeroes->PlaneRow(2, sby[2]),
177	0	};
178	0	const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
179	0	sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
180	0	sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
181	0	sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
182	0	};
183	0	const uint8_t* JXL_RESTRICT row_qdc =
184	0	qdc.ConstRow(rect.y0() + by) + rect.x0();
185	0	const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
186	0	AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
187	0	for (size_t bx = 0; bx < xsize_blocks; ++bx) {
188	0	AcStrategy acs = acs_row[bx];
189	0	if (!acs.IsFirstBlock()) continue;
190	0	size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
191	0	bx >> cs.HShift(2)};
192	0	size_t cx = acs.covered_blocks_x();
193	0	size_t cy = acs.covered_blocks_y();
194	0	const size_t covered_blocks = cx * cy; // = #LLF coefficients
195	0	const size_t log2_covered_blocks =
196	0	Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
197	0	const size_t size = covered_blocks * kDCTBlockSize;
198
199	0	CoefficientLayout(&cy, &cx); // swap cx/cy to canonical order
200
201	0	for (int c : {1, 0, 2}) {
202	0	if (sbx[c] << cs.HShift(c) != bx) continue;
203	0	if (sby[c] << cs.VShift(c) != by) continue;
204	0	const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];
205
206	0	int32_t nzeros =
207	0	(covered_blocks == 1)
208	0	? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
209	0	: NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
210	0	log2_covered_blocks, block, nzeros_stride,
211	0	row_nzeros[c] + sbx[c]);
212
213	0	int ord = kStrategyOrder[acs.RawStrategy()];
214	0	const coeff_order_t* JXL_RESTRICT order =
215	0	&orders[CoeffOrderOffset(ord, c)];
216
217	0	int32_t predicted_nzeros =
218	0	PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
219	0	size_t block_ctx =
220	0	block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
221	0	const int32_t nzero_ctx =
222	0	block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);
223
224	0	output->emplace_back(nzero_ctx, nzeros);
225	0	const size_t histo_offset =
226	0	block_ctx_map.ZeroDensityContextsOffset(block_ctx);
227		// Skip LLF.
228	0	size_t prev = (nzeros > static_cast<ptrdiff_t>(size / 16) ? 0 : 1);
229	0	for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
230	0	int32_t coeff = block[order[k]];
231	0	size_t ctx =
232	0	histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
233	0	log2_covered_blocks, prev);
234	0	uint32_t u_coeff = PackSigned(coeff);
235	0	output->emplace_back(static_cast<uint32_t>(ctx), u_coeff);
236	0	prev = (coeff != 0) ? 1 : 0;
237	0	nzeros -= prev;
238	0	}
239	0	JXL_ENSURE(nzeros == 0);
240	0	offset[c] += size;
241	0	}
242	0	}
243	0	}
244	0	return true;
245	0	}
246
247		// NOLINTNEXTLINE(google-readability-namespace-comments)
248		} // namespace HWY_NAMESPACE
249		} // namespace jxl
250		HWY_AFTER_NAMESPACE();
251
252		#if HWY_ONCE
253		namespace jxl {
254		HWY_EXPORT(TokenizeCoefficients);
255		Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
256		const Rect& rect,
257		const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
258		const AcStrategyImage& ac_strategy,
259		const YCbCrChromaSubsampling& cs,
260		Image3I* JXL_RESTRICT tmp_num_nzeroes,
261		std::vector<Token>* JXL_RESTRICT output,
262		const ImageB& qdc, const ImageI& qf,
263	0	const BlockCtxMap& block_ctx_map) {
264	0	return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)(
265	0	orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf,
266	0	block_ctx_map);
267	0	}
268
269		} // namespace jxl
270		#endif // HWY_ONCE