Coverage Report

Created: 2025-07-23 08:18

/src/libjxl/lib/jxl/enc_entropy_coder.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_entropy_coder.h"
7
8
#include <cstddef>
9
#include <cstdint>
10
#include <vector>
11
12
#include "lib/jxl/base/compiler_specific.h"  // ssize_t
13
#include "lib/jxl/base/rect.h"
14
#include "lib/jxl/enc_ans.h"
15
#include "lib/jxl/frame_dimensions.h"
16
#include "lib/jxl/frame_header.h"
17
18
#undef HWY_TARGET_INCLUDE
19
#define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc"
20
#include <hwy/foreach_target.h>
21
#include <hwy/highway.h>
22
23
#include "lib/jxl/ac_context.h"
24
#include "lib/jxl/ac_strategy.h"
25
#include "lib/jxl/base/bits.h"
26
#include "lib/jxl/base/compiler_specific.h"
27
#include "lib/jxl/base/status.h"
28
#include "lib/jxl/coeff_order.h"
29
#include "lib/jxl/coeff_order_fwd.h"
30
#include "lib/jxl/entropy_coder.h"
31
#include "lib/jxl/image.h"
32
#include "lib/jxl/pack_signed.h"
33
34
HWY_BEFORE_NAMESPACE();
35
namespace jxl {
36
namespace HWY_NAMESPACE {
37
38
// These templates are not found via ADL.
39
using hwy::HWY_NAMESPACE::Add;
40
using hwy::HWY_NAMESPACE::AndNot;
41
using hwy::HWY_NAMESPACE::Eq;
42
using hwy::HWY_NAMESPACE::GetLane;
43
44
// Returns number of non-zero coefficients (but skip LLF).
45
// We cannot rely on block[] being all-zero bits, so first truncate to integer.
46
// Also writes the per-8x8 block nzeros starting at nzeros_pos.
47
int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy,
48
                            const AcStrategy acs, const size_t covered_blocks,
49
                            const size_t log2_covered_blocks,
50
                            const int32_t* JXL_RESTRICT block,
51
                            const size_t nzeros_stride,
52
843k
                            int32_t* JXL_RESTRICT nzeros_pos) {
53
843k
  const HWY_CAPPED(int32_t, kBlockDim) di;
54
55
843k
  const auto zero = Zero(di);
56
  // Add FF..FF for every zero coefficient, negate to get #zeros.
57
843k
  auto neg_sum_zero = zero;
58
59
843k
  {
60
    // Mask sufficient for one row of coefficients.
61
843k
    HWY_ALIGN const int32_t
62
843k
        llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
63
843k
            -1, -1, -1, -1};
64
    // First cx=1,2,4 elements are FF..FF, others 0.
65
843k
    const int32_t* llf_mask_pos =
66
843k
        llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
67
68
    // Rows with LLF: mask out the LLF
69
2.23M
    for (size_t y = 0; y < cy; y++) {
70
6.15M
      for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
71
4.76M
        const auto llf_mask = LoadU(di, llf_mask_pos + x);
72
73
        // LLF counts as zero so we don't include it in nzeros.
74
4.76M
        const auto coef =
75
4.76M
            AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
76
77
4.76M
        neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
78
4.76M
      }
79
1.38M
    }
80
843k
  }
81
82
  // Remaining rows: no mask
83
10.5M
  for (size_t y = cy; y < cy * kBlockDim; y++) {
84
43.0M
    for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
85
33.3M
      const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
86
33.3M
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
87
33.3M
    }
88
9.72M
  }
89
90
  // We want area - sum_zero, add because neg_sum_zero is already negated.
91
843k
  const int32_t nzeros = static_cast<int32_t>(cx * cy * kDCTBlockSize) +
92
843k
                         GetLane(SumOfLanes(di, neg_sum_zero));
93
94
843k
  const int32_t shifted_nzeros = static_cast<int32_t>(
95
843k
      (nzeros + covered_blocks - 1) >> log2_covered_blocks);
96
  // Need non-canonicalized dimensions!
97
2.57M
  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
98
6.49M
    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
99
4.76M
      nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
100
4.76M
    }
101
1.72M
  }
102
103
843k
  return nzeros;
104
843k
}
Unexecuted instantiation: jxl::N_SSE4::NumNonZeroExceptLLF(unsigned long, unsigned long, jxl::AcStrategy, unsigned long, unsigned long, int const*, unsigned long, int*)
jxl::N_AVX2::NumNonZeroExceptLLF(unsigned long, unsigned long, jxl::AcStrategy, unsigned long, unsigned long, int const*, unsigned long, int*)
Line
Count
Source
52
843k
                            int32_t* JXL_RESTRICT nzeros_pos) {
53
843k
  const HWY_CAPPED(int32_t, kBlockDim) di;
54
55
843k
  const auto zero = Zero(di);
56
  // Add FF..FF for every zero coefficient, negate to get #zeros.
57
843k
  auto neg_sum_zero = zero;
58
59
843k
  {
60
    // Mask sufficient for one row of coefficients.
61
843k
    HWY_ALIGN const int32_t
62
843k
        llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
63
843k
            -1, -1, -1, -1};
64
    // First cx=1,2,4 elements are FF..FF, others 0.
65
843k
    const int32_t* llf_mask_pos =
66
843k
        llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
67
68
    // Rows with LLF: mask out the LLF
69
2.23M
    for (size_t y = 0; y < cy; y++) {
70
6.15M
      for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
71
4.76M
        const auto llf_mask = LoadU(di, llf_mask_pos + x);
72
73
        // LLF counts as zero so we don't include it in nzeros.
74
4.76M
        const auto coef =
75
4.76M
            AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
76
77
4.76M
        neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
78
4.76M
      }
79
1.38M
    }
80
843k
  }
81
82
  // Remaining rows: no mask
83
10.5M
  for (size_t y = cy; y < cy * kBlockDim; y++) {
84
43.0M
    for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
85
33.3M
      const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
86
33.3M
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
87
33.3M
    }
88
9.72M
  }
89
90
  // We want area - sum_zero, add because neg_sum_zero is already negated.
91
843k
  const int32_t nzeros = static_cast<int32_t>(cx * cy * kDCTBlockSize) +
92
843k
                         GetLane(SumOfLanes(di, neg_sum_zero));
93
94
843k
  const int32_t shifted_nzeros = static_cast<int32_t>(
95
843k
      (nzeros + covered_blocks - 1) >> log2_covered_blocks);
96
  // Need non-canonicalized dimensions!
97
2.57M
  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
98
6.49M
    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
99
4.76M
      nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
100
4.76M
    }
101
1.72M
  }
102
103
843k
  return nzeros;
104
843k
}
Unexecuted instantiation: jxl::N_AVX3::NumNonZeroExceptLLF(unsigned long, unsigned long, jxl::AcStrategy, unsigned long, unsigned long, int const*, unsigned long, int*)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::NumNonZeroExceptLLF(unsigned long, unsigned long, jxl::AcStrategy, unsigned long, unsigned long, int const*, unsigned long, int*)
Unexecuted instantiation: jxl::N_AVX3_SPR::NumNonZeroExceptLLF(unsigned long, unsigned long, jxl::AcStrategy, unsigned long, unsigned long, int const*, unsigned long, int*)
Unexecuted instantiation: jxl::N_SSE2::NumNonZeroExceptLLF(unsigned long, unsigned long, jxl::AcStrategy, unsigned long, unsigned long, int const*, unsigned long, int*)
105
106
// Specialization for 8x8, where only top-left is LLF/DC.
107
// About 1% overall speedup vs. NumNonZeroExceptLLF.
108
int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
109
3.93M
                              int32_t* JXL_RESTRICT nzeros_pos) {
110
3.93M
  const HWY_CAPPED(int32_t, kBlockDim) di;
111
112
3.93M
  const auto zero = Zero(di);
113
  // Add FF..FF for every zero coefficient, negate to get #zeros.
114
3.93M
  auto neg_sum_zero = zero;
115
116
3.93M
  {
117
    // First row has DC, so mask
118
3.93M
    const size_t y = 0;
119
3.93M
    HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
120
121
7.86M
    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
122
3.93M
      const auto dc_mask = Load(di, dc_mask_lanes + x);
123
124
      // DC counts as zero so we don't include it in nzeros.
125
3.93M
      const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
126
127
3.93M
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
128
3.93M
    }
129
3.93M
  }
130
131
  // Remaining rows: no mask
132
31.4M
  for (size_t y = 1; y < kBlockDim; y++) {
133
55.0M
    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
134
27.5M
      const auto coef = Load(di, &block[y * kBlockDim + x]);
135
27.5M
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
136
27.5M
    }
137
27.5M
  }
138
139
  // We want 64 - sum_zero, add because neg_sum_zero is already negated.
140
3.93M
  const int32_t nzeros = static_cast<int32_t>(kDCTBlockSize) +
141
3.93M
                         GetLane(SumOfLanes(di, neg_sum_zero));
142
143
3.93M
  *nzeros_pos = nzeros;
144
145
3.93M
  return nzeros;
146
3.93M
}
Unexecuted instantiation: jxl::N_SSE4::NumNonZero8x8ExceptDC(int const*, int*)
jxl::N_AVX2::NumNonZero8x8ExceptDC(int const*, int*)
Line
Count
Source
109
3.93M
                              int32_t* JXL_RESTRICT nzeros_pos) {
110
3.93M
  const HWY_CAPPED(int32_t, kBlockDim) di;
111
112
3.93M
  const auto zero = Zero(di);
113
  // Add FF..FF for every zero coefficient, negate to get #zeros.
114
3.93M
  auto neg_sum_zero = zero;
115
116
3.93M
  {
117
    // First row has DC, so mask
118
3.93M
    const size_t y = 0;
119
3.93M
    HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
120
121
7.86M
    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
122
3.93M
      const auto dc_mask = Load(di, dc_mask_lanes + x);
123
124
      // DC counts as zero so we don't include it in nzeros.
125
3.93M
      const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
126
127
3.93M
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
128
3.93M
    }
129
3.93M
  }
130
131
  // Remaining rows: no mask
132
31.4M
  for (size_t y = 1; y < kBlockDim; y++) {
133
55.0M
    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
134
27.5M
      const auto coef = Load(di, &block[y * kBlockDim + x]);
135
27.5M
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
136
27.5M
    }
137
27.5M
  }
138
139
  // We want 64 - sum_zero, add because neg_sum_zero is already negated.
140
3.93M
  const int32_t nzeros = static_cast<int32_t>(kDCTBlockSize) +
141
3.93M
                         GetLane(SumOfLanes(di, neg_sum_zero));
142
143
3.93M
  *nzeros_pos = nzeros;
144
145
3.93M
  return nzeros;
146
3.93M
}
Unexecuted instantiation: jxl::N_AVX3::NumNonZero8x8ExceptDC(int const*, int*)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::NumNonZero8x8ExceptDC(int const*, int*)
Unexecuted instantiation: jxl::N_AVX3_SPR::NumNonZero8x8ExceptDC(int const*, int*)
Unexecuted instantiation: jxl::N_SSE2::NumNonZero8x8ExceptDC(int const*, int*)
147
148
// The number of nonzeros of each block is predicted from the top and the left
149
// blocks, with opportune scaling to take into account the number of blocks of
150
// each strategy.  The predicted number of nonzeros divided by two is used as a
151
// context; if this number is above 63, a specific context is used.  If the
152
// number of nonzeros of a strategy is above 63, it is written directly using a
153
// fixed number of bits (that depends on the size of the strategy).
154
Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
155
                            const Rect& rect,
156
                            const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
157
                            const AcStrategyImage& ac_strategy,
158
                            const YCbCrChromaSubsampling& cs,
159
                            Image3I* JXL_RESTRICT tmp_num_nzeroes,
160
                            std::vector<Token>* JXL_RESTRICT output,
161
                            const ImageB& qdc, const ImageI& qf,
162
5.26k
                            const BlockCtxMap& block_ctx_map) {
163
5.26k
  const size_t xsize_blocks = rect.xsize();
164
5.26k
  const size_t ysize_blocks = rect.ysize();
165
5.26k
  output->clear();
166
  // TODO(user): update the estimate: usually less coefficients are used.
167
5.26k
  output->reserve(3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
168
169
5.26k
  size_t offset[3] = {};
170
5.26k
  const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
171
119k
  for (size_t by = 0; by < ysize_blocks; ++by) {
172
113k
    size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
173
113k
                     by >> cs.VShift(2)};
174
113k
    int32_t* JXL_RESTRICT row_nzeros[3] = {
175
113k
        tmp_num_nzeroes->PlaneRow(0, sby[0]),
176
113k
        tmp_num_nzeroes->PlaneRow(1, sby[1]),
177
113k
        tmp_num_nzeroes->PlaneRow(2, sby[2]),
178
113k
    };
179
113k
    const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
180
113k
        sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
181
113k
        sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
182
113k
        sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
183
113k
    };
184
113k
    const uint8_t* JXL_RESTRICT row_qdc =
185
113k
        qdc.ConstRow(rect.y0() + by) + rect.x0();
186
113k
    const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
187
113k
    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
188
3.01M
    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
189
2.89M
      AcStrategy acs = acs_row[bx];
190
2.89M
      if (!acs.IsFirstBlock()) continue;
191
1.59M
      size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
192
1.59M
                       bx >> cs.HShift(2)};
193
1.59M
      size_t cx = acs.covered_blocks_x();
194
1.59M
      size_t cy = acs.covered_blocks_y();
195
1.59M
      const size_t covered_blocks = cx * cy;  // = #LLF coefficients
196
1.59M
      const size_t log2_covered_blocks =
197
1.59M
          Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
198
1.59M
      const size_t size = covered_blocks * kDCTBlockSize;
199
200
1.59M
      CoefficientLayout(&cy, &cx);  // swap cx/cy to canonical order
201
202
4.77M
      for (int c : {1, 0, 2}) {
203
4.77M
        if (sbx[c] << cs.HShift(c) != bx) continue;
204
4.77M
        if (sby[c] << cs.VShift(c) != by) continue;
205
4.77M
        const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];
206
207
4.77M
        int32_t nzeros =
208
4.77M
            (covered_blocks == 1)
209
4.77M
                ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
210
4.77M
                : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
211
843k
                                      log2_covered_blocks, block, nzeros_stride,
212
843k
                                      row_nzeros[c] + sbx[c]);
213
214
4.77M
        int ord = kStrategyOrder[acs.RawStrategy()];
215
4.77M
        const coeff_order_t* JXL_RESTRICT order =
216
4.77M
            &orders[CoeffOrderOffset(ord, c)];
217
218
4.77M
        int32_t predicted_nzeros =
219
4.77M
            PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
220
4.77M
        size_t block_ctx =
221
4.77M
            block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
222
4.77M
        const int32_t nzero_ctx =
223
4.77M
            block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);
224
225
4.77M
        output->emplace_back(nzero_ctx, nzeros);
226
4.77M
        const size_t histo_offset =
227
4.77M
            block_ctx_map.ZeroDensityContextsOffset(block_ctx);
228
        // Skip LLF.
229
4.77M
        size_t prev = (nzeros > static_cast<ssize_t>(size / 16) ? 0 : 1);
230
150M
        for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
231
145M
          int32_t coeff = block[order[k]];
232
145M
          size_t ctx =
233
145M
              histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
234
145M
                                                log2_covered_blocks, prev);
235
145M
          uint32_t u_coeff = PackSigned(coeff);
236
145M
          output->emplace_back(ctx, u_coeff);
237
145M
          prev = (coeff != 0) ? 1 : 0;
238
145M
          nzeros -= prev;
239
145M
        }
240
4.77M
        JXL_ENSURE(nzeros == 0);
241
4.77M
        offset[c] += size;
242
4.77M
      }
243
1.59M
    }
244
113k
  }
245
5.26k
  return true;
246
5.26k
}
Unexecuted instantiation: jxl::N_SSE4::TokenizeCoefficients(unsigned int const*, jxl::RectT<unsigned long> const&, int const* restrict*, jxl::AcStrategyImage const&, jxl::YCbCrChromaSubsampling const&, jxl::Image3<int>*, std::__1::vector<jxl::Token, std::__1::allocator<jxl::Token> >*, jxl::Plane<unsigned char> const&, jxl::Plane<int> const&, jxl::BlockCtxMap const&)
jxl::N_AVX2::TokenizeCoefficients(unsigned int const*, jxl::RectT<unsigned long> const&, int const* restrict*, jxl::AcStrategyImage const&, jxl::YCbCrChromaSubsampling const&, jxl::Image3<int>*, std::__1::vector<jxl::Token, std::__1::allocator<jxl::Token> >*, jxl::Plane<unsigned char> const&, jxl::Plane<int> const&, jxl::BlockCtxMap const&)
Line
Count
Source
162
5.26k
                            const BlockCtxMap& block_ctx_map) {
163
5.26k
  const size_t xsize_blocks = rect.xsize();
164
5.26k
  const size_t ysize_blocks = rect.ysize();
165
5.26k
  output->clear();
166
  // TODO(user): update the estimate: usually less coefficients are used.
167
5.26k
  output->reserve(3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
168
169
5.26k
  size_t offset[3] = {};
170
5.26k
  const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
171
119k
  for (size_t by = 0; by < ysize_blocks; ++by) {
172
113k
    size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
173
113k
                     by >> cs.VShift(2)};
174
113k
    int32_t* JXL_RESTRICT row_nzeros[3] = {
175
113k
        tmp_num_nzeroes->PlaneRow(0, sby[0]),
176
113k
        tmp_num_nzeroes->PlaneRow(1, sby[1]),
177
113k
        tmp_num_nzeroes->PlaneRow(2, sby[2]),
178
113k
    };
179
113k
    const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
180
113k
        sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
181
113k
        sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
182
113k
        sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
183
113k
    };
184
113k
    const uint8_t* JXL_RESTRICT row_qdc =
185
113k
        qdc.ConstRow(rect.y0() + by) + rect.x0();
186
113k
    const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
187
113k
    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
188
3.01M
    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
189
2.89M
      AcStrategy acs = acs_row[bx];
190
2.89M
      if (!acs.IsFirstBlock()) continue;
191
1.59M
      size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
192
1.59M
                       bx >> cs.HShift(2)};
193
1.59M
      size_t cx = acs.covered_blocks_x();
194
1.59M
      size_t cy = acs.covered_blocks_y();
195
1.59M
      const size_t covered_blocks = cx * cy;  // = #LLF coefficients
196
1.59M
      const size_t log2_covered_blocks =
197
1.59M
          Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
198
1.59M
      const size_t size = covered_blocks * kDCTBlockSize;
199
200
1.59M
      CoefficientLayout(&cy, &cx);  // swap cx/cy to canonical order
201
202
4.77M
      for (int c : {1, 0, 2}) {
203
4.77M
        if (sbx[c] << cs.HShift(c) != bx) continue;
204
4.77M
        if (sby[c] << cs.VShift(c) != by) continue;
205
4.77M
        const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];
206
207
4.77M
        int32_t nzeros =
208
4.77M
            (covered_blocks == 1)
209
4.77M
                ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
210
4.77M
                : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
211
843k
                                      log2_covered_blocks, block, nzeros_stride,
212
843k
                                      row_nzeros[c] + sbx[c]);
213
214
4.77M
        int ord = kStrategyOrder[acs.RawStrategy()];
215
4.77M
        const coeff_order_t* JXL_RESTRICT order =
216
4.77M
            &orders[CoeffOrderOffset(ord, c)];
217
218
4.77M
        int32_t predicted_nzeros =
219
4.77M
            PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
220
4.77M
        size_t block_ctx =
221
4.77M
            block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
222
4.77M
        const int32_t nzero_ctx =
223
4.77M
            block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);
224
225
4.77M
        output->emplace_back(nzero_ctx, nzeros);
226
4.77M
        const size_t histo_offset =
227
4.77M
            block_ctx_map.ZeroDensityContextsOffset(block_ctx);
228
        // Skip LLF.
229
4.77M
        size_t prev = (nzeros > static_cast<ssize_t>(size / 16) ? 0 : 1);
230
150M
        for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
231
145M
          int32_t coeff = block[order[k]];
232
145M
          size_t ctx =
233
145M
              histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
234
145M
                                                log2_covered_blocks, prev);
235
145M
          uint32_t u_coeff = PackSigned(coeff);
236
145M
          output->emplace_back(ctx, u_coeff);
237
145M
          prev = (coeff != 0) ? 1 : 0;
238
145M
          nzeros -= prev;
239
145M
        }
240
4.77M
        JXL_ENSURE(nzeros == 0);
241
4.77M
        offset[c] += size;
242
4.77M
      }
243
1.59M
    }
244
113k
  }
245
5.26k
  return true;
246
5.26k
}
Unexecuted instantiation: jxl::N_AVX3::TokenizeCoefficients(unsigned int const*, jxl::RectT<unsigned long> const&, int const* restrict*, jxl::AcStrategyImage const&, jxl::YCbCrChromaSubsampling const&, jxl::Image3<int>*, std::__1::vector<jxl::Token, std::__1::allocator<jxl::Token> >*, jxl::Plane<unsigned char> const&, jxl::Plane<int> const&, jxl::BlockCtxMap const&)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::TokenizeCoefficients(unsigned int const*, jxl::RectT<unsigned long> const&, int const* restrict*, jxl::AcStrategyImage const&, jxl::YCbCrChromaSubsampling const&, jxl::Image3<int>*, std::__1::vector<jxl::Token, std::__1::allocator<jxl::Token> >*, jxl::Plane<unsigned char> const&, jxl::Plane<int> const&, jxl::BlockCtxMap const&)
Unexecuted instantiation: jxl::N_AVX3_SPR::TokenizeCoefficients(unsigned int const*, jxl::RectT<unsigned long> const&, int const* restrict*, jxl::AcStrategyImage const&, jxl::YCbCrChromaSubsampling const&, jxl::Image3<int>*, std::__1::vector<jxl::Token, std::__1::allocator<jxl::Token> >*, jxl::Plane<unsigned char> const&, jxl::Plane<int> const&, jxl::BlockCtxMap const&)
Unexecuted instantiation: jxl::N_SSE2::TokenizeCoefficients(unsigned int const*, jxl::RectT<unsigned long> const&, int const* restrict*, jxl::AcStrategyImage const&, jxl::YCbCrChromaSubsampling const&, jxl::Image3<int>*, std::__1::vector<jxl::Token, std::__1::allocator<jxl::Token> >*, jxl::Plane<unsigned char> const&, jxl::Plane<int> const&, jxl::BlockCtxMap const&)
247
248
// NOLINTNEXTLINE(google-readability-namespace-comments)
249
}  // namespace HWY_NAMESPACE
250
}  // namespace jxl
251
HWY_AFTER_NAMESPACE();
252
253
#if HWY_ONCE
254
namespace jxl {
255
HWY_EXPORT(TokenizeCoefficients);
256
Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
257
                            const Rect& rect,
258
                            const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
259
                            const AcStrategyImage& ac_strategy,
260
                            const YCbCrChromaSubsampling& cs,
261
                            Image3I* JXL_RESTRICT tmp_num_nzeroes,
262
                            std::vector<Token>* JXL_RESTRICT output,
263
                            const ImageB& qdc, const ImageI& qf,
264
5.26k
                            const BlockCtxMap& block_ctx_map) {
265
5.26k
  return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)(
266
5.26k
      orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf,
267
5.26k
      block_ctx_map);
268
5.26k
}
269
270
}  // namespace jxl
271
#endif  // HWY_ONCE