Coverage Report

Created: 2025-07-16 07:53

/src/libjxl/lib/jxl/enc_entropy_coder.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/enc_entropy_coder.h"
7
8
#include <cstddef>
9
#include <cstdint>
10
#include <vector>
11
12
#undef HWY_TARGET_INCLUDE
13
#define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc"
14
#include <hwy/foreach_target.h>
15
#include <hwy/highway.h>
16
17
#include "lib/jxl/ac_context.h"
18
#include "lib/jxl/ac_strategy.h"
19
#include "lib/jxl/base/bits.h"
20
#include "lib/jxl/base/compiler_specific.h"
21
#include "lib/jxl/base/status.h"
22
#include "lib/jxl/coeff_order.h"
23
#include "lib/jxl/coeff_order_fwd.h"
24
#include "lib/jxl/entropy_coder.h"
25
#include "lib/jxl/image.h"
26
#include "lib/jxl/pack_signed.h"
27
28
HWY_BEFORE_NAMESPACE();
29
namespace jxl {
30
namespace HWY_NAMESPACE {
31
32
// These templates are not found via ADL.
33
using hwy::HWY_NAMESPACE::Add;
34
using hwy::HWY_NAMESPACE::AndNot;
35
using hwy::HWY_NAMESPACE::Eq;
36
using hwy::HWY_NAMESPACE::GetLane;
37
38
// Returns number of non-zero coefficients (but skip LLF).
39
// We cannot rely on block[] being all-zero bits, so first truncate to integer.
40
// Also writes the per-8x8 block nzeros starting at nzeros_pos.
41
int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy,
42
                            const AcStrategy acs, const size_t covered_blocks,
43
                            const size_t log2_covered_blocks,
44
                            const int32_t* JXL_RESTRICT block,
45
                            const size_t nzeros_stride,
46
0
                            int32_t* JXL_RESTRICT nzeros_pos) {
47
0
  const HWY_CAPPED(int32_t, kBlockDim) di;
48
49
0
  const auto zero = Zero(di);
50
  // Add FF..FF for every zero coefficient, negate to get #zeros.
51
0
  auto neg_sum_zero = zero;
52
53
0
  {
54
    // Mask sufficient for one row of coefficients.
55
0
    HWY_ALIGN const int32_t
56
0
        llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
57
0
            -1, -1, -1, -1};
58
    // First cx=1,2,4 elements are FF..FF, others 0.
59
0
    const int32_t* llf_mask_pos =
60
0
        llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
61
62
    // Rows with LLF: mask out the LLF
63
0
    for (size_t y = 0; y < cy; y++) {
64
0
      for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
65
0
        const auto llf_mask = LoadU(di, llf_mask_pos + x);
66
67
        // LLF counts as zero so we don't include it in nzeros.
68
0
        const auto coef =
69
0
            AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
70
71
0
        neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
72
0
      }
73
0
    }
74
0
  }
75
76
  // Remaining rows: no mask
77
0
  for (size_t y = cy; y < cy * kBlockDim; y++) {
78
0
    for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
79
0
      const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
80
0
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
81
0
    }
82
0
  }
83
84
  // We want area - sum_zero, add because neg_sum_zero is already negated.
85
0
  const int32_t nzeros = static_cast<int32_t>(cx * cy * kDCTBlockSize) +
86
0
                         GetLane(SumOfLanes(di, neg_sum_zero));
87
88
0
  const int32_t shifted_nzeros = static_cast<int32_t>(
89
0
      (nzeros + covered_blocks - 1) >> log2_covered_blocks);
90
  // Need non-canonicalized dimensions!
91
0
  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
92
0
    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
93
0
      nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
94
0
    }
95
0
  }
96
97
0
  return nzeros;
98
0
}
99
100
// Specialization for 8x8, where only top-left is LLF/DC.
101
// About 1% overall speedup vs. NumNonZeroExceptLLF.
102
int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
103
0
                              int32_t* JXL_RESTRICT nzeros_pos) {
104
0
  const HWY_CAPPED(int32_t, kBlockDim) di;
105
106
0
  const auto zero = Zero(di);
107
  // Add FF..FF for every zero coefficient, negate to get #zeros.
108
0
  auto neg_sum_zero = zero;
109
110
0
  {
111
    // First row has DC, so mask
112
0
    const size_t y = 0;
113
0
    HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
114
115
0
    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
116
0
      const auto dc_mask = Load(di, dc_mask_lanes + x);
117
118
      // DC counts as zero so we don't include it in nzeros.
119
0
      const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
120
121
0
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
122
0
    }
123
0
  }
124
125
  // Remaining rows: no mask
126
0
  for (size_t y = 1; y < kBlockDim; y++) {
127
0
    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
128
0
      const auto coef = Load(di, &block[y * kBlockDim + x]);
129
0
      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
130
0
    }
131
0
  }
132
133
  // We want 64 - sum_zero, add because neg_sum_zero is already negated.
134
0
  const int32_t nzeros = static_cast<int32_t>(kDCTBlockSize) +
135
0
                         GetLane(SumOfLanes(di, neg_sum_zero));
136
137
0
  *nzeros_pos = nzeros;
138
139
0
  return nzeros;
140
0
}
141
142
// The number of nonzeros of each block is predicted from the top and the left
143
// blocks, with opportune scaling to take into account the number of blocks of
144
// each strategy.  The predicted number of nonzeros divided by two is used as a
145
// context; if this number is above 63, a specific context is used.  If the
146
// number of nonzeros of a strategy is above 63, it is written directly using a
147
// fixed number of bits (that depends on the size of the strategy).
148
Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
149
                            const Rect& rect,
150
                            const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
151
                            const AcStrategyImage& ac_strategy,
152
                            const YCbCrChromaSubsampling& cs,
153
                            Image3I* JXL_RESTRICT tmp_num_nzeroes,
154
                            std::vector<Token>* JXL_RESTRICT output,
155
                            const ImageB& qdc, const ImageI& qf,
156
0
                            const BlockCtxMap& block_ctx_map) {
157
0
  const size_t xsize_blocks = rect.xsize();
158
0
  const size_t ysize_blocks = rect.ysize();
159
0
  output->clear();
160
  // TODO(user): update the estimate: usually less coefficients are used.
161
0
  output->reserve(3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
162
163
0
  size_t offset[3] = {};
164
0
  const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
165
0
  for (size_t by = 0; by < ysize_blocks; ++by) {
166
0
    size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
167
0
                     by >> cs.VShift(2)};
168
0
    int32_t* JXL_RESTRICT row_nzeros[3] = {
169
0
        tmp_num_nzeroes->PlaneRow(0, sby[0]),
170
0
        tmp_num_nzeroes->PlaneRow(1, sby[1]),
171
0
        tmp_num_nzeroes->PlaneRow(2, sby[2]),
172
0
    };
173
0
    const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
174
0
        sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
175
0
        sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
176
0
        sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
177
0
    };
178
0
    const uint8_t* JXL_RESTRICT row_qdc =
179
0
        qdc.ConstRow(rect.y0() + by) + rect.x0();
180
0
    const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
181
0
    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
182
0
    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
183
0
      AcStrategy acs = acs_row[bx];
184
0
      if (!acs.IsFirstBlock()) continue;
185
0
      size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
186
0
                       bx >> cs.HShift(2)};
187
0
      size_t cx = acs.covered_blocks_x();
188
0
      size_t cy = acs.covered_blocks_y();
189
0
      const size_t covered_blocks = cx * cy;  // = #LLF coefficients
190
0
      const size_t log2_covered_blocks =
191
0
          Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
192
0
      const size_t size = covered_blocks * kDCTBlockSize;
193
194
0
      CoefficientLayout(&cy, &cx);  // swap cx/cy to canonical order
195
196
0
      for (int c : {1, 0, 2}) {
197
0
        if (sbx[c] << cs.HShift(c) != bx) continue;
198
0
        if (sby[c] << cs.VShift(c) != by) continue;
199
0
        const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];
200
201
0
        int32_t nzeros =
202
0
            (covered_blocks == 1)
203
0
                ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
204
0
                : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
205
0
                                      log2_covered_blocks, block, nzeros_stride,
206
0
                                      row_nzeros[c] + sbx[c]);
207
208
0
        int ord = kStrategyOrder[acs.RawStrategy()];
209
0
        const coeff_order_t* JXL_RESTRICT order =
210
0
            &orders[CoeffOrderOffset(ord, c)];
211
212
0
        int32_t predicted_nzeros =
213
0
            PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
214
0
        size_t block_ctx =
215
0
            block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
216
0
        const int32_t nzero_ctx =
217
0
            block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);
218
219
0
        output->emplace_back(nzero_ctx, nzeros);
220
0
        const size_t histo_offset =
221
0
            block_ctx_map.ZeroDensityContextsOffset(block_ctx);
222
        // Skip LLF.
223
0
        size_t prev = (nzeros > static_cast<ssize_t>(size / 16) ? 0 : 1);
224
0
        for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
225
0
          int32_t coeff = block[order[k]];
226
0
          size_t ctx =
227
0
              histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
228
0
                                                log2_covered_blocks, prev);
229
0
          uint32_t u_coeff = PackSigned(coeff);
230
0
          output->emplace_back(ctx, u_coeff);
231
0
          prev = (coeff != 0) ? 1 : 0;
232
0
          nzeros -= prev;
233
0
        }
234
0
        JXL_ENSURE(nzeros == 0);
235
0
        offset[c] += size;
236
0
      }
237
0
    }
238
0
  }
239
0
  return true;
240
0
}
241
242
// NOLINTNEXTLINE(google-readability-namespace-comments)
243
}  // namespace HWY_NAMESPACE
244
}  // namespace jxl
245
HWY_AFTER_NAMESPACE();
246
247
#if HWY_ONCE
248
namespace jxl {
249
HWY_EXPORT(TokenizeCoefficients);
250
Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
251
                            const Rect& rect,
252
                            const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
253
                            const AcStrategyImage& ac_strategy,
254
                            const YCbCrChromaSubsampling& cs,
255
                            Image3I* JXL_RESTRICT tmp_num_nzeroes,
256
                            std::vector<Token>* JXL_RESTRICT output,
257
                            const ImageB& qdc, const ImageI& qf,
258
0
                            const BlockCtxMap& block_ctx_map) {
259
0
  return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)(
260
0
      orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf,
261
0
      block_ctx_map);
262
0
}
263
264
}  // namespace jxl
265
#endif  // HWY_ONCE