Coverage Report

Created: 2025-11-16 07:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_coeff_order.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <jxl/memory_manager.h>
7
8
#include <algorithm>
9
#include <cmath>
10
#include <cstddef>
11
#include <cstdint>
12
#include <cstring>
13
#include <limits>
14
#include <utility>
15
#include <vector>
16
17
#include "lib/jxl/ac_strategy.h"
18
#include "lib/jxl/base/compiler_specific.h"
19
#include "lib/jxl/base/rect.h"
20
#include "lib/jxl/base/status.h"
21
#include "lib/jxl/coeff_order.h"
22
#include "lib/jxl/coeff_order_fwd.h"
23
#include "lib/jxl/common.h"
24
#include "lib/jxl/dct_util.h"
25
#include "lib/jxl/enc_ans.h"
26
#include "lib/jxl/enc_ans_params.h"
27
#include "lib/jxl/enc_bit_writer.h"
28
#include "lib/jxl/frame_dimensions.h"
29
#include "lib/jxl/lehmer_code.h"
30
#include "lib/jxl/memory_manager_internal.h"
31
32
namespace jxl {
33
34
struct AuxOut;
35
enum class LayerType : uint8_t;
36
37
std::pair<uint32_t, uint32_t> ComputeUsedOrders(
38
    const SpeedTier speed, const AcStrategyImage& ac_strategy,
39
3.48k
    const Rect& rect) {
40
  // No coefficient reordering in Falcon or faster.
41
  // Only uses DCT8 = 0, so bitfield = 1.
42
3.48k
  if (speed >= SpeedTier::kFalcon) return {1, 1};
43
44
3.48k
  uint32_t ret = 0;
45
3.48k
  uint32_t ret_customize = 0;
46
3.48k
  size_t xsize_blocks = rect.xsize();
47
3.48k
  size_t ysize_blocks = rect.ysize();
48
  // TODO(veluca): precompute when doing DCT.
49
108k
  for (size_t by = 0; by < ysize_blocks; ++by) {
50
105k
    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
51
5.04M
    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
52
4.94M
      int ord = kStrategyOrder[acs_row[bx].RawStrategy()];
53
      // Do not customize coefficient orders for blocks bigger than 32x32.
54
4.94M
      ret |= 1u << ord;
55
4.94M
      if (ord > 6) {
56
927k
        continue;
57
927k
      }
58
4.01M
      ret_customize |= 1u << ord;
59
4.01M
    }
60
105k
  }
61
  // Use default orders for small images.
62
3.48k
  if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0};
63
3.27k
  return {ret, ret_customize};
64
3.48k
}
65
66
Status ComputeCoeffOrder(SpeedTier speed, const ACImage& ac_image,
67
                         const AcStrategyImage& ac_strategy,
68
                         const FrameDimensions& frame_dim,
69
                         uint32_t& all_used_orders, uint32_t prev_used_acs,
70
                         uint32_t current_used_acs,
71
                         uint32_t current_used_orders,
72
3.48k
                         coeff_order_t* JXL_RESTRICT order) {
73
3.48k
  JxlMemoryManager* memory_manager = ac_strategy.memory_manager();
74
3.48k
  std::vector<int64_t> num_zeros(kCoeffOrderMaxSize);
75
  // If compressing at high speed and only using 8x8 DCTs, only consider a
76
  // subset of blocks.
77
3.48k
  double block_fraction = 1.0f;
78
  // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes
79
  // encoding significantly less dense.
80
3.48k
  if (speed >= SpeedTier::kSquirrel && current_used_orders == 1) {
81
10
    block_fraction = 0.5f;
82
10
  }
83
  // No need to compute number of zero coefficients if all orders are the
84
  // default.
85
3.48k
  if (current_used_orders != 0) {
86
3.24k
    uint64_t threshold =
87
3.24k
        (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction;
88
3.24k
    uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull),
89
3.24k
                     static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)};
90
    // Xorshift128+ adapted from xorshift128+-inl.h
91
2.49M
    auto use_sample = [&]() {
92
2.49M
      auto s1 = s[0];
93
2.49M
      const auto s0 = s[1];
94
2.49M
      const auto bits = s1 + s0;  // b, c
95
2.49M
      s[0] = s0;
96
2.49M
      s1 ^= s1 << 23;
97
2.49M
      s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
98
2.49M
      s[1] = s1;
99
2.49M
      return (bits >> 32) <= threshold;
100
2.49M
    };
101
102
    // Count number of zero coefficients, separately for each DCT band.
103
    // TODO(veluca): precompute when doing DCT.
104
11.8k
    for (size_t group_index = 0; group_index < frame_dim.num_groups;
105
8.59k
         group_index++) {
106
8.59k
      const size_t gx = group_index % frame_dim.xsize_groups;
107
8.59k
      const size_t gy = group_index / frame_dim.xsize_groups;
108
8.59k
      const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks,
109
8.59k
                      kGroupDimInBlocks, kGroupDimInBlocks,
110
8.59k
                      frame_dim.xsize_blocks, frame_dim.ysize_blocks);
111
8.59k
      ConstACPtr rows[3];
112
8.59k
      ACType type = ac_image.Type();
113
34.3k
      for (size_t c = 0; c < 3; c++) {
114
25.7k
        rows[c] = ac_image.PlaneRow(c, group_index, 0);
115
25.7k
      }
116
8.59k
      size_t ac_offset = 0;
117
118
      // TODO(veluca): SIMDfy.
119
198k
      for (size_t by = 0; by < rect.ysize(); ++by) {
120
190k
        AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
121
5.08M
        for (size_t bx = 0; bx < rect.xsize(); ++bx) {
122
4.89M
          AcStrategy acs = acs_row[bx];
123
4.89M
          if (!acs.IsFirstBlock()) continue;
124
2.49M
          if (!use_sample()) continue;
125
2.49M
          size_t size = kDCTBlockSize << acs.log2_covered_blocks();
126
9.99M
          for (size_t c = 0; c < 3; ++c) {
127
7.49M
            const size_t order_offset =
128
7.49M
                CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c);
129
7.49M
            if (type == ACType::k16) {
130
0
              for (size_t k = 0; k < size; k++) {
131
0
                bool is_zero = rows[c].ptr16[ac_offset + k] == 0;
132
0
                num_zeros[order_offset + k] += is_zero ? 1 : 0;
133
0
              }
134
7.49M
            } else {
135
948M
              for (size_t k = 0; k < size; k++) {
136
940M
                bool is_zero = rows[c].ptr32[ac_offset + k] == 0;
137
940M
                num_zeros[order_offset + k] += is_zero ? 1 : 0;
138
940M
              }
139
7.49M
            }
140
            // Ensure LLFs are first in the order.
141
7.49M
            size_t cx = acs.covered_blocks_x();
142
7.49M
            size_t cy = acs.covered_blocks_y();
143
7.49M
            CoefficientLayout(&cy, &cx);
144
16.0M
            for (size_t iy = 0; iy < cy; iy++) {
145
23.2M
              for (size_t ix = 0; ix < cx; ix++) {
146
14.6M
                num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1;
147
14.6M
              }
148
8.55M
            }
149
7.49M
          }
150
2.49M
          ac_offset += size;
151
2.49M
        }
152
190k
      }
153
8.59k
    }
154
3.24k
  }
155
3.48k
  struct PosAndCount {
156
3.48k
    uint32_t pos;
157
    // Saving index breaks the ties for non-stable sort
158
3.48k
    uint64_t count_and_idx;
159
3.48k
  };
160
3.48k
  size_t mem_bytes = AcStrategy::kMaxCoeffArea * sizeof(PosAndCount);
161
3.48k
  JXL_ASSIGN_OR_RETURN(auto mem,
162
3.48k
                       AlignedMemory::Create(memory_manager, mem_bytes));
163
164
3.48k
  std::vector<coeff_order_t> natural_order_buffer;
165
166
3.48k
  uint16_t computed = 0;
167
97.6k
  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
168
94.1k
    uint8_t ord = kStrategyOrder[o];
169
94.1k
    if (computed & (1 << ord)) continue;
170
45.3k
    computed |= 1 << ord;
171
45.3k
    AcStrategy acs = AcStrategy::FromRawStrategy(o);
172
45.3k
    size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
173
    // Expected maximal size is 256 x 256.
174
45.3k
    JXL_DASSERT(sz <= (1 << 16));
175
176
    // Do nothing for transforms that don't appear.
177
45.3k
    if ((1 << ord) & ~current_used_acs) continue;
178
179
    // Do nothing if we already committed to this custom order previously.
180
16.5k
    if ((1 << ord) & prev_used_acs) continue;
181
16.5k
    if ((1 << ord) & all_used_orders) continue;
182
183
16.5k
    if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz);
184
16.5k
    acs.ComputeNaturalCoeffOrder(natural_order_buffer.data());
185
186
    // Ensure natural coefficient order is not permuted if the order is
187
    // not transmitted.
188
16.5k
    if ((1 << ord) & ~current_used_orders) {
189
7.30k
      for (size_t c = 0; c < 3; c++) {
190
5.47k
        size_t offset = CoeffOrderOffset(ord, c);
191
5.47k
        JXL_ENSURE(CoeffOrderOffset(ord, c + 1) - offset == sz);
192
5.47k
        memcpy(&order[offset], natural_order_buffer.data(),
193
5.47k
               sz * sizeof(*order));
194
5.47k
      }
195
1.82k
      continue;
196
1.82k
    }
197
198
14.7k
    bool is_nondefault = false;
199
58.8k
    for (uint8_t c = 0; c < 3; c++) {
200
      // Apply zig-zag order.
201
44.1k
      PosAndCount* pos_and_val = mem.address<PosAndCount>();
202
44.1k
      size_t offset = CoeffOrderOffset(ord, c);
203
44.1k
      JXL_ENSURE(CoeffOrderOffset(ord, c + 1) - offset == sz);
204
44.1k
      float inv_sqrt_sz = 1.0f / std::sqrt(sz);
205
13.2M
      for (size_t i = 0; i < sz; ++i) {
206
13.2M
        size_t pos = natural_order_buffer[i];
207
13.2M
        pos_and_val[i].pos = pos;
208
        // We don't care for the exact number -> quantize number of zeros,
209
        // to get less permuted order.
210
13.2M
        uint64_t count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f;
211
        // Worst case: all dct8x8, all zeroes: count <= nb_pixels/64/8
212
        // nb_pixels is limited to 2^40 (Level 10 limit)
213
        // so count is limited to 2^31
214
13.2M
        JXL_DASSERT(count < (uint64_t{1} << 48));
215
13.2M
        pos_and_val[i].count_and_idx = (count << 16) | i;
216
13.2M
      }
217
218
      // Stable-sort -> elements with same number of zeros will preserve their
219
      // order.
220
38.5M
      auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
221
38.5M
        return a.count_and_idx < b.count_and_idx;
222
38.5M
      };
223
44.1k
      std::sort(pos_and_val, pos_and_val + sz, comparator);
224
225
      // Grab indices.
226
13.2M
      for (size_t i = 0; i < sz; ++i) {
227
13.2M
        order[offset + i] = pos_and_val[i].pos;
228
13.2M
        is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos;
229
13.2M
      }
230
44.1k
    }
231
14.7k
    if (!is_nondefault) {
232
6.66k
      current_used_orders &= ~(1 << ord);
233
6.66k
    }
234
14.7k
  }
235
3.48k
  all_used_orders |= current_used_orders;
236
3.48k
  return true;
237
3.48k
}
238
239
namespace {
240
241
Status TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
242
24.1k
                           size_t size, std::vector<Token>* tokens) {
243
24.1k
  std::vector<LehmerT> lehmer(size);
244
24.1k
  std::vector<uint32_t> temp(size + 1);
245
24.1k
  JXL_RETURN_IF_ERROR(
246
24.1k
      ComputeLehmerCode(order, temp.data(), size, lehmer.data()));
247
24.1k
  size_t end = size;
248
3.31M
  while (end > skip && lehmer[end - 1] == 0) {
249
3.28M
    --end;
250
3.28M
  }
251
24.1k
  tokens->emplace_back(CoeffOrderContext(size),
252
24.1k
                       static_cast<uint32_t>(end - skip));
253
24.1k
  uint32_t last = 0;
254
1.03M
  for (size_t i = skip; i < end; ++i) {
255
1.01M
    tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
256
1.01M
    last = lehmer[i];
257
1.01M
  }
258
24.1k
  return true;
259
24.1k
}
260
261
}  // namespace
262
263
Status EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
264
                         size_t size, BitWriter* writer, LayerType layer,
265
0
                         AuxOut* aux_out) {
266
0
  JxlMemoryManager* memory_manager = writer->memory_manager();
267
0
  std::vector<std::vector<Token>> tokens(1);
268
0
  JXL_RETURN_IF_ERROR(TokenizePermutation(order, skip, size, tokens.data()));
269
0
  EntropyEncodingData codes;
270
0
  JXL_ASSIGN_OR_RETURN(
271
0
      size_t cost, BuildAndEncodeHistograms(memory_manager, HistogramParams(),
272
0
                                            kPermutationContexts, tokens,
273
0
                                            &codes, writer, layer, aux_out));
274
0
  (void)cost;
275
0
  JXL_RETURN_IF_ERROR(WriteTokens(tokens[0], codes, 0, writer, layer, aux_out));
276
0
  return true;
277
0
}
278
279
namespace {
280
Status EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs,
281
                        std::vector<Token>* tokens, coeff_order_t* order_zigzag,
282
24.1k
                        std::vector<coeff_order_t>& natural_order_lut) {
283
24.1k
  const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
284
24.1k
  const size_t size = kDCTBlockSize * llf;
285
4.39M
  for (size_t i = 0; i < size; ++i) {
286
4.37M
    order_zigzag[i] = natural_order_lut[order[i]];
287
4.37M
  }
288
24.1k
  JXL_RETURN_IF_ERROR(TokenizePermutation(order_zigzag, llf, size, tokens));
289
24.1k
  return true;
290
24.1k
}
291
}  // namespace
292
293
Status EncodeCoeffOrders(uint16_t used_orders,
294
                         const coeff_order_t* JXL_RESTRICT order,
295
                         BitWriter* writer, LayerType layer,
296
3.48k
                         AuxOut* JXL_RESTRICT aux_out) {
297
3.48k
  JxlMemoryManager* memory_manager = writer->memory_manager();
298
3.48k
  size_t mem_bytes = AcStrategy::kMaxCoeffArea * sizeof(coeff_order_t);
299
3.48k
  JXL_ASSIGN_OR_RETURN(auto mem,
300
3.48k
                       AlignedMemory::Create(memory_manager, mem_bytes));
301
3.48k
  uint16_t computed = 0;
302
3.48k
  std::vector<std::vector<Token>> tokens(1);
303
3.48k
  std::vector<coeff_order_t> natural_order_lut;
304
97.6k
  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
305
94.1k
    uint8_t ord = kStrategyOrder[o];
306
94.1k
    if (computed & (1 << ord)) continue;
307
45.3k
    computed |= 1 << ord;
308
45.3k
    if ((used_orders & (1 << ord)) == 0) continue;
309
8.04k
    AcStrategy acs = AcStrategy::FromRawStrategy(o);
310
8.04k
    const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
311
8.04k
    const size_t size = kDCTBlockSize * llf;
312
8.04k
    if (natural_order_lut.size() < size) natural_order_lut.resize(size);
313
8.04k
    acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data());
314
32.1k
    for (size_t c = 0; c < 3; c++) {
315
24.1k
      JXL_RETURN_IF_ERROR(
316
24.1k
          EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, tokens.data(),
317
24.1k
                           mem.address<coeff_order_t>(), natural_order_lut));
318
24.1k
    }
319
8.04k
  }
320
  // Do not write anything if no order is used.
321
3.48k
  if (used_orders != 0) {
322
2.67k
    EntropyEncodingData codes;
323
2.67k
    JXL_ASSIGN_OR_RETURN(
324
2.67k
        size_t cost, BuildAndEncodeHistograms(memory_manager, HistogramParams(),
325
2.67k
                                              kPermutationContexts, tokens,
326
2.67k
                                              &codes, writer, layer, aux_out));
327
2.67k
    (void)cost;
328
2.67k
    JXL_RETURN_IF_ERROR(
329
2.67k
        WriteTokens(tokens[0], codes, 0, writer, layer, aux_out));
330
2.67k
  }
331
3.48k
  return true;
332
3.48k
}
333
334
}  // namespace jxl