/src/libjxl/lib/jxl/enc_ans.cc
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/enc_ans.h" |
7 | | |
8 | | #include <jxl/memory_manager.h> |
9 | | #include <jxl/types.h> |
10 | | |
11 | | #include <algorithm> |
12 | | #include <array> |
13 | | #include <cmath> |
14 | | #include <cstddef> |
15 | | #include <cstdint> |
16 | | #include <limits> |
17 | | #include <utility> |
18 | | #include <vector> |
19 | | |
20 | | #include "lib/jxl/ans_common.h" |
21 | | #include "lib/jxl/ans_params.h" |
22 | | #include "lib/jxl/base/bits.h" |
23 | | #include "lib/jxl/base/common.h" |
24 | | #include "lib/jxl/base/compiler_specific.h" |
25 | | #include "lib/jxl/base/status.h" |
26 | | #include "lib/jxl/common.h" |
27 | | #include "lib/jxl/dec_ans.h" |
28 | | #include "lib/jxl/enc_ans_params.h" |
29 | | #include "lib/jxl/enc_ans_simd.h" |
30 | | #include "lib/jxl/enc_aux_out.h" |
31 | | #include "lib/jxl/enc_cluster.h" |
32 | | #include "lib/jxl/enc_context_map.h" |
33 | | #include "lib/jxl/enc_fields.h" |
34 | | #include "lib/jxl/enc_huffman.h" |
35 | | #include "lib/jxl/enc_lz77.h" |
36 | | #include "lib/jxl/enc_params.h" |
37 | | #include "lib/jxl/fields.h" |
38 | | #include "lib/jxl/memory_manager_internal.h" |
39 | | #include "lib/jxl/modular/options.h" |
40 | | #include "lib/jxl/simd_util.h" |
41 | | |
42 | | namespace jxl { |
43 | | |
44 | | namespace { |
45 | | |
46 | | #if (!JXL_IS_DEBUG_BUILD) |
47 | | constexpr |
48 | | #endif |
49 | | bool ans_fuzzer_friendly_ = false; |
50 | | |
51 | | const int kMaxNumSymbolsForSmallCode = 2; |
52 | | |
53 | | template <typename Writer> |
54 | 5.90M | void StoreVarLenUint8(size_t n, Writer* writer) { |
55 | 5.90M | JXL_DASSERT(n <= 255); |
56 | 5.90M | if (n == 0) { |
57 | 543k | writer->Write(1, 0); |
58 | 5.35M | } else { |
59 | 5.35M | writer->Write(1, 1); |
60 | 5.35M | size_t nbits = FloorLog2Nonzero(n); |
61 | 5.35M | writer->Write(3, nbits); |
62 | 5.35M | writer->Write(nbits, n - (1ULL << nbits)); |
63 | 5.35M | } |
64 | 5.90M | } enc_ans.cc:void jxl::(anonymous namespace)::StoreVarLenUint8<jxl::SizeWriter>(unsigned long, jxl::SizeWriter*) Line | Count | Source | 54 | 5.64M | void StoreVarLenUint8(size_t n, Writer* writer) { | 55 | 5.64M | JXL_DASSERT(n <= 255); | 56 | 5.64M | if (n == 0) { | 57 | 516k | writer->Write(1, 0); | 58 | 5.12M | } else { | 59 | 5.12M | writer->Write(1, 1); | 60 | 5.12M | size_t nbits = FloorLog2Nonzero(n); | 61 | 5.12M | writer->Write(3, nbits); | 62 | 5.12M | writer->Write(nbits, n - (1ULL << nbits)); | 63 | 5.12M | } | 64 | 5.64M | } |
enc_ans.cc:void jxl::(anonymous namespace)::StoreVarLenUint8<jxl::BitWriter>(unsigned long, jxl::BitWriter*) Line | Count | Source | 54 | 260k | void StoreVarLenUint8(size_t n, Writer* writer) { | 55 | 260k | JXL_DASSERT(n <= 255); | 56 | 260k | if (n == 0) { | 57 | 26.4k | writer->Write(1, 0); | 58 | 233k | } else { | 59 | 233k | writer->Write(1, 1); | 60 | 233k | size_t nbits = FloorLog2Nonzero(n); | 61 | 233k | writer->Write(3, nbits); | 62 | 233k | writer->Write(nbits, n - (1ULL << nbits)); | 63 | 233k | } | 64 | 260k | } |
|
65 | | |
66 | | template <typename Writer> |
67 | 27.5k | void StoreVarLenUint16(size_t n, Writer* writer) { |
68 | 27.5k | JXL_DASSERT(n <= 65535); |
69 | 27.5k | if (n == 0) { |
70 | 849 | writer->Write(1, 0); |
71 | 26.6k | } else { |
72 | 26.6k | writer->Write(1, 1); |
73 | 26.6k | size_t nbits = FloorLog2Nonzero(n); |
74 | 26.6k | writer->Write(4, nbits); |
75 | 26.6k | writer->Write(nbits, n - (1ULL << nbits)); |
76 | 26.6k | } |
77 | 27.5k | } enc_ans.cc:void jxl::(anonymous namespace)::StoreVarLenUint16<jxl::BitWriter>(unsigned long, jxl::BitWriter*) Line | Count | Source | 67 | 8.84k | void StoreVarLenUint16(size_t n, Writer* writer) { | 68 | 8.84k | JXL_DASSERT(n <= 65535); | 69 | 8.84k | if (n == 0) { | 70 | 849 | writer->Write(1, 0); | 71 | 7.99k | } else { | 72 | 7.99k | writer->Write(1, 1); | 73 | 7.99k | size_t nbits = FloorLog2Nonzero(n); | 74 | 7.99k | writer->Write(4, nbits); | 75 | 7.99k | writer->Write(nbits, n - (1ULL << nbits)); | 76 | 7.99k | } | 77 | 8.84k | } |
enc_ans.cc:void jxl::(anonymous namespace)::StoreVarLenUint16<jxl::SizeWriter>(unsigned long, jxl::SizeWriter*) Line | Count | Source | 67 | 18.6k | void StoreVarLenUint16(size_t n, Writer* writer) { | 68 | 18.6k | JXL_DASSERT(n <= 65535); | 69 | 18.6k | if (n == 0) { | 70 | 0 | writer->Write(1, 0); | 71 | 18.6k | } else { | 72 | 18.6k | writer->Write(1, 1); | 73 | 18.6k | size_t nbits = FloorLog2Nonzero(n); | 74 | 18.6k | writer->Write(4, nbits); | 75 | 18.6k | writer->Write(nbits, n - (1ULL << nbits)); | 76 | 18.6k | } | 77 | 18.6k | } |
|
78 | | |
79 | | class ANSEncodingHistogram { |
80 | | public: |
81 | 172k | const std::vector<ANSHistBin>& Counts() const { return counts_; } |
82 | 729k | float Cost() const { return cost_; } |
83 | | // The only way to construct valid histogram for ANS encoding |
84 | | static StatusOr<ANSEncodingHistogram> ComputeBest( |
85 | | const Histogram& histo, |
86 | 729k | HistogramParams::ANSHistogramStrategy ans_histogram_strategy) { |
87 | 729k | ANSEncodingHistogram result; |
88 | | |
89 | 729k | result.alphabet_size_ = histo.alphabet_size(); |
90 | 729k | if (result.alphabet_size_ > ANS_MAX_ALPHABET_SIZE) |
91 | 0 | return JXL_FAILURE("Too many entries in an ANS histogram"); |
92 | | |
93 | 729k | if (result.alphabet_size_ > 0) { |
94 | | // Flat code |
95 | 729k | result.method_ = 0; |
96 | 729k | result.num_symbols_ = result.alphabet_size_; |
97 | 729k | result.counts_ = CreateFlatHistogram(result.alphabet_size_, ANS_TAB_SIZE); |
98 | | // in this case length can be non-suitable for SIMD - fix it |
99 | 729k | result.counts_.resize(histo.counts.size()); |
100 | 729k | SizeWriter writer; |
101 | 729k | JXL_RETURN_IF_ERROR(result.Encode(&writer)); |
102 | 729k | result.cost_ = writer.size + EstimateDataBitsFlat(histo); |
103 | 729k | } else { |
104 | | // Empty histogram |
105 | 0 | result.method_ = 1; |
106 | 0 | result.num_symbols_ = 0; |
107 | 0 | result.cost_ = 3; |
108 | 0 | return result; |
109 | 0 | } |
110 | | |
111 | 729k | size_t symbol_count = 0; |
112 | 26.0M | for (size_t n = 0; n < result.alphabet_size_; ++n) { |
113 | 25.2M | if (histo.counts[n] > 0) { |
114 | 11.7M | if (symbol_count < kMaxNumSymbolsForSmallCode) { |
115 | 1.41M | result.symbols_[symbol_count] = n; |
116 | 1.41M | } |
117 | 11.7M | ++symbol_count; |
118 | 11.7M | } |
119 | 25.2M | } |
120 | 729k | result.num_symbols_ = symbol_count; |
121 | 729k | if (symbol_count == 1) { |
122 | | // Single-bin histogram |
123 | 43.0k | result.method_ = 1; |
124 | 43.0k | result.counts_ = histo.counts; |
125 | 43.0k | result.counts_[result.symbols_[0]] = ANS_TAB_SIZE; |
126 | 43.0k | SizeWriter writer; |
127 | 43.0k | JXL_RETURN_IF_ERROR(result.Encode(&writer)); |
128 | 43.0k | result.cost_ = writer.size; |
129 | 43.0k | return result; |
130 | 43.0k | } |
131 | | |
132 | | // Here min 2 symbols |
133 | 686k | ANSEncodingHistogram normalized = result; |
134 | 2.78M | auto try_shift = [&](uint32_t shift) -> Status { |
135 | | // `shift = 12` and `shift = 11` are the same |
136 | 2.78M | normalized.method_ = std::min(shift, ANS_LOG_TAB_SIZE - 1) + 1; |
137 | | |
138 | 2.78M | if (!normalized.RebalanceHistogram(histo)) { |
139 | 0 | return JXL_FAILURE("Logic error: couldn't rebalance a histogram"); |
140 | 0 | } |
141 | 2.78M | SizeWriter writer; |
142 | 2.78M | JXL_RETURN_IF_ERROR(normalized.Encode(&writer)); |
143 | 2.78M | normalized.cost_ = writer.size + normalized.EstimateDataBits(histo); |
144 | 2.78M | if (normalized.cost_ < result.cost_) { |
145 | 736k | result = normalized; |
146 | 736k | } |
147 | 2.78M | return true; |
148 | 2.78M | }; |
149 | | |
150 | 686k | switch (ans_histogram_strategy) { |
151 | 11.5k | case HistogramParams::ANSHistogramStrategy::kPrecise: |
152 | 149k | for (uint32_t shift = 0; shift < ANS_LOG_TAB_SIZE; shift++) { |
153 | 138k | JXL_RETURN_IF_ERROR(try_shift(shift)); |
154 | 138k | } |
155 | 11.5k | break; |
156 | 156k | case HistogramParams::ANSHistogramStrategy::kApproximate: |
157 | 1.25M | for (uint32_t shift = 0; shift <= ANS_LOG_TAB_SIZE; shift += 2) { |
158 | 1.09M | JXL_RETURN_IF_ERROR(try_shift(shift)); |
159 | 1.09M | } |
160 | 156k | break; |
161 | 518k | case HistogramParams::ANSHistogramStrategy::kFast: |
162 | 518k | JXL_RETURN_IF_ERROR(try_shift(0)); |
163 | 518k | JXL_RETURN_IF_ERROR(try_shift(ANS_LOG_TAB_SIZE / 2)); |
164 | 518k | JXL_RETURN_IF_ERROR(try_shift(ANS_LOG_TAB_SIZE)); |
165 | 518k | break; |
166 | 686k | } |
167 | | |
168 | | // Sanity check |
169 | | #if JXL_IS_DEBUG_BUILD |
170 | | JXL_ENSURE(histo.counts.size() == result.counts_.size()); |
171 | | ANSHistBin total = 0; // Used only in assert. |
172 | | for (size_t i = 0; i < result.alphabet_size_; ++i) { |
173 | | JXL_ENSURE(result.counts_[i] >= 0); |
174 | | // For non-flat histogram values should be zero or non-zero simultaneously |
175 | | // for the same symbol in both initial and normalized histograms. |
176 | | JXL_ENSURE(result.method_ == 0 || |
177 | | (histo.counts[i] > 0) == (result.counts_[i] > 0)); |
178 | | // Check accuracy of the histogram values |
179 | | if (result.method_ > 0 && result.counts_[i] > 0 && |
180 | | i != result.omit_pos_) { |
181 | | int logcounts = FloorLog2Nonzero<uint32_t>(result.counts_[i]); |
182 | | int bitcount = |
183 | | GetPopulationCountPrecision(logcounts, result.method_ - 1); |
184 | | int drop_bits = logcounts - bitcount; |
185 | | // Check that the value is divisible by 2^drop_bits |
186 | | JXL_ENSURE((result.counts_[i] & ((1 << drop_bits) - 1)) == 0); |
187 | | } |
188 | | total += result.counts_[i]; |
189 | | } |
190 | | for (size_t i = result.alphabet_size_; i < result.counts_.size(); ++i) { |
191 | | JXL_ENSURE(histo.counts[i] == 0); |
192 | | JXL_ENSURE(result.counts_[i] == 0); |
193 | | } |
194 | | JXL_ENSURE((histo.total_count == 0) || (total == ANS_TAB_SIZE)); |
195 | | #endif |
196 | 686k | return result; |
197 | 686k | } |
198 | | |
199 | | template <typename Writer> |
200 | 3.73M | Status Encode(Writer* writer) { |
201 | | // The check ensures also that all RLE sequences can be |
202 | | // encoded by `StoreVarLenUint8` |
203 | 3.73M | JXL_ENSURE(alphabet_size_ <= ANS_MAX_ALPHABET_SIZE); |
204 | | |
205 | | /// Flat histogram. |
206 | 3.73M | if (method_ == 0) { |
207 | | // Mark non-small tree. |
208 | 733k | writer->Write(1, 0); |
209 | | // Mark uniform histogram. |
210 | 733k | writer->Write(1, 1); |
211 | 733k | JXL_ENSURE(alphabet_size_ > 0); |
212 | | // Encode alphabet size. |
213 | 733k | StoreVarLenUint8(alphabet_size_ - 1, writer); |
214 | | |
215 | 733k | return true; |
216 | 733k | } |
217 | | |
218 | | /// Small tree. |
219 | 2.99M | if (num_symbols_ <= kMaxNumSymbolsForSmallCode) { |
220 | | // Small tree marker to encode 1-2 symbols. |
221 | 138k | writer->Write(1, 1); |
222 | 138k | if (num_symbols_ == 0) { |
223 | 0 | writer->Write(1, 0); |
224 | 0 | StoreVarLenUint8(0, writer); |
225 | 138k | } else { |
226 | 138k | writer->Write(1, num_symbols_ - 1); |
227 | 369k | for (size_t i = 0; i < num_symbols_; ++i) { |
228 | 230k | StoreVarLenUint8(symbols_[i], writer); |
229 | 230k | } |
230 | 138k | } |
231 | 138k | if (num_symbols_ == 2) { |
232 | 92.1k | writer->Write(ANS_LOG_TAB_SIZE, counts_[symbols_[0]]); |
233 | 92.1k | } |
234 | | |
235 | 138k | return true; |
236 | 138k | } |
237 | | |
238 | | /// General tree. |
239 | | // Mark non-small tree. |
240 | 2.85M | writer->Write(1, 0); |
241 | | // Mark non-flat histogram. |
242 | 2.85M | writer->Write(1, 0); |
243 | | |
244 | | // Elias gamma-like code for `shift = method - 1`. Only difference is that |
245 | | // if the number of bits to be encoded is equal to `upper_bound_log`, |
246 | | // we skip the terminating 0 in unary coding. |
247 | 2.85M | int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1); |
248 | 2.85M | int log = FloorLog2Nonzero(method_); |
249 | 2.85M | writer->Write(log, (1 << log) - 1); |
250 | 2.85M | if (log != upper_bound_log) writer->Write(1, 0); |
251 | 2.85M | writer->Write(log, ((1 << log) - 1) & method_); |
252 | | |
253 | | // Since `num_symbols_ >= 3`, we know that `alphabet_size_ >= 3`, therefore |
254 | | // we encode `alphabet_size_ - 3`. |
255 | 2.85M | StoreVarLenUint8(alphabet_size_ - 3, writer); |
256 | | |
257 | | // Precompute sequences for RLE encoding. Contains the number of identical |
258 | | // values starting at a given index. Only contains that value at the first |
259 | | // element of the series. |
260 | 2.85M | uint8_t same[ANS_MAX_ALPHABET_SIZE] = {}; |
261 | 2.85M | size_t last = 0; |
262 | 110M | for (size_t i = 1; i <= alphabet_size_; i++) { |
263 | | // Store the sequence length once different symbol reached, or we are |
264 | | // near the omit_pos_, or we're at the end. We don't support including the |
265 | | // omit_pos_ in an RLE sequence because this value may use a different |
266 | | // amount of log2 bits than standard, it is too complex to handle in the |
267 | | // decoder. |
268 | 107M | if (i == alphabet_size_ || i == omit_pos_ || i == omit_pos_ + 1 || |
269 | 99.8M | counts_[i] != counts_[last]) { |
270 | 57.4M | same[last] = i - last; |
271 | 57.4M | last = i; |
272 | 57.4M | } |
273 | 107M | } |
274 | | |
275 | 2.85M | uint8_t bit_width[ANS_MAX_ALPHABET_SIZE] = {}; |
276 | | // Use shortest possible Huffman code to encode `omit_pos` (see |
277 | | // `kBitWidthLengths`). `bit_width` value at `omit_pos` should be the |
278 | | // first of maximal values in the whole `bit_width` array, so it can be |
279 | | // increased without changing that property |
280 | 2.85M | int omit_width = 10; |
281 | 110M | for (size_t i = 0; i < alphabet_size_; ++i) { |
282 | 107M | if (i != omit_pos_ && counts_[i] > 0) { |
283 | 47.6M | bit_width[i] = FloorLog2Nonzero<uint32_t>(counts_[i]) + 1; |
284 | 47.6M | omit_width = std::max(omit_width, bit_width[i] + int{i < omit_pos_}); |
285 | 47.6M | } |
286 | 107M | } |
287 | 2.85M | bit_width[omit_pos_] = static_cast<uint8_t>(omit_width); |
288 | | |
289 | | // The bit widths are encoded with a static Huffman code. |
290 | | // The last symbol is used as RLE sequence. |
291 | 2.85M | constexpr uint8_t kBitWidthLengths[ANS_LOG_TAB_SIZE + 2] = { |
292 | 2.85M | 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 6, 7, 7, |
293 | 2.85M | }; |
294 | 2.85M | constexpr uint8_t kBitWidthSymbols[ANS_LOG_TAB_SIZE + 2] = { |
295 | 2.85M | 17, 11, 15, 3, 9, 7, 4, 2, 5, 6, 0, 33, 1, 65, |
296 | 2.85M | }; |
297 | 2.85M | constexpr uint8_t kMinReps = 5; |
298 | 2.85M | constexpr size_t rep = ANS_LOG_TAB_SIZE + 1; |
299 | | // Encode count bit widths |
300 | 71.4M | for (size_t i = 0; i < alphabet_size_; ++i) { |
301 | 68.5M | writer->Write(kBitWidthLengths[bit_width[i]], |
302 | 68.5M | kBitWidthSymbols[bit_width[i]]); |
303 | 68.5M | if (same[i] >= kMinReps) { |
304 | | // Encode the RLE symbol and skip the repeated ones. |
305 | 2.07M | writer->Write(kBitWidthLengths[rep], kBitWidthSymbols[rep]); |
306 | 2.07M | StoreVarLenUint8(same[i] - kMinReps, writer); |
307 | 2.07M | i += same[i] - 1; |
308 | 2.07M | } |
309 | 68.5M | } |
310 | | // Encode additional bits of accuracy |
311 | 2.85M | uint32_t shift = method_ - 1; |
312 | 2.85M | if (shift != 0) { // otherwise `bitcount = 0` |
313 | 52.4M | for (size_t i = 0; i < alphabet_size_; ++i) { |
314 | 50.3M | if (bit_width[i] > 1 && i != omit_pos_) { |
315 | 34.2M | int bitcount = GetPopulationCountPrecision(bit_width[i] - 1, shift); |
316 | 34.2M | int drop_bits = bit_width[i] - 1 - bitcount; |
317 | 34.2M | JXL_DASSERT((counts_[i] & ((1 << drop_bits) - 1)) == 0); |
318 | 34.2M | writer->Write(bitcount, (counts_[i] >> drop_bits) - (1 << bitcount)); |
319 | 34.2M | } |
320 | 50.3M | if (same[i] >= kMinReps) { |
321 | | // Skip symbols encoded by RLE. |
322 | 1.46M | i += same[i] - 1; |
323 | 1.46M | } |
324 | 50.3M | } |
325 | 2.09M | } |
326 | 2.85M | return true; |
327 | 2.99M | } enc_ans.cc:jxl::Status jxl::(anonymous namespace)::ANSEncodingHistogram::Encode<jxl::SizeWriter>(jxl::SizeWriter*) Line | Count | Source | 200 | 3.56M | Status Encode(Writer* writer) { | 201 | | // The check ensures also that all RLE sequences can be | 202 | | // encoded by `StoreVarLenUint8` | 203 | 3.56M | JXL_ENSURE(alphabet_size_ <= ANS_MAX_ALPHABET_SIZE); | 204 | | | 205 | | /// Flat histogram. | 206 | 3.56M | if (method_ == 0) { | 207 | | // Mark non-small tree. | 208 | 729k | writer->Write(1, 0); | 209 | | // Mark uniform histogram. | 210 | 729k | writer->Write(1, 1); | 211 | 729k | JXL_ENSURE(alphabet_size_ > 0); | 212 | | // Encode alphabet size. | 213 | 729k | StoreVarLenUint8(alphabet_size_ - 1, writer); | 214 | | | 215 | 729k | return true; | 216 | 729k | } | 217 | | | 218 | | /// Small tree. | 219 | 2.83M | if (num_symbols_ <= kMaxNumSymbolsForSmallCode) { | 220 | | // Small tree marker to encode 1-2 symbols. | 221 | 132k | writer->Write(1, 1); | 222 | 132k | if (num_symbols_ == 0) { | 223 | 0 | writer->Write(1, 0); | 224 | 0 | StoreVarLenUint8(0, writer); | 225 | 132k | } else { | 226 | 132k | writer->Write(1, num_symbols_ - 1); | 227 | 354k | for (size_t i = 0; i < num_symbols_; ++i) { | 228 | 222k | StoreVarLenUint8(symbols_[i], writer); | 229 | 222k | } | 230 | 132k | } | 231 | 132k | if (num_symbols_ == 2) { | 232 | 89.5k | writer->Write(ANS_LOG_TAB_SIZE, counts_[symbols_[0]]); | 233 | 89.5k | } | 234 | | | 235 | 132k | return true; | 236 | 132k | } | 237 | | | 238 | | /// General tree. | 239 | | // Mark non-small tree. | 240 | 2.70M | writer->Write(1, 0); | 241 | | // Mark non-flat histogram. | 242 | 2.70M | writer->Write(1, 0); | 243 | | | 244 | | // Elias gamma-like code for `shift = method - 1`. Only difference is that | 245 | | // if the number of bits to be encoded is equal to `upper_bound_log`, | 246 | | // we skip the terminating 0 in unary coding. | 247 | 2.70M | int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1); | 248 | 2.70M | int log = FloorLog2Nonzero(method_); | 249 | 2.70M | writer->Write(log, (1 << log) - 1); | 250 | 2.70M | if (log != upper_bound_log) writer->Write(1, 0); | 251 | 2.70M | writer->Write(log, ((1 << log) - 1) & method_); | 252 | | | 253 | | // Since `num_symbols_ >= 3`, we know that `alphabet_size_ >= 3`, therefore | 254 | | // we encode `alphabet_size_ - 3`. | 255 | 2.70M | StoreVarLenUint8(alphabet_size_ - 3, writer); | 256 | | | 257 | | // Precompute sequences for RLE encoding. Contains the number of identical | 258 | | // values starting at a given index. Only contains that value at the first | 259 | | // element of the series. | 260 | 2.70M | uint8_t same[ANS_MAX_ALPHABET_SIZE] = {}; | 261 | 2.70M | size_t last = 0; | 262 | 104M | for (size_t i = 1; i <= alphabet_size_; i++) { | 263 | | // Store the sequence length once different symbol reached, or we are | 264 | | // near the omit_pos_, or we're at the end. We don't support including the | 265 | | // omit_pos_ in an RLE sequence because this value may use a different | 266 | | // amount of log2 bits than standard, it is too complex to handle in the | 267 | | // decoder. | 268 | 102M | if (i == alphabet_size_ || i == omit_pos_ || i == omit_pos_ + 1 || | 269 | 95.1M | counts_[i] != counts_[last]) { | 270 | 54.4M | same[last] = i - last; | 271 | 54.4M | last = i; | 272 | 54.4M | } | 273 | 102M | } | 274 | | | 275 | 2.70M | uint8_t bit_width[ANS_MAX_ALPHABET_SIZE] = {}; | 276 | | // Use shortest possible Huffman code to encode `omit_pos` (see | 277 | | // `kBitWidthLengths`). `bit_width` value at `omit_pos` should be the | 278 | | // first of maximal values in the whole `bit_width` array, so it can be | 279 | | // increased without changing that property | 280 | 2.70M | int omit_width = 10; | 281 | 104M | for (size_t i = 0; i < alphabet_size_; ++i) { | 282 | 102M | if (i != omit_pos_ && counts_[i] > 0) { | 283 | 45.0M | bit_width[i] = FloorLog2Nonzero<uint32_t>(counts_[i]) + 1; | 284 | 45.0M | omit_width = std::max(omit_width, bit_width[i] + int{i < omit_pos_}); | 285 | 45.0M | } | 286 | 102M | } | 287 | 2.70M | bit_width[omit_pos_] = static_cast<uint8_t>(omit_width); | 288 | | | 289 | | // The bit widths are encoded with a static Huffman code. | 290 | | // The last symbol is used as RLE sequence. | 291 | 2.70M | constexpr uint8_t kBitWidthLengths[ANS_LOG_TAB_SIZE + 2] = { | 292 | 2.70M | 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 6, 7, 7, | 293 | 2.70M | }; | 294 | 2.70M | constexpr uint8_t kBitWidthSymbols[ANS_LOG_TAB_SIZE + 2] = { | 295 | 2.70M | 17, 11, 15, 3, 9, 7, 4, 2, 5, 6, 0, 33, 1, 65, | 296 | 2.70M | }; | 297 | 2.70M | constexpr uint8_t kMinReps = 5; | 298 | 2.70M | constexpr size_t rep = ANS_LOG_TAB_SIZE + 1; | 299 | | // Encode count bit widths | 300 | 67.6M | for (size_t i = 0; i < alphabet_size_; ++i) { | 301 | 64.9M | writer->Write(kBitWidthLengths[bit_width[i]], | 302 | 64.9M | kBitWidthSymbols[bit_width[i]]); | 303 | 64.9M | if (same[i] >= kMinReps) { | 304 | | // Encode the RLE symbol and skip the repeated ones. | 305 | 1.98M | writer->Write(kBitWidthLengths[rep], kBitWidthSymbols[rep]); | 306 | 1.98M | StoreVarLenUint8(same[i] - kMinReps, writer); | 307 | 1.98M | i += same[i] - 1; | 308 | 1.98M | } | 309 | 64.9M | } | 310 | | // Encode additional bits of accuracy | 311 | 2.70M | uint32_t shift = method_ - 1; | 312 | 2.70M | if (shift != 0) { // otherwise `bitcount = 0` | 313 | 51.0M | for (size_t i = 0; i < alphabet_size_; ++i) { | 314 | 49.0M | if (bit_width[i] > 1 && i != omit_pos_) { | 315 | 33.3M | int bitcount = GetPopulationCountPrecision(bit_width[i] - 1, shift); | 316 | 33.3M | int drop_bits = bit_width[i] - 1 - bitcount; | 317 | 33.3M | JXL_DASSERT((counts_[i] & ((1 << drop_bits) - 1)) == 0); | 318 | 33.3M | writer->Write(bitcount, (counts_[i] >> drop_bits) - (1 << bitcount)); | 319 | 33.3M | } | 320 | 49.0M | if (same[i] >= kMinReps) { | 321 | | // Skip symbols encoded by RLE. | 322 | 1.43M | i += same[i] - 1; | 323 | 1.43M | } | 324 | 49.0M | } | 325 | 2.03M | } | 326 | 2.70M | return true; | 327 | 2.83M | } |
enc_ans.cc:jxl::Status jxl::(anonymous namespace)::ANSEncodingHistogram::Encode<jxl::BitWriter>(jxl::BitWriter*) Line | Count | Source | 200 | 167k | Status Encode(Writer* writer) { | 201 | | // The check ensures also that all RLE sequences can be | 202 | | // encoded by `StoreVarLenUint8` | 203 | 167k | JXL_ENSURE(alphabet_size_ <= ANS_MAX_ALPHABET_SIZE); | 204 | | | 205 | | /// Flat histogram. | 206 | 167k | if (method_ == 0) { | 207 | | // Mark non-small tree. | 208 | 3.99k | writer->Write(1, 0); | 209 | | // Mark uniform histogram. | 210 | 3.99k | writer->Write(1, 1); | 211 | 3.99k | JXL_ENSURE(alphabet_size_ > 0); | 212 | | // Encode alphabet size. | 213 | 3.99k | StoreVarLenUint8(alphabet_size_ - 1, writer); | 214 | | | 215 | 3.99k | return true; | 216 | 3.99k | } | 217 | | | 218 | | /// Small tree. | 219 | 163k | if (num_symbols_ <= kMaxNumSymbolsForSmallCode) { | 220 | | // Small tree marker to encode 1-2 symbols. | 221 | 5.96k | writer->Write(1, 1); | 222 | 5.96k | if (num_symbols_ == 0) { | 223 | 0 | writer->Write(1, 0); | 224 | 0 | StoreVarLenUint8(0, writer); | 225 | 5.96k | } else { | 226 | 5.96k | writer->Write(1, num_symbols_ - 1); | 227 | 14.4k | for (size_t i = 0; i < num_symbols_; ++i) { | 228 | 8.51k | StoreVarLenUint8(symbols_[i], writer); | 229 | 8.51k | } | 230 | 5.96k | } | 231 | 5.96k | if (num_symbols_ == 2) { | 232 | 2.55k | writer->Write(ANS_LOG_TAB_SIZE, counts_[symbols_[0]]); | 233 | 2.55k | } | 234 | | | 235 | 5.96k | return true; | 236 | 5.96k | } | 237 | | | 238 | | /// General tree. | 239 | | // Mark non-small tree. | 240 | 157k | writer->Write(1, 0); | 241 | | // Mark non-flat histogram. | 242 | 157k | writer->Write(1, 0); | 243 | | | 244 | | // Elias gamma-like code for `shift = method - 1`. Only difference is that | 245 | | // if the number of bits to be encoded is equal to `upper_bound_log`, | 246 | | // we skip the terminating 0 in unary coding. | 247 | 157k | int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1); | 248 | 157k | int log = FloorLog2Nonzero(method_); | 249 | 157k | writer->Write(log, (1 << log) - 1); | 250 | 157k | if (log != upper_bound_log) writer->Write(1, 0); | 251 | 157k | writer->Write(log, ((1 << log) - 1) & method_); | 252 | | | 253 | | // Since `num_symbols_ >= 3`, we know that `alphabet_size_ >= 3`, therefore | 254 | | // we encode `alphabet_size_ - 3`. | 255 | 157k | StoreVarLenUint8(alphabet_size_ - 3, writer); | 256 | | | 257 | | // Precompute sequences for RLE encoding. Contains the number of identical | 258 | | // values starting at a given index. Only contains that value at the first | 259 | | // element of the series. | 260 | 157k | uint8_t same[ANS_MAX_ALPHABET_SIZE] = {}; | 261 | 157k | size_t last = 0; | 262 | 5.27M | for (size_t i = 1; i <= alphabet_size_; i++) { | 263 | | // Store the sequence length once different symbol reached, or we are | 264 | | // near the omit_pos_, or we're at the end. We don't support including the | 265 | | // omit_pos_ in an RLE sequence because this value may use a different | 266 | | // amount of log2 bits than standard, it is too complex to handle in the | 267 | | // decoder. | 268 | 5.11M | if (i == alphabet_size_ || i == omit_pos_ || i == omit_pos_ + 1 || | 269 | 4.72M | counts_[i] != counts_[last]) { | 270 | 2.97M | same[last] = i - last; | 271 | 2.97M | last = i; | 272 | 2.97M | } | 273 | 5.11M | } | 274 | | | 275 | 157k | uint8_t bit_width[ANS_MAX_ALPHABET_SIZE] = {}; | 276 | | // Use shortest possible Huffman code to encode `omit_pos` (see | 277 | | // `kBitWidthLengths`). `bit_width` value at `omit_pos` should be the | 278 | | // first of maximal values in the whole `bit_width` array, so it can be | 279 | | // increased without changing that property | 280 | 157k | int omit_width = 10; | 281 | 5.27M | for (size_t i = 0; i < alphabet_size_; ++i) { | 282 | 5.11M | if (i != omit_pos_ && counts_[i] > 0) { | 283 | 2.59M | bit_width[i] = FloorLog2Nonzero<uint32_t>(counts_[i]) + 1; | 284 | 2.59M | omit_width = std::max(omit_width, bit_width[i] + int{i < omit_pos_}); | 285 | 2.59M | } | 286 | 5.11M | } | 287 | 157k | bit_width[omit_pos_] = static_cast<uint8_t>(omit_width); | 288 | | | 289 | | // The bit widths are encoded with a static Huffman code. | 290 | | // The last symbol is used as RLE sequence. | 291 | 157k | constexpr uint8_t kBitWidthLengths[ANS_LOG_TAB_SIZE + 2] = { | 292 | 157k | 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 6, 7, 7, | 293 | 157k | }; | 294 | 157k | constexpr uint8_t kBitWidthSymbols[ANS_LOG_TAB_SIZE + 2] = { | 295 | 157k | 17, 11, 15, 3, 9, 7, 4, 2, 5, 6, 0, 33, 1, 65, | 296 | 157k | }; | 297 | 157k | constexpr uint8_t kMinReps = 5; | 298 | 157k | constexpr size_t rep = ANS_LOG_TAB_SIZE + 1; | 299 | | // Encode count bit widths | 300 | 3.78M | for (size_t i = 0; i < alphabet_size_; ++i) { | 301 | 3.63M | writer->Write(kBitWidthLengths[bit_width[i]], | 302 | 3.63M | kBitWidthSymbols[bit_width[i]]); | 303 | 3.63M | if (same[i] >= kMinReps) { | 304 | | // Encode the RLE symbol and skip the repeated ones. | 305 | 89.9k | writer->Write(kBitWidthLengths[rep], kBitWidthSymbols[rep]); | 306 | 89.9k | StoreVarLenUint8(same[i] - kMinReps, writer); | 307 | 89.9k | i += same[i] - 1; | 308 | 89.9k | } | 309 | 3.63M | } | 310 | | // Encode additional bits of accuracy | 311 | 157k | uint32_t shift = method_ - 1; | 312 | 157k | if (shift != 0) { // otherwise `bitcount = 0` | 313 | 1.38M | for (size_t i = 0; i < alphabet_size_; ++i) { | 314 | 1.32M | if (bit_width[i] > 1 && i != omit_pos_) { | 315 | 909k | int bitcount = GetPopulationCountPrecision(bit_width[i] - 1, shift); | 316 | 909k | int drop_bits = bit_width[i] - 1 - bitcount; | 317 | 909k | JXL_DASSERT((counts_[i] & ((1 << drop_bits) - 1)) == 0); | 318 | 909k | writer->Write(bitcount, (counts_[i] >> drop_bits) - (1 << bitcount)); | 319 | 909k | } | 320 | 1.32M | if (same[i] >= kMinReps) { | 321 | | // Skip symbols encoded by RLE. | 322 | 33.0k | i += same[i] - 1; | 323 | 33.0k | } | 324 | 1.32M | } | 325 | 57.4k | } | 326 | 157k | return true; | 327 | 163k | } |
|
328 | | |
329 | | void ANSBuildInfoTable(const AliasTable::Entry* table, size_t log_alpha_size, |
330 | 172k | ANSEncSymbolInfo* info) { |
331 | | // Create valid alias table for empty streams |
332 | 5.86M | for (size_t s = 0; s < std::max(size_t{1}, alphabet_size_); ++s) { |
333 | 5.69M | const ANSHistBin freq = s == alphabet_size_ ? ANS_TAB_SIZE : counts_[s]; |
334 | 5.69M | info[s].freq_ = static_cast<uint16_t>(freq); |
335 | 5.69M | #ifdef USE_MULT_BY_RECIPROCAL |
336 | 5.69M | if (freq != 0) { |
337 | 2.97M | info[s].ifreq_ = ((1ull << RECIPROCAL_PRECISION) + info[s].freq_ - 1) / |
338 | 2.97M | info[s].freq_; |
339 | 2.97M | } else { |
340 | 2.71M | info[s].ifreq_ = |
341 | 2.71M | 1; // Shouldn't matter (symbol shouldn't occur), but... |
342 | 2.71M | } |
343 | 5.69M | #endif |
344 | 5.69M | info[s].reverse_map_.resize(freq); |
345 | 5.69M | } |
346 | 172k | size_t log_entry_size = ANS_LOG_TAB_SIZE - log_alpha_size; |
347 | 172k | size_t entry_size_minus_1 = (1 << log_entry_size) - 1; |
348 | 707M | for (int i = 0; i < ANS_TAB_SIZE; i++) { |
349 | 707M | AliasTable::Symbol s = |
350 | 707M | AliasTable::Lookup(table, i, log_entry_size, entry_size_minus_1); |
351 | 707M | info[s.value].reverse_map_[s.offset] = i; |
352 | 707M | } |
353 | 172k | } |
354 | | |
355 | | private: |
356 | 729k | ANSEncodingHistogram() {} |
357 | | |
358 | | // Fixed-point log2 LUT for values of [0,4096] |
359 | | using Lg2LUT = std::array<uint32_t, ANS_TAB_SIZE + 1>; |
360 | | static const Lg2LUT lg2; |
361 | | |
362 | 2.78M | float EstimateDataBits(const Histogram& histo) { |
363 | 2.78M | int64_t sum = 0; |
364 | 105M | for (size_t i = 0; i < alphabet_size_; ++i) { |
365 | | // += histogram[i] * -log(counts[i]/total_counts) |
366 | 102M | sum += histo.counts[i] * int64_t{lg2[counts_[i]]}; |
367 | 102M | } |
368 | 2.78M | return (histo.total_count - ldexpf(sum, -31)) * ANS_LOG_TAB_SIZE; |
369 | 2.78M | } |
370 | | |
371 | 729k | static float EstimateDataBitsFlat(const Histogram& histo) { |
372 | 729k | size_t len = histo.alphabet_size(); |
373 | 729k | int64_t flat_bits = int64_t{lg2[len]} * ANS_LOG_TAB_SIZE; |
374 | 729k | return ldexpf(histo.total_count * flat_bits, -31); |
375 | 729k | } |
376 | | |
377 | | struct CountsEntropy { |
378 | | ANSHistBin count : 16; // allowed value of counts in a histogram bin |
379 | | ANSHistBin step_log : 16; // log2 of increase step size (can use 5 bits) |
380 | | int32_t delta_lg2; // change of log between that value and the next allowed |
381 | | }; |
382 | | |
383 | | // Array is sorted by decreasing allowed counts for each possible shift. |
384 | | // Exclusion of single-bin histograms before `RebalanceHistogram` allows |
385 | | // to put count upper limit of 4095, and shifts of 11 and 12 produce the |
386 | | // same table |
387 | | using CountsArray = |
388 | | std::array<std::array<CountsEntropy, ANS_TAB_SIZE>, ANS_LOG_TAB_SIZE>; |
389 | | using CountsIndex = |
390 | | std::array<std::array<uint16_t, ANS_TAB_SIZE>, ANS_LOG_TAB_SIZE>; |
391 | | struct AllowedCounts { |
392 | | CountsArray array; |
393 | | CountsIndex index; |
394 | | }; |
395 | | static const AllowedCounts allowed_counts; |
396 | | |
397 | | // Returns the difference between largest count that can be represented and is |
398 | | // smaller than "count" and smallest representable count larger than "count". |
399 | 117M | static uint32_t SmallestIncrementLog(uint32_t count, uint32_t shift) { |
400 | 117M | if (count == 0) return 0; |
401 | 62.1M | uint32_t bits = FloorLog2Nonzero(count); |
402 | 62.1M | uint32_t drop_bits = bits - GetPopulationCountPrecision(bits, shift); |
403 | 62.1M | return drop_bits; |
404 | 117M | } |
405 | | // We are growing/reducing histogram step by step trying to maximize total |
406 | | // entropy i.e. sum of `freq[n] * log[counts[n]]` with a given sum of |
407 | | // `counts[n]` chosen from `allowed_counts[shift]`. This sum is balanced by |
408 | | // the `counts[omit_pos_]` in the highest bin of histogram. We start from |
409 | | // close to correct solution and each time a step with maximum entropy |
410 | | // increase per unit of bin change is chosen. This greedy scheme is not |
411 | | // guaranteed to achieve the global maximum, but cannot produce invalid |
412 | | // histogram. We use a fixed-point approximation for logarithms and all |
413 | | // arithmetic is integer besides initial approximation. Sum of `freq` and each |
414 | | // of `lg2[counts]` are supposed to be limited to `int32_t` range, so that the |
415 | | // sum of their products should not exceed `int64_t`. |
416 | 2.78M | bool RebalanceHistogram(const Histogram& histo) { |
417 | 2.78M | constexpr ANSHistBin table_size = ANS_TAB_SIZE; |
418 | 2.78M | uint32_t shift = method_ - 1; |
419 | | |
420 | 2.78M | struct EntropyDelta { |
421 | 2.78M | ANSHistBin freq; // initial count |
422 | 2.78M | size_t count_ind; // index of current bin value in `allowed_counts` |
423 | 2.78M | size_t bin_ind; // index of current bin in `counts` |
424 | 2.78M | }; |
425 | | // Penalties corresponding to different step sizes - entropy decrease in |
426 | | // balancing bin, step of size (1 << ANS_LOG_TAB_SIZE - 1) is not possible |
427 | 2.78M | std::array<int64_t, ANS_LOG_TAB_SIZE - 1> balance_inc = {}; |
428 | 2.78M | std::array<int64_t, ANS_LOG_TAB_SIZE - 1> balance_dec = {}; |
429 | 2.78M | const auto& ac = allowed_counts.array[shift]; |
430 | 2.78M | const auto& ai = allowed_counts.index[shift]; |
431 | | // TODO(ivan) separate cases of shift >= 11 - all steps are 1 there, and |
432 | | // possibly 10 - all relevant steps are 2. |
433 | | // Total entropy change by a step: increase/decrease in current bin |
434 | | // together with corresponding decrease/increase in the balancing bin. |
435 | | // Inc steps increase current bin, dec steps decrease |
436 | 861M | const auto delta_entropy_inc = [&](const EntropyDelta& a) { |
437 | 861M | return a.freq * int64_t{ac[a.count_ind].delta_lg2} - |
438 | 861M | balance_inc[ac[a.count_ind].step_log]; |
439 | 861M | }; |
440 | 130M | const auto delta_entropy_dec = [&](const EntropyDelta& a) { |
441 | 130M | return a.freq * int64_t{ac[a.count_ind + 1].delta_lg2} - |
442 | 130M | balance_dec[ac[a.count_ind + 1].step_log]; |
443 | 130M | }; |
444 | | // Compare steps by entropy increase per unit of histogram bin change. |
445 | | // Truncation is OK here, accuracy is anyway better than float |
446 | 422M | const auto IncLess = [&](const EntropyDelta& a, const EntropyDelta& b) { |
447 | 422M | return delta_entropy_inc(a) >> ac[a.count_ind].step_log < |
448 | 422M | delta_entropy_inc(b) >> ac[b.count_ind].step_log; |
449 | 422M | }; |
450 | 63.2M | const auto DecLess = [&](const EntropyDelta& a, const EntropyDelta& b) { |
451 | 63.2M | return delta_entropy_dec(a) >> ac[a.count_ind + 1].step_log < |
452 | 63.2M | delta_entropy_dec(b) >> ac[b.count_ind + 1].step_log; |
453 | 63.2M | }; |
454 | | // Vector of adjustable bins from `allowed_counts` |
455 | 2.78M | std::vector<EntropyDelta> bins; |
456 | 2.78M | bins.reserve(256); |
457 | | |
458 | 2.78M | double norm = double{table_size} / histo.total_count; |
459 | | |
460 | 2.78M | size_t remainder_pos = 0; // highest balancing bin in the histogram |
461 | 2.78M | int64_t max_freq = 0; |
462 | 2.78M | ANSHistBin rest = table_size; // reserve of histogram counts to distribute |
463 | 105M | for (size_t n = 0; n < alphabet_size_; ++n) { |
464 | 102M | ANSHistBin freq = histo.counts[n]; |
465 | 102M | if (freq > max_freq) { |
466 | 6.78M | remainder_pos = n; |
467 | 6.78M | max_freq = freq; |
468 | 6.78M | } |
469 | | |
470 | 102M | double target = freq * norm; // rounding |
471 | | // Keep zeros and clamp nonzero freq counts to [1, table_size) |
472 | 102M | ANSHistBin count = std::max<ANSHistBin>(round(target), freq > 0); |
473 | 102M | count = std::min<ANSHistBin>(count, table_size - 1); |
474 | 102M | uint32_t step_log = SmallestIncrementLog(count, shift); |
475 | 102M | ANSHistBin inc = 1 << step_log; |
476 | 102M | count &= ~(inc - 1); |
477 | | |
478 | 102M | counts_[n] = count; |
479 | 102M | rest -= count; |
480 | 102M | if (target > 1.0) { |
481 | 47.4M | bins.push_back({freq, ai[count], n}); |
482 | 47.4M | } |
483 | 102M | } |
484 | | |
485 | | // Delete the highest balancing bin from adjustable by `allowed_counts` |
486 | 2.78M | bins.erase(std::find_if( |
487 | 2.78M | bins.begin(), bins.end(), |
488 | 16.7M | [&](const EntropyDelta& a) { return a.bin_ind == remainder_pos; })); |
489 | | // From now on `rest` is the height of balancing bin, |
490 | | // here it can be negative, but will be tracted into positive domain later |
491 | 2.78M | rest += counts_[remainder_pos]; |
492 | | |
493 | 2.78M | if (!bins.empty()) { |
494 | 2.78M | const uint32_t max_log = ac[1].step_log; |
495 | 16.9M | while (true) { |
496 | | // Update balancing bin penalties setting guards and tractors |
497 | 149M | for (uint32_t log = 0; log <= max_log; ++log) { |
498 | 132M | ANSHistBin delta = 1 << log; |
499 | 132M | if (rest >= table_size) { |
500 | | // Tract large `rest` into allowed domain: |
501 | 0 | balance_inc[log] = 0; // permit all inc steps |
502 | 0 | balance_dec[log] = 0; // forbid all dec steps |
503 | 132M | } else if (rest > 1) { |
504 | | // `rest` is OK, put guards against non-possible steps |
505 | 132M | balance_inc[log] = |
506 | 132M | rest > delta // possible step |
507 | 132M | ? max_freq * int64_t{lg2[rest] - lg2[rest - delta]} |
508 | 132M | : std::numeric_limits<int64_t>::max(); // forbidden |
509 | 132M | balance_dec[log] = |
510 | 132M | rest + delta < table_size // possible step |
511 | 132M | ? max_freq * int64_t{lg2[rest + delta] - lg2[rest]} |
512 | 132M | : 0; // forbidden |
513 | 132M | } else { |
514 | | // Tract negative or zero `rest` into positive: |
515 | | // forbid all inc steps |
516 | 349 | balance_inc[log] = std::numeric_limits<int64_t>::max(); |
517 | | // permit all dec steps |
518 | 349 | balance_dec[log] = std::numeric_limits<int64_t>::max(); |
519 | 349 | } |
520 | 132M | } |
521 | | // Try to increase entropy |
522 | 16.9M | auto best_bin_inc = std::max_element(bins.begin(), bins.end(), IncLess); |
523 | 16.9M | if (delta_entropy_inc(*best_bin_inc) > 0) { |
524 | | // Grow the bin with the best histogram entropy increase |
525 | 13.3M | rest -= 1 << ac[best_bin_inc->count_ind--].step_log; |
526 | 13.3M | } else { |
527 | | // This still implies that entropy is strictly increasing each step |
528 | | // (or `rest` is tracted into positive domain), so we cannot loop |
529 | | // infinitely |
530 | 3.55M | auto best_bin_dec = |
531 | 3.55M | std::min_element(bins.begin(), bins.end(), DecLess); |
532 | | // Break if no reverse steps can grow entropy (or valid) |
533 | 3.55M | if (delta_entropy_dec(*best_bin_dec) >= 0) break; |
534 | | // Decrease the bin with the best histogram entropy increase |
535 | 767k | rest += 1 << ac[++best_bin_dec->count_ind].step_log; |
536 | 767k | } |
537 | 16.9M | } |
538 | | // Set counts besides the balancing bin |
539 | 44.6M | for (auto& a : bins) counts_[a.bin_ind] = ac[a.count_ind].count; |
540 | | |
541 | | // The scheme works fine if we have room to grow `bit_width` of balancing |
542 | | // bin, otherwise we need to put balancing bin to the first bin of 12 bit |
543 | | // width. In this case both that bin and balancing one should be close to |
544 | | // 2048 in targets, so exchange of them will not produce much worse |
545 | | // histogram |
546 | 32.8M | for (size_t n = 0; n < remainder_pos; ++n) { |
547 | 30.0M | if (counts_[n] >= 2048) { |
548 | 10.0k | counts_[remainder_pos] = counts_[n]; |
549 | 10.0k | remainder_pos = n; |
550 | 10.0k | break; |
551 | 10.0k | } |
552 | 30.0M | } |
553 | 2.78M | } |
554 | | // Set balancing bin |
555 | 2.78M | counts_[remainder_pos] = rest; |
556 | 2.78M | omit_pos_ = remainder_pos; |
557 | | |
558 | 2.78M | return counts_[remainder_pos] > 0; |
559 | 2.78M | } |
560 | | |
561 | | float cost_ = 0; |
562 | | uint32_t method_ = 0; |
563 | | size_t omit_pos_ = 0; |
564 | | size_t alphabet_size_ = 0; |
565 | | size_t num_symbols_ = 0; |
566 | | size_t symbols_[kMaxNumSymbolsForSmallCode] = {}; |
567 | | std::vector<ANSHistBin> counts_{}; |
568 | | }; |
569 | | |
570 | | using AEH = ANSEncodingHistogram; |
571 | | |
572 | 290 | const AEH::Lg2LUT AEH::lg2 = [] { |
573 | 290 | Lg2LUT lg2; |
574 | 290 | lg2[0] = 0; // for entropy calculations it is OK |
575 | 1.18M | for (size_t i = 1; i < lg2.size(); ++i) { |
576 | 1.18M | lg2[i] = round(ldexp(log2(i) / ANS_LOG_TAB_SIZE, 31)); |
577 | 1.18M | } |
578 | 290 | return lg2; |
579 | 290 | }(); |
580 | | |
581 | 290 | const AEH::AllowedCounts AEH::allowed_counts = [] { |
582 | 290 | AllowedCounts result; |
583 | | |
584 | 3.77k | for (uint32_t shift = 0; shift < result.array.size(); ++shift) { |
585 | 3.48k | auto& ac = result.array[shift]; |
586 | 3.48k | auto& ai = result.index[shift]; |
587 | 3.48k | ANSHistBin last = ~0; |
588 | 3.48k | size_t slot = 0; |
589 | | // TODO(eustas): are those "default" values relevant? |
590 | 3.48k | ac[0].delta_lg2 = 0; |
591 | 3.48k | ac[0].step_log = 0; |
592 | 14.2M | for (int32_t i = ac.size() - 1; i >= 0; --i) { |
593 | 14.2M | int32_t curr = i & ~((1 << SmallestIncrementLog(i, shift)) - 1); |
594 | 14.2M | if (curr == last) continue; |
595 | 2.77M | last = curr; |
596 | 2.77M | ac[slot].count = curr; |
597 | 2.77M | ai[curr] = slot; |
598 | 2.77M | if (curr == 0) { |
599 | | // Guards against non-possible steps: |
600 | | // at max value [0] - 0 (by init), at min value - max |
601 | 3.48k | ac[slot].delta_lg2 = std::numeric_limits<int32_t>::max(); |
602 | 3.48k | ac[slot].step_log = 0; |
603 | 2.77M | } else if (slot > 0) { |
604 | 2.77M | ANSHistBin prev = ac[slot - 1].count; |
605 | 2.77M | ac[slot].delta_lg2 = round(ldexp( |
606 | 2.77M | log2(static_cast<double>(prev) / curr) / ANS_LOG_TAB_SIZE, 31)); |
607 | 2.77M | ac[slot].step_log = FloorLog2Nonzero<uint32_t>(prev - curr); |
608 | 2.77M | prev = curr; |
609 | 2.77M | } |
610 | 2.77M | slot++; |
611 | 2.77M | } |
612 | 3.48k | } |
613 | | |
614 | 290 | return result; |
615 | 290 | }(); |
616 | | |
617 | | } // namespace |
618 | | |
619 | 557k | StatusOr<float> Histogram::ANSPopulationCost() const { |
620 | 557k | if (counts.size() > ANS_MAX_ALPHABET_SIZE) { |
621 | 0 | return std::numeric_limits<float>::max(); |
622 | 0 | } |
623 | 557k | JXL_ASSIGN_OR_RETURN( |
624 | 557k | ANSEncodingHistogram normalized, |
625 | 557k | ANSEncodingHistogram::ComputeBest( |
626 | 557k | *this, HistogramParams::ANSHistogramStrategy::kFast)); |
627 | 557k | return normalized.Cost(); |
628 | 557k | } |
629 | | |
630 | | // Returns an estimate or exact cost of encoding this histogram and the |
631 | | // corresponding data. |
632 | | StatusOr<size_t> EntropyEncodingData::BuildAndStoreANSEncodingData( |
633 | | JxlMemoryManager* memory_manager, |
634 | | HistogramParams::ANSHistogramStrategy ans_histogram_strategy, |
635 | 200k | const Histogram& histogram, BitWriter* writer) { |
636 | 200k | ANSEncSymbolInfo* info = encoding_info.back().data(); |
637 | 200k | size_t size = histogram.alphabet_size(); |
638 | 200k | if (use_prefix_code) { |
639 | 27.5k | size_t cost = 0; |
640 | 27.5k | if (size <= 1) return 0; |
641 | 26.6k | std::vector<uint32_t> histo(size); |
642 | 256k | for (size_t i = 0; i < size; i++) { |
643 | 229k | JXL_ENSURE(histogram.counts[i] >= 0); |
644 | 229k | histo[i] = histogram.counts[i]; |
645 | 229k | } |
646 | 26.6k | std::vector<uint8_t> depths(size); |
647 | 26.6k | std::vector<uint16_t> bits(size); |
648 | 26.6k | if (writer == nullptr) { |
649 | 18.6k | BitWriter tmp_writer{memory_manager}; |
650 | 18.6k | JXL_RETURN_IF_ERROR(tmp_writer.WithMaxBits( |
651 | 18.6k | 8 * size + 8, // safe upper bound |
652 | 18.6k | LayerType::Header, /*aux_out=*/nullptr, [&] { |
653 | 18.6k | return BuildAndStoreHuffmanTree(histo.data(), size, depths.data(), |
654 | 18.6k | bits.data(), &tmp_writer); |
655 | 18.6k | })); |
656 | 18.6k | cost = tmp_writer.BitsWritten(); |
657 | 18.6k | } else { |
658 | 7.99k | size_t start = writer->BitsWritten(); |
659 | 7.99k | JXL_RETURN_IF_ERROR(BuildAndStoreHuffmanTree( |
660 | 7.99k | histo.data(), size, depths.data(), bits.data(), writer)); |
661 | 7.99k | cost = writer->BitsWritten() - start; |
662 | 7.99k | } |
663 | 256k | for (size_t i = 0; i < size; i++) { |
664 | 229k | info[i].bits = depths[i] == 0 ? 0 : bits[i]; |
665 | 229k | info[i].depth = depths[i]; |
666 | 229k | } |
667 | | // Estimate data cost. |
668 | 256k | for (size_t i = 0; i < size; i++) { |
669 | 229k | cost += histo[i] * info[i].depth; |
670 | 229k | } |
671 | 26.6k | return cost; |
672 | 26.6k | } |
673 | 345k | JXL_ASSIGN_OR_RETURN( |
674 | 345k | ANSEncodingHistogram normalized, |
675 | 345k | ANSEncodingHistogram::ComputeBest(histogram, ans_histogram_strategy)); |
676 | | |
677 | | // TODO(eustas): fix: 2KiB on stack |
678 | 345k | AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE]; |
679 | | |
680 | 345k | JXL_RETURN_IF_ERROR( |
681 | 345k | InitAliasTable(normalized.Counts(), ANS_LOG_TAB_SIZE, log_alpha_size, a)); |
682 | 172k | normalized.ANSBuildInfoTable(a, log_alpha_size, info); |
683 | 172k | if (writer != nullptr) { |
684 | | // size_t start = writer->BitsWritten(); |
685 | 167k | JXL_RETURN_IF_ERROR(normalized.Encode(writer)); |
686 | | // return writer->BitsWritten() - start; |
687 | 167k | } |
688 | 172k | return static_cast<size_t>(ceilf(normalized.Cost())); |
689 | 172k | } |
690 | | |
691 | | namespace { |
692 | | |
693 | | Histogram HistogramFromSymbolInfo( |
694 | 0 | const std::vector<ANSEncSymbolInfo>& encoding_info, bool use_prefix_code) { |
695 | 0 | Histogram histo; |
696 | 0 | histo.counts.resize(DivCeil(encoding_info.size(), Histogram::kRounding) * |
697 | 0 | Histogram::kRounding); |
698 | 0 | histo.total_count = 0; |
699 | 0 | for (size_t i = 0; i < encoding_info.size(); ++i) { |
700 | 0 | const ANSEncSymbolInfo& info = encoding_info[i]; |
701 | 0 | int count = use_prefix_code |
702 | 0 | ? (info.depth ? (1u << (PREFIX_MAX_BITS - info.depth)) : 0) |
703 | 0 | : info.freq_; |
704 | 0 | histo.counts[i] = count; |
705 | 0 | histo.total_count += count; |
706 | 0 | } |
707 | 0 | return histo; |
708 | 0 | } |
709 | | |
710 | | } // namespace |
711 | | |
712 | | Status EntropyEncodingData::ChooseUintConfigs( |
713 | | JxlMemoryManager* memory_manager, const HistogramParams& params, |
714 | | const std::vector<std::vector<Token>>& tokens, |
715 | 41.8k | std::vector<Histogram>& clustered_histograms) { |
716 | | // Set sane default `log_alpha_size`. |
717 | 41.8k | if (use_prefix_code) { |
718 | 27.3k | log_alpha_size = PREFIX_MAX_BITS; |
719 | 27.3k | } else if (params.streaming_mode) { |
720 | | // TODO(szabadka) Figure out if we can use lower values here. |
721 | 0 | log_alpha_size = 8; |
722 | 14.5k | } else if (lz77.enabled) { |
723 | 2.53k | log_alpha_size = 8; |
724 | 11.9k | } else { |
725 | 11.9k | log_alpha_size = 7; |
726 | 11.9k | } |
727 | | |
728 | 41.8k | if (ans_fuzzer_friendly_) { |
729 | 0 | uint_config.assign(1, HybridUintConfig(7, 0, 0)); |
730 | 0 | return true; |
731 | 0 | } |
732 | | |
733 | 41.8k | uint_config.assign(clustered_histograms.size(), params.UintConfig()); |
734 | | // If the uint config is fixed, just use it. |
735 | 41.8k | if (params.uint_method != HistogramParams::HybridUintMethod::kBest && |
736 | 39.2k | params.uint_method != HistogramParams::HybridUintMethod::kFast) { |
737 | 31.3k | return true; |
738 | 31.3k | } |
739 | | // Even if the uint config is adaptive, just stick with the default in |
740 | | // streaming mode. |
741 | 10.4k | if (params.streaming_mode) { |
742 | 0 | return true; |
743 | 0 | } |
744 | | |
745 | | // Brute-force method that tries a few options. |
746 | 10.4k | std::vector<HybridUintConfig> configs; |
747 | 10.4k | if (params.uint_method == HistogramParams::HybridUintMethod::kBest) { |
748 | 2.62k | configs = { |
749 | 2.62k | HybridUintConfig(4, 2, 0), // default |
750 | 2.62k | HybridUintConfig(4, 1, 0), // less precise |
751 | 2.62k | HybridUintConfig(4, 2, 1), // add sign |
752 | 2.62k | HybridUintConfig(4, 2, 2), // add sign+parity |
753 | 2.62k | HybridUintConfig(4, 1, 2), // add parity but less msb |
754 | | // Same as above, but more direct coding. |
755 | 2.62k | HybridUintConfig(5, 2, 0), HybridUintConfig(5, 1, 0), |
756 | 2.62k | HybridUintConfig(5, 2, 1), HybridUintConfig(5, 2, 2), |
757 | 2.62k | HybridUintConfig(5, 1, 2), |
758 | | // Same as above, but less direct coding. |
759 | 2.62k | HybridUintConfig(3, 2, 0), HybridUintConfig(3, 1, 0), |
760 | 2.62k | HybridUintConfig(3, 2, 1), HybridUintConfig(3, 1, 2), |
761 | | // For near-lossless. |
762 | 2.62k | HybridUintConfig(4, 1, 3), HybridUintConfig(5, 1, 4), |
763 | 2.62k | HybridUintConfig(5, 2, 3), HybridUintConfig(6, 1, 5), |
764 | 2.62k | HybridUintConfig(6, 2, 4), HybridUintConfig(6, 0, 0), |
765 | | // Other |
766 | 2.62k | HybridUintConfig(0, 0, 0), // varlenuint |
767 | 2.62k | HybridUintConfig(2, 0, 1), // works well for ctx map |
768 | 2.62k | HybridUintConfig(7, 0, 0), // direct coding |
769 | 2.62k | HybridUintConfig(8, 0, 0), // direct coding |
770 | 2.62k | HybridUintConfig(9, 0, 0), // direct coding |
771 | 2.62k | HybridUintConfig(10, 0, 0), // direct coding |
772 | 2.62k | HybridUintConfig(11, 0, 0), // direct coding |
773 | 2.62k | HybridUintConfig(12, 0, 0), // direct coding |
774 | 2.62k | }; |
775 | 7.86k | } else { |
776 | 7.86k | JXL_DASSERT(params.uint_method == HistogramParams::HybridUintMethod::kFast); |
777 | 7.86k | configs = { |
778 | 7.86k | HybridUintConfig(4, 2, 0), // default |
779 | 7.86k | HybridUintConfig(4, 1, 2), // add parity but less msb |
780 | 7.86k | HybridUintConfig(0, 0, 0), // smallest histograms |
781 | 7.86k | HybridUintConfig(2, 0, 1), // works well for ctx map |
782 | 7.86k | }; |
783 | 7.86k | } |
784 | | |
785 | 10.4k | size_t num_histo = clustered_histograms.size(); |
786 | 10.4k | std::vector<uint8_t> is_valid(num_histo); |
787 | 10.4k | std::vector<size_t> histo_volume(2 * num_histo); |
788 | 10.4k | std::vector<size_t> histo_offset(2 * num_histo + 1); |
789 | 10.4k | std::vector<uint32_t> max_value_per_histo(2 * num_histo); |
790 | | |
791 | | // TODO(veluca): do not ignore lz77 commands. |
792 | | |
793 | 97.8k | for (const auto& stream : tokens) { |
794 | 113M | for (const auto& token : stream) { |
795 | 113M | size_t histo = context_map[token.context]; |
796 | 113M | histo_volume[histo + (token.is_lz77_length ? num_histo : 0)]++; |
797 | 113M | } |
798 | 97.8k | } |
799 | 10.4k | size_t max_histo_volume = 0; |
800 | 184k | for (size_t h = 0; h < 2 * num_histo; ++h) { |
801 | 174k | max_histo_volume = std::max(max_histo_volume, histo_volume[h]); |
802 | 174k | histo_offset[h + 1] = histo_offset[h] + histo_volume[h]; |
803 | 174k | } |
804 | | |
805 | 10.4k | const size_t max_vec_size = MaxVectorSize(); |
806 | 10.4k | std::vector<uint32_t> transposed(histo_offset[num_histo * 2] + max_vec_size); |
807 | 10.4k | { |
808 | 10.4k | std::vector<size_t> next_offset = histo_offset; // copy |
809 | 97.8k | for (const auto& stream : tokens) { |
810 | 113M | for (const auto& token : stream) { |
811 | 113M | size_t histo = |
812 | 113M | context_map[token.context] + (token.is_lz77_length ? num_histo : 0); |
813 | 113M | transposed[next_offset[histo]++] = token.value; |
814 | 113M | } |
815 | 97.8k | } |
816 | 10.4k | } |
817 | 184k | for (size_t h = 0; h < 2 * num_histo; ++h) { |
818 | 174k | max_value_per_histo[h] = |
819 | 174k | MaxValue(transposed.data() + histo_offset[h], histo_volume[h]); |
820 | 174k | } |
821 | 10.4k | uint32_t max_lz77 = 0; |
822 | 97.7k | for (size_t h = num_histo; h < 2 * num_histo; ++h) { |
823 | 87.2k | max_lz77 = std::max(max_lz77, MaxValue(transposed.data() + histo_offset[h], |
824 | 87.2k | histo_volume[h])); |
825 | 87.2k | } |
826 | | |
827 | | // Wider histograms are assigned max cost in PopulationCost anyway |
828 | | // and therefore will not be used |
829 | 10.4k | size_t max_alpha = ANS_MAX_ALPHABET_SIZE; |
830 | | |
831 | 10.4k | JXL_ASSIGN_OR_RETURN( |
832 | 10.4k | AlignedMemory tmp, |
833 | 10.4k | AlignedMemory::Create(memory_manager, (max_histo_volume + max_vec_size) * |
834 | 10.4k | sizeof(uint32_t))); |
835 | 97.7k | for (size_t h = 0; h < num_histo; h++) { |
836 | 87.2k | float best_cost = std::numeric_limits<float>::max(); |
837 | 535k | for (HybridUintConfig cfg : configs) { |
838 | 535k | uint32_t max_v = max_value_per_histo[h]; |
839 | 535k | size_t capacity; |
840 | 535k | { |
841 | 535k | uint32_t tok, nbits, bits; |
842 | 535k | cfg.Encode(max_v, &tok, &nbits, &bits); |
843 | 535k | tok |= cfg.LsbMask(); |
844 | 535k | if (tok >= max_alpha || (lz77.enabled && tok >= lz77.min_symbol)) { |
845 | 12.6k | continue; // Not valid config for this context |
846 | 12.6k | } |
847 | 522k | capacity = tok + 1; |
848 | 522k | } |
849 | | |
850 | 0 | Histogram histo; |
851 | 522k | histo.EnsureCapacity(capacity); |
852 | 522k | size_t len = histo_volume[h]; |
853 | 522k | uint32_t* data = transposed.data() + histo_offset[h]; |
854 | 522k | size_t extra_bits = EstimateTokenCost(data, len, cfg, tmp); |
855 | 522k | uint32_t* tmp_tokens = tmp.address<uint32_t>(); |
856 | 586M | for (size_t i = 0; i < len; ++i) { |
857 | 586M | histo.FastAdd(tmp_tokens[i]); |
858 | 586M | } |
859 | 522k | histo.Condition(); |
860 | 522k | JXL_ASSIGN_OR_RETURN(float cost, histo.ANSPopulationCost()); |
861 | 522k | cost += extra_bits; |
862 | | // Add signaling cost of the hybriduintconfig itself. |
863 | 522k | cost += CeilLog2Nonzero(cfg.split_exponent + 1); |
864 | 522k | cost += CeilLog2Nonzero(cfg.split_exponent - cfg.msb_in_token + 1); |
865 | 522k | if (cost < best_cost) { |
866 | 179k | uint_config[h] = cfg; |
867 | 179k | best_cost = cost; |
868 | 179k | clustered_histograms[h].swap(histo); |
869 | 179k | } |
870 | 522k | } |
871 | 87.2k | } |
872 | | |
873 | 10.4k | size_t max_tok = 0; |
874 | 97.7k | for (size_t h = 0; h < num_histo; ++h) { |
875 | 87.2k | Histogram& histo = clustered_histograms[h]; |
876 | 87.2k | max_tok = std::max(max_tok, histo.MaxSymbol()); |
877 | 87.2k | size_t len = histo_volume[num_histo + h]; |
878 | 87.2k | if (len == 0) continue; // E.g. when lz77 not enabled |
879 | 729 | size_t max_histo_tok = max_value_per_histo[num_histo + h]; |
880 | 729 | uint32_t tok, nbits, bits; |
881 | 729 | lz77.length_uint_config.Encode(max_histo_tok, &tok, &nbits, &bits); |
882 | 729 | tok |= lz77.length_uint_config.LsbMask(); |
883 | 729 | tok += lz77.min_symbol; |
884 | 729 | histo.EnsureCapacity(tok + 1); |
885 | 729 | uint32_t* data = transposed.data() + histo_offset[num_histo + h]; |
886 | 729 | uint32_t unused = |
887 | 729 | EstimateTokenCost(data, len, lz77.length_uint_config, tmp); |
888 | 729 | (void)unused; |
889 | 729 | uint32_t* tmp_tokens = tmp.address<uint32_t>(); |
890 | 8.89k | for (size_t i = 0; i < len; ++i) { |
891 | 8.16k | histo.FastAdd(tmp_tokens[i] + lz77.min_symbol); |
892 | 8.16k | } |
893 | 729 | histo.Condition(); |
894 | 729 | max_tok = std::max(max_tok, histo.MaxSymbol()); |
895 | 729 | } |
896 | | |
897 | | // `log_alpha_size - 5` is encoded in the header, so min is 5. |
898 | 10.4k | size_t log_size = 5; |
899 | 17.5k | while (max_tok >= (1u << log_size)) ++log_size; |
900 | | |
901 | 10.4k | size_t max_log_alpha_size = use_prefix_code ? PREFIX_MAX_BITS : 8; |
902 | 10.4k | JXL_ENSURE(log_size <= max_log_alpha_size); |
903 | | |
904 | 10.4k | if (use_prefix_code) { |
905 | 3.40k | log_alpha_size = PREFIX_MAX_BITS; |
906 | 7.08k | } else { |
907 | 7.08k | log_alpha_size = log_size; |
908 | 7.08k | } |
909 | | |
910 | 10.4k | return true; |
911 | 10.4k | } |
912 | | |
913 | | // NOTE: `layer` is only for clustered_entropy; caller does ReclaimAndCharge. |
914 | | // Returns cost (in bits). |
915 | | StatusOr<size_t> EntropyEncodingData::BuildAndStoreEntropyCodes( |
916 | | JxlMemoryManager* memory_manager, const HistogramParams& params, |
917 | | const std::vector<std::vector<Token>>& tokens, |
918 | | const std::vector<Histogram>& builder, BitWriter* writer, LayerType layer, |
919 | 41.8k | AuxOut* aux_out) { |
920 | 41.8k | const size_t prev_histograms = encoding_info.size(); |
921 | 41.8k | std::vector<Histogram> clustered_histograms; |
922 | 41.8k | for (size_t i = 0; i < prev_histograms; ++i) { |
923 | 0 | clustered_histograms.push_back( |
924 | 0 | HistogramFromSymbolInfo(encoding_info[i], use_prefix_code)); |
925 | 0 | } |
926 | 41.8k | size_t context_offset = context_map.size(); |
927 | 41.8k | context_map.resize(context_offset + builder.size()); |
928 | 41.8k | if (builder.size() > 1) { |
929 | 15.9k | if (!ans_fuzzer_friendly_) { |
930 | 15.9k | std::vector<uint32_t> histogram_symbols; |
931 | 15.9k | JXL_RETURN_IF_ERROR(ClusterHistograms(params, builder, kClustersLimit, |
932 | 15.9k | &clustered_histograms, |
933 | 15.9k | &histogram_symbols)); |
934 | 16.6M | for (size_t c = 0; c < builder.size(); ++c) { |
935 | 16.6M | context_map[context_offset + c] = |
936 | 16.6M | static_cast<uint8_t>(histogram_symbols[c]); |
937 | 16.6M | } |
938 | 15.9k | } else { |
939 | 0 | JXL_ENSURE(encoding_info.empty()); |
940 | 0 | std::fill(context_map.begin(), context_map.end(), 0); |
941 | 0 | size_t max_symbol = 0; |
942 | 0 | for (const Histogram& h : builder) { |
943 | 0 | max_symbol = std::max(h.counts.size(), max_symbol); |
944 | 0 | } |
945 | 0 | size_t num_symbols = 1 << CeilLog2Nonzero(max_symbol + 1); |
946 | 0 | clustered_histograms.resize(1); |
947 | 0 | clustered_histograms[0].Clear(); |
948 | 0 | for (size_t i = 0; i < num_symbols; i++) { |
949 | 0 | clustered_histograms[0].Add(i); |
950 | 0 | } |
951 | 0 | } |
952 | 15.9k | if (writer != nullptr) { |
953 | 14.3k | JXL_RETURN_IF_ERROR(EncodeContextMap( |
954 | 14.3k | context_map, clustered_histograms.size(), writer, layer, aux_out)); |
955 | 14.3k | } |
956 | 25.8k | } else { |
957 | 25.8k | JXL_ENSURE(encoding_info.empty()); |
958 | 25.8k | clustered_histograms.push_back(builder[0]); |
959 | 25.8k | } |
960 | 41.8k | if (aux_out != nullptr) { |
961 | 0 | for (size_t i = prev_histograms; i < clustered_histograms.size(); ++i) { |
962 | 0 | aux_out->layer(layer).clustered_entropy += |
963 | 0 | clustered_histograms[i].ShannonEntropy(); |
964 | 0 | } |
965 | 0 | } |
966 | | |
967 | 41.8k | JXL_RETURN_IF_ERROR( |
968 | 41.8k | ChooseUintConfigs(memory_manager, params, tokens, clustered_histograms)); |
969 | | |
970 | 41.8k | SizeWriter size_writer; // Used if writer == nullptr to estimate costs. |
971 | 41.8k | size_t cost = use_prefix_code ? 1 : 3; |
972 | | |
973 | 41.8k | if (writer) writer->Write(1, TO_JXL_BOOL(use_prefix_code)); |
974 | 41.8k | if (writer == nullptr) { |
975 | 22.2k | EncodeUintConfigs(uint_config, &size_writer, log_alpha_size); |
976 | 22.2k | } else { |
977 | 19.5k | if (!use_prefix_code) writer->Write(2, log_alpha_size - 5); |
978 | 19.5k | EncodeUintConfigs(uint_config, writer, log_alpha_size); |
979 | 19.5k | } |
980 | 41.8k | if (use_prefix_code) { |
981 | 27.5k | for (const auto& histo : clustered_histograms) { |
982 | 27.5k | size_t alphabet_size = std::max<size_t>(1, histo.alphabet_size()); |
983 | 27.5k | if (writer) { |
984 | 8.84k | StoreVarLenUint16(alphabet_size - 1, writer); |
985 | 18.6k | } else { |
986 | 18.6k | StoreVarLenUint16(alphabet_size - 1, &size_writer); |
987 | 18.6k | } |
988 | 27.5k | } |
989 | 27.3k | } |
990 | 41.8k | cost += size_writer.size; |
991 | 242k | for (size_t c = prev_histograms; c < clustered_histograms.size(); ++c) { |
992 | 200k | size_t alphabet_size = clustered_histograms[c].alphabet_size(); |
993 | 200k | encoding_info.emplace_back(); |
994 | 200k | encoding_info.back().resize(alphabet_size); |
995 | 200k | BitWriter* histo_writer = writer; |
996 | 200k | if (params.streaming_mode) { |
997 | 0 | encoded_histograms.emplace_back(memory_manager); |
998 | 0 | histo_writer = &encoded_histograms.back(); |
999 | 0 | } |
1000 | 200k | const auto& body = [&]() -> Status { |
1001 | 200k | JXL_ASSIGN_OR_RETURN(size_t ans_cost, |
1002 | 200k | BuildAndStoreANSEncodingData( |
1003 | 200k | memory_manager, params.ans_histogram_strategy, |
1004 | 200k | clustered_histograms[c], histo_writer)); |
1005 | 200k | cost += ans_cost; |
1006 | 200k | return true; |
1007 | 200k | }; |
1008 | 200k | if (histo_writer) { |
1009 | 176k | JXL_RETURN_IF_ERROR(histo_writer->WithMaxBits( |
1010 | 176k | 256 + alphabet_size * 24, layer, aux_out, body, |
1011 | 176k | /*finished_histogram=*/true)); |
1012 | 176k | } else { |
1013 | 23.5k | JXL_RETURN_IF_ERROR(body()); |
1014 | 23.5k | } |
1015 | 200k | if (params.streaming_mode) { |
1016 | 0 | JXL_RETURN_IF_ERROR(writer->AppendUnaligned(*histo_writer)); |
1017 | 0 | } |
1018 | 200k | } |
1019 | 41.8k | return cost; |
1020 | 41.8k | } |
1021 | | |
1022 | | template <typename Writer> |
1023 | | void EncodeUintConfig(const HybridUintConfig uint_config, Writer* writer, |
1024 | 203k | size_t log_alpha_size) { |
1025 | 203k | writer->Write(CeilLog2Nonzero(log_alpha_size + 1), |
1026 | 203k | uint_config.split_exponent); |
1027 | 203k | if (uint_config.split_exponent == log_alpha_size) { |
1028 | 61 | return; // msb/lsb don't matter. |
1029 | 61 | } |
1030 | 202k | size_t nbits = CeilLog2Nonzero(uint_config.split_exponent + 1); |
1031 | 202k | writer->Write(nbits, uint_config.msb_in_token); |
1032 | 202k | nbits = CeilLog2Nonzero(uint_config.split_exponent - |
1033 | 202k | uint_config.msb_in_token + 1); |
1034 | 202k | writer->Write(nbits, uint_config.lsb_in_token); |
1035 | 202k | } void jxl::EncodeUintConfig<jxl::SizeWriter>(jxl::HybridUintConfig, jxl::SizeWriter*, unsigned long) Line | Count | Source | 1024 | 25.1k | size_t log_alpha_size) { | 1025 | 25.1k | writer->Write(CeilLog2Nonzero(log_alpha_size + 1), | 1026 | 25.1k | uint_config.split_exponent); | 1027 | 25.1k | if (uint_config.split_exponent == log_alpha_size) { | 1028 | 0 | return; // msb/lsb don't matter. | 1029 | 0 | } | 1030 | 25.1k | size_t nbits = CeilLog2Nonzero(uint_config.split_exponent + 1); | 1031 | 25.1k | writer->Write(nbits, uint_config.msb_in_token); | 1032 | 25.1k | nbits = CeilLog2Nonzero(uint_config.split_exponent - | 1033 | 25.1k | uint_config.msb_in_token + 1); | 1034 | 25.1k | writer->Write(nbits, uint_config.lsb_in_token); | 1035 | 25.1k | } |
void jxl::EncodeUintConfig<jxl::BitWriter>(jxl::HybridUintConfig, jxl::BitWriter*, unsigned long) Line | Count | Source | 1024 | 177k | size_t log_alpha_size) { | 1025 | 177k | writer->Write(CeilLog2Nonzero(log_alpha_size + 1), | 1026 | 177k | uint_config.split_exponent); | 1027 | 177k | if (uint_config.split_exponent == log_alpha_size) { | 1028 | 61 | return; // msb/lsb don't matter. | 1029 | 61 | } | 1030 | 177k | size_t nbits = CeilLog2Nonzero(uint_config.split_exponent + 1); | 1031 | 177k | writer->Write(nbits, uint_config.msb_in_token); | 1032 | 177k | nbits = CeilLog2Nonzero(uint_config.split_exponent - | 1033 | 177k | uint_config.msb_in_token + 1); | 1034 | 177k | writer->Write(nbits, uint_config.lsb_in_token); | 1035 | 177k | } |
|
1036 | | template <typename Writer> |
1037 | | void EncodeUintConfigs(const std::vector<HybridUintConfig>& uint_config, |
1038 | 41.8k | Writer* writer, size_t log_alpha_size) { |
1039 | | // TODO(veluca): RLE? |
1040 | 200k | for (const auto& cfg : uint_config) { |
1041 | 200k | EncodeUintConfig(cfg, writer, log_alpha_size); |
1042 | 200k | } |
1043 | 41.8k | } void jxl::EncodeUintConfigs<jxl::BitWriter>(std::__1::vector<jxl::HybridUintConfig, std::__1::allocator<jxl::HybridUintConfig> > const&, jxl::BitWriter*, unsigned long) Line | Count | Source | 1038 | 19.5k | Writer* writer, size_t log_alpha_size) { | 1039 | | // TODO(veluca): RLE? | 1040 | 176k | for (const auto& cfg : uint_config) { | 1041 | 176k | EncodeUintConfig(cfg, writer, log_alpha_size); | 1042 | 176k | } | 1043 | 19.5k | } |
void jxl::EncodeUintConfigs<jxl::SizeWriter>(std::__1::vector<jxl::HybridUintConfig, std::__1::allocator<jxl::HybridUintConfig> > const&, jxl::SizeWriter*, unsigned long) Line | Count | Source | 1038 | 22.2k | Writer* writer, size_t log_alpha_size) { | 1039 | | // TODO(veluca): RLE? | 1040 | 23.5k | for (const auto& cfg : uint_config) { | 1041 | 23.5k | EncodeUintConfig(cfg, writer, log_alpha_size); | 1042 | 23.5k | } | 1043 | 22.2k | } |
|
1044 | | template void EncodeUintConfigs(const std::vector<HybridUintConfig>&, |
1045 | | BitWriter*, size_t); |
1046 | | |
1047 | | Status EncodeHistograms(const EntropyEncodingData& codes, BitWriter* writer, |
1048 | 0 | LayerType layer, AuxOut* aux_out) { |
1049 | 0 | return writer->WithMaxBits( |
1050 | 0 | 128 + kClustersLimit * 136, layer, aux_out, |
1051 | 0 | [&]() -> Status { |
1052 | 0 | JXL_RETURN_IF_ERROR(Bundle::Write(codes.lz77, writer, layer, aux_out)); |
1053 | 0 | if (codes.lz77.enabled) { |
1054 | 0 | EncodeUintConfig(codes.lz77.length_uint_config, writer, |
1055 | 0 | /*log_alpha_size=*/8); |
1056 | 0 | } |
1057 | 0 | JXL_RETURN_IF_ERROR(EncodeContextMap(codes.context_map, |
1058 | 0 | codes.encoding_info.size(), writer, |
1059 | 0 | layer, aux_out)); |
1060 | 0 | writer->Write(1, TO_JXL_BOOL(codes.use_prefix_code)); |
1061 | 0 | size_t log_alpha_size = 8; |
1062 | 0 | if (codes.use_prefix_code) { |
1063 | 0 | log_alpha_size = PREFIX_MAX_BITS; |
1064 | 0 | } else { |
1065 | 0 | log_alpha_size = 8; // streaming_mode |
1066 | 0 | writer->Write(2, log_alpha_size - 5); |
1067 | 0 | } |
1068 | 0 | EncodeUintConfigs(codes.uint_config, writer, log_alpha_size); |
1069 | 0 | if (codes.use_prefix_code) { |
1070 | 0 | for (const auto& info : codes.encoding_info) { |
1071 | 0 | StoreVarLenUint16(info.size() - 1, writer); |
1072 | 0 | } |
1073 | 0 | } |
1074 | 0 | for (const auto& histo_writer : codes.encoded_histograms) { |
1075 | 0 | JXL_RETURN_IF_ERROR(writer->AppendUnaligned(histo_writer)); |
1076 | 0 | } |
1077 | 0 | return true; |
1078 | 0 | }, |
1079 | 0 | /*finished_histogram=*/true); |
1080 | 0 | } |
1081 | | |
1082 | | StatusOr<size_t> BuildAndEncodeHistograms( |
1083 | | JxlMemoryManager* memory_manager, const HistogramParams& params, |
1084 | | size_t num_contexts, std::vector<std::vector<Token>>& tokens, |
1085 | | EntropyEncodingData* codes, BitWriter* writer, LayerType layer, |
1086 | 41.8k | AuxOut* aux_out) { |
1087 | | // TODO(Ivan): presumably not needed - default |
1088 | | // if (params.initialize_global_state) codes->lz77.enabled = false; |
1089 | 41.8k | codes->lz77.nonserialized_distance_context = num_contexts; |
1090 | 41.8k | codes->lz77.min_symbol = params.force_huffman ? 512 : 224; |
1091 | 41.8k | std::vector<std::vector<Token>> tokens_lz77 = |
1092 | 41.8k | ApplyLZ77(params, num_contexts, tokens, codes->lz77); |
1093 | 41.8k | if (!tokens_lz77.empty()) codes->lz77.enabled = true; |
1094 | 41.8k | if (ans_fuzzer_friendly_) { |
1095 | 0 | codes->lz77.length_uint_config = HybridUintConfig(10, 0, 0); |
1096 | 0 | codes->lz77.min_symbol = 2048; |
1097 | 0 | } |
1098 | | |
1099 | 41.8k | size_t cost = 0; |
1100 | 41.8k | const size_t max_contexts = std::min(num_contexts, kClustersLimit); |
1101 | 41.8k | const auto& body = [&]() -> Status { |
1102 | 41.8k | if (writer) { |
1103 | 19.5k | JXL_RETURN_IF_ERROR(Bundle::Write(codes->lz77, writer, layer, aux_out)); |
1104 | 22.2k | } else { |
1105 | 22.2k | size_t ebits, bits; |
1106 | 22.2k | JXL_RETURN_IF_ERROR(Bundle::CanEncode(codes->lz77, &ebits, &bits)); |
1107 | 22.2k | cost += bits; |
1108 | 22.2k | } |
1109 | 41.8k | if (codes->lz77.enabled) { |
1110 | 2.83k | if (writer) { |
1111 | 1.17k | size_t b = writer->BitsWritten(); |
1112 | 1.17k | EncodeUintConfig(codes->lz77.length_uint_config, writer, |
1113 | 1.17k | /*log_alpha_size=*/8); |
1114 | 1.17k | cost += writer->BitsWritten() - b; |
1115 | 1.66k | } else { |
1116 | 1.66k | SizeWriter size_writer; |
1117 | 1.66k | EncodeUintConfig(codes->lz77.length_uint_config, &size_writer, |
1118 | 1.66k | /*log_alpha_size=*/8); |
1119 | 1.66k | cost += size_writer.size; |
1120 | 1.66k | } |
1121 | 2.83k | num_contexts += 1; |
1122 | 2.83k | tokens = std::move(tokens_lz77); |
1123 | 2.83k | } |
1124 | 41.8k | size_t total_tokens = 0; |
1125 | | // Build histograms. |
1126 | 41.8k | std::vector<Histogram> builder(num_contexts); |
1127 | 41.8k | HybridUintConfig uint_config = params.UintConfig(); |
1128 | 41.8k | if (ans_fuzzer_friendly_) { |
1129 | 0 | uint_config = HybridUintConfig(10, 0, 0); |
1130 | 0 | } |
1131 | 133k | for (const auto& stream : tokens) { |
1132 | 133k | if (codes->lz77.enabled) { |
1133 | 3.16M | for (const auto& token : stream) { |
1134 | 3.16M | total_tokens++; |
1135 | 3.16M | uint32_t tok, nbits, bits; |
1136 | 3.16M | (token.is_lz77_length ? codes->lz77.length_uint_config : uint_config) |
1137 | 3.16M | .Encode(token.value, &tok, &nbits, &bits); |
1138 | 3.16M | tok += token.is_lz77_length ? codes->lz77.min_symbol : 0; |
1139 | 3.16M | JXL_DASSERT(token.context < num_contexts); |
1140 | 3.16M | builder[token.context].Add(tok); |
1141 | 3.16M | } |
1142 | 130k | } else if (num_contexts == 1) { |
1143 | 10.6M | for (const auto& token : stream) { |
1144 | 10.6M | total_tokens++; |
1145 | 10.6M | uint32_t tok, nbits, bits; |
1146 | 10.6M | uint_config.Encode(token.value, &tok, &nbits, &bits); |
1147 | 10.6M | builder[0].Add(tok); |
1148 | 10.6M | } |
1149 | 104k | } else { |
1150 | 305M | for (const auto& token : stream) { |
1151 | 305M | total_tokens++; |
1152 | 305M | uint32_t tok, nbits, bits; |
1153 | 305M | uint_config.Encode(token.value, &tok, &nbits, &bits); |
1154 | 305M | JXL_DASSERT(token.context < num_contexts); |
1155 | 305M | builder[token.context].Add(tok); |
1156 | 305M | } |
1157 | 104k | } |
1158 | 133k | } |
1159 | | |
1160 | 41.8k | if (params.add_missing_symbols) { |
1161 | 0 | for (size_t c = 0; c < num_contexts; ++c) { |
1162 | 0 | for (int symbol = 0; symbol < ANS_MAX_ALPHABET_SIZE; ++symbol) { |
1163 | 0 | builder[c].Add(symbol); |
1164 | 0 | } |
1165 | 0 | } |
1166 | 0 | } |
1167 | | |
1168 | 41.8k | if (params.initialize_global_state) { |
1169 | 41.8k | bool use_prefix_code = |
1170 | 41.8k | params.force_huffman || total_tokens < 100 || |
1171 | 14.6k | params.clustering == HistogramParams::ClusteringType::kFastest || |
1172 | 14.6k | ans_fuzzer_friendly_; |
1173 | 41.8k | if (!use_prefix_code) { |
1174 | 14.6k | bool all_singleton = true; |
1175 | 10.1M | for (size_t i = 0; i < num_contexts; i++) { |
1176 | 10.1M | if (builder[i].ShannonEntropy() >= 1e-5) { |
1177 | 1.33M | all_singleton = false; |
1178 | 1.33M | } |
1179 | 10.1M | } |
1180 | 14.6k | if (all_singleton) { |
1181 | 113 | use_prefix_code = true; |
1182 | 113 | } |
1183 | 14.6k | } |
1184 | 41.8k | codes->use_prefix_code = use_prefix_code; |
1185 | 41.8k | } |
1186 | | |
1187 | 41.8k | if (params.add_fixed_histograms) { |
1188 | | // TODO(szabadka) Add more fixed histograms. |
1189 | | // TODO(szabadka) Reduce alphabet size by choosing a non-default |
1190 | | // uint_config. |
1191 | 0 | const size_t alphabet_size = ANS_MAX_ALPHABET_SIZE; |
1192 | 0 | codes->log_alpha_size = 8; |
1193 | 0 | JXL_ENSURE(alphabet_size == 1u << codes->log_alpha_size); |
1194 | 0 | static_assert(ANS_MAX_ALPHABET_SIZE <= ANS_TAB_SIZE, |
1195 | 0 | "Alphabet does not fit table"); |
1196 | 0 | codes->encoding_info.emplace_back(); |
1197 | 0 | codes->encoding_info.back().resize(alphabet_size); |
1198 | 0 | codes->encoded_histograms.emplace_back(memory_manager); |
1199 | 0 | BitWriter* histo_writer = &codes->encoded_histograms.back(); |
1200 | 0 | JXL_RETURN_IF_ERROR(histo_writer->WithMaxBits( |
1201 | 0 | 256 + alphabet_size * 24, LayerType::Header, nullptr, |
1202 | 0 | [&]() -> Status { |
1203 | 0 | JXL_ASSIGN_OR_RETURN( |
1204 | 0 | size_t ans_cost, |
1205 | 0 | codes->BuildAndStoreANSEncodingData( |
1206 | 0 | memory_manager, params.ans_histogram_strategy, |
1207 | 0 | Histogram::Flat(alphabet_size, ANS_TAB_SIZE), |
1208 | 0 | histo_writer)); |
1209 | 0 | (void)ans_cost; |
1210 | 0 | return true; |
1211 | 0 | })); |
1212 | 0 | } |
1213 | | |
1214 | | // Encode histograms. |
1215 | 41.8k | JXL_ASSIGN_OR_RETURN( |
1216 | 41.8k | size_t entropy_bits, |
1217 | 41.8k | codes->BuildAndStoreEntropyCodes(memory_manager, params, tokens, |
1218 | 41.8k | builder, writer, layer, aux_out)); |
1219 | 41.8k | cost += entropy_bits; |
1220 | 41.8k | return true; |
1221 | 41.8k | }; |
1222 | 41.8k | if (writer) { |
1223 | 19.5k | JXL_RETURN_IF_ERROR(writer->WithMaxBits( |
1224 | 19.5k | 128 + num_contexts * 40 + max_contexts * 96, layer, aux_out, body, |
1225 | 19.5k | /*finished_histogram=*/true)); |
1226 | 22.2k | } else { |
1227 | 22.2k | JXL_RETURN_IF_ERROR(body()); |
1228 | 22.2k | } |
1229 | | |
1230 | 41.8k | if (aux_out != nullptr) { |
1231 | 0 | aux_out->layer(layer).num_clustered_histograms += |
1232 | 0 | codes->encoding_info.size(); |
1233 | 0 | } |
1234 | 41.8k | return cost; |
1235 | 41.8k | } |
1236 | | |
1237 | | size_t WriteTokens(const std::vector<Token>& tokens, |
1238 | | const EntropyEncodingData& codes, size_t context_offset, |
1239 | 30.1k | BitWriter* writer) { |
1240 | 30.1k | size_t num_extra_bits = 0; |
1241 | 30.1k | if (codes.use_prefix_code) { |
1242 | 694k | for (const auto& token : tokens) { |
1243 | 694k | uint32_t tok, nbits, bits; |
1244 | 694k | size_t histo = codes.context_map[context_offset + token.context]; |
1245 | 694k | (token.is_lz77_length ? codes.lz77.length_uint_config |
1246 | 694k | : codes.uint_config[histo]) |
1247 | 694k | .Encode(token.value, &tok, &nbits, &bits); |
1248 | 694k | tok += token.is_lz77_length ? codes.lz77.min_symbol : 0; |
1249 | | // Combine two calls to the BitWriter. Equivalent to: |
1250 | | // writer->Write(codes.encoding_info[histo][tok].depth, |
1251 | | // codes.encoding_info[histo][tok].bits); |
1252 | | // writer->Write(nbits, bits); |
1253 | 694k | uint64_t data = codes.encoding_info[histo][tok].bits; |
1254 | 694k | data |= static_cast<uint64_t>(bits) |
1255 | 694k | << codes.encoding_info[histo][tok].depth; |
1256 | 694k | writer->Write(codes.encoding_info[histo][tok].depth + nbits, data); |
1257 | 694k | num_extra_bits += nbits; |
1258 | 694k | } |
1259 | 10.2k | return num_extra_bits; |
1260 | 10.2k | } |
1261 | 19.9k | std::vector<uint64_t> out; |
1262 | 19.9k | std::vector<uint8_t> out_nbits; |
1263 | 19.9k | out.reserve(tokens.size()); |
1264 | 19.9k | out_nbits.reserve(tokens.size()); |
1265 | 19.9k | uint64_t allbits = 0; |
1266 | 19.9k | size_t numallbits = 0; |
1267 | | // Writes in *reversed* order. |
1268 | 618M | auto addbits = [&](size_t bits, size_t nbits) { |
1269 | 618M | if (JXL_UNLIKELY(nbits)) { |
1270 | 97.5M | JXL_DASSERT(bits >> nbits == 0); |
1271 | 97.5M | if (JXL_UNLIKELY(numallbits + nbits > BitWriter::kMaxBitsPerCall)) { |
1272 | 21.4M | out.push_back(allbits); |
1273 | 21.4M | out_nbits.push_back(numallbits); |
1274 | 21.4M | numallbits = allbits = 0; |
1275 | 21.4M | } |
1276 | 97.5M | allbits <<= nbits; |
1277 | 97.5M | allbits |= bits; |
1278 | 97.5M | numallbits += nbits; |
1279 | 97.5M | } |
1280 | 618M | }; |
1281 | 19.9k | const int end = tokens.size(); |
1282 | 19.9k | ANSCoder ans; |
1283 | 19.9k | if (codes.lz77.enabled || codes.context_map.size() > 1) { |
1284 | 305M | for (int i = end - 1; i >= 0; --i) { |
1285 | 305M | const Token token = tokens[i]; |
1286 | 305M | const uint8_t histo = codes.context_map[context_offset + token.context]; |
1287 | 305M | uint32_t tok, nbits, bits; |
1288 | 305M | (token.is_lz77_length ? codes.lz77.length_uint_config |
1289 | 305M | : codes.uint_config[histo]) |
1290 | 305M | .Encode(tokens[i].value, &tok, &nbits, &bits); |
1291 | 305M | tok += token.is_lz77_length ? codes.lz77.min_symbol : 0; |
1292 | 305M | const ANSEncSymbolInfo& info = codes.encoding_info[histo][tok]; |
1293 | 305M | JXL_DASSERT(info.freq_ > 0); |
1294 | | // Extra bits first as this is reversed. |
1295 | 305M | addbits(bits, nbits); |
1296 | 305M | num_extra_bits += nbits; |
1297 | 305M | uint8_t ans_nbits = 0; |
1298 | 305M | uint32_t ans_bits = ans.PutSymbol(info, &ans_nbits); |
1299 | 305M | addbits(ans_bits, ans_nbits); |
1300 | 305M | } |
1301 | 18.9k | } else { |
1302 | 3.17M | for (int i = end - 1; i >= 0; --i) { |
1303 | 3.17M | uint32_t tok, nbits, bits; |
1304 | 3.17M | codes.uint_config[0].Encode(tokens[i].value, &tok, &nbits, &bits); |
1305 | 3.17M | const ANSEncSymbolInfo& info = codes.encoding_info[0][tok]; |
1306 | | // Extra bits first as this is reversed. |
1307 | 3.17M | addbits(bits, nbits); |
1308 | 3.17M | num_extra_bits += nbits; |
1309 | 3.17M | uint8_t ans_nbits = 0; |
1310 | 3.17M | uint32_t ans_bits = ans.PutSymbol(info, &ans_nbits); |
1311 | 3.17M | addbits(ans_bits, ans_nbits); |
1312 | 3.17M | } |
1313 | 1.01k | } |
1314 | 19.9k | const uint32_t state = ans.GetState(); |
1315 | 19.9k | writer->Write(32, state); |
1316 | 19.9k | writer->Write(numallbits, allbits); |
1317 | 21.4M | for (int i = out.size(); i > 0; --i) { |
1318 | 21.4M | writer->Write(out_nbits[i - 1], out[i - 1]); |
1319 | 21.4M | } |
1320 | 19.9k | return num_extra_bits; |
1321 | 30.1k | } |
1322 | | |
1323 | | Status WriteTokens(const std::vector<Token>& tokens, |
1324 | | const EntropyEncodingData& codes, size_t context_offset, |
1325 | 24.1k | BitWriter* writer, LayerType layer, AuxOut* aux_out) { |
1326 | | // Theoretically, we could have 15 prefix code bits + 31 extra bits. |
1327 | 24.1k | return writer->WithMaxBits( |
1328 | 24.1k | 46 * tokens.size() + 32 * 1024 * 4, layer, aux_out, [&] { |
1329 | 24.1k | size_t num_extra_bits = |
1330 | 24.1k | WriteTokens(tokens, codes, context_offset, writer); |
1331 | 24.1k | if (aux_out != nullptr) { |
1332 | 0 | aux_out->layer(layer).extra_bits += num_extra_bits; |
1333 | 0 | } |
1334 | 24.1k | return true; |
1335 | 24.1k | }); |
1336 | 24.1k | } |
1337 | | |
1338 | 0 | void SetANSFuzzerFriendly(bool ans_fuzzer_friendly) { |
1339 | | #if JXL_IS_DEBUG_BUILD // Guard against accidental / malicious changes. |
1340 | | ans_fuzzer_friendly_ = ans_fuzzer_friendly; |
1341 | | #endif |
1342 | 0 | } |
1343 | | |
1344 | | HistogramParams HistogramParams::ForModular( |
1345 | | const CompressParams& cparams, |
1346 | 7.86k | const std::vector<uint8_t>& extra_dc_precision, bool streaming_mode) { |
1347 | 7.86k | HistogramParams params; |
1348 | 7.86k | params.streaming_mode = streaming_mode; |
1349 | 7.86k | if (cparams.speed_tier > SpeedTier::kKitten) { |
1350 | 7.86k | params.clustering = HistogramParams::ClusteringType::kFast; |
1351 | 7.86k | params.ans_histogram_strategy = |
1352 | 7.86k | cparams.speed_tier > SpeedTier::kThunder |
1353 | 7.86k | ? HistogramParams::ANSHistogramStrategy::kFast |
1354 | 7.86k | : HistogramParams::ANSHistogramStrategy::kApproximate; |
1355 | 7.86k | params.lz77_method = |
1356 | 7.86k | cparams.modular_mode && cparams.speed_tier <= SpeedTier::kHare |
1357 | 7.86k | ? HistogramParams::LZ77Method::kRLE |
1358 | 7.86k | : HistogramParams::LZ77Method::kNone; |
1359 | | // Near-lossless DC, as well as modular mode, require choosing hybrid uint |
1360 | | // more carefully. |
1361 | 7.86k | if ((!extra_dc_precision.empty() && extra_dc_precision[0] != 0) || |
1362 | 4.84k | (cparams.modular_mode && cparams.speed_tier < SpeedTier::kCheetah)) { |
1363 | 4.84k | params.uint_method = HistogramParams::HybridUintMethod::kFast; |
1364 | 4.84k | } else { |
1365 | 3.02k | params.uint_method = HistogramParams::HybridUintMethod::kNone; |
1366 | 3.02k | } |
1367 | 7.86k | } else if (cparams.speed_tier <= SpeedTier::kTortoise) { |
1368 | 0 | params.lz77_method = HistogramParams::LZ77Method::kOptimal; |
1369 | 0 | } else { |
1370 | 0 | params.lz77_method = HistogramParams::LZ77Method::kLZ77; |
1371 | 0 | } |
1372 | 7.86k | if (cparams.decoding_speed_tier >= 2) { |
1373 | 0 | params.max_histograms = 12; |
1374 | 0 | } |
1375 | | // No predictor requires LZ77 to compress residuals. |
1376 | | // Effort 3 and lower have forced predictors, so kNone is set. |
1377 | 7.86k | if (cparams.options.predictor == Predictor::Zero && cparams.modular_mode) { |
1378 | 0 | params.lz77_method = cparams.speed_tier >= SpeedTier::kFalcon |
1379 | 0 | ? HistogramParams::LZ77Method::kNone |
1380 | 0 | : cparams.speed_tier >= SpeedTier::kHare |
1381 | 0 | ? HistogramParams::LZ77Method::kRLE |
1382 | 0 | : cparams.speed_tier >= SpeedTier::kKitten |
1383 | 0 | ? HistogramParams::LZ77Method::kLZ77 |
1384 | 0 | : HistogramParams::LZ77Method::kOptimal; |
1385 | 0 | } |
1386 | 7.86k | return params; |
1387 | 7.86k | } |
1388 | | } // namespace jxl |