/src/libjxl/lib/jxl/ans_common.h
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #ifndef LIB_JXL_ANS_COMMON_H_ |
7 | | #define LIB_JXL_ANS_COMMON_H_ |
8 | | |
9 | | #include <algorithm> |
10 | | #include <cstddef> |
11 | | #include <cstdint> |
12 | | #include <cstring> |
13 | | #include <hwy/base.h> |
14 | | #include <hwy/cache_control.h> // Prefetch |
15 | | #include <vector> |
16 | | |
17 | | #include "lib/jxl/ans_params.h" |
18 | | #include "lib/jxl/base/byte_order.h" |
19 | | #include "lib/jxl/base/compiler_specific.h" |
20 | | #include "lib/jxl/base/status.h" |
21 | | |
22 | | namespace jxl { |
23 | | |
24 | | // Returns the precision (number of bits) that should be used to store |
25 | | // a histogram count such that Log2Floor(count) == logcount. |
26 | | static JXL_MAYBE_UNUSED JXL_INLINE uint32_t |
27 | 24.4M | GetPopulationCountPrecision(uint32_t logcount, uint32_t shift) { |
28 | 24.4M | int32_t r = std::min<int>( |
29 | 24.4M | logcount, static_cast<int>(shift) - |
30 | 24.4M | static_cast<int>((ANS_LOG_TAB_SIZE - logcount) >> 1)); |
31 | 24.4M | if (r < 0) return 0; |
32 | 21.5M | return r; |
33 | 24.4M | } Unexecuted instantiation: encode.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_icc_codec.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) enc_ans.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Line | Count | Source | 27 | 24.3M | GetPopulationCountPrecision(uint32_t logcount, uint32_t shift) { | 28 | 24.3M | int32_t r = std::min<int>( | 29 | 24.3M | logcount, static_cast<int>(shift) - | 30 | 24.3M | static_cast<int>((ANS_LOG_TAB_SIZE - logcount) >> 1)); | 31 | 24.3M | if (r < 0) return 0; | 32 | 21.5M | return r; | 33 | 24.3M | } |
Unexecuted instantiation: enc_cluster.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_context_map.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_lz77.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_fast_lossless.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_frame.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_modular.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_patch_dictionary.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_dot_dictionary.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_detect_dots.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_debug_image.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_quant_weights.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_coeff_order.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_heuristics.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_cache.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_group.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_splines.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_ac_strategy.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_entropy_coder.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_jpeg_data.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_encoding.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_ma.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_transform.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_rct.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_palette.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: ans_common.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: coeff_order.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) dec_ans.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Line | Count | Source | 27 | 106k | GetPopulationCountPrecision(uint32_t logcount, uint32_t shift) { | 28 | 106k | int32_t r = std::min<int>( | 29 | 106k | logcount, static_cast<int>(shift) - | 30 | 106k | static_cast<int>((ANS_LOG_TAB_SIZE - logcount) >> 1)); | 31 | 106k | if (r < 0) return 0; | 32 | 17.7k | return r; | 33 | 106k | } |
Unexecuted instantiation: dec_cache.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_context_map.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_external_image.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_frame.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_group.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_modular.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_noise.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_patch_dictionary.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: decode.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: epf.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: icc_codec.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_ma.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: encoding.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: modular_image.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: transform.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: rct.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: palette.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: quant_weights.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: stage_blending.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: stage_epf.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: stage_write.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: splines.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) |
34 | | |
35 | | // Returns a histogram where the counts are positive, differ by at most 1, |
36 | | // and add up to total_count. The bigger counts (if any) are at the beginning |
37 | | // of the histogram. |
38 | | static JXL_MAYBE_UNUSED JXL_INLINE std::vector<int32_t> CreateFlatHistogram( |
39 | 85.0k | int length, int total_count) { |
40 | 85.0k | JXL_DASSERT(length > 0); |
41 | 85.0k | JXL_DASSERT(length <= total_count); |
42 | 85.0k | const int count = total_count / length; |
43 | 85.0k | std::vector<int32_t> result(length, count); |
44 | 85.0k | const int rem_counts = total_count % length; |
45 | 1.40M | for (int i = 0; i < rem_counts; ++i) { |
46 | 1.31M | ++result[i]; |
47 | 1.31M | } |
48 | 85.0k | return result; |
49 | 85.0k | } Unexecuted instantiation: encode.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_icc_codec.cc:jxl::CreateFlatHistogram(int, int) enc_ans.cc:jxl::CreateFlatHistogram(int, int) Line | Count | Source | 39 | 69.9k | int length, int total_count) { | 40 | 69.9k | JXL_DASSERT(length > 0); | 41 | 69.9k | JXL_DASSERT(length <= total_count); | 42 | 69.9k | const int count = total_count / length; | 43 | 69.9k | std::vector<int32_t> result(length, count); | 44 | 69.9k | const int rem_counts = total_count % length; | 45 | 1.30M | for (int i = 0; i < rem_counts; ++i) { | 46 | 1.23M | ++result[i]; | 47 | 1.23M | } | 48 | 69.9k | return result; | 49 | 69.9k | } |
Unexecuted instantiation: enc_cluster.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_context_map.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_lz77.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_fast_lossless.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_frame.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_modular.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_patch_dictionary.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_dot_dictionary.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_detect_dots.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_debug_image.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_quant_weights.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_coeff_order.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_heuristics.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_cache.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_group.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_splines.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_ac_strategy.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_entropy_coder.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_jpeg_data.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_encoding.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_ma.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_transform.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_rct.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: enc_palette.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: ans_common.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: coeff_order.cc:jxl::CreateFlatHistogram(int, int) dec_ans.cc:jxl::CreateFlatHistogram(int, int) Line | Count | Source | 39 | 15.0k | int length, int total_count) { | 40 | 15.0k | JXL_DASSERT(length > 0); | 41 | 15.0k | JXL_DASSERT(length <= total_count); | 42 | 15.0k | const int count = total_count / length; | 43 | 15.0k | std::vector<int32_t> result(length, count); | 44 | 15.0k | const int rem_counts = total_count % length; | 45 | 97.6k | for (int i = 0; i < rem_counts; ++i) { | 46 | 82.5k | ++result[i]; | 47 | 82.5k | } | 48 | 15.0k | return result; | 49 | 15.0k | } |
Unexecuted instantiation: dec_cache.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: dec_context_map.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: dec_external_image.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: dec_frame.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: dec_group.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: dec_modular.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: dec_noise.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: dec_patch_dictionary.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: decode.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: epf.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: icc_codec.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: dec_ma.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: encoding.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: modular_image.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: transform.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: rct.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: palette.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: quant_weights.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: stage_blending.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: stage_epf.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: stage_write.cc:jxl::CreateFlatHistogram(int, int) Unexecuted instantiation: splines.cc:jxl::CreateFlatHistogram(int, int) |
50 | | |
51 | | // An alias table implements a mapping from the [0, ANS_TAB_SIZE) range into |
52 | | // the [0, ANS_MAX_ALPHABET_SIZE) range, satisfying the following conditions: |
53 | | // - each symbol occurs as many times as specified by any valid distribution |
54 | | // of frequencies of the symbols. A valid distribution here is an array of |
55 | | // ANS_MAX_ALPHABET_SIZE that contains numbers in the range [0, ANS_TAB_SIZE], |
56 | | // and whose sum is ANS_TAB_SIZE. |
57 | | // - lookups can be done in constant time, and also return how many smaller |
58 | | // input values map into the same symbol, according to some well-defined order |
59 | | // of input values. |
60 | | // - the space used by the alias table is given by a small constant times the |
61 | | // index of the largest symbol with nonzero probability in the distribution. |
62 | | // Each of the entries in the table covers a range of `entry_size` values in the |
63 | | // [0, ANS_TAB_SIZE) range; consecutive entries represent consecutive |
64 | | // sub-ranges. In the range covered by entry `i`, the first `cutoff` values map |
65 | | // to symbol `i`, while the others map to symbol `right_value`. |
66 | | // |
67 | | // TODO(veluca): consider making the order used for computing offsets easier to |
68 | | // define - it is currently defined by the algorithm to compute the alias table. |
69 | | // Beware of breaking the implicit assumption that symbols that come after the |
70 | | // cutoff value should have an offset at least as big as the cutoff. |
71 | | |
72 | | struct AliasTable { |
73 | | struct Symbol { |
74 | | size_t value; |
75 | | size_t offset; |
76 | | size_t freq; |
77 | | }; |
78 | | |
79 | | // Working set size matters here (~64 tables x 256 entries). |
80 | | // offsets0 is always zero (beginning of [0] side among the same symbol). |
81 | | // offsets1 is an offset of (pos >= cutoff) side decremented by cutoff. |
82 | | #pragma pack(push, 1) |
83 | | struct Entry { |
84 | | uint8_t cutoff; // < kEntrySizeMinus1 when used by ANS. |
85 | | uint8_t right_value; // < alphabet size. |
86 | | uint16_t freq0; |
87 | | |
88 | | // Only used if `greater` (see Lookup) |
89 | | uint16_t offsets1; // <= ANS_TAB_SIZE |
90 | | uint16_t freq1_xor_freq0; // for branchless ternary in Lookup |
91 | | }; |
92 | | #pragma pack(pop) |
93 | | |
94 | | // Dividing `value` by `entry_size` determines `i`, the entry which is |
95 | | // responsible for the input. If the remainder is below `cutoff`, then the |
96 | | // mapped symbol is `i`; since `offsets[0]` stores the number of occurrences |
97 | | // of `i` "before" the start of this entry, the offset of the input will be |
98 | | // `offsets[0] + remainder`. If the remainder is above cutoff, the mapped |
99 | | // symbol is `right_value`; since `offsets[1]` stores the number of |
100 | | // occurrences of `right_value` "before" this entry, minus the `cutoff` value, |
101 | | // the input offset is then `remainder + offsets[1]`. |
102 | | static JXL_INLINE Symbol Lookup(const Entry* JXL_RESTRICT table, size_t value, |
103 | | size_t log_entry_size, |
104 | 115M | size_t entry_size_minus_1) { |
105 | 115M | const size_t i = value >> log_entry_size; |
106 | 115M | const size_t pos = value & entry_size_minus_1; |
107 | | |
108 | 115M | #if JXL_BYTE_ORDER_LITTLE |
109 | 115M | uint64_t entry; |
110 | 115M | memcpy(&entry, &table[i].cutoff, sizeof(entry)); |
111 | 115M | const size_t cutoff = entry & 0xFF; // = MOVZX |
112 | 115M | const size_t right_value = (entry >> 8) & 0xFF; // = MOVZX |
113 | 115M | const size_t freq0 = (entry >> 16) & 0xFFFF; |
114 | | #else |
115 | | // Generates multiple loads with complex addressing. |
116 | | const size_t cutoff = table[i].cutoff; |
117 | | const size_t right_value = table[i].right_value; |
118 | | const size_t freq0 = table[i].freq0; |
119 | | #endif |
120 | | |
121 | 115M | const bool greater = pos >= cutoff; |
122 | | |
123 | 115M | #if JXL_BYTE_ORDER_LITTLE |
124 | 115M | const uint64_t conditional = greater ? entry : 0; // = CMOV |
125 | 115M | const size_t offsets1_or_0 = (conditional >> 32) & 0xFFFF; |
126 | 115M | const size_t freq1_xor_freq0_or_0 = conditional >> 48; |
127 | | #else |
128 | | const size_t offsets1_or_0 = greater ? table[i].offsets1 : 0; |
129 | | const size_t freq1_xor_freq0_or_0 = greater ? table[i].freq1_xor_freq0 : 0; |
130 | | #endif |
131 | | |
132 | | // WARNING: moving this code may interfere with CMOV heuristics. |
133 | 115M | Symbol s; |
134 | 115M | s.value = greater ? right_value : i; |
135 | 115M | s.offset = offsets1_or_0 + pos; |
136 | 115M | s.freq = freq0 ^ freq1_xor_freq0_or_0; // = greater ? freq1 : freq0 |
137 | | // XOR avoids implementation-defined conversion from unsigned to signed. |
138 | | // Alternatives considered: BEXTR is 2 cycles on HSW, SET+shift causes |
139 | | // spills, simple ternary has a long dependency chain. |
140 | | |
141 | 115M | return s; |
142 | 115M | } |
143 | | |
144 | | static HWY_INLINE void Prefetch(const Entry* JXL_RESTRICT table, size_t value, |
145 | 33.5M | size_t log_entry_size) { |
146 | 33.5M | const size_t i = value >> log_entry_size; |
147 | 33.5M | hwy::Prefetch(table + i); |
148 | 33.5M | } |
149 | | }; |
150 | | |
151 | | // Computes an alias table for a given distribution. |
152 | | Status InitAliasTable(std::vector<int32_t> distribution, uint32_t log_range, |
153 | | size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a); |
154 | | |
155 | | } // namespace jxl |
156 | | |
157 | | #endif // LIB_JXL_ANS_COMMON_H_ |