/src/libjxl/lib/jxl/ans_common.h
Line | Count | Source |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #ifndef LIB_JXL_ANS_COMMON_H_ |
7 | | #define LIB_JXL_ANS_COMMON_H_ |
8 | | |
9 | | #include <algorithm> |
10 | | #include <cstddef> |
11 | | #include <cstdint> |
12 | | #include <cstring> |
13 | | #include <hwy/base.h> |
14 | | #include <hwy/cache_control.h> // Prefetch |
15 | | #include <vector> |
16 | | |
17 | | #include "lib/jxl/ans_params.h" |
18 | | #include "lib/jxl/base/byte_order.h" |
19 | | #include "lib/jxl/base/compiler_specific.h" |
20 | | #include "lib/jxl/base/status.h" |
21 | | |
22 | | namespace jxl { |
23 | | |
24 | | // Returns the precision (number of bits) that should be used to store |
25 | | // a histogram count such that Log2Floor(count) == logcount. |
26 | | static JXL_MAYBE_UNUSED JXL_INLINE uint32_t |
27 | 67.5k | GetPopulationCountPrecision(uint32_t logcount, uint32_t shift) { |
28 | 67.5k | int32_t r = std::min<int>( |
29 | 67.5k | logcount, static_cast<int>(shift) - |
30 | 67.5k | static_cast<int>((ANS_LOG_TAB_SIZE - logcount) >> 1)); |
31 | 67.5k | if (r < 0) return 0; |
32 | 14.5k | return r; |
33 | 67.5k | } Unexecuted instantiation: encode.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: decode.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: icc_codec.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: encoding.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: quant_weights.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_frame.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_group.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_heuristics.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_icc_codec.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_modular.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_patch_dictionary.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_quant_weights.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_splines.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_encoding.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_ma.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) dec_ans.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Line | Count | Source | 27 | 67.5k | GetPopulationCountPrecision(uint32_t logcount, uint32_t shift) { | 28 | 67.5k | int32_t r = std::min<int>( | 29 | 67.5k | logcount, static_cast<int>(shift) - | 30 | 67.5k | static_cast<int>((ANS_LOG_TAB_SIZE - logcount) >> 1)); | 31 | 67.5k | if (r < 0) return 0; | 32 | 14.5k | return r; | 33 | 67.5k | } |
Unexecuted instantiation: dec_cache.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_context_map.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_external_image.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_frame.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_group.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_modular.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_noise.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_patch_dictionary.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: epf.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: dec_ma.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: stage_blending.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: stage_epf.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: stage_write.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: splines.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_ac_strategy.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_ans.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_cache.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_cluster.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_coeff_order.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_context_map.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_debug_image.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_dot_dictionary.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_entropy_coder.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: ans_common.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: coeff_order.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) Unexecuted instantiation: enc_detect_dots.cc:jxl::GetPopulationCountPrecision(unsigned int, unsigned int) |
34 | | |
35 | | // Returns a histogram where the counts are positive, differ by at most 1, |
36 | | // and add up to total_count. The bigger counts (if any) are at the beginning |
37 | | // of the histogram. |
38 | | std::vector<int32_t> CreateFlatHistogram(int length, int total_count); |
39 | | |
40 | | // An alias table implements a mapping from the [0, ANS_TAB_SIZE) range into |
41 | | // the [0, ANS_MAX_ALPHABET_SIZE) range, satisfying the following conditions: |
42 | | // - each symbol occurs as many times as specified by any valid distribution |
43 | | // of frequencies of the symbols. A valid distribution here is an array of |
44 | | // ANS_MAX_ALPHABET_SIZE that contains numbers in the range [0, ANS_TAB_SIZE], |
45 | | // and whose sum is ANS_TAB_SIZE. |
46 | | // - lookups can be done in constant time, and also return how many smaller |
47 | | // input values map into the same symbol, according to some well-defined order |
48 | | // of input values. |
49 | | // - the space used by the alias table is given by a small constant times the |
50 | | // index of the largest symbol with nonzero probability in the distribution. |
51 | | // Each of the entries in the table covers a range of `entry_size` values in the |
52 | | // [0, ANS_TAB_SIZE) range; consecutive entries represent consecutive |
53 | | // sub-ranges. In the range covered by entry `i`, the first `cutoff` values map |
54 | | // to symbol `i`, while the others map to symbol `right_value`. |
55 | | // |
56 | | // TODO(veluca): consider making the order used for computing offsets easier to |
57 | | // define - it is currently defined by the algorithm to compute the alias table. |
58 | | // Beware of breaking the implicit assumption that symbols that come after the |
59 | | // cutoff value should have an offset at least as big as the cutoff. |
60 | | |
61 | | struct AliasTable { |
62 | | struct Symbol { |
63 | | size_t value; |
64 | | size_t offset; |
65 | | size_t freq; |
66 | | }; |
67 | | |
68 | | // Working set size matters here (~64 tables x 256 entries). |
69 | | // offsets0 is always zero (beginning of [0] side among the same symbol). |
70 | | // offsets1 is an offset of (pos >= cutoff) side decremented by cutoff. |
71 | | #pragma pack(push, 1) |
72 | | struct Entry { |
73 | | uint8_t cutoff; // < kEntrySizeMinus1 when used by ANS. |
74 | | uint8_t right_value; // < alphabet size. |
75 | | uint16_t freq0; |
76 | | |
77 | | // Only used if `greater` (see Lookup) |
78 | | uint16_t offsets1; // <= ANS_TAB_SIZE |
79 | | uint16_t freq1_xor_freq0; // for branchless ternary in Lookup |
80 | | }; |
81 | | #pragma pack(pop) |
82 | | |
83 | | // Dividing `value` by `entry_size` determines `i`, the entry which is |
84 | | // responsible for the input. If the remainder is below `cutoff`, then the |
85 | | // mapped symbol is `i`; since `offsets[0]` stores the number of occurrences |
86 | | // of `i` "before" the start of this entry, the offset of the input will be |
87 | | // `offsets[0] + remainder`. If the remainder is above cutoff, the mapped |
88 | | // symbol is `right_value`; since `offsets[1]` stores the number of |
89 | | // occurrences of `right_value` "before" this entry, minus the `cutoff` value, |
90 | | // the input offset is then `remainder + offsets[1]`. |
91 | | static JXL_INLINE Symbol Lookup(const Entry* JXL_RESTRICT table, size_t value, |
92 | | size_t log_entry_size, |
93 | 40.2M | size_t entry_size_minus_1) { |
94 | 40.2M | const size_t i = value >> log_entry_size; |
95 | 40.2M | const size_t pos = value & entry_size_minus_1; |
96 | | |
97 | 40.2M | #if JXL_BYTE_ORDER_LITTLE |
98 | 40.2M | uint64_t entry; |
99 | 40.2M | memcpy(&entry, &table[i].cutoff, sizeof(entry)); |
100 | 40.2M | const size_t cutoff = entry & 0xFF; // = MOVZX |
101 | 40.2M | const size_t right_value = (entry >> 8) & 0xFF; // = MOVZX |
102 | 40.2M | const size_t freq0 = (entry >> 16) & 0xFFFF; |
103 | | #else |
104 | | // Generates multiple loads with complex addressing. |
105 | | const size_t cutoff = table[i].cutoff; |
106 | | const size_t right_value = table[i].right_value; |
107 | | const size_t freq0 = table[i].freq0; |
108 | | #endif |
109 | | |
110 | 40.2M | const bool greater = pos >= cutoff; |
111 | | |
112 | 40.2M | #if JXL_BYTE_ORDER_LITTLE |
113 | 40.2M | const uint64_t conditional = greater ? entry : 0; // = CMOV |
114 | 40.2M | const size_t offsets1_or_0 = (conditional >> 32) & 0xFFFF; |
115 | 40.2M | const size_t freq1_xor_freq0_or_0 = conditional >> 48; |
116 | | #else |
117 | | const size_t offsets1_or_0 = greater ? table[i].offsets1 : 0; |
118 | | const size_t freq1_xor_freq0_or_0 = greater ? table[i].freq1_xor_freq0 : 0; |
119 | | #endif |
120 | | |
121 | | // WARNING: moving this code may interfere with CMOV heuristics. |
122 | 40.2M | Symbol s; |
123 | 40.2M | s.value = greater ? right_value : i; |
124 | 40.2M | s.offset = offsets1_or_0 + pos; |
125 | 40.2M | s.freq = freq0 ^ freq1_xor_freq0_or_0; // = greater ? freq1 : freq0 |
126 | | // XOR avoids implementation-defined conversion from unsigned to signed. |
127 | | // Alternatives considered: BEXTR is 2 cycles on HSW, SET+shift causes |
128 | | // spills, simple ternary has a long dependency chain. |
129 | | |
130 | 40.2M | return s; |
131 | 40.2M | } |
132 | | |
133 | | static HWY_INLINE void Prefetch(const Entry* JXL_RESTRICT table, size_t value, |
134 | 40.8M | size_t log_entry_size) { |
135 | 40.8M | const size_t i = value >> log_entry_size; |
136 | 40.8M | hwy::Prefetch(table + i); |
137 | 40.8M | } |
138 | | }; |
139 | | |
140 | | // Computes an alias table for a given distribution. |
141 | | Status InitAliasTable(std::vector<int32_t> distribution, uint32_t log_range, |
142 | | size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a); |
143 | | |
144 | | } // namespace jxl |
145 | | |
146 | | #endif // LIB_JXL_ANS_COMMON_H_ |