/src/guetzli/guetzli/entropy_encode.cc
Line | Count | Source |
1 | | /* |
2 | | * Copyright 2016 Google Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | // Entropy encoding (Huffman) utilities. |
18 | | |
19 | | #include "guetzli/entropy_encode.h" |
20 | | |
21 | | #include <assert.h> |
22 | | #include <algorithm> |
23 | | |
24 | | namespace guetzli { |
25 | | |
26 | 2.06M | bool SetDepth(int p0, HuffmanTree *pool, uint8_t *depth, int max_depth) { |
27 | 2.06M | int stack[17]; |
28 | 2.06M | int level = 0; |
29 | 2.06M | int p = p0; |
30 | 2.06M | assert(max_depth <= 16); |
31 | 2.06M | stack[0] = -1; |
32 | 47.7M | while (true) { |
33 | 47.7M | if (pool[p].index_left_ >= 0) { |
34 | 22.8M | level++; |
35 | 22.8M | if (level > max_depth) return false; |
36 | 22.8M | stack[level] = pool[p].index_right_or_value_; |
37 | 22.8M | p = pool[p].index_left_; |
38 | 22.8M | continue; |
39 | 24.9M | } else { |
40 | 24.9M | depth[pool[p].index_right_or_value_] = static_cast<uint8_t>(level); |
41 | 24.9M | } |
42 | 49.8M | while (level >= 0 && stack[level] == -1) level--; |
43 | 24.9M | if (level < 0) return true; |
44 | 22.8M | p = stack[level]; |
45 | 22.8M | stack[level] = -1; |
46 | 22.8M | } |
47 | 2.06M | } |
48 | | |
49 | | // Sort the root nodes, least popular first. |
50 | | static inline bool SortHuffmanTree(const HuffmanTree& v0, |
51 | 84.6M | const HuffmanTree& v1) { |
52 | 84.6M | if (v0.total_count_ != v1.total_count_) { |
53 | 64.8M | return v0.total_count_ < v1.total_count_; |
54 | 64.8M | } |
55 | 19.8M | return v0.index_right_or_value_ > v1.index_right_or_value_; |
56 | 84.6M | } |
57 | | |
58 | | // This function will create a Huffman tree. |
59 | | // |
60 | | // The catch here is that the tree cannot be arbitrarily deep. |
61 | | // Brotli specifies a maximum depth of 15 bits for "code trees" |
62 | | // and 7 bits for "code length code trees." |
63 | | // |
64 | | // count_limit is the value that is to be faked as the minimum value |
65 | | // and this minimum value is raised until the tree matches the |
66 | | // maximum length requirement. |
67 | | // |
68 | | // This algorithm is not of excellent performance for very long data blocks, |
69 | | // especially when population counts are longer than 2**tree_limit, but |
70 | | // we are not planning to use this with extremely long blocks. |
71 | | // |
72 | | // See http://en.wikipedia.org/wiki/Huffman_coding |
73 | | void CreateHuffmanTree(const uint32_t *data, |
74 | | const size_t length, |
75 | | const int tree_limit, |
76 | | HuffmanTree* tree, |
77 | 2.08M | uint8_t *depth) { |
78 | | // For block sizes below 64 kB, we never need to do a second iteration |
79 | | // of this loop. Probably all of our block sizes will be smaller than |
80 | | // that, so this loop is mostly of academic interest. If we actually |
81 | | // would need this, we would be better off with the Katajainen algorithm. |
82 | 2.08M | for (uint32_t count_limit = 1; ; count_limit *= 2) { |
83 | 2.08M | size_t n = 0; |
84 | 536M | for (size_t i = length; i != 0;) { |
85 | 534M | --i; |
86 | 534M | if (data[i]) { |
87 | 24.9M | const uint32_t count = std::max<uint32_t>(data[i], count_limit); |
88 | 24.9M | tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i)); |
89 | 24.9M | } |
90 | 534M | } |
91 | | |
92 | 2.08M | if (n == 1) { |
93 | 11.4k | depth[tree[0].index_right_or_value_] = 1; // Only one element. |
94 | 11.4k | break; |
95 | 11.4k | } |
96 | | |
97 | 2.06M | std::sort(tree, tree + n, SortHuffmanTree); |
98 | | |
99 | | // The nodes are: |
100 | | // [0, n): the sorted leaf nodes that we start with. |
101 | | // [n]: we add a sentinel here. |
102 | | // [n + 1, 2n): new parent nodes are added here, starting from |
103 | | // (n+1). These are naturally in ascending order. |
104 | | // [2n]: we add a sentinel at the end as well. |
105 | | // There will be (2n+1) elements at the end. |
106 | 2.06M | const HuffmanTree sentinel(~static_cast<uint32_t>(0), -1, -1); |
107 | 2.06M | tree[n] = sentinel; |
108 | 2.06M | tree[n + 1] = sentinel; |
109 | | |
110 | 2.06M | size_t i = 0; // Points to the next leaf node. |
111 | 2.06M | size_t j = n + 1; // Points to the next non-leaf node. |
112 | 24.9M | for (size_t k = n - 1; k != 0; --k) { |
113 | 22.8M | size_t left, right; |
114 | 22.8M | if (tree[i].total_count_ <= tree[j].total_count_) { |
115 | 12.2M | left = i; |
116 | 12.2M | ++i; |
117 | 12.2M | } else { |
118 | 10.5M | left = j; |
119 | 10.5M | ++j; |
120 | 10.5M | } |
121 | 22.8M | if (tree[i].total_count_ <= tree[j].total_count_) { |
122 | 12.6M | right = i; |
123 | 12.6M | ++i; |
124 | 12.6M | } else { |
125 | 10.2M | right = j; |
126 | 10.2M | ++j; |
127 | 10.2M | } |
128 | | |
129 | | // The sentinel node becomes the parent node. |
130 | 22.8M | size_t j_end = 2 * n - k; |
131 | 22.8M | tree[j_end].total_count_ = |
132 | 22.8M | tree[left].total_count_ + tree[right].total_count_; |
133 | 22.8M | tree[j_end].index_left_ = static_cast<int16_t>(left); |
134 | 22.8M | tree[j_end].index_right_or_value_ = static_cast<int16_t>(right); |
135 | | |
136 | | // Add back the last sentinel node. |
137 | 22.8M | tree[j_end + 1] = sentinel; |
138 | 22.8M | } |
139 | 2.06M | if (SetDepth(static_cast<int>(2 * n - 1), &tree[0], depth, tree_limit)) { |
140 | | /* We need to pack the Huffman tree in tree_limit bits. If this was not |
141 | | successful, add fake entities to the lowest values and retry. */ |
142 | 2.06M | break; |
143 | 2.06M | } |
144 | 2.06M | } |
145 | 2.08M | } |
146 | | |
147 | | } // namespace guetzli |