/src/libwebp/src/utils/bit_reader_inl_utils.h
Line | Count | Source |
1 | | // Copyright 2014 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style license |
4 | | // that can be found in the COPYING file in the root of the source |
5 | | // tree. An additional intellectual property rights grant can be found |
6 | | // in the file PATENTS. All contributing project authors may |
7 | | // be found in the AUTHORS file in the root of the source tree. |
8 | | // ----------------------------------------------------------------------------- |
9 | | // |
10 | | // Specific inlined methods for boolean decoder [VP8GetBit() ...] |
11 | | // This file should be included by the .c sources that actually need to call |
12 | | // these methods. |
13 | | // |
14 | | // Author: Skal (pascal.massimino@gmail.com) |
15 | | |
16 | | #ifndef WEBP_UTILS_BIT_READER_INL_UTILS_H_ |
17 | | #define WEBP_UTILS_BIT_READER_INL_UTILS_H_ |
18 | | |
19 | | #ifdef HAVE_CONFIG_H |
20 | | #include "src/webp/config.h" |
21 | | #endif |
22 | | |
23 | | #include <assert.h> |
24 | | #include <string.h> // for memcpy |
25 | | |
26 | | #include "src/dsp/cpu.h" |
27 | | #include "src/dsp/dsp.h" |
28 | | #include "src/utils/bit_reader_utils.h" |
29 | | #include "src/utils/bounds_safety.h" |
30 | | #include "src/utils/endian_inl_utils.h" |
31 | | #include "src/utils/utils.h" |
32 | | #include "src/webp/types.h" |
33 | | |
34 | | WEBP_ASSUME_UNSAFE_INDEXABLE_ABI |
35 | | |
36 | | #ifdef __cplusplus |
37 | | extern "C" { |
38 | | #endif |
39 | | |
40 | | //------------------------------------------------------------------------------ |
41 | | // Derived type lbit_t = natural type for memory I/O |
42 | | |
43 | | #if (BITS > 32) |
44 | | typedef uint64_t lbit_t; |
45 | | #elif (BITS > 16) |
46 | | typedef uint32_t lbit_t; |
47 | | #elif (BITS > 8) |
48 | | typedef uint16_t lbit_t; |
49 | | #else |
50 | | typedef uint8_t lbit_t; |
51 | | #endif |
52 | | |
53 | | extern const uint8_t kVP8Log2Range[128]; |
54 | | extern const uint8_t kVP8NewRange[128]; |
55 | | |
56 | | // special case for the tail byte-reading |
57 | | void VP8LoadFinalBytes(VP8BitReader* const br); |
58 | | |
59 | | //------------------------------------------------------------------------------ |
60 | | // Inlined critical functions |
61 | | |
62 | | // makes sure br->value has at least BITS bits worth of data |
63 | | static WEBP_UBSAN_IGNORE_UNDEF WEBP_INLINE void VP8LoadNewBytes( |
64 | 19.7M | VP8BitReader* WEBP_RESTRICT const br) { |
65 | 19.7M | assert(br != NULL && br->buf != NULL); |
66 | | // Read 'BITS' bits at a time if possible. |
67 | 19.7M | if (br->buf < br->buf_max) { |
68 | | // convert memory type to register type (with some zero'ing!) |
69 | 6.20M | bit_t bits; |
70 | | #if defined(WEBP_USE_MIPS32) |
71 | | // This is needed because of un-aligned read. |
72 | | lbit_t in_bits; |
73 | | lbit_t* p_buf = (lbit_t*)br->buf; |
74 | | __asm__ volatile( |
75 | | ".set push \n\t" |
76 | | ".set at \n\t" |
77 | | ".set macro \n\t" |
78 | | "ulw %[in_bits], 0(%[p_buf]) \n\t" |
79 | | ".set pop \n\t" |
80 | | : [in_bits] "=r"(in_bits) |
81 | | : [p_buf] "r"(p_buf) |
82 | | : "memory", "at"); |
83 | | #else |
84 | 6.20M | lbit_t in_bits; |
85 | 6.20M | WEBP_UNSAFE_MEMCPY(&in_bits, br->buf, sizeof(in_bits)); |
86 | 6.20M | #endif |
87 | 6.20M | br->buf += BITS >> 3; |
88 | 6.20M | WEBP_SELF_ASSIGN(br->buf_end); |
89 | 6.20M | #if !defined(WORDS_BIGENDIAN) |
90 | 6.20M | #if (BITS > 32) |
91 | 6.20M | bits = BSwap64(in_bits); |
92 | 6.20M | bits >>= 64 - BITS; |
93 | | #elif (BITS >= 24) |
94 | | bits = BSwap32(in_bits); |
95 | | bits >>= (32 - BITS); |
96 | | #elif (BITS == 16) |
97 | | bits = BSwap16(in_bits); |
98 | | #else // BITS == 8 |
99 | | bits = (bit_t)in_bits; |
100 | | #endif // BITS > 32 |
101 | | #else // WORDS_BIGENDIAN |
102 | | bits = (bit_t)in_bits; |
103 | | if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS); |
104 | | #endif |
105 | 6.20M | br->value = bits | (br->value << BITS); |
106 | 6.20M | br->bits += BITS; |
107 | 13.5M | } else { |
108 | 13.5M | VP8LoadFinalBytes(br); // no need to be inlined |
109 | 13.5M | } |
110 | 19.7M | } vp8_dec.c:VP8LoadNewBytes Line | Count | Source | 64 | 11.4M | VP8BitReader* WEBP_RESTRICT const br) { | 65 | 11.4M | assert(br != NULL && br->buf != NULL); | 66 | | // Read 'BITS' bits at a time if possible. | 67 | 11.4M | if (br->buf < br->buf_max) { | 68 | | // convert memory type to register type (with some zero'ing!) | 69 | 4.95M | bit_t bits; | 70 | | #if defined(WEBP_USE_MIPS32) | 71 | | // This is needed because of un-aligned read. | 72 | | lbit_t in_bits; | 73 | | lbit_t* p_buf = (lbit_t*)br->buf; | 74 | | __asm__ volatile( | 75 | | ".set push \n\t" | 76 | | ".set at \n\t" | 77 | | ".set macro \n\t" | 78 | | "ulw %[in_bits], 0(%[p_buf]) \n\t" | 79 | | ".set pop \n\t" | 80 | | : [in_bits] "=r"(in_bits) | 81 | | : [p_buf] "r"(p_buf) | 82 | | : "memory", "at"); | 83 | | #else | 84 | 4.95M | lbit_t in_bits; | 85 | 4.95M | WEBP_UNSAFE_MEMCPY(&in_bits, br->buf, sizeof(in_bits)); | 86 | 4.95M | #endif | 87 | 4.95M | br->buf += BITS >> 3; | 88 | 4.95M | WEBP_SELF_ASSIGN(br->buf_end); | 89 | 4.95M | #if !defined(WORDS_BIGENDIAN) | 90 | 4.95M | #if (BITS > 32) | 91 | 4.95M | bits = BSwap64(in_bits); | 92 | 4.95M | bits >>= 64 - BITS; | 93 | | #elif (BITS >= 24) | 94 | | bits = BSwap32(in_bits); | 95 | | bits >>= (32 - BITS); | 96 | | #elif (BITS == 16) | 97 | | bits = BSwap16(in_bits); | 98 | | #else // BITS == 8 | 99 | | bits = (bit_t)in_bits; | 100 | | #endif // BITS > 32 | 101 | | #else // WORDS_BIGENDIAN | 102 | | bits = (bit_t)in_bits; | 103 | | if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS); | 104 | | #endif | 105 | 4.95M | br->value = bits | (br->value << BITS); | 106 | 4.95M | br->bits += BITS; | 107 | 6.50M | } else { | 108 | 6.50M | VP8LoadFinalBytes(br); // no need to be inlined | 109 | 6.50M | } | 110 | 11.4M | } |
bit_reader_utils.c:VP8LoadNewBytes Line | Count | Source | 64 | 3.56M | VP8BitReader* WEBP_RESTRICT const br) { | 65 | 3.56M | assert(br != NULL && br->buf != NULL); | 66 | | // Read 'BITS' bits at a time if possible. | 67 | 3.56M | if (br->buf < br->buf_max) { | 68 | | // convert memory type to register type (with some zero'ing!) | 69 | 788k | bit_t bits; | 70 | | #if defined(WEBP_USE_MIPS32) | 71 | | // This is needed because of un-aligned read. | 72 | | lbit_t in_bits; | 73 | | lbit_t* p_buf = (lbit_t*)br->buf; | 74 | | __asm__ volatile( | 75 | | ".set push \n\t" | 76 | | ".set at \n\t" | 77 | | ".set macro \n\t" | 78 | | "ulw %[in_bits], 0(%[p_buf]) \n\t" | 79 | | ".set pop \n\t" | 80 | | : [in_bits] "=r"(in_bits) | 81 | | : [p_buf] "r"(p_buf) | 82 | | : "memory", "at"); | 83 | | #else | 84 | 788k | lbit_t in_bits; | 85 | 788k | WEBP_UNSAFE_MEMCPY(&in_bits, br->buf, sizeof(in_bits)); | 86 | 788k | #endif | 87 | 788k | br->buf += BITS >> 3; | 88 | 788k | WEBP_SELF_ASSIGN(br->buf_end); | 89 | 788k | #if !defined(WORDS_BIGENDIAN) | 90 | 788k | #if (BITS > 32) | 91 | 788k | bits = BSwap64(in_bits); | 92 | 788k | bits >>= 64 - BITS; | 93 | | #elif (BITS >= 24) | 94 | | bits = BSwap32(in_bits); | 95 | | bits >>= (32 - BITS); | 96 | | #elif (BITS == 16) | 97 | | bits = BSwap16(in_bits); | 98 | | #else // BITS == 8 | 99 | | bits = (bit_t)in_bits; | 100 | | #endif // BITS > 32 | 101 | | #else // WORDS_BIGENDIAN | 102 | | bits = (bit_t)in_bits; | 103 | | if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS); | 104 | | #endif | 105 | 788k | br->value = bits | (br->value << BITS); | 106 | 788k | br->bits += BITS; | 107 | 2.77M | } else { | 108 | 2.77M | VP8LoadFinalBytes(br); // no need to be inlined | 109 | 2.77M | } | 110 | 3.56M | } |
tree_dec.c:VP8LoadNewBytes Line | Count | Source | 64 | 4.72M | VP8BitReader* WEBP_RESTRICT const br) { | 65 | 4.72M | assert(br != NULL && br->buf != NULL); | 66 | | // Read 'BITS' bits at a time if possible. | 67 | 4.72M | if (br->buf < br->buf_max) { | 68 | | // convert memory type to register type (with some zero'ing!) | 69 | 460k | bit_t bits; | 70 | | #if defined(WEBP_USE_MIPS32) | 71 | | // This is needed because of un-aligned read. | 72 | | lbit_t in_bits; | 73 | | lbit_t* p_buf = (lbit_t*)br->buf; | 74 | | __asm__ volatile( | 75 | | ".set push \n\t" | 76 | | ".set at \n\t" | 77 | | ".set macro \n\t" | 78 | | "ulw %[in_bits], 0(%[p_buf]) \n\t" | 79 | | ".set pop \n\t" | 80 | | : [in_bits] "=r"(in_bits) | 81 | | : [p_buf] "r"(p_buf) | 82 | | : "memory", "at"); | 83 | | #else | 84 | 460k | lbit_t in_bits; | 85 | 460k | WEBP_UNSAFE_MEMCPY(&in_bits, br->buf, sizeof(in_bits)); | 86 | 460k | #endif | 87 | 460k | br->buf += BITS >> 3; | 88 | 460k | WEBP_SELF_ASSIGN(br->buf_end); | 89 | 460k | #if !defined(WORDS_BIGENDIAN) | 90 | 460k | #if (BITS > 32) | 91 | 460k | bits = BSwap64(in_bits); | 92 | 460k | bits >>= 64 - BITS; | 93 | | #elif (BITS >= 24) | 94 | | bits = BSwap32(in_bits); | 95 | | bits >>= (32 - BITS); | 96 | | #elif (BITS == 16) | 97 | | bits = BSwap16(in_bits); | 98 | | #else // BITS == 8 | 99 | | bits = (bit_t)in_bits; | 100 | | #endif // BITS > 32 | 101 | | #else // WORDS_BIGENDIAN | 102 | | bits = (bit_t)in_bits; | 103 | | if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS); | 104 | | #endif | 105 | 460k | br->value = bits | (br->value << BITS); | 106 | 460k | br->bits += BITS; | 107 | 4.26M | } else { | 108 | 4.26M | VP8LoadFinalBytes(br); // no need to be inlined | 109 | 4.26M | } | 110 | 4.72M | } |
|
111 | | |
112 | | // Read a bit with proba 'prob'. Speed-critical function! |
113 | | static WEBP_INLINE int VP8GetBit(VP8BitReader* WEBP_RESTRICT const br, int prob, |
114 | 428M | const char label[]) { |
115 | | // Don't move this declaration! It makes a big speed difference to store |
116 | | // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't |
117 | | // alter br->range value. |
118 | 428M | range_t range = br->range; |
119 | 428M | if (br->bits < 0) { |
120 | 18.1M | VP8LoadNewBytes(br); |
121 | 18.1M | } |
122 | 428M | { |
123 | 428M | const int pos = br->bits; |
124 | 428M | const range_t split = (range * prob) >> 8; |
125 | 428M | const range_t value = (range_t)(br->value >> pos); |
126 | 428M | const int bit = (value > split); |
127 | 428M | if (bit) { |
128 | 186M | range -= split; |
129 | 186M | br->value -= (bit_t)(split + 1) << pos; |
130 | 241M | } else { |
131 | 241M | range = split + 1; |
132 | 241M | } |
133 | 428M | { |
134 | 428M | const int shift = 7 ^ BitsLog2Floor(range); |
135 | 428M | range <<= shift; |
136 | 428M | br->bits -= shift; |
137 | 428M | } |
138 | 428M | br->range = range - 1; |
139 | 428M | BT_TRACK(br); |
140 | 428M | return bit; |
141 | 428M | } |
142 | 428M | } Line | Count | Source | 114 | 273M | const char label[]) { | 115 | | // Don't move this declaration! It makes a big speed difference to store | 116 | | // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't | 117 | | // alter br->range value. | 118 | 273M | range_t range = br->range; | 119 | 273M | if (br->bits < 0) { | 120 | 10.1M | VP8LoadNewBytes(br); | 121 | 10.1M | } | 122 | 273M | { | 123 | 273M | const int pos = br->bits; | 124 | 273M | const range_t split = (range * prob) >> 8; | 125 | 273M | const range_t value = (range_t)(br->value >> pos); | 126 | 273M | const int bit = (value > split); | 127 | 273M | if (bit) { | 128 | 155M | range -= split; | 129 | 155M | br->value -= (bit_t)(split + 1) << pos; | 130 | 155M | } else { | 131 | 118M | range = split + 1; | 132 | 118M | } | 133 | 273M | { | 134 | 273M | const int shift = 7 ^ BitsLog2Floor(range); | 135 | 273M | range <<= shift; | 136 | 273M | br->bits -= shift; | 137 | 273M | } | 138 | 273M | br->range = range - 1; | 139 | 273M | BT_TRACK(br); | 140 | 273M | return bit; | 141 | 273M | } | 142 | 273M | } |
bit_reader_utils.c:VP8GetBit Line | Count | Source | 114 | 27.1M | const char label[]) { | 115 | | // Don't move this declaration! It makes a big speed difference to store | 116 | | // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't | 117 | | // alter br->range value. | 118 | 27.1M | range_t range = br->range; | 119 | 27.1M | if (br->bits < 0) { | 120 | 3.26M | VP8LoadNewBytes(br); | 121 | 3.26M | } | 122 | 27.1M | { | 123 | 27.1M | const int pos = br->bits; | 124 | 27.1M | const range_t split = (range * prob) >> 8; | 125 | 27.1M | const range_t value = (range_t)(br->value >> pos); | 126 | 27.1M | const int bit = (value > split); | 127 | 27.1M | if (bit) { | 128 | 11.7M | range -= split; | 129 | 11.7M | br->value -= (bit_t)(split + 1) << pos; | 130 | 15.3M | } else { | 131 | 15.3M | range = split + 1; | 132 | 15.3M | } | 133 | 27.1M | { | 134 | 27.1M | const int shift = 7 ^ BitsLog2Floor(range); | 135 | 27.1M | range <<= shift; | 136 | 27.1M | br->bits -= shift; | 137 | 27.1M | } | 138 | 27.1M | br->range = range - 1; | 139 | 27.1M | BT_TRACK(br); | 140 | 27.1M | return bit; | 141 | 27.1M | } | 142 | 27.1M | } |
Line | Count | Source | 114 | 127M | const char label[]) { | 115 | | // Don't move this declaration! It makes a big speed difference to store | 116 | | // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't | 117 | | // alter br->range value. | 118 | 127M | range_t range = br->range; | 119 | 127M | if (br->bits < 0) { | 120 | 4.72M | VP8LoadNewBytes(br); | 121 | 4.72M | } | 122 | 127M | { | 123 | 127M | const int pos = br->bits; | 124 | 127M | const range_t split = (range * prob) >> 8; | 125 | 127M | const range_t value = (range_t)(br->value >> pos); | 126 | 127M | const int bit = (value > split); | 127 | 127M | if (bit) { | 128 | 19.4M | range -= split; | 129 | 19.4M | br->value -= (bit_t)(split + 1) << pos; | 130 | 107M | } else { | 131 | 107M | range = split + 1; | 132 | 107M | } | 133 | 127M | { | 134 | 127M | const int shift = 7 ^ BitsLog2Floor(range); | 135 | 127M | range <<= shift; | 136 | 127M | br->bits -= shift; | 137 | 127M | } | 138 | 127M | br->range = range - 1; | 139 | 127M | BT_TRACK(br); | 140 | 127M | return bit; | 141 | 127M | } | 142 | 127M | } |
|
143 | | |
144 | | // simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here) |
145 | | static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE int VP8GetSigned( |
146 | 44.5M | VP8BitReader* WEBP_RESTRICT const br, int v, const char label[]) { |
147 | 44.5M | if (br->bits < 0) { |
148 | 989k | VP8LoadNewBytes(br); |
149 | 989k | } |
150 | 44.5M | { |
151 | 44.5M | const int pos = br->bits; |
152 | 44.5M | const range_t split = br->range >> 1; |
153 | 44.5M | const range_t value = (range_t)(br->value >> pos); |
154 | 44.5M | const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0 |
155 | 44.5M | br->bits -= 1; |
156 | 44.5M | br->range += (range_t)mask; |
157 | 44.5M | br->range |= 1; |
158 | 44.5M | br->value -= (bit_t)((split + 1) & (uint32_t)mask) << pos; |
159 | 44.5M | BT_TRACK(br); |
160 | 44.5M | return (v ^ mask) - mask; |
161 | 44.5M | } |
162 | 44.5M | } Line | Count | Source | 146 | 44.5M | VP8BitReader* WEBP_RESTRICT const br, int v, const char label[]) { | 147 | 44.5M | if (br->bits < 0) { | 148 | 989k | VP8LoadNewBytes(br); | 149 | 989k | } | 150 | 44.5M | { | 151 | 44.5M | const int pos = br->bits; | 152 | 44.5M | const range_t split = br->range >> 1; | 153 | 44.5M | const range_t value = (range_t)(br->value >> pos); | 154 | 44.5M | const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0 | 155 | 44.5M | br->bits -= 1; | 156 | 44.5M | br->range += (range_t)mask; | 157 | 44.5M | br->range |= 1; | 158 | 44.5M | br->value -= (bit_t)((split + 1) & (uint32_t)mask) << pos; | 159 | 44.5M | BT_TRACK(br); | 160 | 44.5M | return (v ^ mask) - mask; | 161 | 44.5M | } | 162 | 44.5M | } |
Unexecuted instantiation: bit_reader_utils.c:VP8GetSigned Unexecuted instantiation: tree_dec.c:VP8GetSigned |
163 | | |
164 | | static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* WEBP_RESTRICT const br, |
165 | 24.7M | int prob, const char label[]) { |
166 | | // Don't move this declaration! It makes a big speed difference to store |
167 | | // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't |
168 | | // alter br->range value. |
169 | 24.7M | range_t range = br->range; |
170 | 24.7M | if (br->bits < 0) { |
171 | 312k | VP8LoadNewBytes(br); |
172 | 312k | } |
173 | 24.7M | { |
174 | 24.7M | const int pos = br->bits; |
175 | 24.7M | const range_t split = (range * prob) >> 8; |
176 | 24.7M | const range_t value = (range_t)(br->value >> pos); |
177 | 24.7M | int bit; // Don't use 'const int bit = (value > split);", it's slower. |
178 | 24.7M | if (value > split) { |
179 | 18.0M | range -= split + 1; |
180 | 18.0M | br->value -= (bit_t)(split + 1) << pos; |
181 | 18.0M | bit = 1; |
182 | 18.0M | } else { |
183 | 6.71M | range = split; |
184 | 6.71M | bit = 0; |
185 | 6.71M | } |
186 | 24.7M | if (range <= (range_t)0x7e) { |
187 | 10.9M | const int shift = kVP8Log2Range[range]; |
188 | 10.9M | range = kVP8NewRange[range]; |
189 | 10.9M | br->bits -= shift; |
190 | 10.9M | } |
191 | 24.7M | br->range = range; |
192 | 24.7M | BT_TRACK(br); |
193 | 24.7M | return bit; |
194 | 24.7M | } |
195 | 24.7M | } Line | Count | Source | 165 | 24.7M | int prob, const char label[]) { | 166 | | // Don't move this declaration! It makes a big speed difference to store | 167 | | // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't | 168 | | // alter br->range value. | 169 | 24.7M | range_t range = br->range; | 170 | 24.7M | if (br->bits < 0) { | 171 | 312k | VP8LoadNewBytes(br); | 172 | 312k | } | 173 | 24.7M | { | 174 | 24.7M | const int pos = br->bits; | 175 | 24.7M | const range_t split = (range * prob) >> 8; | 176 | 24.7M | const range_t value = (range_t)(br->value >> pos); | 177 | 24.7M | int bit; // Don't use 'const int bit = (value > split);", it's slower. | 178 | 24.7M | if (value > split) { | 179 | 18.0M | range -= split + 1; | 180 | 18.0M | br->value -= (bit_t)(split + 1) << pos; | 181 | 18.0M | bit = 1; | 182 | 18.0M | } else { | 183 | 6.71M | range = split; | 184 | 6.71M | bit = 0; | 185 | 6.71M | } | 186 | 24.7M | if (range <= (range_t)0x7e) { | 187 | 10.9M | const int shift = kVP8Log2Range[range]; | 188 | 10.9M | range = kVP8NewRange[range]; | 189 | 10.9M | br->bits -= shift; | 190 | 10.9M | } | 191 | 24.7M | br->range = range; | 192 | 24.7M | BT_TRACK(br); | 193 | 24.7M | return bit; | 194 | 24.7M | } | 195 | 24.7M | } |
Unexecuted instantiation: bit_reader_utils.c:VP8GetBitAlt Unexecuted instantiation: tree_dec.c:VP8GetBitAlt |
196 | | |
197 | | #ifdef __cplusplus |
198 | | } // extern "C" |
199 | | #endif |
200 | | |
201 | | #endif // WEBP_UTILS_BIT_READER_INL_UTILS_H_ |