1#!/usr/bin/env python
2r"""
3This package defines classes that simplify bit-wise creation, manipulation and
4interpretation of data.
5
6Classes:
7
8Bits -- An immutable container for binary data.
9BitArray -- A mutable container for binary data.
10ConstBitStream -- An immutable container with streaming methods.
11BitStream -- A mutable container with streaming methods.
12Array -- An efficient list-like container where each item has a fixed-length binary format.
13Dtype -- Encapsulate the data types used in the other classes.
14
15Functions:
16
17pack -- Create a BitStream from a format string.
18
19Data:
20
21options -- Module-wide options.
22
23Exceptions:
24
25Error -- Module exception base class.
26CreationError -- Error during creation.
27InterpretError -- Inappropriate interpretation of binary data.
28ByteAlignError -- Whole byte position or length needed.
29ReadError -- Reading or peeking past the end of a bitstring.
30
31https://github.com/scott-griffiths/bitstring
32"""
33
34__licence__ = """
35The MIT License
36
37Copyright (c) 2006 Scott Griffiths (dr.scottgriffiths@gmail.com)
38
39Permission is hereby granted, free of charge, to any person obtaining a copy
40of this software and associated documentation files (the "Software"), to deal
41in the Software without restriction, including without limitation the rights
42to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
43copies of the Software, and to permit persons to whom the Software is
44furnished to do so, subject to the following conditions:
45
46The above copyright notice and this permission notice shall be included in
47all copies or substantial portions of the Software.
48
49THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
54OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
55THE SOFTWARE.
56"""
57
58__version__ = "4.2.1"
59
60__author__ = "Scott Griffiths"
61
62import sys
63
64from .bits import Bits
65from .bitstring_options import Options
66from .bitarray_ import BitArray
67from .bitstream import ConstBitStream, BitStream
68from .methods import pack
69from .array_ import Array
70from .exceptions import Error, ReadError, InterpretError, ByteAlignError, CreationError
71from .dtypes import DtypeDefinition, dtype_register, Dtype
72import types
73from typing import List, Tuple, Literal
74from .mxfp import decompress_luts as mxfp_decompress_luts
75from .fp8 import decompress_luts as binary8_decompress_luts
76
77# Decompress the LUTs for the exotic floating point formats
78mxfp_decompress_luts()
79binary8_decompress_luts()
80
81# The Options class returns a singleton.
82options = Options()
83
84# These get defined properly by the module magic below. This just stops mypy complaining about them.
85bytealigned = lsb0 = None
86
87
88# An opaque way of adding module level properties. Taken from https://peps.python.org/pep-0549/
89# This is now deprecated. Use the options object directly instead.
90class _MyModuleType(types.ModuleType):
91 @property
92 def bytealigned(self) -> bool:
93 """Determines whether a number of methods default to working only on byte boundaries."""
94 return options.bytealigned
95
96 @bytealigned.setter
97 def bytealigned(self, value: bool) -> None:
98 """Determines whether a number of methods default to working only on byte boundaries."""
99 options.bytealigned = value
100
101 @property
102 def lsb0(self) -> bool:
103 """If True, the least significant bit (the final bit) is indexed as bit zero."""
104 return options.lsb0
105
106 @lsb0.setter
107 def lsb0(self, value: bool) -> None:
108 """If True, the least significant bit (the final bit) is indexed as bit zero."""
109 options.lsb0 = value
110
111
112sys.modules[__name__].__class__ = _MyModuleType
113
114"""These methods convert a bit length to the number of characters needed to print it for different interpretations."""
115def hex_bits2chars(bitlength: int):
116 # One character for every 4 bits
117 return bitlength // 4
118
119def oct_bits2chars(bitlength: int):
120 # One character for every 3 bits
121 return bitlength // 3
122
123def bin_bits2chars(bitlength: int):
124 # One character for each bit
125 return bitlength
126
127def bytes_bits2chars(bitlength: int):
128 # One character for every 8 bits
129 return bitlength // 8
130
131def uint_bits2chars(bitlength: int):
132 # How many characters is largest possible int of this length?
133 return len(str((1 << bitlength) - 1))
134
135def int_bits2chars(bitlength: int):
136 # How many characters is largest negative int of this length? (To include minus sign).
137 return len(str((-1 << (bitlength - 1))))
138
139def float_bits2chars(bitlength: Literal[16, 32, 64]):
140 # These bit lengths were found by looking at lots of possible values
141 if bitlength in [16, 32]:
142 return 23 # Empirical value
143 else:
144 return 24 # Empirical value
145
146def p3binary_bits2chars(bitlength: Literal[8]):
147 return 19 # Empirical value
148
149def p4binary_bits2chars(bitlength: Literal[8]):
150 # Found by looking at all the possible values
151 return 13 # Empirical value
152
153def e4m3mxfp_bits2chars(bitlength: Literal[8]):
154 return 13
155
156def e5m2mxfp_bits2chars(bitlength: Literal[8]):
157 return 19
158
159def e3m2mxfp_bits2chars(bitlength: Literal[6]):
160 # Not sure what the best value is here. It's 7 without considering the scale that could be applied.
161 return 7
162
163def e2m3mxfp_bits2chars(bitlength: Literal[6]):
164 # Not sure what the best value is here.
165 return 7
166
167def e2m1mxfp_bits2chars(bitlength: Literal[4]):
168 # Not sure what the best value is here.
169 return 7
170
171def e8m0mxfp_bits2chars(bitlength: Literal[8]):
172 # Can range same as float32
173 return 23
174
175def mxint_bits2chars(bitlength: Literal[8]):
176 # Not sure what the best value is here.
177 return 10
178
179
180def bfloat_bits2chars(bitlength: Literal[16]):
181 # Found by looking at all the possible values
182 return 23 # Empirical value
183
184def bits_bits2chars(bitlength: int):
185 # For bits type we can see how long it needs to be printed by trying any value
186 temp = Bits(bitlength)
187 return len(str(temp))
188
189def bool_bits2chars(bitlength: Literal[1]):
190 # Bools are printed as 1 or 0, not True or False, so are one character each
191 return 1
192
193dtype_definitions = [
194 # Integer types
195 DtypeDefinition('uint', Bits._setuint, Bits._getuint, int, False, uint_bits2chars,
196 description="a two's complement unsigned int"),
197 DtypeDefinition('uintle', Bits._setuintle, Bits._getuintle, int, False, uint_bits2chars,
198 allowed_lengths=(8, 16, 24, ...), description="a two's complement little-endian unsigned int"),
199 DtypeDefinition('uintbe', Bits._setuintbe, Bits._getuintbe, int, False, uint_bits2chars,
200 allowed_lengths=(8, 16, 24, ...), description="a two's complement big-endian unsigned int"),
201 DtypeDefinition('int', Bits._setint, Bits._getint, int, True, int_bits2chars,
202 description="a two's complement signed int"),
203 DtypeDefinition('intle', Bits._setintle, Bits._getintle, int, True, int_bits2chars,
204 allowed_lengths=(8, 16, 24, ...), description="a two's complement little-endian signed int"),
205 DtypeDefinition('intbe', Bits._setintbe, Bits._getintbe, int, True, int_bits2chars,
206 allowed_lengths=(8, 16, 24, ...), description="a two's complement big-endian signed int"),
207 # String types
208 DtypeDefinition('hex', Bits._sethex, Bits._gethex, str, False, hex_bits2chars,
209 allowed_lengths=(0, 4, 8, ...), description="a hexadecimal string"),
210 DtypeDefinition('bin', Bits._setbin_safe, Bits._getbin, str, False, bin_bits2chars,
211 description="a binary string"),
212 DtypeDefinition('oct', Bits._setoct, Bits._getoct, str, False, oct_bits2chars,
213 allowed_lengths=(0, 3, 6, ...), description="an octal string"),
214 # Float types
215 DtypeDefinition('float', Bits._setfloatbe, Bits._getfloatbe, float, True, float_bits2chars,
216 allowed_lengths=(16, 32, 64), description="a big-endian floating point number"),
217 DtypeDefinition('floatle', Bits._setfloatle, Bits._getfloatle, float, True, float_bits2chars,
218 allowed_lengths=(16, 32, 64), description="a little-endian floating point number"),
219 DtypeDefinition('bfloat', Bits._setbfloatbe, Bits._getbfloatbe, float, True, bfloat_bits2chars,
220 allowed_lengths=(16,), description="a 16 bit big-endian bfloat floating point number"),
221 DtypeDefinition('bfloatle', Bits._setbfloatle, Bits._getbfloatle, float, True, bfloat_bits2chars,
222 allowed_lengths=(16,), description="a 16 bit little-endian bfloat floating point number"),
223 # Other known length types
224 DtypeDefinition('bits', Bits._setbits, Bits._getbits, Bits, False, bits_bits2chars,
225 description="a bitstring object"),
226 DtypeDefinition('bool', Bits._setbool, Bits._getbool, bool, False, bool_bits2chars,
227 allowed_lengths=(1,), description="a bool (True or False)"),
228 DtypeDefinition('bytes', Bits._setbytes, Bits._getbytes, bytes, False, bytes_bits2chars,
229 multiplier=8, description="a bytes object"),
230 # Unknown length types
231 DtypeDefinition('se', Bits._setse, Bits._getse, int, True, None,
232 variable_length=True, description="a signed exponential-Golomb code"),
233 DtypeDefinition('ue', Bits._setue, Bits._getue, int, False, None,
234 variable_length=True, description="an unsigned exponential-Golomb code"),
235 DtypeDefinition('sie', Bits._setsie, Bits._getsie, int, True, None,
236 variable_length=True, description="a signed interleaved exponential-Golomb code"),
237 DtypeDefinition('uie', Bits._setuie, Bits._getuie, int, False, None,
238 variable_length=True, description="an unsigned interleaved exponential-Golomb code"),
239 # Special case pad type
240 DtypeDefinition('pad', Bits._setpad, Bits._getpad, None, False, None,
241 description="a skipped section of padding"),
242
243 # MXFP and IEEE 8-bit float types
244 DtypeDefinition('p3binary', Bits._setp3binary, Bits._getp3binary, float, True, p3binary_bits2chars,
245 allowed_lengths=(8,), description="an 8 bit float with binary8p3 format"),
246 DtypeDefinition('p4binary', Bits._setp4binary, Bits._getp4binary, float, True, p4binary_bits2chars,
247 allowed_lengths=(8,), description="an 8 bit float with binary8p4 format"),
248 DtypeDefinition('e4m3mxfp', Bits._sete4m3mxfp, Bits._gete4m3mxfp, float, True, e4m3mxfp_bits2chars,
249 allowed_lengths=(8,), description="an 8 bit float with MXFP E4M3 format"),
250 DtypeDefinition('e5m2mxfp', Bits._sete5m2mxfp, Bits._gete5m2mxfp, float, True, e5m2mxfp_bits2chars,
251 allowed_lengths=(8,), description="an 8 bit float with MXFP E5M2 format"),
252 DtypeDefinition('e3m2mxfp', Bits._sete3m2mxfp, Bits._gete3m2mxfp, float, True, e3m2mxfp_bits2chars,
253 allowed_lengths=(6,), description="a 6 bit float with MXFP E3M2 format"),
254 DtypeDefinition('e2m3mxfp', Bits._sete2m3mxfp, Bits._gete2m3mxfp, float, True, e2m3mxfp_bits2chars,
255 allowed_lengths=(6,), description="a 6 bit float with MXFP E2M3 format"),
256 DtypeDefinition('e2m1mxfp', Bits._sete2m1mxfp, Bits._gete2m1mxfp, float, True, e2m1mxfp_bits2chars,
257 allowed_lengths=(4,), description="a 4 bit float with MXFP E2M1 format"),
258 DtypeDefinition('e8m0mxfp', Bits._sete8m0mxfp, Bits._gete8m0mxfp, float, False, e8m0mxfp_bits2chars,
259 allowed_lengths=(8,), description="an 8 bit float with MXFP E8M0 format"),
260 DtypeDefinition('mxint', Bits._setmxint, Bits._getmxint, float, True, mxint_bits2chars,
261 allowed_lengths=(8,), description="an 8 bit float with MXFP INT8 format"),
262]
263
264
265aliases: List[Tuple[str, str]] = [
266 # Floats default to big endian
267 ('float', 'floatbe'),
268 ('bfloat', 'bfloatbe'),
269
270 # Some single letter aliases for popular types
271 ('int', 'i'),
272 ('uint', 'u'),
273 ('hex', 'h'),
274 ('oct', 'o'),
275 ('bin', 'b'),
276 ('float', 'f'),
277]
278
279# Create native-endian aliases depending on the byteorder of the system
280byteorder: str = sys.byteorder
281if byteorder == 'little':
282 aliases.extend([
283 ('uintle', 'uintne'),
284 ('intle', 'intne'),
285 ('floatle', 'floatne'),
286 ('bfloatle', 'bfloatne'),
287 ])
288else:
289 aliases.extend([
290 ('uintbe', 'uintne'),
291 ('intbe', 'intne'),
292 ('floatbe', 'floatne'),
293 ('bfloatbe', 'bfloatne'),
294 ])
295
296
297for dt in dtype_definitions:
298 dtype_register.add_dtype(dt)
299for alias in aliases:
300 dtype_register.add_dtype_alias(alias[0], alias[1])
301
302property_docstrings = [f'{name} -- Interpret as {dtype_register[name].description}.' for name in dtype_register.names]
303property_docstring = '\n '.join(property_docstrings)
304
305# We can't be sure the docstrings are present, as it might be compiled without docstrings.
306if Bits.__doc__ is not None:
307 Bits.__doc__ = Bits.__doc__.replace('[GENERATED_PROPERTY_DESCRIPTIONS]', property_docstring)
308if BitArray.__doc__ is not None:
309 BitArray.__doc__ = BitArray.__doc__.replace('[GENERATED_PROPERTY_DESCRIPTIONS]', property_docstring)
310if ConstBitStream.__doc__ is not None:
311 ConstBitStream.__doc__ = ConstBitStream.__doc__.replace('[GENERATED_PROPERTY_DESCRIPTIONS]', property_docstring)
312if BitStream.__doc__ is not None:
313 BitStream.__doc__ = BitStream.__doc__.replace('[GENERATED_PROPERTY_DESCRIPTIONS]', property_docstring)
314
315
316__all__ = ['ConstBitStream', 'BitStream', 'BitArray', 'Array',
317 'Bits', 'pack', 'Error', 'ReadError', 'InterpretError',
318 'ByteAlignError', 'CreationError', 'bytealigned', 'lsb0', 'Dtype', 'options']