Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bitstring/utils.py: 45%
175 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:15 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:15 +0000
1from __future__ import annotations
2import itertools
3import functools
4import re
5from typing import Tuple, List, Optional, Pattern, Dict, Union, Match
6import sys
7from bitstring.exceptions import Error
9byteorder: str = sys.byteorder
11CACHE_SIZE = 256
13SIGNED_INTEGER_NAMES: List[str] = ['int', 'se', 'sie', 'intbe', 'intle', 'intne']
14UNSIGNED_INTEGER_NAMES: List[str] = ['uint', 'ue', 'uie', 'uintbe', 'uintle', 'uintne', 'bool']
15FLOAT_NAMES: List[str] = ['float', 'floatbe', 'floatle', 'floatne', 'bfloatbe', 'bfloatle', 'bfloatne', 'bfloat', 'float8_143', 'float8_152']
16STRING_NAMES: List[str] = ['hex', 'oct', 'bin']
18INIT_NAMES: List[str] = SIGNED_INTEGER_NAMES + UNSIGNED_INTEGER_NAMES + FLOAT_NAMES + STRING_NAMES + ['bits', 'bytes', 'pad']
19# Sort longest first as we want to match them in that order (so floatne before float etc.).
20INIT_NAMES.sort(key=len, reverse=True)
22TOKEN_RE: Pattern[str] = re.compile(r'^(?P<name>' + '|'.join(INIT_NAMES) +
23 r'):?(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE)
24# Tokens such as 'u32', 'f64=4.5' or 'i6=-3'
25SHORT_TOKEN_RE: Pattern[str] = re.compile(r'^(?P<name>[uifboh]):?(?P<len>\d+)?(=(?P<value>.*))?$')
26DEFAULT_BITS: Pattern[str] = re.compile(r'^(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE)
28# A string followed by optional : then an integer number
29STR_INT_RE: Pattern[str] = re.compile(r'^(?P<string>.+?):?(?P<integer>\d*)$')
31MULTIPLICATIVE_RE: Pattern[str] = re.compile(r'^(?P<factor>.*)\*(?P<token>.+)')
33# Hex, oct or binary literals
34LITERAL_RE: Pattern[str] = re.compile(r'^(?P<name>0([xob]))(?P<value>.+)', re.IGNORECASE)
36# An endianness indicator followed by one or more struct.pack codes
37STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=]){1}(?P<fmt>(?:\d*[bBhHlLqQefd])+)$')
38# The same as above, but it doesn't insist on an endianness as it's byteswapping anyway.
39BYTESWAP_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])?(?P<fmt>(?:\d*[bBhHlLqQefd])+)$')
40# An endianness indicator followed by exactly one struct.pack codes
41SINGLE_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=]){1}(?P<fmt>(?:[bBhHlLqQefd]))$')
43# A number followed by a single character struct.pack code
44STRUCT_SPLIT_RE: Pattern[str] = re.compile(r'\d*[bBhHlLqQefd]')
46# These replicate the struct.pack codes
47# Big-endian
48REPLACEMENTS_BE: Dict[str, str] = {'b': 'int:8', 'B': 'uint:8',
49 'h': 'intbe:16', 'H': 'uintbe:16',
50 'l': 'intbe:32', 'L': 'uintbe:32',
51 'q': 'intbe:64', 'Q': 'uintbe:64',
52 'e': 'floatbe:16', 'f': 'floatbe:32', 'd': 'floatbe:64'}
53# Little-endian
54REPLACEMENTS_LE: Dict[str, str] = {'b': 'int:8', 'B': 'uint:8',
55 'h': 'intle:16', 'H': 'uintle:16',
56 'l': 'intle:32', 'L': 'uintle:32',
57 'q': 'intle:64', 'Q': 'uintle:64',
58 'e': 'floatle:16', 'f': 'floatle:32', 'd': 'floatle:64'}
60# Native-endian
61REPLACEMENTS_NE: Dict[str, str] = {'b': 'int:8', 'B': 'uint:8',
62 'h': 'intne:16', 'H': 'uintne:16',
63 'l': 'intne:32', 'L': 'uintne:32',
64 'q': 'intne:64', 'Q': 'uintne:64',
65 'e': 'floatne:16', 'f': 'floatne:32', 'd': 'floatne:64'}
67# Tokens which are always the same length, so it doesn't need to be supplied.
68FIXED_LENGTH_TOKENS: Dict[str, int] = {'bool': 1,
69 'bfloat': 16,
70 'float8_143': 8,
71 'float8_152': 8}
73def structparser(m: Match[str]) -> List[str]:
74 """Parse struct-like format string token into sub-token list."""
75 endian = m.group('endian')
76 # Split the format string into a list of 'q', '4h' etc.
77 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt'))
78 # Now deal with multiplicative factors, 4h -> hhhh etc.
79 fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else
80 f for f in formatlist])
81 if endian in '@=':
82 # Native endianness
83 tokens = [REPLACEMENTS_NE[c] for c in fmt]
84 elif endian == '<':
85 tokens = [REPLACEMENTS_LE[c] for c in fmt]
86 else:
87 assert endian == '>'
88 tokens = [REPLACEMENTS_BE[c] for c in fmt]
89 return tokens
91@functools.lru_cache(CACHE_SIZE)
92def parse_name_length_token(fmt: str) -> Tuple[str, int]:
93 # Any single token with just a name and length
94 m = SINGLE_STRUCT_PACK_RE.match(fmt)
95 if m:
96 endian = m.group('endian')
97 f = m.group('fmt')
98 if endian == '>':
99 fmt = REPLACEMENTS_BE[f]
100 elif endian == '<':
101 fmt = REPLACEMENTS_LE[f]
102 else:
103 assert endian in '=@'
104 fmt = REPLACEMENTS_NE[f]
105 m2 = STR_INT_RE.match(fmt)
106 if m2:
107 name = m2.group('string')
108 length_str = m2.group('integer')
109 length = 0 if length_str == '' else int(length_str)
110 else:
111 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.")
112 if name in 'uifboh':
113 name = {'u': 'uint',
114 'i': 'int',
115 'f': 'float',
116 'b': 'bin',
117 'o': 'oct',
118 'h': 'hex'}[name]
119 if name in ('se', 'ue', 'sie', 'uie'):
120 if length is not None:
121 raise ValueError(
122 f"Exponential-Golomb codes (se/ue/sie/uie) can't have fixed lengths. Length of {length} was given.")
124 if name == 'float8_':
125 name += str(length)
126 length = 8
128 if name in FIXED_LENGTH_TOKENS.keys():
129 token_length = FIXED_LENGTH_TOKENS[name]
130 if length not in [0, token_length]:
131 raise ValueError(f"{name} tokens can only be {token_length} bits long, not {length} bits.")
132 length = token_length
134 if length is None:
135 length = 0
136 return name, length
138@functools.lru_cache(CACHE_SIZE)
139def parse_single_token(token: str) -> Tuple[str, str, Optional[str]]:
140 m1 = TOKEN_RE.match(token)
141 if m1:
142 name = m1.group('name')
143 length = m1.group('len')
144 value = m1.group('value')
145 else:
146 m1_short = SHORT_TOKEN_RE.match(token)
147 if m1_short:
148 name = m1_short.group('name')
149 name = {'u': 'uint',
150 'i': 'int',
151 'f': 'float',
152 'b': 'bin',
153 'o': 'oct',
154 'h': 'hex'}[name]
155 length = m1_short.group('len')
156 value = m1_short.group('value')
157 else:
158 # If you don't specify a 'name' then the default is 'bits':
159 name = 'bits'
160 m2 = DEFAULT_BITS.match(token)
161 if not m2:
162 raise ValueError(f"Don't understand token '{token}'.")
163 length = m2.group('len')
164 value = m2.group('value')
166 if name in FIXED_LENGTH_TOKENS.keys():
167 token_length = str(FIXED_LENGTH_TOKENS[name])
168 if length is not None and length != token_length:
169 raise ValueError(f"{name} tokens can only be {token_length} bits long, not {length} bits.")
170 length = token_length
172 return name, length, value
175@functools.lru_cache(CACHE_SIZE)
176def tokenparser(fmt: str, keys: Tuple[str, ...] = ()) -> \
177 Tuple[bool, List[Tuple[str, Union[int, str, None], Optional[str]]]]:
178 """Divide the format string into tokens and parse them.
180 Return stretchy token and list of [initialiser, length, value]
181 initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc.
182 length is None if not known, as is value.
184 If the token is in the keyword dictionary (keys) then it counts as a
185 special case and isn't messed with.
187 tokens must be of the form: [factor*][initialiser][:][length][=value]
189 """
190 # Remove whitespace
191 fmt = ''.join(fmt.split())
192 # Expand any brackets.
193 fmt = expand_brackets(fmt)
194 # Split tokens by ',' and remove whitespace
195 # The meta_tokens can either be ordinary single tokens or multiple
196 # struct-format token strings.
197 meta_tokens = [f.strip() for f in fmt.split(',')]
198 return_values: List[Tuple[str, Union[int, str, None], Optional[str]]] = []
199 stretchy_token = False
200 for meta_token in meta_tokens:
201 # See if it has a multiplicative factor
202 m = MULTIPLICATIVE_RE.match(meta_token)
203 if not m:
204 factor = 1
205 else:
206 factor = int(m.group('factor'))
207 meta_token = m.group('token')
208 # See if it's a struct-like format
209 m = STRUCT_PACK_RE.match(meta_token)
210 if m:
211 tokens = structparser(m)
212 else:
213 tokens = [meta_token]
214 ret_vals: List[Tuple[str, Union[str, int, None], Optional[str]]] = []
215 for token in tokens:
216 if keys and token in keys:
217 # Don't bother parsing it, it's a keyword argument
218 ret_vals.append((token, None, None))
219 continue
220 if token == '':
221 continue
223 # Match literal tokens of the form 0x... 0o... and 0b...
224 m = LITERAL_RE.match(token)
225 if m:
226 ret_vals.append((m.group('name'), None, m.group('value')))
227 continue
229 name, length, value = parse_single_token(token)
231 if name in ('se', 'ue', 'sie', 'uie'):
232 if length is not None:
233 raise ValueError(f"Exponential-Golomb codes (se/ue/sie/uie) can't have fixed lengths. Length of {length} was given.")
234 else:
235 if length is None:
236 stretchy_token = True
238 if length is not None:
239 # Try converting length to int, otherwise check it's a key.
240 try:
241 length = int(length)
242 if length < 0:
243 raise Error
244 # For the 'bytes' token convert length to bits.
245 if name == 'bytes':
246 length *= 8
247 except Error:
248 raise ValueError("Can't read a token with a negative length.")
249 except ValueError:
250 if not keys or length not in keys:
251 raise ValueError(f"Don't understand length '{length}' of token.")
252 ret_vals.append((name, length, value))
253 return_values.extend(itertools.repeat(ret_vals, factor))
255 return_values = itertools.chain.from_iterable(return_values)
256 return stretchy_token, list(return_values)
259def expand_brackets(s: str) -> str:
260 """Expand all brackets."""
261 while True:
262 start = s.find('(')
263 if start == -1:
264 break
265 count = 1 # Number of hanging open brackets
266 p = start + 1
267 while p < len(s):
268 if s[p] == '(':
269 count += 1
270 if s[p] == ')':
271 count -= 1
272 if not count:
273 break
274 p += 1
275 if count:
276 raise ValueError(f"Unbalanced parenthesis in '{s}'.")
277 if start == 0 or s[start - 1] != '*':
278 s = s[0:start] + s[start + 1:p] + s[p + 1:]
279 else:
280 # Looks for first number*(
281 bracket_re = re.compile(r'(?P<factor>\d+)\*\(')
282 m = bracket_re.search(s)
283 if m:
284 factor = int(m.group('factor'))
285 matchstart = m.start('factor')
286 s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:]
287 else:
288 raise ValueError(f"Failed to parse '{s}'.")
289 return s