Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/bitstring/utils.py: 26%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import functools
4import re
5from typing import Tuple, List, Optional, Pattern, Dict, Union, Match
8# A token name followed by optional : then an integer number
9NAME_INT_RE: Pattern[str] = re.compile(r'^([a-zA-Z][a-zA-Z0-9_]*?):?(\d*)$')
11# A token name followed by optional : then an arbitrary keyword
12NAME_KWARG_RE: Pattern[str] = re.compile(r'^([a-zA-Z][a-zA-Z0-9_]*?):?([a-zA-Z0-9_]+)$')
14CACHE_SIZE = 256
16DEFAULT_BITS: Pattern[str] = re.compile(r'^(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE)
18MULTIPLICATIVE_RE: Pattern[str] = re.compile(r'^(?P<factor>.*)\*(?P<token>.+)')
20# Hex, oct or binary literals
21LITERAL_RE: Pattern[str] = re.compile(r'^(?P<name>0([xob]))(?P<value>.+)', re.IGNORECASE)
23# An endianness indicator followed by one or more struct.pack codes
24STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])(?P<fmt>(?:\d*[bBhHlLiIqQefd])+)$')
25# The same as above, but it doesn't insist on an endianness as it's byteswapping anyway.
26BYTESWAP_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])?(?P<fmt>(?:\d*[bBhHlLiIqQefd])+)$')
27# An endianness indicator followed by exactly one struct.pack codes
28SINGLE_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])(?P<fmt>[bBhHlLiIqQefd])$')
30# A number followed by a single character struct.pack code
31STRUCT_SPLIT_RE: Pattern[str] = re.compile(r'\d*[bBhHlLiIqQefd]')
33# These replicate the struct.pack codes
34# Big-endian
35REPLACEMENTS_BE: Dict[str, str] = {'b': 'int8', 'B': 'uint8',
36 'h': 'intbe16', 'H': 'uintbe16',
37 'l': 'intbe32', 'L': 'uintbe32',
38 'i': 'intbe32', 'I': 'uintbe32',
39 'q': 'intbe64', 'Q': 'uintbe64',
40 'e': 'floatbe16', 'f': 'floatbe32', 'd': 'floatbe64'}
41# Little-endian
42REPLACEMENTS_LE: Dict[str, str] = {'b': 'int8', 'B': 'uint8',
43 'h': 'intle16', 'H': 'uintle16',
44 'l': 'intle32', 'L': 'uintle32',
45 'i': 'intle32', 'I': 'uintle32',
46 'q': 'intle64', 'Q': 'uintle64',
47 'e': 'floatle16', 'f': 'floatle32', 'd': 'floatle64'}
49# Native-endian
50REPLACEMENTS_NE: Dict[str, str] = {'b': 'int8', 'B': 'uint8',
51 'h': 'intne16', 'H': 'uintne16',
52 'l': 'intne32', 'L': 'uintne32',
53 'i': 'intne32', 'I': 'uintne32',
54 'q': 'intne64', 'Q': 'uintne64',
55 'e': 'floatne16', 'f': 'floatne32', 'd': 'floatne64'}
57# Size in bytes of all the pack codes.
58PACK_CODE_SIZE: Dict[str, int] = {'b': 1, 'B': 1, 'h': 2, 'H': 2, 'l': 4, 'L': 4, 'i': 4, 'I': 4,
59 'q': 8, 'Q': 8, 'e': 2, 'f': 4, 'd': 8}
62def structparser(m: Match[str]) -> List[str]:
63 """Parse struct-like format string token into sub-token list."""
64 endian = m.group('endian')
65 # Split the format string into a list of 'q', '4h' etc.
66 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt'))
67 # Now deal with multiplicative factors, 4h -> hhhh etc.
68 fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else
69 f for f in formatlist])
70 if endian in '@=':
71 # Native endianness
72 tokens = [REPLACEMENTS_NE[c] for c in fmt]
73 elif endian == '<':
74 tokens = [REPLACEMENTS_LE[c] for c in fmt]
75 else:
76 assert endian == '>'
77 tokens = [REPLACEMENTS_BE[c] for c in fmt]
78 return tokens
81@functools.lru_cache(CACHE_SIZE)
82def parse_name_length_token(fmt: str, **kwargs) -> Tuple[str, Optional[int]]:
83 # Any single token with just a name and length
84 if m2 := NAME_INT_RE.match(fmt):
85 name = m2.group(1)
86 length_str = m2.group(2)
87 length = None if length_str == '' else int(length_str)
88 else:
89 # Maybe the length is in the kwargs?
90 if m := NAME_KWARG_RE.match(fmt):
91 name = m.group(1)
92 try:
93 length_str = kwargs[m.group(2)]
94 except KeyError:
95 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.")
96 length = int(length_str)
97 else:
98 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.")
99 return name, length
102@functools.lru_cache(CACHE_SIZE)
103def parse_single_struct_token(fmt: str) -> Optional[Tuple[str, Optional[int]]]:
104 if m := SINGLE_STRUCT_PACK_RE.match(fmt):
105 endian = m.group('endian')
106 f = m.group('fmt')
107 if endian == '>':
108 fmt = REPLACEMENTS_BE[f]
109 elif endian == '<':
110 fmt = REPLACEMENTS_LE[f]
111 else:
112 assert endian in '=@'
113 fmt = REPLACEMENTS_NE[f]
114 return parse_name_length_token(fmt)
115 else:
116 return None
119@functools.lru_cache(CACHE_SIZE)
120def parse_single_token(token: str) -> Tuple[str, str, Optional[str]]:
121 if (equals_pos := token.find('=')) == -1:
122 value = None
123 else:
124 value = token[equals_pos + 1:]
125 token = token[:equals_pos]
127 if m2 := NAME_INT_RE.match(token):
128 name = m2.group(1)
129 length_str = m2.group(2)
130 length = None if length_str == '' else length_str
131 elif m3 := NAME_KWARG_RE.match(token):
132 # name then a keyword for a length
133 name = m3.group(1)
134 length = m3.group(2)
135 else:
136 # If you don't specify a 'name' then the default is 'bits'
137 name = 'bits'
138 length = token
139 return name, length, value
142@functools.lru_cache(CACHE_SIZE)
143def preprocess_tokens(fmt: str) -> List[str]:
144 # Remove whitespace and expand brackets
145 fmt = expand_brackets(''.join(fmt.split()))
147 # Split tokens by ',' and remove whitespace
148 # The meta_tokens can either be ordinary single tokens or multiple struct-format token strings.
149 meta_tokens = [f.strip() for f in fmt.split(',')]
150 final_tokens = []
152 for meta_token in meta_tokens:
153 if meta_token == '':
154 continue
155 # Extract factor and actual token if a multiplicative factor exists
156 factor = 1
157 if m := MULTIPLICATIVE_RE.match(meta_token):
158 factor = int(m.group('factor'))
159 meta_token = m.group('token')
161 # Parse struct-like format into sub-tokens or treat as single token
162 tokens = structparser(m) if (m := STRUCT_PACK_RE.match(meta_token)) else [meta_token]
164 # Extend final tokens list with parsed tokens, repeated by the factor
165 final_tokens.extend(tokens * factor)
166 return final_tokens
169@functools.lru_cache(CACHE_SIZE)
170def tokenparser(fmt: str, keys: Tuple[str, ...] = ()) -> \
171 Tuple[bool, List[Tuple[str, Union[int, str, None], Optional[str]]]]:
172 """Divide the format string into tokens and parse them.
174 Return stretchy token and list of [initialiser, length, value]
175 initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc.
176 length is None if not known, as is value.
178 If the token is in the keyword dictionary (keys) then it counts as a
179 special case and isn't messed with.
181 tokens must be of the form: [factor*][initialiser][:][length][=value]
183 """
184 tokens = preprocess_tokens(fmt)
185 stretchy_token = False
186 ret_vals: List[Tuple[str, Union[str, int, None], Optional[str]]] = []
187 for token in tokens:
188 if keys and token in keys:
189 # Don't bother parsing it, it's a keyword argument
190 ret_vals.append((token, None, None))
191 continue
192 if token == '':
193 continue
194 # Match literal tokens of the form 0x... 0o... and 0b...
195 if m := LITERAL_RE.match(token):
196 ret_vals.append((m.group('name'), None, m.group('value')))
197 continue
198 name, length, value = parse_single_token(token)
199 if length is None:
200 stretchy_token = True
201 if length is not None:
202 # Try converting length to int, otherwise check it's a key.
203 try:
204 length = int(length)
205 except ValueError:
206 if not keys or length not in keys:
207 raise ValueError(f"Don't understand length '{length}' of token.")
208 ret_vals.append((name, length, value))
209 return stretchy_token, ret_vals
212BRACKET_RE = re.compile(r'(?P<factor>\d+)\*\(')
215def expand_brackets(s: str) -> str:
216 """Expand all brackets."""
217 while True:
218 start = s.find('(')
219 if start == -1:
220 break
221 count = 1 # Number of hanging open brackets
222 p = start + 1
223 while p < len(s):
224 count += (s[p] == '(') - (s[p] == ')')
225 if count == 0:
226 break
227 p += 1
228 if count != 0:
229 raise ValueError(f"Unbalanced parenthesis in '{s}'.")
230 if start == 0 or s[start - 1] != '*':
231 s = s[0:start] + s[start + 1:p] + s[p + 1:]
232 else:
233 # Looks for first number*(
234 m = BRACKET_RE.search(s)
235 if m:
236 factor = int(m.group('factor'))
237 matchstart = m.start('factor')
238 s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:]
239 else:
240 raise ValueError(f"Failed to parse '{s}'.")
241 return s