Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/bitstring/utils.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

137 statements  

1from __future__ import annotations 

2 

3import functools 

4import re 

5from typing import Tuple, List, Optional, Pattern, Dict, Union, Match 

6 

7 

8# A token name followed by optional : then an integer number 

9NAME_INT_RE: Pattern[str] = re.compile(r'^([a-zA-Z][a-zA-Z0-9_]*?):?(\d*)$') 

10 

11# A token name followed by optional : then an arbitrary keyword 

12NAME_KWARG_RE: Pattern[str] = re.compile(r'^([a-zA-Z][a-zA-Z0-9_]*?):?([a-zA-Z0-9_]+)$') 

13 

14CACHE_SIZE = 256 

15 

16DEFAULT_BITS: Pattern[str] = re.compile(r'^(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE) 

17 

18MULTIPLICATIVE_RE: Pattern[str] = re.compile(r'^(?P<factor>.*)\*(?P<token>.+)') 

19 

20# Hex, oct or binary literals 

21LITERAL_RE: Pattern[str] = re.compile(r'^(?P<name>0([xob]))(?P<value>.+)', re.IGNORECASE) 

22 

23# An endianness indicator followed by one or more struct.pack codes 

24STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])(?P<fmt>(?:\d*[bBhHlLiIqQefd])+)$') 

25# The same as above, but it doesn't insist on an endianness as it's byteswapping anyway. 

26BYTESWAP_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])?(?P<fmt>(?:\d*[bBhHlLiIqQefd])+)$') 

27# An endianness indicator followed by exactly one struct.pack codes 

28SINGLE_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])(?P<fmt>[bBhHlLiIqQefd])$') 

29 

30# A number followed by a single character struct.pack code 

31STRUCT_SPLIT_RE: Pattern[str] = re.compile(r'\d*[bBhHlLiIqQefd]') 

32 

33# These replicate the struct.pack codes 

34# Big-endian 

35REPLACEMENTS_BE: Dict[str, str] = {'b': 'int8', 'B': 'uint8', 

36 'h': 'intbe16', 'H': 'uintbe16', 

37 'l': 'intbe32', 'L': 'uintbe32', 

38 'i': 'intbe32', 'I': 'uintbe32', 

39 'q': 'intbe64', 'Q': 'uintbe64', 

40 'e': 'floatbe16', 'f': 'floatbe32', 'd': 'floatbe64'} 

41# Little-endian 

42REPLACEMENTS_LE: Dict[str, str] = {'b': 'int8', 'B': 'uint8', 

43 'h': 'intle16', 'H': 'uintle16', 

44 'l': 'intle32', 'L': 'uintle32', 

45 'i': 'intle32', 'I': 'uintle32', 

46 'q': 'intle64', 'Q': 'uintle64', 

47 'e': 'floatle16', 'f': 'floatle32', 'd': 'floatle64'} 

48 

49# Native-endian 

50REPLACEMENTS_NE: Dict[str, str] = {'b': 'int8', 'B': 'uint8', 

51 'h': 'intne16', 'H': 'uintne16', 

52 'l': 'intne32', 'L': 'uintne32', 

53 'i': 'intne32', 'I': 'uintne32', 

54 'q': 'intne64', 'Q': 'uintne64', 

55 'e': 'floatne16', 'f': 'floatne32', 'd': 'floatne64'} 

56 

57# Size in bytes of all the pack codes. 

58PACK_CODE_SIZE: Dict[str, int] = {'b': 1, 'B': 1, 'h': 2, 'H': 2, 'l': 4, 'L': 4, 'i': 4, 'I': 4, 

59 'q': 8, 'Q': 8, 'e': 2, 'f': 4, 'd': 8} 

60 

61 

62def structparser(m: Match[str]) -> List[str]: 

63 """Parse struct-like format string token into sub-token list.""" 

64 endian = m.group('endian') 

65 # Split the format string into a list of 'q', '4h' etc. 

66 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt')) 

67 # Now deal with multiplicative factors, 4h -> hhhh etc. 

68 fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else 

69 f for f in formatlist]) 

70 if endian in '@=': 

71 # Native endianness 

72 tokens = [REPLACEMENTS_NE[c] for c in fmt] 

73 elif endian == '<': 

74 tokens = [REPLACEMENTS_LE[c] for c in fmt] 

75 else: 

76 assert endian == '>' 

77 tokens = [REPLACEMENTS_BE[c] for c in fmt] 

78 return tokens 

79 

80 

81@functools.lru_cache(CACHE_SIZE) 

82def parse_name_length_token(fmt: str, **kwargs) -> Tuple[str, Optional[int]]: 

83 # Any single token with just a name and length 

84 if m2 := NAME_INT_RE.match(fmt): 

85 name = m2.group(1) 

86 length_str = m2.group(2) 

87 length = None if length_str == '' else int(length_str) 

88 else: 

89 # Maybe the length is in the kwargs? 

90 if m := NAME_KWARG_RE.match(fmt): 

91 name = m.group(1) 

92 try: 

93 length_str = kwargs[m.group(2)] 

94 except KeyError: 

95 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.") 

96 length = int(length_str) 

97 else: 

98 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.") 

99 return name, length 

100 

101 

102@functools.lru_cache(CACHE_SIZE) 

103def parse_single_struct_token(fmt: str) -> Optional[Tuple[str, Optional[int]]]: 

104 if m := SINGLE_STRUCT_PACK_RE.match(fmt): 

105 endian = m.group('endian') 

106 f = m.group('fmt') 

107 if endian == '>': 

108 fmt = REPLACEMENTS_BE[f] 

109 elif endian == '<': 

110 fmt = REPLACEMENTS_LE[f] 

111 else: 

112 assert endian in '=@' 

113 fmt = REPLACEMENTS_NE[f] 

114 return parse_name_length_token(fmt) 

115 else: 

116 return None 

117 

118 

119@functools.lru_cache(CACHE_SIZE) 

120def parse_single_token(token: str) -> Tuple[str, str, Optional[str]]: 

121 if (equals_pos := token.find('=')) == -1: 

122 value = None 

123 else: 

124 value = token[equals_pos + 1:] 

125 token = token[:equals_pos] 

126 

127 if m2 := NAME_INT_RE.match(token): 

128 name = m2.group(1) 

129 length_str = m2.group(2) 

130 length = None if length_str == '' else length_str 

131 elif m3 := NAME_KWARG_RE.match(token): 

132 # name then a keyword for a length 

133 name = m3.group(1) 

134 length = m3.group(2) 

135 else: 

136 # If you don't specify a 'name' then the default is 'bits' 

137 name = 'bits' 

138 length = token 

139 return name, length, value 

140 

141 

142@functools.lru_cache(CACHE_SIZE) 

143def preprocess_tokens(fmt: str) -> List[str]: 

144 # Remove whitespace and expand brackets 

145 fmt = expand_brackets(''.join(fmt.split())) 

146 

147 # Split tokens by ',' and remove whitespace 

148 # The meta_tokens can either be ordinary single tokens or multiple struct-format token strings. 

149 meta_tokens = [f.strip() for f in fmt.split(',')] 

150 final_tokens = [] 

151 

152 for meta_token in meta_tokens: 

153 if meta_token == '': 

154 continue 

155 # Extract factor and actual token if a multiplicative factor exists 

156 factor = 1 

157 if m := MULTIPLICATIVE_RE.match(meta_token): 

158 factor = int(m.group('factor')) 

159 meta_token = m.group('token') 

160 

161 # Parse struct-like format into sub-tokens or treat as single token 

162 tokens = structparser(m) if (m := STRUCT_PACK_RE.match(meta_token)) else [meta_token] 

163 

164 # Extend final tokens list with parsed tokens, repeated by the factor 

165 final_tokens.extend(tokens * factor) 

166 return final_tokens 

167 

168 

169@functools.lru_cache(CACHE_SIZE) 

170def tokenparser(fmt: str, keys: Tuple[str, ...] = ()) -> \ 

171 Tuple[bool, List[Tuple[str, Union[int, str, None], Optional[str]]]]: 

172 """Divide the format string into tokens and parse them. 

173 

174 Return stretchy token and list of [initialiser, length, value] 

175 initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc. 

176 length is None if not known, as is value. 

177 

178 If the token is in the keyword dictionary (keys) then it counts as a 

179 special case and isn't messed with. 

180 

181 tokens must be of the form: [factor*][initialiser][:][length][=value] 

182 

183 """ 

184 tokens = preprocess_tokens(fmt) 

185 stretchy_token = False 

186 ret_vals: List[Tuple[str, Union[str, int, None], Optional[str]]] = [] 

187 for token in tokens: 

188 if keys and token in keys: 

189 # Don't bother parsing it, it's a keyword argument 

190 ret_vals.append((token, None, None)) 

191 continue 

192 if token == '': 

193 continue 

194 # Match literal tokens of the form 0x... 0o... and 0b... 

195 if m := LITERAL_RE.match(token): 

196 ret_vals.append((m.group('name'), None, m.group('value'))) 

197 continue 

198 name, length, value = parse_single_token(token) 

199 if length is None: 

200 stretchy_token = True 

201 if length is not None: 

202 # Try converting length to int, otherwise check it's a key. 

203 try: 

204 length = int(length) 

205 except ValueError: 

206 if not keys or length not in keys: 

207 raise ValueError(f"Don't understand length '{length}' of token.") 

208 ret_vals.append((name, length, value)) 

209 return stretchy_token, ret_vals 

210 

211 

212BRACKET_RE = re.compile(r'(?P<factor>\d+)\*\(') 

213 

214 

215def expand_brackets(s: str) -> str: 

216 """Expand all brackets.""" 

217 while True: 

218 start = s.find('(') 

219 if start == -1: 

220 break 

221 count = 1 # Number of hanging open brackets 

222 p = start + 1 

223 while p < len(s): 

224 count += (s[p] == '(') - (s[p] == ')') 

225 if count == 0: 

226 break 

227 p += 1 

228 if count != 0: 

229 raise ValueError(f"Unbalanced parenthesis in '{s}'.") 

230 if start == 0 or s[start - 1] != '*': 

231 s = s[0:start] + s[start + 1:p] + s[p + 1:] 

232 else: 

233 # Looks for first number*( 

234 m = BRACKET_RE.search(s) 

235 if m: 

236 factor = int(m.group('factor')) 

237 matchstart = m.start('factor') 

238 s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:] 

239 else: 

240 raise ValueError(f"Failed to parse '{s}'.") 

241 return s