Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bitstring/utils.py: 45%

175 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:15 +0000

1from __future__ import annotations 

2import itertools 

3import functools 

4import re 

5from typing import Tuple, List, Optional, Pattern, Dict, Union, Match 

6import sys 

7from bitstring.exceptions import Error 

8 

9byteorder: str = sys.byteorder 

10 

11CACHE_SIZE = 256 

12 

13SIGNED_INTEGER_NAMES: List[str] = ['int', 'se', 'sie', 'intbe', 'intle', 'intne'] 

14UNSIGNED_INTEGER_NAMES: List[str] = ['uint', 'ue', 'uie', 'uintbe', 'uintle', 'uintne', 'bool'] 

15FLOAT_NAMES: List[str] = ['float', 'floatbe', 'floatle', 'floatne', 'bfloatbe', 'bfloatle', 'bfloatne', 'bfloat', 'float8_143', 'float8_152'] 

16STRING_NAMES: List[str] = ['hex', 'oct', 'bin'] 

17 

18INIT_NAMES: List[str] = SIGNED_INTEGER_NAMES + UNSIGNED_INTEGER_NAMES + FLOAT_NAMES + STRING_NAMES + ['bits', 'bytes', 'pad'] 

19# Sort longest first as we want to match them in that order (so floatne before float etc.). 

20INIT_NAMES.sort(key=len, reverse=True) 

21 

22TOKEN_RE: Pattern[str] = re.compile(r'^(?P<name>' + '|'.join(INIT_NAMES) + 

23 r'):?(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE) 

24# Tokens such as 'u32', 'f64=4.5' or 'i6=-3' 

25SHORT_TOKEN_RE: Pattern[str] = re.compile(r'^(?P<name>[uifboh]):?(?P<len>\d+)?(=(?P<value>.*))?$') 

26DEFAULT_BITS: Pattern[str] = re.compile(r'^(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE) 

27 

28# A string followed by optional : then an integer number 

29STR_INT_RE: Pattern[str] = re.compile(r'^(?P<string>.+?):?(?P<integer>\d*)$') 

30 

31MULTIPLICATIVE_RE: Pattern[str] = re.compile(r'^(?P<factor>.*)\*(?P<token>.+)') 

32 

33# Hex, oct or binary literals 

34LITERAL_RE: Pattern[str] = re.compile(r'^(?P<name>0([xob]))(?P<value>.+)', re.IGNORECASE) 

35 

36# An endianness indicator followed by one or more struct.pack codes 

37STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=]){1}(?P<fmt>(?:\d*[bBhHlLqQefd])+)$') 

38# The same as above, but it doesn't insist on an endianness as it's byteswapping anyway. 

39BYTESWAP_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])?(?P<fmt>(?:\d*[bBhHlLqQefd])+)$') 

40# An endianness indicator followed by exactly one struct.pack codes 

41SINGLE_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=]){1}(?P<fmt>(?:[bBhHlLqQefd]))$') 

42 

43# A number followed by a single character struct.pack code 

44STRUCT_SPLIT_RE: Pattern[str] = re.compile(r'\d*[bBhHlLqQefd]') 

45 

46# These replicate the struct.pack codes 

47# Big-endian 

48REPLACEMENTS_BE: Dict[str, str] = {'b': 'int:8', 'B': 'uint:8', 

49 'h': 'intbe:16', 'H': 'uintbe:16', 

50 'l': 'intbe:32', 'L': 'uintbe:32', 

51 'q': 'intbe:64', 'Q': 'uintbe:64', 

52 'e': 'floatbe:16', 'f': 'floatbe:32', 'd': 'floatbe:64'} 

53# Little-endian 

54REPLACEMENTS_LE: Dict[str, str] = {'b': 'int:8', 'B': 'uint:8', 

55 'h': 'intle:16', 'H': 'uintle:16', 

56 'l': 'intle:32', 'L': 'uintle:32', 

57 'q': 'intle:64', 'Q': 'uintle:64', 

58 'e': 'floatle:16', 'f': 'floatle:32', 'd': 'floatle:64'} 

59 

60# Native-endian 

61REPLACEMENTS_NE: Dict[str, str] = {'b': 'int:8', 'B': 'uint:8', 

62 'h': 'intne:16', 'H': 'uintne:16', 

63 'l': 'intne:32', 'L': 'uintne:32', 

64 'q': 'intne:64', 'Q': 'uintne:64', 

65 'e': 'floatne:16', 'f': 'floatne:32', 'd': 'floatne:64'} 

66 

67# Tokens which are always the same length, so it doesn't need to be supplied. 

68FIXED_LENGTH_TOKENS: Dict[str, int] = {'bool': 1, 

69 'bfloat': 16, 

70 'float8_143': 8, 

71 'float8_152': 8} 

72 

73def structparser(m: Match[str]) -> List[str]: 

74 """Parse struct-like format string token into sub-token list.""" 

75 endian = m.group('endian') 

76 # Split the format string into a list of 'q', '4h' etc. 

77 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt')) 

78 # Now deal with multiplicative factors, 4h -> hhhh etc. 

79 fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else 

80 f for f in formatlist]) 

81 if endian in '@=': 

82 # Native endianness 

83 tokens = [REPLACEMENTS_NE[c] for c in fmt] 

84 elif endian == '<': 

85 tokens = [REPLACEMENTS_LE[c] for c in fmt] 

86 else: 

87 assert endian == '>' 

88 tokens = [REPLACEMENTS_BE[c] for c in fmt] 

89 return tokens 

90 

91@functools.lru_cache(CACHE_SIZE) 

92def parse_name_length_token(fmt: str) -> Tuple[str, int]: 

93 # Any single token with just a name and length 

94 m = SINGLE_STRUCT_PACK_RE.match(fmt) 

95 if m: 

96 endian = m.group('endian') 

97 f = m.group('fmt') 

98 if endian == '>': 

99 fmt = REPLACEMENTS_BE[f] 

100 elif endian == '<': 

101 fmt = REPLACEMENTS_LE[f] 

102 else: 

103 assert endian in '=@' 

104 fmt = REPLACEMENTS_NE[f] 

105 m2 = STR_INT_RE.match(fmt) 

106 if m2: 

107 name = m2.group('string') 

108 length_str = m2.group('integer') 

109 length = 0 if length_str == '' else int(length_str) 

110 else: 

111 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.") 

112 if name in 'uifboh': 

113 name = {'u': 'uint', 

114 'i': 'int', 

115 'f': 'float', 

116 'b': 'bin', 

117 'o': 'oct', 

118 'h': 'hex'}[name] 

119 if name in ('se', 'ue', 'sie', 'uie'): 

120 if length is not None: 

121 raise ValueError( 

122 f"Exponential-Golomb codes (se/ue/sie/uie) can't have fixed lengths. Length of {length} was given.") 

123 

124 if name == 'float8_': 

125 name += str(length) 

126 length = 8 

127 

128 if name in FIXED_LENGTH_TOKENS.keys(): 

129 token_length = FIXED_LENGTH_TOKENS[name] 

130 if length not in [0, token_length]: 

131 raise ValueError(f"{name} tokens can only be {token_length} bits long, not {length} bits.") 

132 length = token_length 

133 

134 if length is None: 

135 length = 0 

136 return name, length 

137 

138@functools.lru_cache(CACHE_SIZE) 

139def parse_single_token(token: str) -> Tuple[str, str, Optional[str]]: 

140 m1 = TOKEN_RE.match(token) 

141 if m1: 

142 name = m1.group('name') 

143 length = m1.group('len') 

144 value = m1.group('value') 

145 else: 

146 m1_short = SHORT_TOKEN_RE.match(token) 

147 if m1_short: 

148 name = m1_short.group('name') 

149 name = {'u': 'uint', 

150 'i': 'int', 

151 'f': 'float', 

152 'b': 'bin', 

153 'o': 'oct', 

154 'h': 'hex'}[name] 

155 length = m1_short.group('len') 

156 value = m1_short.group('value') 

157 else: 

158 # If you don't specify a 'name' then the default is 'bits': 

159 name = 'bits' 

160 m2 = DEFAULT_BITS.match(token) 

161 if not m2: 

162 raise ValueError(f"Don't understand token '{token}'.") 

163 length = m2.group('len') 

164 value = m2.group('value') 

165 

166 if name in FIXED_LENGTH_TOKENS.keys(): 

167 token_length = str(FIXED_LENGTH_TOKENS[name]) 

168 if length is not None and length != token_length: 

169 raise ValueError(f"{name} tokens can only be {token_length} bits long, not {length} bits.") 

170 length = token_length 

171 

172 return name, length, value 

173 

174 

175@functools.lru_cache(CACHE_SIZE) 

176def tokenparser(fmt: str, keys: Tuple[str, ...] = ()) -> \ 

177 Tuple[bool, List[Tuple[str, Union[int, str, None], Optional[str]]]]: 

178 """Divide the format string into tokens and parse them. 

179 

180 Return stretchy token and list of [initialiser, length, value] 

181 initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc. 

182 length is None if not known, as is value. 

183 

184 If the token is in the keyword dictionary (keys) then it counts as a 

185 special case and isn't messed with. 

186 

187 tokens must be of the form: [factor*][initialiser][:][length][=value] 

188 

189 """ 

190 # Remove whitespace 

191 fmt = ''.join(fmt.split()) 

192 # Expand any brackets. 

193 fmt = expand_brackets(fmt) 

194 # Split tokens by ',' and remove whitespace 

195 # The meta_tokens can either be ordinary single tokens or multiple 

196 # struct-format token strings. 

197 meta_tokens = [f.strip() for f in fmt.split(',')] 

198 return_values: List[Tuple[str, Union[int, str, None], Optional[str]]] = [] 

199 stretchy_token = False 

200 for meta_token in meta_tokens: 

201 # See if it has a multiplicative factor 

202 m = MULTIPLICATIVE_RE.match(meta_token) 

203 if not m: 

204 factor = 1 

205 else: 

206 factor = int(m.group('factor')) 

207 meta_token = m.group('token') 

208 # See if it's a struct-like format 

209 m = STRUCT_PACK_RE.match(meta_token) 

210 if m: 

211 tokens = structparser(m) 

212 else: 

213 tokens = [meta_token] 

214 ret_vals: List[Tuple[str, Union[str, int, None], Optional[str]]] = [] 

215 for token in tokens: 

216 if keys and token in keys: 

217 # Don't bother parsing it, it's a keyword argument 

218 ret_vals.append((token, None, None)) 

219 continue 

220 if token == '': 

221 continue 

222 

223 # Match literal tokens of the form 0x... 0o... and 0b... 

224 m = LITERAL_RE.match(token) 

225 if m: 

226 ret_vals.append((m.group('name'), None, m.group('value'))) 

227 continue 

228 

229 name, length, value = parse_single_token(token) 

230 

231 if name in ('se', 'ue', 'sie', 'uie'): 

232 if length is not None: 

233 raise ValueError(f"Exponential-Golomb codes (se/ue/sie/uie) can't have fixed lengths. Length of {length} was given.") 

234 else: 

235 if length is None: 

236 stretchy_token = True 

237 

238 if length is not None: 

239 # Try converting length to int, otherwise check it's a key. 

240 try: 

241 length = int(length) 

242 if length < 0: 

243 raise Error 

244 # For the 'bytes' token convert length to bits. 

245 if name == 'bytes': 

246 length *= 8 

247 except Error: 

248 raise ValueError("Can't read a token with a negative length.") 

249 except ValueError: 

250 if not keys or length not in keys: 

251 raise ValueError(f"Don't understand length '{length}' of token.") 

252 ret_vals.append((name, length, value)) 

253 return_values.extend(itertools.repeat(ret_vals, factor)) 

254 

255 return_values = itertools.chain.from_iterable(return_values) 

256 return stretchy_token, list(return_values) 

257 

258 

259def expand_brackets(s: str) -> str: 

260 """Expand all brackets.""" 

261 while True: 

262 start = s.find('(') 

263 if start == -1: 

264 break 

265 count = 1 # Number of hanging open brackets 

266 p = start + 1 

267 while p < len(s): 

268 if s[p] == '(': 

269 count += 1 

270 if s[p] == ')': 

271 count -= 1 

272 if not count: 

273 break 

274 p += 1 

275 if count: 

276 raise ValueError(f"Unbalanced parenthesis in '{s}'.") 

277 if start == 0 or s[start - 1] != '*': 

278 s = s[0:start] + s[start + 1:p] + s[p + 1:] 

279 else: 

280 # Looks for first number*( 

281 bracket_re = re.compile(r'(?P<factor>\d+)\*\(') 

282 m = bracket_re.search(s) 

283 if m: 

284 factor = int(m.group('factor')) 

285 matchstart = m.start('factor') 

286 s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:] 

287 else: 

288 raise ValueError(f"Failed to parse '{s}'.") 

289 return s