Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bitstring/utils.py: 45%

1from __future__ import annotations

2import itertools

3import functools

4import re

5from typing import Tuple, List, Optional, Pattern, Dict, Union, Match

6import sys

7from bitstring.exceptions import Error

9byteorder: str = sys.byteorder

11CACHE_SIZE = 256

13SIGNED_INTEGER_NAMES: List[str] = ['int', 'se', 'sie', 'intbe', 'intle', 'intne']

14UNSIGNED_INTEGER_NAMES: List[str] = ['uint', 'ue', 'uie', 'uintbe', 'uintle', 'uintne', 'bool']

15FLOAT_NAMES: List[str] = ['float', 'floatbe', 'floatle', 'floatne', 'bfloatbe', 'bfloatle', 'bfloatne', 'bfloat', 'float8_143', 'float8_152']

16STRING_NAMES: List[str] = ['hex', 'oct', 'bin']

18INIT_NAMES: List[str] = SIGNED_INTEGER_NAMES + UNSIGNED_INTEGER_NAMES + FLOAT_NAMES + STRING_NAMES + ['bits', 'bytes', 'pad']

19# Sort longest first as we want to match them in that order (so floatne before float etc.).

20INIT_NAMES.sort(key=len, reverse=True)

22TOKEN_RE: Pattern[str] = re.compile(r'^(?P<name>' + '|'.join(INIT_NAMES) +

23 r'):?(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE)

24# Tokens such as 'u32', 'f64=4.5' or 'i6=-3'

25SHORT_TOKEN_RE: Pattern[str] = re.compile(r'^(?P<name>[uifboh]):?(?P<len>\d+)?(=(?P<value>.*))?$')

26DEFAULT_BITS: Pattern[str] = re.compile(r'^(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE)

28# A string followed by optional : then an integer number

29STR_INT_RE: Pattern[str] = re.compile(r'^(?P<string>.+?):?(?P<integer>\d*)$')

31MULTIPLICATIVE_RE: Pattern[str] = re.compile(r'^(?P<factor>.*)\*(?P<token>.+)')

33# Hex, oct or binary literals

34LITERAL_RE: Pattern[str] = re.compile(r'^(?P<name>0([xob]))(?P<value>.+)', re.IGNORECASE)

36# An endianness indicator followed by one or more struct.pack codes

37STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=]){1}(?P<fmt>(?:\d*[bBhHlLqQefd])+)$')

38# The same as above, but it doesn't insist on an endianness as it's byteswapping anyway.

39BYTESWAP_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])?(?P<fmt>(?:\d*[bBhHlLqQefd])+)$')

40# An endianness indicator followed by exactly one struct.pack codes

41SINGLE_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=]){1}(?P<fmt>(?:[bBhHlLqQefd]))$')

43# A number followed by a single character struct.pack code

44STRUCT_SPLIT_RE: Pattern[str] = re.compile(r'\d*[bBhHlLqQefd]')

46# These replicate the struct.pack codes

47# Big-endian

48REPLACEMENTS_BE: Dict[str, str] = {'b': 'int:8', 'B': 'uint:8',

49 'h': 'intbe:16', 'H': 'uintbe:16',

50 'l': 'intbe:32', 'L': 'uintbe:32',

51 'q': 'intbe:64', 'Q': 'uintbe:64',

52 'e': 'floatbe:16', 'f': 'floatbe:32', 'd': 'floatbe:64'}

53# Little-endian

54REPLACEMENTS_LE: Dict[str, str] = {'b': 'int:8', 'B': 'uint:8',

55 'h': 'intle:16', 'H': 'uintle:16',

56 'l': 'intle:32', 'L': 'uintle:32',

57 'q': 'intle:64', 'Q': 'uintle:64',

58 'e': 'floatle:16', 'f': 'floatle:32', 'd': 'floatle:64'}

60# Native-endian

61REPLACEMENTS_NE: Dict[str, str] = {'b': 'int:8', 'B': 'uint:8',

62 'h': 'intne:16', 'H': 'uintne:16',

63 'l': 'intne:32', 'L': 'uintne:32',

64 'q': 'intne:64', 'Q': 'uintne:64',

65 'e': 'floatne:16', 'f': 'floatne:32', 'd': 'floatne:64'}

67# Tokens which are always the same length, so it doesn't need to be supplied.

68FIXED_LENGTH_TOKENS: Dict[str, int] = {'bool': 1,

69 'bfloat': 16,

70 'float8_143': 8,

71 'float8_152': 8}

73def structparser(m: Match[str]) -> List[str]:

74 """Parse struct-like format string token into sub-token list."""

75 endian = m.group('endian')

76 # Split the format string into a list of 'q', '4h' etc.

77 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt'))

78 # Now deal with multiplicative factors, 4h -> hhhh etc.

79 fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else

80 f for f in formatlist])

81 if endian in '@=':

82 # Native endianness

83 tokens = [REPLACEMENTS_NE[c] for c in fmt]

84 elif endian == '<':

85 tokens = [REPLACEMENTS_LE[c] for c in fmt]

86 else:

87 assert endian == '>'

88 tokens = [REPLACEMENTS_BE[c] for c in fmt]

89 return tokens

91@functools.lru_cache(CACHE_SIZE)

92def parse_name_length_token(fmt: str) -> Tuple[str, int]:

93 # Any single token with just a name and length

94 m = SINGLE_STRUCT_PACK_RE.match(fmt)

95 if m:

96 endian = m.group('endian')

97 f = m.group('fmt')

98 if endian == '>':

99 fmt = REPLACEMENTS_BE[f]

100 elif endian == '<':

101 fmt = REPLACEMENTS_LE[f]

102 else:

103 assert endian in '=@'

104 fmt = REPLACEMENTS_NE[f]

105 m2 = STR_INT_RE.match(fmt)

106 if m2:

107 name = m2.group('string')

108 length_str = m2.group('integer')

109 length = 0 if length_str == '' else int(length_str)

110 else:

111 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.")

112 if name in 'uifboh':

113 name = {'u': 'uint',

114 'i': 'int',

115 'f': 'float',

116 'b': 'bin',

117 'o': 'oct',

118 'h': 'hex'}[name]

119 if name in ('se', 'ue', 'sie', 'uie'):

120 if length is not None:

121 raise ValueError(

122 f"Exponential-Golomb codes (se/ue/sie/uie) can't have fixed lengths. Length of {length} was given.")

123

124 if name == 'float8_':

125 name += str(length)

126 length = 8

127

128 if name in FIXED_LENGTH_TOKENS.keys():

129 token_length = FIXED_LENGTH_TOKENS[name]

130 if length not in [0, token_length]:

131 raise ValueError(f"{name} tokens can only be {token_length} bits long, not {length} bits.")

132 length = token_length

133

134 if length is None:

135 length = 0

136 return name, length

137

138@functools.lru_cache(CACHE_SIZE)

139def parse_single_token(token: str) -> Tuple[str, str, Optional[str]]:

140 m1 = TOKEN_RE.match(token)

141 if m1:

142 name = m1.group('name')

143 length = m1.group('len')

144 value = m1.group('value')

145 else:

146 m1_short = SHORT_TOKEN_RE.match(token)

147 if m1_short:

148 name = m1_short.group('name')

149 name = {'u': 'uint',

150 'i': 'int',

151 'f': 'float',

152 'b': 'bin',

153 'o': 'oct',

154 'h': 'hex'}[name]

155 length = m1_short.group('len')

156 value = m1_short.group('value')

157 else:

158 # If you don't specify a 'name' then the default is 'bits':

159 name = 'bits'

160 m2 = DEFAULT_BITS.match(token)

161 if not m2:

162 raise ValueError(f"Don't understand token '{token}'.")

163 length = m2.group('len')

164 value = m2.group('value')

165

166 if name in FIXED_LENGTH_TOKENS.keys():

167 token_length = str(FIXED_LENGTH_TOKENS[name])

168 if length is not None and length != token_length:

169 raise ValueError(f"{name} tokens can only be {token_length} bits long, not {length} bits.")

170 length = token_length

171

172 return name, length, value

173

174

175@functools.lru_cache(CACHE_SIZE)

176def tokenparser(fmt: str, keys: Tuple[str, ...] = ()) -> \

177 Tuple[bool, List[Tuple[str, Union[int, str, None], Optional[str]]]]:

178 """Divide the format string into tokens and parse them.

179

180 Return stretchy token and list of [initialiser, length, value]

181 initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc.

182 length is None if not known, as is value.

183

184 If the token is in the keyword dictionary (keys) then it counts as a

185 special case and isn't messed with.

186

187 tokens must be of the form: [factor*][initialiser][:][length][=value]

188

189 """

190 # Remove whitespace

191 fmt = ''.join(fmt.split())

192 # Expand any brackets.

193 fmt = expand_brackets(fmt)

194 # Split tokens by ',' and remove whitespace

195 # The meta_tokens can either be ordinary single tokens or multiple

196 # struct-format token strings.

197 meta_tokens = [f.strip() for f in fmt.split(',')]

198 return_values: List[Tuple[str, Union[int, str, None], Optional[str]]] = []

199 stretchy_token = False

200 for meta_token in meta_tokens:

201 # See if it has a multiplicative factor

202 m = MULTIPLICATIVE_RE.match(meta_token)

203 if not m:

204 factor = 1

205 else:

206 factor = int(m.group('factor'))

207 meta_token = m.group('token')

208 # See if it's a struct-like format

209 m = STRUCT_PACK_RE.match(meta_token)

210 if m:

211 tokens = structparser(m)

212 else:

213 tokens = [meta_token]

214 ret_vals: List[Tuple[str, Union[str, int, None], Optional[str]]] = []

215 for token in tokens:

216 if keys and token in keys:

217 # Don't bother parsing it, it's a keyword argument

218 ret_vals.append((token, None, None))

219 continue

220 if token == '':

221 continue

222

223 # Match literal tokens of the form 0x... 0o... and 0b...

224 m = LITERAL_RE.match(token)

225 if m:

226 ret_vals.append((m.group('name'), None, m.group('value')))

227 continue

228

229 name, length, value = parse_single_token(token)

230

231 if name in ('se', 'ue', 'sie', 'uie'):

232 if length is not None:

233 raise ValueError(f"Exponential-Golomb codes (se/ue/sie/uie) can't have fixed lengths. Length of {length} was given.")

234 else:

235 if length is None:

236 stretchy_token = True

237

238 if length is not None:

239 # Try converting length to int, otherwise check it's a key.

240 try:

241 length = int(length)

242 if length < 0:

243 raise Error

244 # For the 'bytes' token convert length to bits.

245 if name == 'bytes':

246 length *= 8

247 except Error:

248 raise ValueError("Can't read a token with a negative length.")

249 except ValueError:

250 if not keys or length not in keys:

251 raise ValueError(f"Don't understand length '{length}' of token.")

252 ret_vals.append((name, length, value))

253 return_values.extend(itertools.repeat(ret_vals, factor))

254

255 return_values = itertools.chain.from_iterable(return_values)

256 return stretchy_token, list(return_values)

257

258

259def expand_brackets(s: str) -> str:

260 """Expand all brackets."""

261 while True:

262 start = s.find('(')

263 if start == -1:

264 break

265 count = 1 # Number of hanging open brackets

266 p = start + 1

267 while p < len(s):

268 if s[p] == '(':

269 count += 1

270 if s[p] == ')':

271 count -= 1

272 if not count:

273 break

274 p += 1

275 if count:

276 raise ValueError(f"Unbalanced parenthesis in '{s}'.")

277 if start == 0 or s[start - 1] != '*':

278 s = s[0:start] + s[start + 1:p] + s[p + 1:]

279 else:

280 # Looks for first number*(

281 bracket_re = re.compile(r'(?P<factor>\d+)\*\(')

282 m = bracket_re.search(s)

283 if m:

284 factor = int(m.group('factor'))

285 matchstart = m.start('factor')

286 s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:]

287 else:

288 raise ValueError(f"Failed to parse '{s}'.")

289 return s