Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/bitstring/utils.py: 26%

1from __future__ import annotations

3import functools

4import re

5from typing import Tuple, List, Optional, Pattern, Dict, Union, Match

8# A token name followed by optional : then an integer number

9NAME_INT_RE: Pattern[str] = re.compile(r'^([a-zA-Z][a-zA-Z0-9_]*?):?(\d*)$')

11# A token name followed by optional : then an arbitrary keyword

12NAME_KWARG_RE: Pattern[str] = re.compile(r'^([a-zA-Z][a-zA-Z0-9_]*?):?([a-zA-Z0-9_]+)$')

14CACHE_SIZE = 256

16DEFAULT_BITS: Pattern[str] = re.compile(r'^(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE)

18MULTIPLICATIVE_RE: Pattern[str] = re.compile(r'^(?P<factor>.*)\*(?P<token>.+)')

20# Hex, oct or binary literals

21LITERAL_RE: Pattern[str] = re.compile(r'^(?P<name>0([xob]))(?P<value>.+)', re.IGNORECASE)

23# An endianness indicator followed by one or more struct.pack codes

24STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])(?P<fmt>(?:\d*[bBhHlLiIqQefd])+)$')

25# The same as above, but it doesn't insist on an endianness as it's byteswapping anyway.

26BYTESWAP_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])?(?P<fmt>(?:\d*[bBhHlLiIqQefd])+)$')

27# An endianness indicator followed by exactly one struct.pack codes

28SINGLE_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])(?P<fmt>[bBhHlLiIqQefd])$')

30# A number followed by a single character struct.pack code

31STRUCT_SPLIT_RE: Pattern[str] = re.compile(r'\d*[bBhHlLiIqQefd]')

33# These replicate the struct.pack codes

34# Big-endian

35REPLACEMENTS_BE: Dict[str, str] = {'b': 'int8', 'B': 'uint8',

36 'h': 'intbe16', 'H': 'uintbe16',

37 'l': 'intbe32', 'L': 'uintbe32',

38 'i': 'intbe32', 'I': 'uintbe32',

39 'q': 'intbe64', 'Q': 'uintbe64',

40 'e': 'floatbe16', 'f': 'floatbe32', 'd': 'floatbe64'}

41# Little-endian

42REPLACEMENTS_LE: Dict[str, str] = {'b': 'int8', 'B': 'uint8',

43 'h': 'intle16', 'H': 'uintle16',

44 'l': 'intle32', 'L': 'uintle32',

45 'i': 'intle32', 'I': 'uintle32',

46 'q': 'intle64', 'Q': 'uintle64',

47 'e': 'floatle16', 'f': 'floatle32', 'd': 'floatle64'}

49# Native-endian

50REPLACEMENTS_NE: Dict[str, str] = {'b': 'int8', 'B': 'uint8',

51 'h': 'intne16', 'H': 'uintne16',

52 'l': 'intne32', 'L': 'uintne32',

53 'i': 'intne32', 'I': 'uintne32',

54 'q': 'intne64', 'Q': 'uintne64',

55 'e': 'floatne16', 'f': 'floatne32', 'd': 'floatne64'}

57# Size in bytes of all the pack codes.

58PACK_CODE_SIZE: Dict[str, int] = {'b': 1, 'B': 1, 'h': 2, 'H': 2, 'l': 4, 'L': 4, 'i': 4, 'I': 4,

59 'q': 8, 'Q': 8, 'e': 2, 'f': 4, 'd': 8}

62def structparser(m: Match[str]) -> List[str]:

63 """Parse struct-like format string token into sub-token list."""

64 endian = m.group('endian')

65 # Split the format string into a list of 'q', '4h' etc.

66 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt'))

67 # Now deal with multiplicative factors, 4h -> hhhh etc.

68 fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else

69 f for f in formatlist])

70 if endian in '@=':

71 # Native endianness

72 tokens = [REPLACEMENTS_NE[c] for c in fmt]

73 elif endian == '<':

74 tokens = [REPLACEMENTS_LE[c] for c in fmt]

75 else:

76 assert endian == '>'

77 tokens = [REPLACEMENTS_BE[c] for c in fmt]

78 return tokens

81@functools.lru_cache(CACHE_SIZE)

82def parse_name_length_token(fmt: str, **kwargs) -> Tuple[str, Optional[int]]:

83 # Any single token with just a name and length

84 if m2 := NAME_INT_RE.match(fmt):

85 name = m2.group(1)

86 length_str = m2.group(2)

87 length = None if length_str == '' else int(length_str)

88 else:

89 # Maybe the length is in the kwargs?

90 if m := NAME_KWARG_RE.match(fmt):

91 name = m.group(1)

92 try:

93 length_str = kwargs[m.group(2)]

94 except KeyError:

95 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.")

96 length = int(length_str)

97 else:

98 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.")

99 return name, length

100

101

102@functools.lru_cache(CACHE_SIZE)

103def parse_single_struct_token(fmt: str) -> Optional[Tuple[str, Optional[int]]]:

104 if m := SINGLE_STRUCT_PACK_RE.match(fmt):

105 endian = m.group('endian')

106 f = m.group('fmt')

107 if endian == '>':

108 fmt = REPLACEMENTS_BE[f]

109 elif endian == '<':

110 fmt = REPLACEMENTS_LE[f]

111 else:

112 assert endian in '=@'

113 fmt = REPLACEMENTS_NE[f]

114 return parse_name_length_token(fmt)

115 else:

116 return None

117

118

119@functools.lru_cache(CACHE_SIZE)

120def parse_single_token(token: str) -> Tuple[str, str, Optional[str]]:

121 if (equals_pos := token.find('=')) == -1:

122 value = None

123 else:

124 value = token[equals_pos + 1:]

125 token = token[:equals_pos]

126

127 if m2 := NAME_INT_RE.match(token):

128 name = m2.group(1)

129 length_str = m2.group(2)

130 length = None if length_str == '' else length_str

131 elif m3 := NAME_KWARG_RE.match(token):

132 # name then a keyword for a length

133 name = m3.group(1)

134 length = m3.group(2)

135 else:

136 # If you don't specify a 'name' then the default is 'bits'

137 name = 'bits'

138 length = token

139 return name, length, value

140

141

142@functools.lru_cache(CACHE_SIZE)

143def preprocess_tokens(fmt: str) -> List[str]:

144 # Remove whitespace and expand brackets

145 fmt = expand_brackets(''.join(fmt.split()))

146

147 # Split tokens by ',' and remove whitespace

148 # The meta_tokens can either be ordinary single tokens or multiple struct-format token strings.

149 meta_tokens = [f.strip() for f in fmt.split(',')]

150 final_tokens = []

151

152 for meta_token in meta_tokens:

153 if meta_token == '':

154 continue

155 # Extract factor and actual token if a multiplicative factor exists

156 factor = 1

157 if m := MULTIPLICATIVE_RE.match(meta_token):

158 factor = int(m.group('factor'))

159 meta_token = m.group('token')

160

161 # Parse struct-like format into sub-tokens or treat as single token

162 tokens = structparser(m) if (m := STRUCT_PACK_RE.match(meta_token)) else [meta_token]

163

164 # Extend final tokens list with parsed tokens, repeated by the factor

165 final_tokens.extend(tokens * factor)

166 return final_tokens

167

168

169@functools.lru_cache(CACHE_SIZE)

170def tokenparser(fmt: str, keys: Tuple[str, ...] = ()) -> \

171 Tuple[bool, List[Tuple[str, Union[int, str, None], Optional[str]]]]:

172 """Divide the format string into tokens and parse them.

173

174 Return stretchy token and list of [initialiser, length, value]

175 initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc.

176 length is None if not known, as is value.

177

178 If the token is in the keyword dictionary (keys) then it counts as a

179 special case and isn't messed with.

180

181 tokens must be of the form: [factor*][initialiser][:][length][=value]

182

183 """

184 tokens = preprocess_tokens(fmt)

185 stretchy_token = False

186 ret_vals: List[Tuple[str, Union[str, int, None], Optional[str]]] = []

187 for token in tokens:

188 if keys and token in keys:

189 # Don't bother parsing it, it's a keyword argument

190 ret_vals.append((token, None, None))

191 continue

192 if token == '':

193 continue

194 # Match literal tokens of the form 0x... 0o... and 0b...

195 if m := LITERAL_RE.match(token):

196 ret_vals.append((m.group('name'), None, m.group('value')))

197 continue

198 name, length, value = parse_single_token(token)

199 if length is None:

200 stretchy_token = True

201 if length is not None:

202 # Try converting length to int, otherwise check it's a key.

203 try:

204 length = int(length)

205 except ValueError:

206 if not keys or length not in keys:

207 raise ValueError(f"Don't understand length '{length}' of token.")

208 ret_vals.append((name, length, value))

209 return stretchy_token, ret_vals

210

211

212BRACKET_RE = re.compile(r'(?P<factor>\d+)\*\(')

213

214

215def expand_brackets(s: str) -> str:

216 """Expand all brackets."""

217 while True:

218 start = s.find('(')

219 if start == -1:

220 break

221 count = 1 # Number of hanging open brackets

222 p = start + 1

223 while p < len(s):

224 count += (s[p] == '(') - (s[p] == ')')

225 if count == 0:

226 break

227 p += 1

228 if count != 0:

229 raise ValueError(f"Unbalanced parenthesis in '{s}'.")

230 if start == 0 or s[start - 1] != '*':

231 s = s[0:start] + s[start + 1:p] + s[p + 1:]

232 else:

233 # Looks for first number*(

234 m = BRACKET_RE.search(s)

235 if m:

236 factor = int(m.group('factor'))

237 matchstart = m.start('factor')

238 s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:]

239 else:

240 raise ValueError(f"Failed to parse '{s}'.")

241 return s