Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/bitstring/utils.py: 26%

1from __future__ import annotations

3import functools

4import re

5from typing import Tuple, List, Optional, Pattern, Dict, Union, Match

8# A token name followed by optional : then an integer number

9NAME_INT_RE: Pattern[str] = re.compile(r'^([a-zA-Z][a-zA-Z0-9_]*?):?(\d*)$')

11# A token name followed by optional : then an arbitrary keyword

12NAME_KWARG_RE: Pattern[str] = re.compile(r'^([a-zA-Z][a-zA-Z0-9_]*?):?([a-zA-Z0-9_]+)$')

14CACHE_SIZE = 256

16DEFAULT_BITS: Pattern[str] = re.compile(r'^(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE)

18MULTIPLICATIVE_RE: Pattern[str] = re.compile(r'^(?P<factor>.*)\*(?P<token>.+)')

20# Hex, oct or binary literals

21LITERAL_RE: Pattern[str] = re.compile(r'^(?P<name>0([xob]))(?P<value>.+)', re.IGNORECASE)

23# An endianness indicator followed by one or more struct.pack codes

24STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=]){1}(?P<fmt>(?:\d*[bBhHlLqQefd])+)$')

25# The same as above, but it doesn't insist on an endianness as it's byteswapping anyway.

26BYTESWAP_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=])?(?P<fmt>(?:\d*[bBhHlLqQefd])+)$')

27# An endianness indicator followed by exactly one struct.pack codes

28SINGLE_STRUCT_PACK_RE: Pattern[str] = re.compile(r'^(?P<endian>[<>@=]){1}(?P<fmt>[bBhHlLqQefd])$')

30# A number followed by a single character struct.pack code

31STRUCT_SPLIT_RE: Pattern[str] = re.compile(r'\d*[bBhHlLqQefd]')

33# These replicate the struct.pack codes

34# Big-endian

35REPLACEMENTS_BE: Dict[str, str] = {'b': 'int8', 'B': 'uint8',

36 'h': 'intbe16', 'H': 'uintbe16',

37 'l': 'intbe32', 'L': 'uintbe32',

38 'q': 'intbe64', 'Q': 'uintbe64',

39 'e': 'floatbe16', 'f': 'floatbe32', 'd': 'floatbe64'}

40# Little-endian

41REPLACEMENTS_LE: Dict[str, str] = {'b': 'int8', 'B': 'uint8',

42 'h': 'intle16', 'H': 'uintle16',

43 'l': 'intle32', 'L': 'uintle32',

44 'q': 'intle64', 'Q': 'uintle64',

45 'e': 'floatle16', 'f': 'floatle32', 'd': 'floatle64'}

47# Native-endian

48REPLACEMENTS_NE: Dict[str, str] = {'b': 'int8', 'B': 'uint8',

49 'h': 'intne16', 'H': 'uintne16',

50 'l': 'intne32', 'L': 'uintne32',

51 'q': 'intne64', 'Q': 'uintne64',

52 'e': 'floatne16', 'f': 'floatne32', 'd': 'floatne64'}

54# Size in bytes of all the pack codes.

55PACK_CODE_SIZE: Dict[str, int] = {'b': 1, 'B': 1, 'h': 2, 'H': 2, 'l': 4, 'L': 4,

56 'q': 8, 'Q': 8, 'e': 2, 'f': 4, 'd': 8}

59def structparser(m: Match[str]) -> List[str]:

60 """Parse struct-like format string token into sub-token list."""

61 endian = m.group('endian')

62 # Split the format string into a list of 'q', '4h' etc.

63 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt'))

64 # Now deal with multiplicative factors, 4h -> hhhh etc.

65 fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else

66 f for f in formatlist])

67 if endian in '@=':

68 # Native endianness

69 tokens = [REPLACEMENTS_NE[c] for c in fmt]

70 elif endian == '<':

71 tokens = [REPLACEMENTS_LE[c] for c in fmt]

72 else:

73 assert endian == '>'

74 tokens = [REPLACEMENTS_BE[c] for c in fmt]

75 return tokens

77@functools.lru_cache(CACHE_SIZE)

78def parse_name_length_token(fmt: str, **kwargs) -> Tuple[str, Optional[int]]:

79 # Any single token with just a name and length

80 if m2 := NAME_INT_RE.match(fmt):

81 name = m2.group(1)

82 length_str = m2.group(2)

83 length = None if length_str == '' else int(length_str)

84 else:

85 # Maybe the length is in the kwargs?

86 if m := NAME_KWARG_RE.match(fmt):

87 name = m.group(1)

88 try:

89 length_str = kwargs[m.group(2)]

90 except KeyError:

91 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.")

92 length = int(length_str)

93 else:

94 raise ValueError(f"Can't parse 'name[:]length' token '{fmt}'.")

95 return name, length

97@functools.lru_cache(CACHE_SIZE)

98def parse_single_struct_token(fmt: str) -> Optional[Tuple[str, Optional[int]]]:

99 if m := SINGLE_STRUCT_PACK_RE.match(fmt):

100 endian = m.group('endian')

101 f = m.group('fmt')

102 if endian == '>':

103 fmt = REPLACEMENTS_BE[f]

104 elif endian == '<':

105 fmt = REPLACEMENTS_LE[f]

106 else:

107 assert endian in '=@'

108 fmt = REPLACEMENTS_NE[f]

109 return parse_name_length_token(fmt)

110 else:

111 return None

112

113@functools.lru_cache(CACHE_SIZE)

114def parse_single_token(token: str) -> Tuple[str, str, Optional[str]]:

115 if (equals_pos := token.find('=')) == -1:

116 value = None

117 else:

118 value = token[equals_pos + 1:]

119 token = token[:equals_pos]

120

121 if m2 := NAME_INT_RE.match(token):

122 name = m2.group(1)

123 length_str = m2.group(2)

124 length = None if length_str == '' else length_str

125 elif m3 := NAME_KWARG_RE.match(token):

126 # name then a keyword for a length

127 name = m3.group(1)

128 length = m3.group(2)

129 else:

130 # If you don't specify a 'name' then the default is 'bits'

131 name = 'bits'

132 length = token

133 return name, length, value

134

135@functools.lru_cache(CACHE_SIZE)

136def preprocess_tokens(fmt: str) -> List[str]:

137 # Remove whitespace and expand brackets

138 fmt = expand_brackets(''.join(fmt.split()))

139

140 # Split tokens by ',' and remove whitespace

141 # The meta_tokens can either be ordinary single tokens or multiple struct-format token strings.

142 meta_tokens = [f.strip() for f in fmt.split(',')]

143 final_tokens = []

144

145 for meta_token in meta_tokens:

146 if meta_token == '':

147 continue

148 # Extract factor and actual token if a multiplicative factor exists

149 factor = 1

150 if m := MULTIPLICATIVE_RE.match(meta_token):

151 factor = int(m.group('factor'))

152 meta_token = m.group('token')

153

154 # Parse struct-like format into sub-tokens or treat as single token

155 tokens = structparser(m) if (m := STRUCT_PACK_RE.match(meta_token)) else [meta_token]

156

157 # Extend final tokens list with parsed tokens, repeated by the factor

158 final_tokens.extend(tokens * factor)

159 return final_tokens

160

161

162@functools.lru_cache(CACHE_SIZE)

163def tokenparser(fmt: str, keys: Tuple[str, ...] = ()) -> \

164 Tuple[bool, List[Tuple[str, Union[int, str, None], Optional[str]]]]:

165 """Divide the format string into tokens and parse them.

166

167 Return stretchy token and list of [initialiser, length, value]

168 initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc.

169 length is None if not known, as is value.

170

171 If the token is in the keyword dictionary (keys) then it counts as a

172 special case and isn't messed with.

173

174 tokens must be of the form: [factor*][initialiser][:][length][=value]

175

176 """

177 tokens = preprocess_tokens(fmt)

178 stretchy_token = False

179 ret_vals: List[Tuple[str, Union[str, int, None], Optional[str]]] = []

180 for token in tokens:

181 if keys and token in keys:

182 # Don't bother parsing it, it's a keyword argument

183 ret_vals.append((token, None, None))

184 continue

185 if token == '':

186 continue

187 # Match literal tokens of the form 0x... 0o... and 0b...

188 if m := LITERAL_RE.match(token):

189 ret_vals.append((m.group('name'), None, m.group('value')))

190 continue

191 name, length, value = parse_single_token(token)

192 if length is None:

193 stretchy_token = True

194 if length is not None:

195 # Try converting length to int, otherwise check it's a key.

196 try:

197 length = int(length)

198 except ValueError:

199 if not keys or length not in keys:

200 raise ValueError(f"Don't understand length '{length}' of token.")

201 ret_vals.append((name, length, value))

202 return stretchy_token, ret_vals

203

204

205BRACKET_RE = re.compile(r'(?P<factor>\d+)\*\(')

206

207def expand_brackets(s: str) -> str:

208 """Expand all brackets."""

209 while True:

210 start = s.find('(')

211 if start == -1:

212 break

213 count = 1 # Number of hanging open brackets

214 p = start + 1

215 while p < len(s):

216 count += (s[p] == '(') - (s[p] == ')')

217 if count == 0:

218 break

219 p += 1

220 if count != 0:

221 raise ValueError(f"Unbalanced parenthesis in '{s}'.")

222 if start == 0 or s[start - 1] != '*':

223 s = s[0:start] + s[start + 1:p] + s[p + 1:]

224 else:

225 # Looks for first number*(

226 m = BRACKET_RE.search(s)

227 if m:

228 factor = int(m.group('factor'))

229 matchstart = m.start('factor')

230 s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:]

231 else:

232 raise ValueError(f"Failed to parse '{s}'.")

233 return s