Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/util.py: 42%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

211 statements  

1# util.py 

2import contextlib 

3import re 

4from functools import lru_cache, wraps 

5import inspect 

6import itertools 

7import types 

8from typing import Callable, Union, Iterable, TypeVar, cast 

9import warnings 

10 

11_bslash = chr(92) 

12C = TypeVar("C", bound=Callable) 

13 

14 

15class __config_flags: 

16 """Internal class for defining compatibility and debugging flags""" 

17 

18 _all_names: list[str] = [] 

19 _fixed_names: list[str] = [] 

20 _type_desc = "configuration" 

21 

22 @classmethod 

23 def _set(cls, dname, value): 

24 if dname in cls._fixed_names: 

25 warnings.warn( 

26 f"{cls.__name__}.{dname} {cls._type_desc} is {str(getattr(cls, dname)).upper()}" 

27 f" and cannot be overridden", 

28 stacklevel=3, 

29 ) 

30 return 

31 if dname in cls._all_names: 

32 setattr(cls, dname, value) 

33 else: 

34 raise ValueError(f"no such {cls._type_desc} {dname!r}") 

35 

36 enable = classmethod(lambda cls, name: cls._set(name, True)) 

37 disable = classmethod(lambda cls, name: cls._set(name, False)) 

38 

39 

40@lru_cache(maxsize=128) 

41def col(loc: int, strg: str) -> int: 

42 """ 

43 Returns current column within a string, counting newlines as line separators. 

44 The first column is number 1. 

45 

46 Note: the default parsing behavior is to expand tabs in the input string 

47 before starting the parsing process. See 

48 :meth:`ParserElement.parse_string` for more 

49 information on parsing strings containing ``<TAB>`` s, and suggested 

50 methods to maintain a consistent view of the parsed string, the parse 

51 location, and line and column positions within the parsed string. 

52 """ 

53 s = strg 

54 return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc) 

55 

56 

57@lru_cache(maxsize=128) 

58def lineno(loc: int, strg: str) -> int: 

59 """Returns current line number within a string, counting newlines as line separators. 

60 The first line is number 1. 

61 

62 Note - the default parsing behavior is to expand tabs in the input string 

63 before starting the parsing process. See :meth:`ParserElement.parse_string` 

64 for more information on parsing strings containing ``<TAB>`` s, and 

65 suggested methods to maintain a consistent view of the parsed string, the 

66 parse location, and line and column positions within the parsed string. 

67 """ 

68 return strg.count("\n", 0, loc) + 1 

69 

70 

71@lru_cache(maxsize=128) 

72def line(loc: int, strg: str) -> str: 

73 """ 

74 Returns the line of text containing loc within a string, counting newlines as line separators. 

75 """ 

76 last_cr = strg.rfind("\n", 0, loc) 

77 next_cr = strg.find("\n", loc) 

78 return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :] 

79 

80 

81class _UnboundedCache: 

82 def __init__(self): 

83 cache = {} 

84 cache_get = cache.get 

85 self.not_in_cache = not_in_cache = object() 

86 

87 def get(_, key): 

88 return cache_get(key, not_in_cache) 

89 

90 def set_(_, key, value): 

91 cache[key] = value 

92 

93 def clear(_): 

94 cache.clear() 

95 

96 self.size = None 

97 self.get = types.MethodType(get, self) 

98 self.set = types.MethodType(set_, self) 

99 self.clear = types.MethodType(clear, self) 

100 

101 

102class _FifoCache: 

103 def __init__(self, size): 

104 cache = {} 

105 self.size = size 

106 self.not_in_cache = not_in_cache = object() 

107 cache_get = cache.get 

108 cache_pop = cache.pop 

109 

110 def get(_, key): 

111 return cache_get(key, not_in_cache) 

112 

113 def set_(_, key, value): 

114 cache[key] = value 

115 while len(cache) > size: 

116 # pop oldest element in cache by getting the first key 

117 cache_pop(next(iter(cache))) 

118 

119 def clear(_): 

120 cache.clear() 

121 

122 self.get = types.MethodType(get, self) 

123 self.set = types.MethodType(set_, self) 

124 self.clear = types.MethodType(clear, self) 

125 

126 

127class LRUMemo: 

128 """ 

129 A memoizing mapping that retains `capacity` deleted items 

130 

131 The memo tracks retained items by their access order; once `capacity` items 

132 are retained, the least recently used item is discarded. 

133 """ 

134 

135 def __init__(self, capacity): 

136 self._capacity = capacity 

137 self._active = {} 

138 self._memory = {} 

139 

140 def __getitem__(self, key): 

141 try: 

142 return self._active[key] 

143 except KeyError: 

144 self._memory[key] = self._memory.pop(key) 

145 return self._memory[key] 

146 

147 def __setitem__(self, key, value): 

148 self._memory.pop(key, None) 

149 self._active[key] = value 

150 

151 def __delitem__(self, key): 

152 try: 

153 value = self._active.pop(key) 

154 except KeyError: 

155 pass 

156 else: 

157 oldest_keys = list(self._memory)[: -(self._capacity + 1)] 

158 for key_to_delete in oldest_keys: 

159 self._memory.pop(key_to_delete) 

160 self._memory[key] = value 

161 

162 def clear(self): 

163 self._active.clear() 

164 self._memory.clear() 

165 

166 

167class UnboundedMemo(dict): 

168 """ 

169 A memoizing mapping that retains all deleted items 

170 """ 

171 

172 def __delitem__(self, key): 

173 pass 

174 

175 

176def _escape_regex_range_chars(s: str) -> str: 

177 # escape these chars: ^-[] 

178 for c in r"\^-[]": 

179 s = s.replace(c, _bslash + c) 

180 s = s.replace("\n", r"\n") 

181 s = s.replace("\t", r"\t") 

182 return str(s) 

183 

184 

185class _GroupConsecutive: 

186 """ 

187 Used as a callable `key` for itertools.groupby to group 

188 characters that are consecutive: 

189  

190 .. testcode:: 

191 

192 from itertools import groupby 

193 from pyparsing.util import _GroupConsecutive 

194 

195 grouped = groupby("abcdejkmpqrs", key=_GroupConsecutive()) 

196 for index, group in grouped: 

197 print(tuple([index, list(group)])) 

198 

199 prints: 

200 

201 .. testoutput:: 

202 

203 (0, ['a', 'b', 'c', 'd', 'e']) 

204 (1, ['j', 'k']) 

205 (2, ['m']) 

206 (3, ['p', 'q', 'r', 's']) 

207 """ 

208 

209 def __init__(self) -> None: 

210 self.prev = 0 

211 self.counter = itertools.count() 

212 self.value = -1 

213 

214 def __call__(self, char: str) -> int: 

215 c_int = ord(char) 

216 self.prev, prev = c_int, self.prev 

217 if c_int - prev > 1: 

218 self.value = next(self.counter) 

219 return self.value 

220 

221 

222def _collapse_string_to_ranges( 

223 s: Union[str, Iterable[str]], re_escape: bool = True 

224) -> str: 

225 r""" 

226 Take a string or list of single-character strings, and return 

227 a string of the consecutive characters in that string collapsed 

228 into groups, as might be used in a regular expression '[a-z]' 

229 character set:: 

230 

231 'a' -> 'a' -> '[a]' 

232 'bc' -> 'bc' -> '[bc]' 

233 'defgh' -> 'd-h' -> '[d-h]' 

234 'fdgeh' -> 'd-h' -> '[d-h]' 

235 'jklnpqrtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]' 

236 

237 Duplicates get collapsed out:: 

238 

239 'aaa' -> 'a' -> '[a]' 

240 'bcbccb' -> 'bc' -> '[bc]' 

241 'defghhgf' -> 'd-h' -> '[d-h]' 

242 'jklnpqrjjjtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]' 

243 

244 Spaces are preserved:: 

245 

246 'ab c' -> ' a-c' -> '[ a-c]' 

247 

248 Characters that are significant when defining regex ranges 

249 get escaped:: 

250 

251 'acde[]-' -> r'\-\[\]ac-e' -> r'[\-\[\]ac-e]' 

252 """ 

253 

254 # Developer notes: 

255 # - Do not optimize this code assuming that the given input string 

256 # or internal lists will be short (such as in loading generators into 

257 # lists to make it easier to find the last element); this method is also 

258 # used to generate regex ranges for character sets in the pyparsing.unicode 

259 # classes, and these can be _very_ long lists of strings 

260 

261 def escape_re_range_char(c: str) -> str: 

262 return "\\" + c if c in r"\^-][" else c 

263 

264 def no_escape_re_range_char(c: str) -> str: 

265 return c 

266 

267 if not re_escape: 

268 escape_re_range_char = no_escape_re_range_char 

269 

270 ret = [] 

271 

272 # reduce input string to remove duplicates, and put in sorted order 

273 s_chars: list[str] = sorted(set(s)) 

274 

275 if len(s_chars) > 2: 

276 # find groups of characters that are consecutive (can be collapsed 

277 # down to "<first>-<last>") 

278 for _, chars in itertools.groupby(s_chars, key=_GroupConsecutive()): 

279 # _ is unimportant, is just used to identify groups 

280 # chars is an iterator of one or more consecutive characters 

281 # that comprise the current group 

282 first = last = next(chars) 

283 with contextlib.suppress(ValueError): 

284 *_, last = chars 

285 

286 if first == last: 

287 # there was only a single char in this group 

288 ret.append(escape_re_range_char(first)) 

289 

290 elif last == chr(ord(first) + 1): 

291 # there were only 2 characters in this group 

292 # 'a','b' -> 'ab' 

293 ret.append(f"{escape_re_range_char(first)}{escape_re_range_char(last)}") 

294 

295 else: 

296 # there were > 2 characters in this group, make into a range 

297 # 'c','d','e' -> 'c-e' 

298 ret.append( 

299 f"{escape_re_range_char(first)}-{escape_re_range_char(last)}" 

300 ) 

301 else: 

302 # only 1 or 2 chars were given to form into groups 

303 # 'a' -> ['a'] 

304 # 'bc' -> ['b', 'c'] 

305 # 'dg' -> ['d', 'g'] 

306 # no need to list them with "-", just return as a list 

307 # (after escaping) 

308 ret = [escape_re_range_char(c) for c in s_chars] 

309 

310 return "".join(ret) 

311 

312 

313def _flatten(ll: Iterable) -> list: 

314 ret = [] 

315 to_visit = [*ll] 

316 while to_visit: 

317 i = to_visit.pop(0) 

318 if isinstance(i, Iterable) and not isinstance(i, str): 

319 to_visit[:0] = i 

320 else: 

321 ret.append(i) 

322 return ret 

323 

324 

325def make_compressed_re( 

326 word_list: Iterable[str], 

327 max_level: int = 2, 

328 *, 

329 non_capturing_groups: bool = True, 

330 _level: int = 1, 

331) -> str: 

332 """ 

333 Create a regular expression string from a list of words, collapsing by common 

334 prefixes and optional suffixes. 

335 

336 Calls itself recursively to build nested sublists for each group of suffixes 

337 that have a shared prefix. 

338 """ 

339 

340 def get_suffixes_from_common_prefixes(namelist: list[str]): 

341 if len(namelist) > 1: 

342 for prefix, suffixes in itertools.groupby(namelist, key=lambda s: s[:1]): 

343 yield prefix, sorted([s[1:] for s in suffixes], key=len, reverse=True) 

344 else: 

345 yield namelist[0][0], [namelist[0][1:]] 

346 

347 if _level == 1: 

348 if not word_list: 

349 raise ValueError("no words given to make_compressed_re()") 

350 

351 if "" in word_list: 

352 raise ValueError("word list cannot contain empty string") 

353 else: 

354 # internal recursive call, just return empty string if no words 

355 if not word_list: 

356 return "" 

357 

358 # dedupe the word list 

359 word_list = list({}.fromkeys(word_list)) 

360 

361 if max_level == 0: 

362 if any(len(wd) > 1 for wd in word_list): 

363 return "|".join( 

364 sorted([re.escape(wd) for wd in word_list], key=len, reverse=True) 

365 ) 

366 else: 

367 return f"[{''.join(_escape_regex_range_chars(wd) for wd in word_list)}]" 

368 

369 ret = [] 

370 sep = "" 

371 ncgroup = "?:" if non_capturing_groups else "" 

372 

373 for initial, suffixes in get_suffixes_from_common_prefixes(sorted(word_list)): 

374 ret.append(sep) 

375 sep = "|" 

376 

377 initial = re.escape(initial) 

378 

379 trailing = "" 

380 if "" in suffixes: 

381 trailing = "?" 

382 suffixes.remove("") 

383 

384 if len(suffixes) > 1: 

385 if all(len(s) == 1 for s in suffixes): 

386 ret.append( 

387 f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}" 

388 ) 

389 else: 

390 if _level < max_level: 

391 suffix_re = make_compressed_re( 

392 sorted(suffixes), 

393 max_level, 

394 non_capturing_groups=non_capturing_groups, 

395 _level=_level + 1, 

396 ) 

397 ret.append(f"{initial}({ncgroup}{suffix_re}){trailing}") 

398 else: 

399 if all(len(s) == 1 for s in suffixes): 

400 ret.append( 

401 f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}" 

402 ) 

403 else: 

404 suffixes.sort(key=len, reverse=True) 

405 ret.append( 

406 f"{initial}({ncgroup}{'|'.join(re.escape(s) for s in suffixes)}){trailing}" 

407 ) 

408 else: 

409 if suffixes: 

410 suffix = re.escape(suffixes[0]) 

411 if len(suffix) > 1 and trailing: 

412 ret.append(f"{initial}({ncgroup}{suffix}){trailing}") 

413 else: 

414 ret.append(f"{initial}{suffix}{trailing}") 

415 else: 

416 ret.append(initial) 

417 return "".join(ret) 

418 

419 

420def replaced_by_pep8(compat_name: str, fn: C) -> C: 

421 # In a future version, uncomment the code in the internal _inner() functions 

422 # to begin emitting DeprecationWarnings. 

423 

424 # Unwrap staticmethod/classmethod 

425 fn = getattr(fn, "__func__", fn) 

426 

427 # (Presence of 'self' arg in signature is used by explain_exception() methods, so we take 

428 # some extra steps to add it if present in decorated function.) 

429 if ["self"] == list(inspect.signature(fn).parameters)[:1]: 

430 

431 @wraps(fn) 

432 def _inner(self, *args, **kwargs): 

433 # warnings.warn( 

434 # f"Deprecated - use {fn.__name__}", DeprecationWarning, stacklevel=2 

435 # ) 

436 return fn(self, *args, **kwargs) 

437 

438 else: 

439 

440 @wraps(fn) 

441 def _inner(*args, **kwargs): 

442 # warnings.warn( 

443 # f"Deprecated - use {fn.__name__}", DeprecationWarning, stacklevel=2 

444 # ) 

445 return fn(*args, **kwargs) 

446 

447 _inner.__doc__ = f""" 

448 .. deprecated:: 3.0.0 

449 Use :class:`{fn.__name__}` instead 

450 """ 

451 _inner.__name__ = compat_name 

452 _inner.__annotations__ = fn.__annotations__ 

453 if isinstance(fn, types.FunctionType): 

454 _inner.__kwdefaults__ = fn.__kwdefaults__ # type: ignore [attr-defined] 

455 elif isinstance(fn, type) and hasattr(fn, "__init__"): 

456 _inner.__kwdefaults__ = fn.__init__.__kwdefaults__ # type: ignore [misc,attr-defined] 

457 else: 

458 _inner.__kwdefaults__ = None # type: ignore [attr-defined] 

459 _inner.__qualname__ = fn.__qualname__ 

460 return cast(C, _inner)