Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/util.py: 40%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

238 statements  

1# util.py 

2import contextlib 

3import re 

4from functools import lru_cache, wraps 

5import inspect 

6import itertools 

7import types 

8from typing import Callable, Union, Iterable, TypeVar, cast, Any 

9import warnings 

10 

11from .warnings import PyparsingDeprecationWarning, PyparsingDiagnosticWarning 

12 

13_bslash = chr(92) 

14C = TypeVar("C", bound=Callable) 

15 

16 

17class __config_flags: 

18 """Internal class for defining compatibility and debugging flags""" 

19 

20 _all_names: list[str] = [] 

21 _fixed_names: list[str] = [] 

22 _type_desc = "configuration" 

23 

24 @classmethod 

25 def _set(cls, dname, value): 

26 if dname in cls._fixed_names: 

27 warnings.warn( 

28 f"{cls.__name__}.{dname} {cls._type_desc} is {str(getattr(cls, dname)).upper()}" 

29 f" and cannot be overridden", 

30 PyparsingDiagnosticWarning, 

31 stacklevel=3, 

32 ) 

33 return 

34 if dname in cls._all_names: 

35 setattr(cls, dname, value) 

36 else: 

37 raise ValueError(f"no such {cls._type_desc} {dname!r}") 

38 

39 enable = classmethod(lambda cls, name: cls._set(name, True)) 

40 disable = classmethod(lambda cls, name: cls._set(name, False)) 

41 

42 

43@lru_cache(maxsize=128) 

44def col(loc: int, strg: str) -> int: 

45 """ 

46 Returns current column within a string, counting newlines as line separators. 

47 The first column is number 1. 

48 

49 Note: the default parsing behavior is to expand tabs in the input string 

50 before starting the parsing process. See 

51 :meth:`ParserElement.parse_string` for more 

52 information on parsing strings containing ``<TAB>`` s, and suggested 

53 methods to maintain a consistent view of the parsed string, the parse 

54 location, and line and column positions within the parsed string. 

55 """ 

56 s = strg 

57 return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc) 

58 

59 

60@lru_cache(maxsize=128) 

61def lineno(loc: int, strg: str) -> int: 

62 """Returns current line number within a string, counting newlines as line separators. 

63 The first line is number 1. 

64 

65 Note - the default parsing behavior is to expand tabs in the input string 

66 before starting the parsing process. See :meth:`ParserElement.parse_string` 

67 for more information on parsing strings containing ``<TAB>`` s, and 

68 suggested methods to maintain a consistent view of the parsed string, the 

69 parse location, and line and column positions within the parsed string. 

70 """ 

71 return strg.count("\n", 0, loc) + 1 

72 

73 

74@lru_cache(maxsize=128) 

75def line(loc: int, strg: str) -> str: 

76 """ 

77 Returns the line of text containing loc within a string, counting newlines as line separators. 

78 """ 

79 last_cr = strg.rfind("\n", 0, loc) 

80 next_cr = strg.find("\n", loc) 

81 return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :] 

82 

83 

84class _UnboundedCache: 

85 def __init__(self): 

86 cache = {} 

87 cache_get = cache.get 

88 self.not_in_cache = not_in_cache = object() 

89 

90 def get(_, key): 

91 return cache_get(key, not_in_cache) 

92 

93 def set_(_, key, value): 

94 cache[key] = value 

95 

96 def clear(_): 

97 cache.clear() 

98 

99 self.size = None 

100 self.get = types.MethodType(get, self) 

101 self.set = types.MethodType(set_, self) 

102 self.clear = types.MethodType(clear, self) 

103 

104 

105class _FifoCache: 

106 def __init__(self, size): 

107 cache = {} 

108 self.size = size 

109 self.not_in_cache = not_in_cache = object() 

110 cache_get = cache.get 

111 cache_pop = cache.pop 

112 

113 def get(_, key): 

114 return cache_get(key, not_in_cache) 

115 

116 def set_(_, key, value): 

117 cache[key] = value 

118 while len(cache) > size: 

119 # pop oldest element in cache by getting the first key 

120 cache_pop(next(iter(cache))) 

121 

122 def clear(_): 

123 cache.clear() 

124 

125 self.get = types.MethodType(get, self) 

126 self.set = types.MethodType(set_, self) 

127 self.clear = types.MethodType(clear, self) 

128 

129 

130class LRUMemo: 

131 """ 

132 A memoizing mapping that retains `capacity` deleted items 

133 

134 The memo tracks retained items by their access order; once `capacity` items 

135 are retained, the least recently used item is discarded. 

136 """ 

137 

138 def __init__(self, capacity): 

139 self._capacity = capacity 

140 self._active = {} 

141 self._memory = {} 

142 

143 def __getitem__(self, key): 

144 try: 

145 return self._active[key] 

146 except KeyError: 

147 self._memory[key] = self._memory.pop(key) 

148 return self._memory[key] 

149 

150 def __setitem__(self, key, value): 

151 self._memory.pop(key, None) 

152 self._active[key] = value 

153 

154 def __delitem__(self, key): 

155 try: 

156 value = self._active.pop(key) 

157 except KeyError: 

158 pass 

159 else: 

160 oldest_keys = list(self._memory)[: -(self._capacity + 1)] 

161 for key_to_delete in oldest_keys: 

162 self._memory.pop(key_to_delete) 

163 self._memory[key] = value 

164 

165 def clear(self): 

166 self._active.clear() 

167 self._memory.clear() 

168 

169 

170class UnboundedMemo(dict): 

171 """ 

172 A memoizing mapping that retains all deleted items 

173 """ 

174 

175 def __delitem__(self, key): 

176 pass 

177 

178 

179def _escape_regex_range_chars(s: str) -> str: 

180 # escape these chars: ^-[] 

181 for c in r"\^-[]": 

182 s = s.replace(c, _bslash + c) 

183 s = s.replace("\n", r"\n") 

184 s = s.replace("\t", r"\t") 

185 return str(s) 

186 

187 

188class _GroupConsecutive: 

189 """ 

190 Used as a callable `key` for itertools.groupby to group 

191 characters that are consecutive: 

192 

193 .. testcode:: 

194 

195 from itertools import groupby 

196 from pyparsing.util import _GroupConsecutive 

197 

198 grouped = groupby("abcdejkmpqrs", key=_GroupConsecutive()) 

199 for index, group in grouped: 

200 print(tuple([index, list(group)])) 

201 

202 prints: 

203 

204 .. testoutput:: 

205 

206 (0, ['a', 'b', 'c', 'd', 'e']) 

207 (1, ['j', 'k']) 

208 (2, ['m']) 

209 (3, ['p', 'q', 'r', 's']) 

210 """ 

211 

212 def __init__(self) -> None: 

213 self.prev = 0 

214 self.counter = itertools.count() 

215 self.value = -1 

216 

217 def __call__(self, char: str) -> int: 

218 c_int = ord(char) 

219 self.prev, prev = c_int, self.prev 

220 if c_int - prev > 1: 

221 self.value = next(self.counter) 

222 return self.value 

223 

224 

225def _is_iterable(obj, _str_type=(str, bytes), _iter_exception=Exception): 

226 # str's are iterable, but in pyparsing, we don't want to iterate over them 

227 if isinstance(obj, _str_type): 

228 return False 

229 

230 try: 

231 iter(obj) 

232 except _iter_exception: # noqa 

233 return False 

234 else: 

235 return True 

236 

237 

238def _escape_re_range_char(c: str) -> str: 

239 return "\\" + c if c in r"\^-][" else c 

240 

241 

242def _collapse_string_to_ranges( 

243 s: Union[str, Iterable[str]], re_escape: bool = True 

244) -> str: 

245 r""" 

246 Take a string or list of single-character strings, and return 

247 a string of the consecutive characters in that string collapsed 

248 into groups, as might be used in a regular expression '[a-z]' 

249 character set:: 

250 

251 'a' -> 'a' -> '[a]' 

252 'bc' -> 'bc' -> '[bc]' 

253 'defgh' -> 'd-h' -> '[d-h]' 

254 'fdgeh' -> 'd-h' -> '[d-h]' 

255 'jklnpqrtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]' 

256 

257 Duplicates get collapsed out:: 

258 

259 'aaa' -> 'a' -> '[a]' 

260 'bcbccb' -> 'bc' -> '[bc]' 

261 'defghhgf' -> 'd-h' -> '[d-h]' 

262 'jklnpqrjjjtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]' 

263 

264 Spaces are preserved:: 

265 

266 'ab c' -> ' a-c' -> '[ a-c]' 

267 

268 Characters that are significant when defining regex ranges 

269 get escaped:: 

270 

271 'acde[]-' -> r'\-\[\]ac-e' -> r'[\-\[\]ac-e]' 

272 """ 

273 

274 # Developer notes: 

275 # - Do not optimize this code assuming that the given input string 

276 # or internal lists will be short (such as in loading generators into 

277 # lists to make it easier to find the last element); this method is also 

278 # used to generate regex ranges for character sets in the pyparsing.unicode 

279 # classes, and these can be _very_ long lists of strings 

280 

281 escape_re_range_char: Callable[[str], str] 

282 if re_escape: 

283 escape_re_range_char = _escape_re_range_char 

284 else: 

285 escape_re_range_char = lambda ss: ss 

286 

287 ret = [] 

288 

289 # reduce input string to remove duplicates, and put in sorted order 

290 s_chars: list[str] = sorted(set(s)) 

291 

292 if len(s_chars) > 2: 

293 # find groups of characters that are consecutive (can be collapsed 

294 # down to "<first>-<last>") 

295 for _, chars in itertools.groupby(s_chars, key=_GroupConsecutive()): 

296 # _ is unimportant, is just used to identify groups 

297 # chars is an iterator of one or more consecutive characters 

298 # that comprise the current group 

299 first = last = next(chars) 

300 with contextlib.suppress(ValueError): 

301 *_, last = chars 

302 

303 if first == last: 

304 # there was only a single char in this group 

305 ret.append(escape_re_range_char(first)) 

306 

307 elif last == chr(ord(first) + 1): 

308 # there were only 2 characters in this group 

309 # 'a','b' -> 'ab' 

310 ret.append(f"{escape_re_range_char(first)}{escape_re_range_char(last)}") 

311 

312 else: 

313 # there were > 2 characters in this group, make into a range 

314 # 'c','d','e' -> 'c-e' 

315 ret.append( 

316 f"{escape_re_range_char(first)}-{escape_re_range_char(last)}" 

317 ) 

318 else: 

319 # only 1 or 2 chars were given to form into groups 

320 # 'a' -> ['a'] 

321 # 'bc' -> ['b', 'c'] 

322 # 'dg' -> ['d', 'g'] 

323 # no need to list them with "-", just return as a list 

324 # (after escaping) 

325 ret = [escape_re_range_char(c) for c in s_chars] 

326 

327 return "".join(ret) 

328 

329 

330def _flatten(ll: Iterable) -> list: 

331 ret = [] 

332 for i in ll: 

333 # Developer notes: 

334 # - do not collapse this section of code, isinstance checks are done 

335 # in optimal order 

336 if isinstance(i, str): 

337 ret.append(i) 

338 elif isinstance(i, Iterable): 

339 ret.extend(_flatten(i)) 

340 else: 

341 ret.append(i) 

342 return ret 

343 

344 

345def _convert_escaped_numerics_to_char(s: str) -> str: 

346 if s == "0": 

347 return "\0" 

348 if s.isdigit() and len(s) == 3: 

349 return chr(int(s, 8)) 

350 elif s.startswith(("u", "x")): 

351 return chr(int(s[1:], 16)) 

352 return s 

353 

354 

355def make_compressed_re( 

356 word_list: Iterable[str], 

357 max_level: int = 2, 

358 *, 

359 non_capturing_groups: bool = True, 

360 _level: int = 1, 

361) -> str: 

362 """ 

363 Create a regular expression string from a list of words, collapsing by common 

364 prefixes and optional suffixes. 

365 

366 Calls itself recursively to build nested sublists for each group of suffixes 

367 that have a shared prefix. 

368 """ 

369 

370 def get_suffixes_from_common_prefixes(namelist: list[str]): 

371 if len(namelist) > 1: 

372 for prefix, suffixes in itertools.groupby(namelist, key=lambda s: s[:1]): 

373 yield prefix, sorted([s[1:] for s in suffixes], key=len, reverse=True) 

374 else: 

375 yield namelist[0][0], [namelist[0][1:]] 

376 

377 if _level == 1: 

378 if not word_list: 

379 raise ValueError("no words given to make_compressed_re()") 

380 

381 if "" in word_list: 

382 raise ValueError("word list cannot contain empty string") 

383 else: 

384 # internal recursive call, just return empty string if no words 

385 if not word_list: 

386 return "" 

387 

388 # dedupe the word list 

389 word_list = list({}.fromkeys(word_list)) 

390 

391 if max_level == 0: 

392 if any(len(wd) > 1 for wd in word_list): 

393 return "|".join( 

394 sorted([re.escape(wd) for wd in word_list], key=len, reverse=True) 

395 ) 

396 else: 

397 return f"[{''.join(_escape_regex_range_chars(wd) for wd in word_list)}]" 

398 

399 ret = [] 

400 sep = "" 

401 ncgroup = "?:" if non_capturing_groups else "" 

402 

403 for initial, suffixes in get_suffixes_from_common_prefixes(sorted(word_list)): 

404 ret.append(sep) 

405 sep = "|" 

406 

407 initial = re.escape(initial) 

408 

409 trailing = "" 

410 if "" in suffixes: 

411 trailing = "?" 

412 suffixes.remove("") 

413 

414 if len(suffixes) > 1: 

415 if all(len(s) == 1 for s in suffixes): 

416 ret.append( 

417 f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}" 

418 ) 

419 else: 

420 if _level < max_level: 

421 suffix_re = make_compressed_re( 

422 sorted(suffixes), 

423 max_level, 

424 non_capturing_groups=non_capturing_groups, 

425 _level=_level + 1, 

426 ) 

427 ret.append(f"{initial}({ncgroup}{suffix_re}){trailing}") 

428 else: 

429 if all(len(s) == 1 for s in suffixes): 

430 ret.append( 

431 f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}" 

432 ) 

433 else: 

434 suffixes.sort(key=len, reverse=True) 

435 ret.append( 

436 f"{initial}({ncgroup}{'|'.join(re.escape(s) for s in suffixes)}){trailing}" 

437 ) 

438 else: 

439 if suffixes: 

440 suffix = re.escape(suffixes[0]) 

441 if len(suffix) > 1 and trailing: 

442 ret.append(f"{initial}({ncgroup}{suffix}){trailing}") 

443 else: 

444 ret.append(f"{initial}{suffix}{trailing}") 

445 else: 

446 ret.append(initial) 

447 return "".join(ret) 

448 

449 

450def replaced_by_pep8(compat_name: str, fn: C) -> C: 

451 

452 # Unwrap staticmethod/classmethod 

453 fn = getattr(fn, "__func__", fn) 

454 

455 # (Presence of 'self' arg in signature is used by explain_exception() methods, so we take 

456 # some extra steps to add it if present in decorated function.) 

457 if ["self"] == list(inspect.signature(fn).parameters)[:1]: 

458 

459 @wraps(fn) 

460 def _inner(self, *args, **kwargs): 

461 warnings.warn( 

462 f"{compat_name!r} deprecated - use {fn.__name__!r}", 

463 PyparsingDeprecationWarning, 

464 stacklevel=2, 

465 ) 

466 return fn(self, *args, **kwargs) 

467 

468 else: 

469 

470 @wraps(fn) 

471 def _inner(*args, **kwargs): 

472 warnings.warn( 

473 f"{compat_name!r} deprecated - use {fn.__name__!r}", 

474 PyparsingDeprecationWarning, 

475 stacklevel=2, 

476 ) 

477 return fn(*args, **kwargs) 

478 

479 _inner.__doc__ = f""" 

480 .. deprecated:: 3.0.0 

481 Use :class:`{fn.__name__}` instead 

482 """ 

483 _inner.__name__ = compat_name 

484 _inner.__annotations__ = fn.__annotations__ 

485 if isinstance(fn, types.FunctionType): 

486 _inner.__kwdefaults__ = fn.__kwdefaults__ # type: ignore [attr-defined] 

487 elif isinstance(fn, type) and hasattr(fn, "__init__"): 

488 _inner.__kwdefaults__ = fn.__init__.__kwdefaults__ # type: ignore [misc,attr-defined] 

489 else: 

490 _inner.__kwdefaults__ = None # type: ignore [attr-defined] 

491 _inner.__qualname__ = fn.__qualname__ 

492 return cast(C, _inner) 

493 

494 

495def _to_pep8_name(s: str, _re_sub_pattern=re.compile(r"([a-z])([A-Z])")) -> str: 

496 s = _re_sub_pattern.sub(r"\1_\2", s) 

497 return s.lower() 

498 

499 

500def deprecate_argument( 

501 kwargs: dict[str, Any], arg_name: str, default_value=None, *, new_name: str = "" 

502) -> Any: 

503 

504 if arg_name in kwargs: 

505 new_name = new_name or _to_pep8_name(arg_name) 

506 warnings.warn( 

507 f"{arg_name!r} argument is deprecated, use {new_name!r}", 

508 category=PyparsingDeprecationWarning, 

509 stacklevel=3, 

510 ) 

511 else: 

512 kwargs[arg_name] = default_value 

513 

514 return kwargs[arg_name]