Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/util.py: 40%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# util.py
2import contextlib
3import re
4from functools import lru_cache, wraps
5import inspect
6import itertools
7import types
8from typing import Callable, Union, Iterable, TypeVar, cast, Any
9import warnings
11from .warnings import PyparsingDeprecationWarning, PyparsingDiagnosticWarning
13_bslash = chr(92)
14C = TypeVar("C", bound=Callable)
17class __config_flags:
18 """Internal class for defining compatibility and debugging flags"""
20 _all_names: list[str] = []
21 _fixed_names: list[str] = []
22 _type_desc = "configuration"
24 @classmethod
25 def _set(cls, dname, value):
26 if dname in cls._fixed_names:
27 warnings.warn(
28 f"{cls.__name__}.{dname} {cls._type_desc} is {str(getattr(cls, dname)).upper()}"
29 f" and cannot be overridden",
30 PyparsingDiagnosticWarning,
31 stacklevel=3,
32 )
33 return
34 if dname in cls._all_names:
35 setattr(cls, dname, value)
36 else:
37 raise ValueError(f"no such {cls._type_desc} {dname!r}")
39 enable = classmethod(lambda cls, name: cls._set(name, True))
40 disable = classmethod(lambda cls, name: cls._set(name, False))
43@lru_cache(maxsize=128)
44def col(loc: int, strg: str) -> int:
45 """
46 Returns current column within a string, counting newlines as line separators.
47 The first column is number 1.
49 Note: the default parsing behavior is to expand tabs in the input string
50 before starting the parsing process. See
51 :meth:`ParserElement.parse_string` for more
52 information on parsing strings containing ``<TAB>`` s, and suggested
53 methods to maintain a consistent view of the parsed string, the parse
54 location, and line and column positions within the parsed string.
55 """
56 s = strg
57 return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)
60@lru_cache(maxsize=128)
61def lineno(loc: int, strg: str) -> int:
62 """Returns current line number within a string, counting newlines as line separators.
63 The first line is number 1.
65 Note - the default parsing behavior is to expand tabs in the input string
66 before starting the parsing process. See :meth:`ParserElement.parse_string`
67 for more information on parsing strings containing ``<TAB>`` s, and
68 suggested methods to maintain a consistent view of the parsed string, the
69 parse location, and line and column positions within the parsed string.
70 """
71 return strg.count("\n", 0, loc) + 1
74@lru_cache(maxsize=128)
75def line(loc: int, strg: str) -> str:
76 """
77 Returns the line of text containing loc within a string, counting newlines as line separators.
78 """
79 last_cr = strg.rfind("\n", 0, loc)
80 next_cr = strg.find("\n", loc)
81 return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :]
84class _UnboundedCache:
85 def __init__(self):
86 cache = {}
87 cache_get = cache.get
88 self.not_in_cache = not_in_cache = object()
90 def get(_, key):
91 return cache_get(key, not_in_cache)
93 def set_(_, key, value):
94 cache[key] = value
96 def clear(_):
97 cache.clear()
99 self.size = None
100 self.get = types.MethodType(get, self)
101 self.set = types.MethodType(set_, self)
102 self.clear = types.MethodType(clear, self)
105class _FifoCache:
106 def __init__(self, size):
107 cache = {}
108 self.size = size
109 self.not_in_cache = not_in_cache = object()
110 cache_get = cache.get
111 cache_pop = cache.pop
113 def get(_, key):
114 return cache_get(key, not_in_cache)
116 def set_(_, key, value):
117 cache[key] = value
118 while len(cache) > size:
119 # pop oldest element in cache by getting the first key
120 cache_pop(next(iter(cache)))
122 def clear(_):
123 cache.clear()
125 self.get = types.MethodType(get, self)
126 self.set = types.MethodType(set_, self)
127 self.clear = types.MethodType(clear, self)
130class LRUMemo:
131 """
132 A memoizing mapping that retains `capacity` deleted items
134 The memo tracks retained items by their access order; once `capacity` items
135 are retained, the least recently used item is discarded.
136 """
138 def __init__(self, capacity):
139 self._capacity = capacity
140 self._active = {}
141 self._memory = {}
143 def __getitem__(self, key):
144 try:
145 return self._active[key]
146 except KeyError:
147 self._memory[key] = self._memory.pop(key)
148 return self._memory[key]
150 def __setitem__(self, key, value):
151 self._memory.pop(key, None)
152 self._active[key] = value
154 def __delitem__(self, key):
155 try:
156 value = self._active.pop(key)
157 except KeyError:
158 pass
159 else:
160 oldest_keys = list(self._memory)[: -(self._capacity + 1)]
161 for key_to_delete in oldest_keys:
162 self._memory.pop(key_to_delete)
163 self._memory[key] = value
165 def clear(self):
166 self._active.clear()
167 self._memory.clear()
170class UnboundedMemo(dict):
171 """
172 A memoizing mapping that retains all deleted items
173 """
175 def __delitem__(self, key):
176 pass
179def _escape_regex_range_chars(s: str) -> str:
180 # escape these chars: ^-[]
181 for c in r"\^-[]":
182 s = s.replace(c, _bslash + c)
183 s = s.replace("\n", r"\n")
184 s = s.replace("\t", r"\t")
185 return str(s)
188class _GroupConsecutive:
189 """
190 Used as a callable `key` for itertools.groupby to group
191 characters that are consecutive:
193 .. testcode::
195 from itertools import groupby
196 from pyparsing.util import _GroupConsecutive
198 grouped = groupby("abcdejkmpqrs", key=_GroupConsecutive())
199 for index, group in grouped:
200 print(tuple([index, list(group)]))
202 prints:
204 .. testoutput::
206 (0, ['a', 'b', 'c', 'd', 'e'])
207 (1, ['j', 'k'])
208 (2, ['m'])
209 (3, ['p', 'q', 'r', 's'])
210 """
212 def __init__(self) -> None:
213 self.prev = 0
214 self.counter = itertools.count()
215 self.value = -1
217 def __call__(self, char: str) -> int:
218 c_int = ord(char)
219 self.prev, prev = c_int, self.prev
220 if c_int - prev > 1:
221 self.value = next(self.counter)
222 return self.value
225def _is_iterable(obj, _str_type=(str, bytes), _iter_exception=Exception):
226 # str's are iterable, but in pyparsing, we don't want to iterate over them
227 if isinstance(obj, _str_type):
228 return False
230 try:
231 iter(obj)
232 except _iter_exception: # noqa
233 return False
234 else:
235 return True
238def _escape_re_range_char(c: str) -> str:
239 return "\\" + c if c in r"\^-][" else c
242def _collapse_string_to_ranges(
243 s: Union[str, Iterable[str]], re_escape: bool = True
244) -> str:
245 r"""
246 Take a string or list of single-character strings, and return
247 a string of the consecutive characters in that string collapsed
248 into groups, as might be used in a regular expression '[a-z]'
249 character set::
251 'a' -> 'a' -> '[a]'
252 'bc' -> 'bc' -> '[bc]'
253 'defgh' -> 'd-h' -> '[d-h]'
254 'fdgeh' -> 'd-h' -> '[d-h]'
255 'jklnpqrtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'
257 Duplicates get collapsed out::
259 'aaa' -> 'a' -> '[a]'
260 'bcbccb' -> 'bc' -> '[bc]'
261 'defghhgf' -> 'd-h' -> '[d-h]'
262 'jklnpqrjjjtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'
264 Spaces are preserved::
266 'ab c' -> ' a-c' -> '[ a-c]'
268 Characters that are significant when defining regex ranges
269 get escaped::
271 'acde[]-' -> r'\-\[\]ac-e' -> r'[\-\[\]ac-e]'
272 """
274 # Developer notes:
275 # - Do not optimize this code assuming that the given input string
276 # or internal lists will be short (such as in loading generators into
277 # lists to make it easier to find the last element); this method is also
278 # used to generate regex ranges for character sets in the pyparsing.unicode
279 # classes, and these can be _very_ long lists of strings
281 escape_re_range_char: Callable[[str], str]
282 if re_escape:
283 escape_re_range_char = _escape_re_range_char
284 else:
285 escape_re_range_char = lambda ss: ss
287 ret = []
289 # reduce input string to remove duplicates, and put in sorted order
290 s_chars: list[str] = sorted(set(s))
292 if len(s_chars) > 2:
293 # find groups of characters that are consecutive (can be collapsed
294 # down to "<first>-<last>")
295 for _, chars in itertools.groupby(s_chars, key=_GroupConsecutive()):
296 # _ is unimportant, is just used to identify groups
297 # chars is an iterator of one or more consecutive characters
298 # that comprise the current group
299 first = last = next(chars)
300 with contextlib.suppress(ValueError):
301 *_, last = chars
303 if first == last:
304 # there was only a single char in this group
305 ret.append(escape_re_range_char(first))
307 elif last == chr(ord(first) + 1):
308 # there were only 2 characters in this group
309 # 'a','b' -> 'ab'
310 ret.append(f"{escape_re_range_char(first)}{escape_re_range_char(last)}")
312 else:
313 # there were > 2 characters in this group, make into a range
314 # 'c','d','e' -> 'c-e'
315 ret.append(
316 f"{escape_re_range_char(first)}-{escape_re_range_char(last)}"
317 )
318 else:
319 # only 1 or 2 chars were given to form into groups
320 # 'a' -> ['a']
321 # 'bc' -> ['b', 'c']
322 # 'dg' -> ['d', 'g']
323 # no need to list them with "-", just return as a list
324 # (after escaping)
325 ret = [escape_re_range_char(c) for c in s_chars]
327 return "".join(ret)
330def _flatten(ll: Iterable) -> list:
331 ret = []
332 for i in ll:
333 # Developer notes:
334 # - do not collapse this section of code, isinstance checks are done
335 # in optimal order
336 if isinstance(i, str):
337 ret.append(i)
338 elif isinstance(i, Iterable):
339 ret.extend(_flatten(i))
340 else:
341 ret.append(i)
342 return ret
345def _convert_escaped_numerics_to_char(s: str) -> str:
346 if s == "0":
347 return "\0"
348 if s.isdigit() and len(s) == 3:
349 return chr(int(s, 8))
350 elif s.startswith(("u", "x")):
351 return chr(int(s[1:], 16))
352 return s
355def make_compressed_re(
356 word_list: Iterable[str],
357 max_level: int = 2,
358 *,
359 non_capturing_groups: bool = True,
360 _level: int = 1,
361) -> str:
362 """
363 Create a regular expression string from a list of words, collapsing by common
364 prefixes and optional suffixes.
366 Calls itself recursively to build nested sublists for each group of suffixes
367 that have a shared prefix.
368 """
370 def get_suffixes_from_common_prefixes(namelist: list[str]):
371 if len(namelist) > 1:
372 for prefix, suffixes in itertools.groupby(namelist, key=lambda s: s[:1]):
373 yield prefix, sorted([s[1:] for s in suffixes], key=len, reverse=True)
374 else:
375 yield namelist[0][0], [namelist[0][1:]]
377 if _level == 1:
378 if not word_list:
379 raise ValueError("no words given to make_compressed_re()")
381 if "" in word_list:
382 raise ValueError("word list cannot contain empty string")
383 else:
384 # internal recursive call, just return empty string if no words
385 if not word_list:
386 return ""
388 # dedupe the word list
389 word_list = list({}.fromkeys(word_list))
391 if max_level == 0:
392 if any(len(wd) > 1 for wd in word_list):
393 return "|".join(
394 sorted([re.escape(wd) for wd in word_list], key=len, reverse=True)
395 )
396 else:
397 return f"[{''.join(_escape_regex_range_chars(wd) for wd in word_list)}]"
399 ret = []
400 sep = ""
401 ncgroup = "?:" if non_capturing_groups else ""
403 for initial, suffixes in get_suffixes_from_common_prefixes(sorted(word_list)):
404 ret.append(sep)
405 sep = "|"
407 initial = re.escape(initial)
409 trailing = ""
410 if "" in suffixes:
411 trailing = "?"
412 suffixes.remove("")
414 if len(suffixes) > 1:
415 if all(len(s) == 1 for s in suffixes):
416 ret.append(
417 f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}"
418 )
419 else:
420 if _level < max_level:
421 suffix_re = make_compressed_re(
422 sorted(suffixes),
423 max_level,
424 non_capturing_groups=non_capturing_groups,
425 _level=_level + 1,
426 )
427 ret.append(f"{initial}({ncgroup}{suffix_re}){trailing}")
428 else:
429 if all(len(s) == 1 for s in suffixes):
430 ret.append(
431 f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}"
432 )
433 else:
434 suffixes.sort(key=len, reverse=True)
435 ret.append(
436 f"{initial}({ncgroup}{'|'.join(re.escape(s) for s in suffixes)}){trailing}"
437 )
438 else:
439 if suffixes:
440 suffix = re.escape(suffixes[0])
441 if len(suffix) > 1 and trailing:
442 ret.append(f"{initial}({ncgroup}{suffix}){trailing}")
443 else:
444 ret.append(f"{initial}{suffix}{trailing}")
445 else:
446 ret.append(initial)
447 return "".join(ret)
450def replaced_by_pep8(compat_name: str, fn: C) -> C:
452 # Unwrap staticmethod/classmethod
453 fn = getattr(fn, "__func__", fn)
455 # (Presence of 'self' arg in signature is used by explain_exception() methods, so we take
456 # some extra steps to add it if present in decorated function.)
457 if ["self"] == list(inspect.signature(fn).parameters)[:1]:
459 @wraps(fn)
460 def _inner(self, *args, **kwargs):
461 warnings.warn(
462 f"{compat_name!r} deprecated - use {fn.__name__!r}",
463 PyparsingDeprecationWarning,
464 stacklevel=2,
465 )
466 return fn(self, *args, **kwargs)
468 else:
470 @wraps(fn)
471 def _inner(*args, **kwargs):
472 warnings.warn(
473 f"{compat_name!r} deprecated - use {fn.__name__!r}",
474 PyparsingDeprecationWarning,
475 stacklevel=2,
476 )
477 return fn(*args, **kwargs)
479 _inner.__doc__ = f"""
480 .. deprecated:: 3.0.0
481 Use :class:`{fn.__name__}` instead
482 """
483 _inner.__name__ = compat_name
484 _inner.__annotations__ = fn.__annotations__
485 if isinstance(fn, types.FunctionType):
486 _inner.__kwdefaults__ = fn.__kwdefaults__ # type: ignore [attr-defined]
487 elif isinstance(fn, type) and hasattr(fn, "__init__"):
488 _inner.__kwdefaults__ = fn.__init__.__kwdefaults__ # type: ignore [misc,attr-defined]
489 else:
490 _inner.__kwdefaults__ = None # type: ignore [attr-defined]
491 _inner.__qualname__ = fn.__qualname__
492 return cast(C, _inner)
495def _to_pep8_name(s: str, _re_sub_pattern=re.compile(r"([a-z])([A-Z])")) -> str:
496 s = _re_sub_pattern.sub(r"\1_\2", s)
497 return s.lower()
500def deprecate_argument(
501 kwargs: dict[str, Any], arg_name: str, default_value=None, *, new_name: str = ""
502) -> Any:
504 if arg_name in kwargs:
505 new_name = new_name or _to_pep8_name(arg_name)
506 warnings.warn(
507 f"{arg_name!r} argument is deprecated, use {new_name!r}",
508 category=PyparsingDeprecationWarning,
509 stacklevel=3,
510 )
511 else:
512 kwargs[arg_name] = default_value
514 return kwargs[arg_name]