Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pyparsing/core.py: 44%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# core.py
3#
4from __future__ import annotations
6import collections.abc
7from collections import deque
8import os
9import typing
10from typing import (
11 Any,
12 Callable,
13 Generator,
14 NamedTuple,
15 Sequence,
16 TextIO,
17 Union,
18 cast,
19)
20from abc import ABC, abstractmethod
21from enum import Enum
22import string
23import copy
24import warnings
25import re
26import sys
27from collections.abc import Iterable
28import traceback
29import types
30from operator import itemgetter
31from functools import wraps
32from threading import RLock
33from pathlib import Path
35from .util import (
36 _FifoCache,
37 _UnboundedCache,
38 __config_flags,
39 _collapse_string_to_ranges,
40 _escape_regex_range_chars,
41 _bslash,
42 _flatten,
43 LRUMemo as _LRUMemo,
44 UnboundedMemo as _UnboundedMemo,
45 replaced_by_pep8,
46)
47from .exceptions import *
48from .actions import *
49from .results import ParseResults, _ParseResultsWithOffset
50from .unicode import pyparsing_unicode
52_MAX_INT = sys.maxsize
53str_type: tuple[type, ...] = (str, bytes)
55#
56# Copyright (c) 2003-2022 Paul T. McGuire
57#
58# Permission is hereby granted, free of charge, to any person obtaining
59# a copy of this software and associated documentation files (the
60# "Software"), to deal in the Software without restriction, including
61# without limitation the rights to use, copy, modify, merge, publish,
62# distribute, sublicense, and/or sell copies of the Software, and to
63# permit persons to whom the Software is furnished to do so, subject to
64# the following conditions:
65#
66# The above copyright notice and this permission notice shall be
67# included in all copies or substantial portions of the Software.
68#
69# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
70# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
71# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
72# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
73# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
74# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
75# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
76#
78from functools import cached_property
81class __compat__(__config_flags):
82 """
83 A cross-version compatibility configuration for pyparsing features that will be
84 released in a future version. By setting values in this configuration to True,
85 those features can be enabled in prior versions for compatibility development
86 and testing.
88 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
89 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
90 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
91 behavior
92 """
94 _type_desc = "compatibility"
96 collect_all_And_tokens = True
98 _all_names = [__ for __ in locals() if not __.startswith("_")]
99 _fixed_names = """
100 collect_all_And_tokens
101 """.split()
104class __diag__(__config_flags):
105 _type_desc = "diagnostic"
107 warn_multiple_tokens_in_named_alternation = False
108 warn_ungrouped_named_tokens_in_collection = False
109 warn_name_set_on_empty_Forward = False
110 warn_on_parse_using_empty_Forward = False
111 warn_on_assignment_to_Forward = False
112 warn_on_multiple_string_args_to_oneof = False
113 warn_on_match_first_with_lshift_operator = False
114 enable_debug_on_named_expressions = False
116 _all_names = [__ for __ in locals() if not __.startswith("_")]
117 _warning_names = [name for name in _all_names if name.startswith("warn")]
118 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
120 @classmethod
121 def enable_all_warnings(cls) -> None:
122 for name in cls._warning_names:
123 cls.enable(name)
126class Diagnostics(Enum):
127 """
128 Diagnostic configuration (all default to disabled)
130 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
131 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
132 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
133 name is defined on a containing expression with ungrouped subexpressions that also
134 have results names
135 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
136 with a results name, but has no contents defined
137 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
138 defined in a grammar but has never had an expression attached to it
139 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
140 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
141 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
142 incorrectly called with multiple str arguments
143 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
144 calls to :class:`ParserElement.set_name`
146 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
147 All warnings can be enabled by calling :class:`enable_all_warnings`.
148 """
150 warn_multiple_tokens_in_named_alternation = 0
151 warn_ungrouped_named_tokens_in_collection = 1
152 warn_name_set_on_empty_Forward = 2
153 warn_on_parse_using_empty_Forward = 3
154 warn_on_assignment_to_Forward = 4
155 warn_on_multiple_string_args_to_oneof = 5
156 warn_on_match_first_with_lshift_operator = 6
157 enable_debug_on_named_expressions = 7
160def enable_diag(diag_enum: Diagnostics) -> None:
161 """
162 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
163 """
164 __diag__.enable(diag_enum.name)
167def disable_diag(diag_enum: Diagnostics) -> None:
168 """
169 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
170 """
171 __diag__.disable(diag_enum.name)
174def enable_all_warnings() -> None:
175 """
176 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
177 """
178 __diag__.enable_all_warnings()
181# hide abstract class
182del __config_flags
185def _should_enable_warnings(
186 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
187) -> bool:
188 enable = bool(warn_env_var)
189 for warn_opt in cmd_line_warn_options:
190 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
191 ":"
192 )[:5]
193 if not w_action.lower().startswith("i") and (
194 not (w_message or w_category or w_module) or w_module == "pyparsing"
195 ):
196 enable = True
197 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
198 enable = False
199 return enable
202if _should_enable_warnings(
203 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
204):
205 enable_all_warnings()
208# build list of single arg builtins, that can be used as parse actions
209# fmt: off
210_single_arg_builtins = {
211 sum, len, sorted, reversed, list, tuple, set, any, all, min, max
212}
213# fmt: on
215_generatorType = types.GeneratorType
216ParseImplReturnType = tuple[int, Any]
217PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]
219ParseCondition = Union[
220 Callable[[], bool],
221 Callable[[ParseResults], bool],
222 Callable[[int, ParseResults], bool],
223 Callable[[str, int, ParseResults], bool],
224]
225ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
226DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
227DebugSuccessAction = Callable[
228 [str, int, int, "ParserElement", ParseResults, bool], None
229]
230DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
233alphas: str = string.ascii_uppercase + string.ascii_lowercase
234identchars: str = pyparsing_unicode.Latin1.identchars
235identbodychars: str = pyparsing_unicode.Latin1.identbodychars
236nums: str = "0123456789"
237hexnums: str = nums + "ABCDEFabcdef"
238alphanums: str = alphas + nums
239printables: str = "".join([c for c in string.printable if c not in string.whitespace])
242class _ParseActionIndexError(Exception):
243 """
244 Internal wrapper around IndexError so that IndexErrors raised inside
245 parse actions aren't misinterpreted as IndexErrors raised inside
246 ParserElement parseImpl methods.
247 """
249 def __init__(self, msg: str, exc: BaseException):
250 self.msg: str = msg
251 self.exc: BaseException = exc
254_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]
255pa_call_line_synth = ()
258def _trim_arity(func, max_limit=3):
259 """decorator to trim function calls to match the arity of the target"""
260 global _trim_arity_call_line, pa_call_line_synth
262 if func in _single_arg_builtins:
263 return lambda s, l, t: func(t)
265 limit = 0
266 found_arity = False
268 # synthesize what would be returned by traceback.extract_stack at the call to
269 # user's parse action 'func', so that we don't incur call penalty at parse time
271 # fmt: off
272 LINE_DIFF = 9
273 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
274 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
275 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]
276 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
278 def wrapper(*args):
279 nonlocal found_arity, limit
280 if found_arity:
281 return func(*args[limit:])
282 while 1:
283 try:
284 ret = func(*args[limit:])
285 found_arity = True
286 return ret
287 except TypeError as te:
288 # re-raise TypeErrors if they did not come from our arity testing
289 if found_arity:
290 raise
291 else:
292 tb = te.__traceback__
293 frames = traceback.extract_tb(tb, limit=2)
294 frame_summary = frames[-1]
295 trim_arity_type_error = (
296 [frame_summary[:2]][-1][:2] == pa_call_line_synth
297 )
298 del tb
300 if trim_arity_type_error:
301 if limit < max_limit:
302 limit += 1
303 continue
305 raise
306 except IndexError as ie:
307 # wrap IndexErrors inside a _ParseActionIndexError
308 raise _ParseActionIndexError(
309 "IndexError raised in parse action", ie
310 ).with_traceback(None)
311 # fmt: on
313 # copy func name to wrapper for sensible debug output
314 # (can't use functools.wraps, since that messes with function signature)
315 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
316 wrapper.__name__ = func_name
317 wrapper.__doc__ = func.__doc__
319 return wrapper
322def condition_as_parse_action(
323 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False
324) -> ParseAction:
325 """
326 Function to convert a simple predicate function that returns ``True`` or ``False``
327 into a parse action. Can be used in places when a parse action is required
328 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition
329 to an operator level in :class:`infix_notation`).
331 Optional keyword arguments:
333 - ``message`` - define a custom message to be used in the raised exception
334 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;
335 otherwise will raise :class:`ParseException`
337 """
338 msg = message if message is not None else "failed user-defined condition"
339 exc_type = ParseFatalException if fatal else ParseException
340 fn = _trim_arity(fn)
342 @wraps(fn)
343 def pa(s, l, t):
344 if not bool(fn(s, l, t)):
345 raise exc_type(s, l, msg)
347 return pa
350def _default_start_debug_action(
351 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False
352):
353 cache_hit_str = "*" if cache_hit else ""
354 print(
355 (
356 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"
357 f" {line(loc, instring)}\n"
358 f" {' ' * (col(loc, instring) - 1)}^"
359 )
360 )
363def _default_success_debug_action(
364 instring: str,
365 startloc: int,
366 endloc: int,
367 expr: ParserElement,
368 toks: ParseResults,
369 cache_hit: bool = False,
370):
371 cache_hit_str = "*" if cache_hit else ""
372 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")
375def _default_exception_debug_action(
376 instring: str,
377 loc: int,
378 expr: ParserElement,
379 exc: Exception,
380 cache_hit: bool = False,
381):
382 cache_hit_str = "*" if cache_hit else ""
383 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")
386def null_debug_action(*args):
387 """'Do-nothing' debug action, to suppress debugging output during parsing."""
390class ParserElement(ABC):
391 """Abstract base level parser element class."""
393 DEFAULT_WHITE_CHARS: str = " \n\t\r"
394 verbose_stacktrace: bool = False
395 _literalStringClass: type = None # type: ignore[assignment]
397 @staticmethod
398 def set_default_whitespace_chars(chars: str) -> None:
399 r"""
400 Overrides the default whitespace chars
402 Example::
404 # default whitespace chars are space, <TAB> and newline
405 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
407 # change to just treat newline as significant
408 ParserElement.set_default_whitespace_chars(" \t")
409 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def']
410 """
411 ParserElement.DEFAULT_WHITE_CHARS = chars
413 # update whitespace all parse expressions defined in this module
414 for expr in _builtin_exprs:
415 if expr.copyDefaultWhiteChars:
416 expr.whiteChars = set(chars)
418 @staticmethod
419 def inline_literals_using(cls: type) -> None:
420 """
421 Set class to be used for inclusion of string literals into a parser.
423 Example::
425 # default literal class used is Literal
426 integer = Word(nums)
427 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
429 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31']
432 # change to Suppress
433 ParserElement.inline_literals_using(Suppress)
434 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
436 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31']
437 """
438 ParserElement._literalStringClass = cls
440 @classmethod
441 def using_each(cls, seq, **class_kwargs):
442 """
443 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.
445 Example::
447 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")
449 """
450 yield from (cls(obj, **class_kwargs) for obj in seq)
452 class DebugActions(NamedTuple):
453 debug_try: typing.Optional[DebugStartAction]
454 debug_match: typing.Optional[DebugSuccessAction]
455 debug_fail: typing.Optional[DebugExceptionAction]
457 def __init__(self, savelist: bool = False):
458 self.parseAction: list[ParseAction] = list()
459 self.failAction: typing.Optional[ParseFailAction] = None
460 self.customName: str = None # type: ignore[assignment]
461 self._defaultName: typing.Optional[str] = None
462 self.resultsName: str = None # type: ignore[assignment]
463 self.saveAsList = savelist
464 self.skipWhitespace = True
465 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
466 self.copyDefaultWhiteChars = True
467 # used when checking for left-recursion
468 self.mayReturnEmpty = False
469 self.keepTabs = False
470 self.ignoreExprs: list[ParserElement] = list()
471 self.debug = False
472 self.streamlined = False
473 # optimize exception handling for subclasses that don't advance parse index
474 self.mayIndexError = True
475 self.errmsg: Union[str, None] = ""
476 # mark results names as modal (report only last) or cumulative (list all)
477 self.modalResults = True
478 # custom debug actions
479 self.debugActions = self.DebugActions(None, None, None)
480 # avoid redundant calls to preParse
481 self.callPreparse = True
482 self.callDuringTry = False
483 self.suppress_warnings_: list[Diagnostics] = []
484 self.show_in_diagram = True
486 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:
487 """
488 Suppress warnings emitted for a particular diagnostic on this expression.
490 Example::
492 base = pp.Forward()
493 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)
495 # statement would normally raise a warning, but is now suppressed
496 print(base.parse_string("x"))
498 """
499 self.suppress_warnings_.append(warning_type)
500 return self
502 def visit_all(self):
503 """General-purpose method to yield all expressions and sub-expressions
504 in a grammar. Typically just for internal use.
505 """
506 to_visit = deque([self])
507 seen = set()
508 while to_visit:
509 cur = to_visit.popleft()
511 # guard against looping forever through recursive grammars
512 if cur in seen:
513 continue
514 seen.add(cur)
516 to_visit.extend(cur.recurse())
517 yield cur
519 def copy(self) -> ParserElement:
520 """
521 Make a copy of this :class:`ParserElement`. Useful for defining
522 different parse actions for the same parsing pattern, using copies of
523 the original parse element.
525 Example::
527 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
528 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")
529 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
531 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M"))
533 prints::
535 [5120, 100, 655360, 268435456]
537 Equivalent form of ``expr.copy()`` is just ``expr()``::
539 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
540 """
541 cpy = copy.copy(self)
542 cpy.parseAction = self.parseAction[:]
543 cpy.ignoreExprs = self.ignoreExprs[:]
544 if self.copyDefaultWhiteChars:
545 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
546 return cpy
548 def set_results_name(
549 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False
550 ) -> ParserElement:
551 """
552 Define name for referencing matching tokens as a nested attribute
553 of the returned parse results.
555 Normally, results names are assigned as you would assign keys in a dict:
556 any existing value is overwritten by later values. If it is necessary to
557 keep all values captured for a particular results name, call ``set_results_name``
558 with ``list_all_matches`` = True.
560 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
561 this is so that the client can define a basic element, such as an
562 integer, and reference it in multiple places with different names.
564 You can also set results names using the abbreviated syntax,
565 ``expr("name")`` in place of ``expr.set_results_name("name")``
566 - see :class:`__call__`. If ``list_all_matches`` is required, use
567 ``expr("name*")``.
569 Example::
571 integer = Word(nums)
572 date_str = (integer.set_results_name("year") + '/'
573 + integer.set_results_name("month") + '/'
574 + integer.set_results_name("day"))
576 # equivalent form:
577 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
578 """
579 listAllMatches = listAllMatches or list_all_matches
580 return self._setResultsName(name, listAllMatches)
582 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
583 if name is None:
584 return self
585 newself = self.copy()
586 if name.endswith("*"):
587 name = name[:-1]
588 list_all_matches = True
589 newself.resultsName = name
590 newself.modalResults = not list_all_matches
591 return newself
593 def set_break(self, break_flag: bool = True) -> ParserElement:
594 """
595 Method to invoke the Python pdb debugger when this element is
596 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
597 disable.
598 """
599 if break_flag:
600 _parseMethod = self._parse
602 def breaker(instring, loc, do_actions=True, callPreParse=True):
603 # this call to breakpoint() is intentional, not a checkin error
604 breakpoint()
605 return _parseMethod(instring, loc, do_actions, callPreParse)
607 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]
608 self._parse = breaker # type: ignore [method-assign]
609 elif hasattr(self._parse, "_originalParseMethod"):
610 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]
611 return self
613 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:
614 """
615 Define one or more actions to perform when successfully matching parse element definition.
617 Parse actions can be called to perform data conversions, do extra validation,
618 update external data structures, or enhance or replace the parsed tokens.
619 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
620 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
622 - ``s`` = the original string being parsed (see note below)
623 - ``loc`` = the location of the matching substring
624 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object
626 The parsed tokens are passed to the parse action as ParseResults. They can be
627 modified in place using list-style append, extend, and pop operations to update
628 the parsed list elements; and with dictionary-style item set and del operations
629 to add, update, or remove any named results. If the tokens are modified in place,
630 it is not necessary to return them with a return statement.
632 Parse actions can also completely replace the given tokens, with another ``ParseResults``
633 object, or with some entirely different object (common for parse actions that perform data
634 conversions). A convenient way to build a new parse result is to define the values
635 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
637 If None is passed as the ``fn`` parse action, all previously added parse actions for this
638 expression are cleared.
640 Optional keyword arguments:
642 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during
643 lookaheads and alternate testing. For parse actions that have side effects, it is
644 important to only call the parse action once it is determined that it is being
645 called as part of a successful parse. For parse actions that perform additional
646 validation, then call_during_try should be passed as True, so that the validation
647 code is included in the preliminary "try" parses.
649 Note: the default parsing behavior is to expand tabs in the input string
650 before starting the parsing process. See :class:`parse_string` for more
651 information on parsing strings containing ``<TAB>`` s, and suggested
652 methods to maintain a consistent view of the parsed string, the parse
653 location, and line and column positions within the parsed string.
655 Example::
657 # parse dates in the form YYYY/MM/DD
659 # use parse action to convert toks from str to int at parse time
660 def convert_to_int(toks):
661 return int(toks[0])
663 # use a parse action to verify that the date is a valid date
664 def is_valid_date(instring, loc, toks):
665 from datetime import date
666 year, month, day = toks[::2]
667 try:
668 date(year, month, day)
669 except ValueError:
670 raise ParseException(instring, loc, "invalid date given")
672 integer = Word(nums)
673 date_str = integer + '/' + integer + '/' + integer
675 # add parse actions
676 integer.set_parse_action(convert_to_int)
677 date_str.set_parse_action(is_valid_date)
679 # note that integer fields are now ints, not strings
680 date_str.run_tests('''
681 # successful parse - note that integer fields were converted to ints
682 1999/12/31
684 # fail - invalid date
685 1999/13/31
686 ''')
687 """
688 if list(fns) == [None]:
689 self.parseAction.clear()
690 return self
692 if not all(callable(fn) for fn in fns):
693 raise TypeError("parse actions must be callable")
694 self.parseAction[:] = [_trim_arity(fn) for fn in fns]
695 self.callDuringTry = kwargs.get(
696 "call_during_try", kwargs.get("callDuringTry", False)
697 )
699 return self
701 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:
702 """
703 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
705 See examples in :class:`copy`.
706 """
707 self.parseAction += [_trim_arity(fn) for fn in fns]
708 self.callDuringTry = self.callDuringTry or kwargs.get(
709 "call_during_try", kwargs.get("callDuringTry", False)
710 )
711 return self
713 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement:
714 """Add a boolean predicate function to expression's list of parse actions. See
715 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
716 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
718 Optional keyword arguments:
720 - ``message`` = define a custom message to be used in the raised exception
721 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
722 ParseException
723 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,
724 default=False
726 Example::
728 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
729 year_int = integer.copy()
730 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
731 date_str = year_int + '/' + integer + '/' + integer
733 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0),
734 (line:1, col:1)
735 """
736 for fn in fns:
737 self.parseAction.append(
738 condition_as_parse_action(
739 fn,
740 message=str(kwargs.get("message")),
741 fatal=bool(kwargs.get("fatal", False)),
742 )
743 )
745 self.callDuringTry = self.callDuringTry or kwargs.get(
746 "call_during_try", kwargs.get("callDuringTry", False)
747 )
748 return self
750 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:
751 """
752 Define action to perform if parsing fails at this expression.
753 Fail acton fn is a callable function that takes the arguments
754 ``fn(s, loc, expr, err)`` where:
756 - ``s`` = string being parsed
757 - ``loc`` = location where expression match was attempted and failed
758 - ``expr`` = the parse expression that failed
759 - ``err`` = the exception thrown
761 The function returns no value. It may throw :class:`ParseFatalException`
762 if it is desired to stop parsing immediately."""
763 self.failAction = fn
764 return self
766 def _skipIgnorables(self, instring: str, loc: int) -> int:
767 if not self.ignoreExprs:
768 return loc
769 exprsFound = True
770 ignore_expr_fns = [e._parse for e in self.ignoreExprs]
771 last_loc = loc
772 while exprsFound:
773 exprsFound = False
774 for ignore_fn in ignore_expr_fns:
775 try:
776 while 1:
777 loc, dummy = ignore_fn(instring, loc)
778 exprsFound = True
779 except ParseException:
780 pass
781 # check if all ignore exprs matched but didn't actually advance the parse location
782 if loc == last_loc:
783 break
784 last_loc = loc
785 return loc
787 def preParse(self, instring: str, loc: int) -> int:
788 if self.ignoreExprs:
789 loc = self._skipIgnorables(instring, loc)
791 if self.skipWhitespace:
792 instrlen = len(instring)
793 white_chars = self.whiteChars
794 while loc < instrlen and instring[loc] in white_chars:
795 loc += 1
797 return loc
799 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
800 return loc, []
802 def postParse(self, instring, loc, tokenlist):
803 return tokenlist
805 # @profile
806 def _parseNoCache(
807 self, instring, loc, do_actions=True, callPreParse=True
808 ) -> tuple[int, ParseResults]:
809 debugging = self.debug # and do_actions)
810 len_instring = len(instring)
812 if debugging or self.failAction:
813 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
814 try:
815 if callPreParse and self.callPreparse:
816 pre_loc = self.preParse(instring, loc)
817 else:
818 pre_loc = loc
819 tokens_start = pre_loc
820 if self.debugActions.debug_try:
821 self.debugActions.debug_try(instring, tokens_start, self, False)
822 if self.mayIndexError or pre_loc >= len_instring:
823 try:
824 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
825 except IndexError:
826 raise ParseException(instring, len_instring, self.errmsg, self)
827 else:
828 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
829 except Exception as err:
830 # print("Exception raised:", err)
831 if self.debugActions.debug_fail:
832 self.debugActions.debug_fail(
833 instring, tokens_start, self, err, False
834 )
835 if self.failAction:
836 self.failAction(instring, tokens_start, self, err)
837 raise
838 else:
839 if callPreParse and self.callPreparse:
840 pre_loc = self.preParse(instring, loc)
841 else:
842 pre_loc = loc
843 tokens_start = pre_loc
844 if self.mayIndexError or pre_loc >= len_instring:
845 try:
846 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
847 except IndexError:
848 raise ParseException(instring, len_instring, self.errmsg, self)
849 else:
850 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
852 tokens = self.postParse(instring, loc, tokens)
854 ret_tokens = ParseResults(
855 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults
856 )
857 if self.parseAction and (do_actions or self.callDuringTry):
858 if debugging:
859 try:
860 for fn in self.parseAction:
861 try:
862 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
863 except IndexError as parse_action_exc:
864 exc = ParseException("exception raised in parse action")
865 raise exc from parse_action_exc
867 if tokens is not None and tokens is not ret_tokens:
868 ret_tokens = ParseResults(
869 tokens,
870 self.resultsName,
871 asList=self.saveAsList
872 and isinstance(tokens, (ParseResults, list)),
873 modal=self.modalResults,
874 )
875 except Exception as err:
876 # print "Exception raised in user parse action:", err
877 if self.debugActions.debug_fail:
878 self.debugActions.debug_fail(
879 instring, tokens_start, self, err, False
880 )
881 raise
882 else:
883 for fn in self.parseAction:
884 try:
885 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
886 except IndexError as parse_action_exc:
887 exc = ParseException("exception raised in parse action")
888 raise exc from parse_action_exc
890 if tokens is not None and tokens is not ret_tokens:
891 ret_tokens = ParseResults(
892 tokens,
893 self.resultsName,
894 asList=self.saveAsList
895 and isinstance(tokens, (ParseResults, list)),
896 modal=self.modalResults,
897 )
898 if debugging:
899 # print("Matched", self, "->", ret_tokens.as_list())
900 if self.debugActions.debug_match:
901 self.debugActions.debug_match(
902 instring, tokens_start, loc, self, ret_tokens, False
903 )
905 return loc, ret_tokens
907 def try_parse(
908 self,
909 instring: str,
910 loc: int,
911 *,
912 raise_fatal: bool = False,
913 do_actions: bool = False,
914 ) -> int:
915 try:
916 return self._parse(instring, loc, do_actions=do_actions)[0]
917 except ParseFatalException:
918 if raise_fatal:
919 raise
920 raise ParseException(instring, loc, self.errmsg, self)
922 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:
923 try:
924 self.try_parse(instring, loc, do_actions=do_actions)
925 except (ParseException, IndexError):
926 return False
927 else:
928 return True
930 # cache for left-recursion in Forward references
931 recursion_lock = RLock()
932 recursion_memos: collections.abc.MutableMapping[
933 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]
934 ] = {}
936 class _CacheType(typing.Protocol):
937 """
938 Class to be used for packrat and left-recursion cacheing of results
939 and exceptions.
940 """
942 not_in_cache: bool
944 def get(self, *args) -> typing.Any: ...
946 def set(self, *args) -> None: ...
948 def clear(self) -> None: ...
950 class NullCache(dict):
951 """
952 A null cache type for initialization of the packrat_cache class variable.
953 If/when enable_packrat() is called, this null cache will be replaced by a
954 proper _CacheType class instance.
955 """
957 not_in_cache: bool = True
959 def get(self, *args) -> typing.Any: ...
961 def set(self, *args) -> None: ...
963 def clear(self) -> None: ...
965 # class-level argument cache for optimizing repeated calls when backtracking
966 # through recursive expressions
967 packrat_cache: _CacheType = NullCache()
968 packrat_cache_lock = RLock()
969 packrat_cache_stats = [0, 0]
971 # this method gets repeatedly called during backtracking with the same arguments -
972 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
973 def _parseCache(
974 self, instring, loc, do_actions=True, callPreParse=True
975 ) -> tuple[int, ParseResults]:
976 HIT, MISS = 0, 1
977 lookup = (self, instring, loc, callPreParse, do_actions)
978 with ParserElement.packrat_cache_lock:
979 cache = ParserElement.packrat_cache
980 value = cache.get(lookup)
981 if value is cache.not_in_cache:
982 ParserElement.packrat_cache_stats[MISS] += 1
983 try:
984 value = self._parseNoCache(instring, loc, do_actions, callPreParse)
985 except ParseBaseException as pe:
986 # cache a copy of the exception, without the traceback
987 cache.set(lookup, pe.__class__(*pe.args))
988 raise
989 else:
990 cache.set(lookup, (value[0], value[1].copy(), loc))
991 return value
992 else:
993 ParserElement.packrat_cache_stats[HIT] += 1
994 if self.debug and self.debugActions.debug_try:
995 try:
996 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]
997 except TypeError:
998 pass
999 if isinstance(value, Exception):
1000 if self.debug and self.debugActions.debug_fail:
1001 try:
1002 self.debugActions.debug_fail(
1003 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]
1004 )
1005 except TypeError:
1006 pass
1007 raise value
1009 value = cast(tuple[int, ParseResults, int], value)
1010 loc_, result, endloc = value[0], value[1].copy(), value[2]
1011 if self.debug and self.debugActions.debug_match:
1012 try:
1013 self.debugActions.debug_match(
1014 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]
1015 )
1016 except TypeError:
1017 pass
1019 return loc_, result
1021 _parse = _parseNoCache
1023 @staticmethod
1024 def reset_cache() -> None:
1025 ParserElement.packrat_cache.clear()
1026 ParserElement.packrat_cache_stats[:] = [0] * len(
1027 ParserElement.packrat_cache_stats
1028 )
1029 ParserElement.recursion_memos.clear()
1031 _packratEnabled = False
1032 _left_recursion_enabled = False
1034 @staticmethod
1035 def disable_memoization() -> None:
1036 """
1037 Disables active Packrat or Left Recursion parsing and their memoization
1039 This method also works if neither Packrat nor Left Recursion are enabled.
1040 This makes it safe to call before activating Packrat nor Left Recursion
1041 to clear any previous settings.
1042 """
1043 ParserElement.reset_cache()
1044 ParserElement._left_recursion_enabled = False
1045 ParserElement._packratEnabled = False
1046 ParserElement._parse = ParserElement._parseNoCache
1048 @staticmethod
1049 def enable_left_recursion(
1050 cache_size_limit: typing.Optional[int] = None, *, force=False
1051 ) -> None:
1052 """
1053 Enables "bounded recursion" parsing, which allows for both direct and indirect
1054 left-recursion. During parsing, left-recursive :class:`Forward` elements are
1055 repeatedly matched with a fixed recursion depth that is gradually increased
1056 until finding the longest match.
1058 Example::
1060 import pyparsing as pp
1061 pp.ParserElement.enable_left_recursion()
1063 E = pp.Forward("E")
1064 num = pp.Word(pp.nums)
1065 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1066 E <<= E + '+' - num | num
1068 print(E.parse_string("1+2+3"))
1070 Recursion search naturally memoizes matches of ``Forward`` elements and may
1071 thus skip reevaluation of parse actions during backtracking. This may break
1072 programs with parse actions which rely on strict ordering of side-effects.
1074 Parameters:
1076 - ``cache_size_limit`` - (default=``None``) - memoize at most this many
1077 ``Forward`` elements during matching; if ``None`` (the default),
1078 memoize all ``Forward`` elements.
1080 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1081 thus the two cannot be used together. Use ``force=True`` to disable any
1082 previous, conflicting settings.
1083 """
1084 if force:
1085 ParserElement.disable_memoization()
1086 elif ParserElement._packratEnabled:
1087 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1088 if cache_size_limit is None:
1089 ParserElement.recursion_memos = _UnboundedMemo()
1090 elif cache_size_limit > 0:
1091 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]
1092 else:
1093 raise NotImplementedError(f"Memo size of {cache_size_limit}")
1094 ParserElement._left_recursion_enabled = True
1096 @staticmethod
1097 def enable_packrat(
1098 cache_size_limit: Union[int, None] = 128, *, force: bool = False
1099 ) -> None:
1100 """
1101 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1102 Repeated parse attempts at the same string location (which happens
1103 often in many complex grammars) can immediately return a cached value,
1104 instead of re-executing parsing/validating code. Memoizing is done of
1105 both valid results and parsing exceptions.
1107 Parameters:
1109 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided
1110 will limit the size of the packrat cache; if None is passed, then
1111 the cache size will be unbounded; if 0 is passed, the cache will
1112 be effectively disabled.
1114 This speedup may break existing programs that use parse actions that
1115 have side-effects. For this reason, packrat parsing is disabled when
1116 you first import pyparsing. To activate the packrat feature, your
1117 program must call the class method :class:`ParserElement.enable_packrat`.
1118 For best results, call ``enable_packrat()`` immediately after
1119 importing pyparsing.
1121 Example::
1123 import pyparsing
1124 pyparsing.ParserElement.enable_packrat()
1126 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1127 thus the two cannot be used together. Use ``force=True`` to disable any
1128 previous, conflicting settings.
1129 """
1130 if force:
1131 ParserElement.disable_memoization()
1132 elif ParserElement._left_recursion_enabled:
1133 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1135 if ParserElement._packratEnabled:
1136 return
1138 ParserElement._packratEnabled = True
1139 if cache_size_limit is None:
1140 ParserElement.packrat_cache = _UnboundedCache()
1141 else:
1142 ParserElement.packrat_cache = _FifoCache(cache_size_limit)
1143 ParserElement._parse = ParserElement._parseCache
1145 def parse_string(
1146 self, instring: str, parse_all: bool = False, *, parseAll: bool = False
1147 ) -> ParseResults:
1148 """
1149 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1150 client code.
1152 :param instring: The input string to be parsed.
1153 :param parse_all: If set, the entire input string must match the grammar.
1154 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1155 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1156 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1157 an object with attributes if the given parser includes results names.
1159 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1160 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().
1162 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1163 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1164 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1165 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1167 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1168 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1169 parse action's ``s`` argument, or
1170 - explicitly expand the tabs in your input string before calling ``parse_string``.
1172 Examples:
1174 By default, partial matches are OK.
1176 >>> res = Word('a').parse_string('aaaaabaaa')
1177 >>> print(res)
1178 ['aaaaa']
1180 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1181 directly to see more examples.
1183 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1185 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1186 Traceback (most recent call last):
1187 ...
1188 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6)
1189 """
1190 parseAll = parse_all or parseAll
1192 ParserElement.reset_cache()
1193 if not self.streamlined:
1194 self.streamline()
1195 for e in self.ignoreExprs:
1196 e.streamline()
1197 if not self.keepTabs:
1198 instring = instring.expandtabs()
1199 try:
1200 loc, tokens = self._parse(instring, 0)
1201 if parseAll:
1202 loc = self.preParse(instring, loc)
1203 se = Empty() + StringEnd().set_debug(False)
1204 se._parse(instring, loc)
1205 except _ParseActionIndexError as pa_exc:
1206 raise pa_exc.exc
1207 except ParseBaseException as exc:
1208 if ParserElement.verbose_stacktrace:
1209 raise
1211 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1212 raise exc.with_traceback(None)
1213 else:
1214 return tokens
1216 def scan_string(
1217 self,
1218 instring: str,
1219 max_matches: int = _MAX_INT,
1220 overlap: bool = False,
1221 always_skip_whitespace=True,
1222 *,
1223 debug: bool = False,
1224 maxMatches: int = _MAX_INT,
1225 ) -> Generator[tuple[ParseResults, int, int], None, None]:
1226 """
1227 Scan the input string for expression matches. Each match will return the
1228 matching tokens, start location, and end location. May be called with optional
1229 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1230 ``overlap`` is specified, then overlapping matches will be reported.
1232 Note that the start and end locations are reported relative to the string
1233 being parsed. See :class:`parse_string` for more information on parsing
1234 strings with embedded tabs.
1236 Example::
1238 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1239 print(source)
1240 for tokens, start, end in Word(alphas).scan_string(source):
1241 print(' '*start + '^'*(end-start))
1242 print(' '*start + tokens[0])
1244 prints::
1246 sldjf123lsdjjkf345sldkjf879lkjsfd987
1247 ^^^^^
1248 sldjf
1249 ^^^^^^^
1250 lsdjjkf
1251 ^^^^^^
1252 sldkjf
1253 ^^^^^^
1254 lkjsfd
1255 """
1256 maxMatches = min(maxMatches, max_matches)
1257 if not self.streamlined:
1258 self.streamline()
1259 for e in self.ignoreExprs:
1260 e.streamline()
1262 if not self.keepTabs:
1263 instring = str(instring).expandtabs()
1264 instrlen = len(instring)
1265 loc = 0
1266 if always_skip_whitespace:
1267 preparser = Empty()
1268 preparser.ignoreExprs = self.ignoreExprs
1269 preparser.whiteChars = self.whiteChars
1270 preparseFn = preparser.preParse
1271 else:
1272 preparseFn = self.preParse
1273 parseFn = self._parse
1274 ParserElement.resetCache()
1275 matches = 0
1276 try:
1277 while loc <= instrlen and matches < maxMatches:
1278 try:
1279 preloc: int = preparseFn(instring, loc)
1280 nextLoc: int
1281 tokens: ParseResults
1282 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1283 except ParseException:
1284 loc = preloc + 1
1285 else:
1286 if nextLoc > loc:
1287 matches += 1
1288 if debug:
1289 print(
1290 {
1291 "tokens": tokens.asList(),
1292 "start": preloc,
1293 "end": nextLoc,
1294 }
1295 )
1296 yield tokens, preloc, nextLoc
1297 if overlap:
1298 nextloc = preparseFn(instring, loc)
1299 if nextloc > loc:
1300 loc = nextLoc
1301 else:
1302 loc += 1
1303 else:
1304 loc = nextLoc
1305 else:
1306 loc = preloc + 1
1307 except ParseBaseException as exc:
1308 if ParserElement.verbose_stacktrace:
1309 raise
1311 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1312 raise exc.with_traceback(None)
1314 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1315 """
1316 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1317 be returned from a parse action. To use ``transform_string``, define a grammar and
1318 attach a parse action to it that modifies the returned token list.
1319 Invoking ``transform_string()`` on a target string will then scan for matches,
1320 and replace the matched text patterns according to the logic in the parse
1321 action. ``transform_string()`` returns the resulting transformed string.
1323 Example::
1325 wd = Word(alphas)
1326 wd.set_parse_action(lambda toks: toks[0].title())
1328 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))
1330 prints::
1332 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1333 """
1334 out: list[str] = []
1335 lastE = 0
1336 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1337 # keep string locs straight between transform_string and scan_string
1338 self.keepTabs = True
1339 try:
1340 for t, s, e in self.scan_string(instring, debug=debug):
1341 if s > lastE:
1342 out.append(instring[lastE:s])
1343 lastE = e
1345 if not t:
1346 continue
1348 if isinstance(t, ParseResults):
1349 out += t.as_list()
1350 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1351 out.extend(t)
1352 else:
1353 out.append(t)
1355 out.append(instring[lastE:])
1356 out = [o for o in out if o]
1357 return "".join([str(s) for s in _flatten(out)])
1358 except ParseBaseException as exc:
1359 if ParserElement.verbose_stacktrace:
1360 raise
1362 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1363 raise exc.with_traceback(None)
1365 def search_string(
1366 self,
1367 instring: str,
1368 max_matches: int = _MAX_INT,
1369 *,
1370 debug: bool = False,
1371 maxMatches: int = _MAX_INT,
1372 ) -> ParseResults:
1373 """
1374 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1375 to match the given parse expression. May be called with optional
1376 ``max_matches`` argument, to clip searching after 'n' matches are found.
1378 Example::
1380 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1381 cap_word = Word(alphas.upper(), alphas.lower())
1383 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))
1385 # the sum() builtin can be used to merge results into a single ParseResults object
1386 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))
1388 prints::
1390 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1391 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1392 """
1393 maxMatches = min(maxMatches, max_matches)
1394 try:
1395 return ParseResults(
1396 [
1397 t
1398 for t, s, e in self.scan_string(
1399 instring, maxMatches, always_skip_whitespace=False, debug=debug
1400 )
1401 ]
1402 )
1403 except ParseBaseException as exc:
1404 if ParserElement.verbose_stacktrace:
1405 raise
1407 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1408 raise exc.with_traceback(None)
1410 def split(
1411 self,
1412 instring: str,
1413 maxsplit: int = _MAX_INT,
1414 include_separators: bool = False,
1415 *,
1416 includeSeparators=False,
1417 ) -> Generator[str, None, None]:
1418 """
1419 Generator method to split a string using the given expression as a separator.
1420 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1421 and the optional ``include_separators`` argument (default= ``False``), if the separating
1422 matching text should be included in the split results.
1424 Example::
1426 punc = one_of(list(".,;:/-!?"))
1427 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1429 prints::
1431 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1432 """
1433 includeSeparators = includeSeparators or include_separators
1434 last = 0
1435 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1436 yield instring[last:s]
1437 if includeSeparators:
1438 yield t[0]
1439 last = e
1440 yield instring[last:]
1442 def __add__(self, other) -> ParserElement:
1443 """
1444 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1445 converts them to :class:`Literal`\\ s by default.
1447 Example::
1449 greet = Word(alphas) + "," + Word(alphas) + "!"
1450 hello = "Hello, World!"
1451 print(hello, "->", greet.parse_string(hello))
1453 prints::
1455 Hello, World! -> ['Hello', ',', 'World', '!']
1457 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`::
1459 Literal('start') + ... + Literal('end')
1461 is equivalent to::
1463 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1465 Note that the skipped text is returned with '_skipped' as a results name,
1466 and to support having multiple skips in the same parser, the value returned is
1467 a list of all skipped text.
1468 """
1469 if other is Ellipsis:
1470 return _PendingSkip(self)
1472 if isinstance(other, str_type):
1473 other = self._literalStringClass(other)
1474 if not isinstance(other, ParserElement):
1475 return NotImplemented
1476 return And([self, other])
1478 def __radd__(self, other) -> ParserElement:
1479 """
1480 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1481 """
1482 if other is Ellipsis:
1483 return SkipTo(self)("_skipped*") + self
1485 if isinstance(other, str_type):
1486 other = self._literalStringClass(other)
1487 if not isinstance(other, ParserElement):
1488 return NotImplemented
1489 return other + self
1491 def __sub__(self, other) -> ParserElement:
1492 """
1493 Implementation of ``-`` operator, returns :class:`And` with error stop
1494 """
1495 if isinstance(other, str_type):
1496 other = self._literalStringClass(other)
1497 if not isinstance(other, ParserElement):
1498 return NotImplemented
1499 return self + And._ErrorStop() + other
1501 def __rsub__(self, other) -> ParserElement:
1502 """
1503 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1504 """
1505 if isinstance(other, str_type):
1506 other = self._literalStringClass(other)
1507 if not isinstance(other, ParserElement):
1508 return NotImplemented
1509 return other - self
1511 def __mul__(self, other) -> ParserElement:
1512 """
1513 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1514 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1515 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1516 may also include ``None`` as in:
1518 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1519 to ``expr*n + ZeroOrMore(expr)``
1520 (read as "at least n instances of ``expr``")
1521 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1522 (read as "0 to n instances of ``expr``")
1523 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1524 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1526 Note that ``expr*(None, n)`` does not raise an exception if
1527 more than n exprs exist in the input stream; that is,
1528 ``expr*(None, n)`` does not enforce a maximum number of expr
1529 occurrences. If this behavior is desired, then write
1530 ``expr*(None, n) + ~expr``
1531 """
1532 if other is Ellipsis:
1533 other = (0, None)
1534 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1535 other = ((0,) + other[1:] + (None,))[:2]
1537 if not isinstance(other, (int, tuple)):
1538 return NotImplemented
1540 if isinstance(other, int):
1541 minElements, optElements = other, 0
1542 else:
1543 other = tuple(o if o is not Ellipsis else None for o in other)
1544 other = (other + (None, None))[:2]
1545 if other[0] is None:
1546 other = (0, other[1])
1547 if isinstance(other[0], int) and other[1] is None:
1548 if other[0] == 0:
1549 return ZeroOrMore(self)
1550 if other[0] == 1:
1551 return OneOrMore(self)
1552 else:
1553 return self * other[0] + ZeroOrMore(self)
1554 elif isinstance(other[0], int) and isinstance(other[1], int):
1555 minElements, optElements = other
1556 optElements -= minElements
1557 else:
1558 return NotImplemented
1560 if minElements < 0:
1561 raise ValueError("cannot multiply ParserElement by negative value")
1562 if optElements < 0:
1563 raise ValueError(
1564 "second tuple value must be greater or equal to first tuple value"
1565 )
1566 if minElements == optElements == 0:
1567 return And([])
1569 if optElements:
1571 def makeOptionalList(n):
1572 if n > 1:
1573 return Opt(self + makeOptionalList(n - 1))
1574 else:
1575 return Opt(self)
1577 if minElements:
1578 if minElements == 1:
1579 ret = self + makeOptionalList(optElements)
1580 else:
1581 ret = And([self] * minElements) + makeOptionalList(optElements)
1582 else:
1583 ret = makeOptionalList(optElements)
1584 else:
1585 if minElements == 1:
1586 ret = self
1587 else:
1588 ret = And([self] * minElements)
1589 return ret
1591 def __rmul__(self, other) -> ParserElement:
1592 return self.__mul__(other)
1594 def __or__(self, other) -> ParserElement:
1595 """
1596 Implementation of ``|`` operator - returns :class:`MatchFirst`
1597 """
1598 if other is Ellipsis:
1599 return _PendingSkip(self, must_skip=True)
1601 if isinstance(other, str_type):
1602 # `expr | ""` is equivalent to `Opt(expr)`
1603 if other == "":
1604 return Opt(self)
1605 other = self._literalStringClass(other)
1606 if not isinstance(other, ParserElement):
1607 return NotImplemented
1608 return MatchFirst([self, other])
1610 def __ror__(self, other) -> ParserElement:
1611 """
1612 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1613 """
1614 if isinstance(other, str_type):
1615 other = self._literalStringClass(other)
1616 if not isinstance(other, ParserElement):
1617 return NotImplemented
1618 return other | self
1620 def __xor__(self, other) -> ParserElement:
1621 """
1622 Implementation of ``^`` operator - returns :class:`Or`
1623 """
1624 if isinstance(other, str_type):
1625 other = self._literalStringClass(other)
1626 if not isinstance(other, ParserElement):
1627 return NotImplemented
1628 return Or([self, other])
1630 def __rxor__(self, other) -> ParserElement:
1631 """
1632 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1633 """
1634 if isinstance(other, str_type):
1635 other = self._literalStringClass(other)
1636 if not isinstance(other, ParserElement):
1637 return NotImplemented
1638 return other ^ self
1640 def __and__(self, other) -> ParserElement:
1641 """
1642 Implementation of ``&`` operator - returns :class:`Each`
1643 """
1644 if isinstance(other, str_type):
1645 other = self._literalStringClass(other)
1646 if not isinstance(other, ParserElement):
1647 return NotImplemented
1648 return Each([self, other])
1650 def __rand__(self, other) -> ParserElement:
1651 """
1652 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1653 """
1654 if isinstance(other, str_type):
1655 other = self._literalStringClass(other)
1656 if not isinstance(other, ParserElement):
1657 return NotImplemented
1658 return other & self
1660 def __invert__(self) -> ParserElement:
1661 """
1662 Implementation of ``~`` operator - returns :class:`NotAny`
1663 """
1664 return NotAny(self)
1666 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1667 # iterate over a sequence
1668 __iter__ = None
1670 def __getitem__(self, key):
1671 """
1672 use ``[]`` indexing notation as a short form for expression repetition:
1674 - ``expr[n]`` is equivalent to ``expr*n``
1675 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1676 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1677 to ``expr*n + ZeroOrMore(expr)``
1678 (read as "at least n instances of ``expr``")
1679 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1680 (read as "0 to n instances of ``expr``")
1681 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1682 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1684 ``None`` may be used in place of ``...``.
1686 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception
1687 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is
1688 desired, then write ``expr[..., n] + ~expr``.
1690 For repetition with a stop_on expression, use slice notation:
1692 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``
1693 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``
1695 """
1697 stop_on_defined = False
1698 stop_on = NoMatch()
1699 if isinstance(key, slice):
1700 key, stop_on = key.start, key.stop
1701 if key is None:
1702 key = ...
1703 stop_on_defined = True
1704 elif isinstance(key, tuple) and isinstance(key[-1], slice):
1705 key, stop_on = (key[0], key[1].start), key[1].stop
1706 stop_on_defined = True
1708 # convert single arg keys to tuples
1709 if isinstance(key, str_type):
1710 key = (key,)
1711 try:
1712 iter(key)
1713 except TypeError:
1714 key = (key, key)
1716 if len(key) > 2:
1717 raise TypeError(
1718 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"
1719 )
1721 # clip to 2 elements
1722 ret = self * tuple(key[:2])
1723 ret = typing.cast(_MultipleMatch, ret)
1725 if stop_on_defined:
1726 ret.stopOn(stop_on)
1728 return ret
1730 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:
1731 """
1732 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1734 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1735 passed as ``True``.
1737 If ``name`` is omitted, same as calling :class:`copy`.
1739 Example::
1741 # these are equivalent
1742 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")
1743 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1744 """
1745 if name is not None:
1746 return self._setResultsName(name)
1748 return self.copy()
1750 def suppress(self) -> ParserElement:
1751 """
1752 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1753 cluttering up returned output.
1754 """
1755 return Suppress(self)
1757 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
1758 """
1759 Enables the skipping of whitespace before matching the characters in the
1760 :class:`ParserElement`'s defined pattern.
1762 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1763 """
1764 self.skipWhitespace = True
1765 return self
1767 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
1768 """
1769 Disables the skipping of whitespace before matching the characters in the
1770 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1771 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1773 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1774 """
1775 self.skipWhitespace = False
1776 return self
1778 def set_whitespace_chars(
1779 self, chars: Union[set[str], str], copy_defaults: bool = False
1780 ) -> ParserElement:
1781 """
1782 Overrides the default whitespace chars
1783 """
1784 self.skipWhitespace = True
1785 self.whiteChars = set(chars)
1786 self.copyDefaultWhiteChars = copy_defaults
1787 return self
1789 def parse_with_tabs(self) -> ParserElement:
1790 """
1791 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1792 Must be called before ``parse_string`` when the input grammar contains elements that
1793 match ``<TAB>`` characters.
1794 """
1795 self.keepTabs = True
1796 return self
1798 def ignore(self, other: ParserElement) -> ParserElement:
1799 """
1800 Define expression to be ignored (e.g., comments) while doing pattern
1801 matching; may be called repeatedly, to define multiple comment or other
1802 ignorable patterns.
1804 Example::
1806 patt = Word(alphas)[...]
1807 patt.parse_string('ablaj /* comment */ lskjd')
1808 # -> ['ablaj']
1810 patt.ignore(c_style_comment)
1811 patt.parse_string('ablaj /* comment */ lskjd')
1812 # -> ['ablaj', 'lskjd']
1813 """
1814 if isinstance(other, str_type):
1815 other = Suppress(other)
1817 if isinstance(other, Suppress):
1818 if other not in self.ignoreExprs:
1819 self.ignoreExprs.append(other)
1820 else:
1821 self.ignoreExprs.append(Suppress(other.copy()))
1822 return self
1824 def set_debug_actions(
1825 self,
1826 start_action: DebugStartAction,
1827 success_action: DebugSuccessAction,
1828 exception_action: DebugExceptionAction,
1829 ) -> ParserElement:
1830 """
1831 Customize display of debugging messages while doing pattern matching:
1833 - ``start_action`` - method to be called when an expression is about to be parsed;
1834 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``
1836 - ``success_action`` - method to be called when an expression has successfully parsed;
1837 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``
1839 - ``exception_action`` - method to be called when expression fails to parse;
1840 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
1841 """
1842 self.debugActions = self.DebugActions(
1843 start_action or _default_start_debug_action, # type: ignore[truthy-function]
1844 success_action or _default_success_debug_action, # type: ignore[truthy-function]
1845 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]
1846 )
1847 self.debug = True
1848 return self
1850 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:
1851 """
1852 Enable display of debugging messages while doing pattern matching.
1853 Set ``flag`` to ``True`` to enable, ``False`` to disable.
1854 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.
1856 Example::
1858 wd = Word(alphas).set_name("alphaword")
1859 integer = Word(nums).set_name("numword")
1860 term = wd | integer
1862 # turn on debugging for wd
1863 wd.set_debug()
1865 term[1, ...].parse_string("abc 123 xyz 890")
1867 prints::
1869 Match alphaword at loc 0(1,1)
1870 Matched alphaword -> ['abc']
1871 Match alphaword at loc 3(1,4)
1872 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
1873 Match alphaword at loc 7(1,8)
1874 Matched alphaword -> ['xyz']
1875 Match alphaword at loc 11(1,12)
1876 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
1877 Match alphaword at loc 15(1,16)
1878 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
1880 The output shown is that produced by the default debug actions - custom debug actions can be
1881 specified using :class:`set_debug_actions`. Prior to attempting
1882 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
1883 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
1884 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,
1885 which makes debugging and exception messages easier to understand - for instance, the default
1886 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.
1887 """
1888 if recurse:
1889 for expr in self.visit_all():
1890 expr.set_debug(flag, recurse=False)
1891 return self
1893 if flag:
1894 self.set_debug_actions(
1895 _default_start_debug_action,
1896 _default_success_debug_action,
1897 _default_exception_debug_action,
1898 )
1899 else:
1900 self.debug = False
1901 return self
1903 @property
1904 def default_name(self) -> str:
1905 if self._defaultName is None:
1906 self._defaultName = self._generateDefaultName()
1907 return self._defaultName
1909 @abstractmethod
1910 def _generateDefaultName(self) -> str:
1911 """
1912 Child classes must define this method, which defines how the ``default_name`` is set.
1913 """
1915 def set_name(self, name: typing.Optional[str]) -> ParserElement:
1916 """
1917 Define name for this expression, makes debugging and exception messages clearer. If
1918 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also
1919 enable debug for this expression.
1921 If `name` is None, clears any custom name for this expression, and clears the
1922 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.
1924 Example::
1926 integer = Word(nums)
1927 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)
1929 integer.set_name("integer")
1930 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1931 """
1932 self.customName = name # type: ignore[assignment]
1933 self.errmsg = f"Expected {str(self)}"
1935 if __diag__.enable_debug_on_named_expressions:
1936 self.set_debug(name is not None)
1938 return self
1940 @property
1941 def name(self) -> str:
1942 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name
1943 return self.customName if self.customName is not None else self.default_name
1945 @name.setter
1946 def name(self, new_name) -> None:
1947 self.set_name(new_name)
1949 def __str__(self) -> str:
1950 return self.name
1952 def __repr__(self) -> str:
1953 return str(self)
1955 def streamline(self) -> ParserElement:
1956 self.streamlined = True
1957 self._defaultName = None
1958 return self
1960 def recurse(self) -> list[ParserElement]:
1961 return []
1963 def _checkRecursion(self, parseElementList):
1964 subRecCheckList = parseElementList[:] + [self]
1965 for e in self.recurse():
1966 e._checkRecursion(subRecCheckList)
1968 def validate(self, validateTrace=None) -> None:
1969 """
1970 Check defined expressions for valid structure, check for infinite recursive definitions.
1971 """
1972 warnings.warn(
1973 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
1974 DeprecationWarning,
1975 stacklevel=2,
1976 )
1977 self._checkRecursion([])
1979 def parse_file(
1980 self,
1981 file_or_filename: Union[str, Path, TextIO],
1982 encoding: str = "utf-8",
1983 parse_all: bool = False,
1984 *,
1985 parseAll: bool = False,
1986 ) -> ParseResults:
1987 """
1988 Execute the parse expression on the given file or filename.
1989 If a filename is specified (instead of a file object),
1990 the entire file is opened, read, and closed before parsing.
1991 """
1992 parseAll = parseAll or parse_all
1993 try:
1994 file_or_filename = typing.cast(TextIO, file_or_filename)
1995 file_contents = file_or_filename.read()
1996 except AttributeError:
1997 file_or_filename = typing.cast(str, file_or_filename)
1998 with open(file_or_filename, "r", encoding=encoding) as f:
1999 file_contents = f.read()
2000 try:
2001 return self.parse_string(file_contents, parseAll)
2002 except ParseBaseException as exc:
2003 if ParserElement.verbose_stacktrace:
2004 raise
2006 # catch and re-raise exception from here, clears out pyparsing internal stack trace
2007 raise exc.with_traceback(None)
2009 def __eq__(self, other):
2010 if self is other:
2011 return True
2012 elif isinstance(other, str_type):
2013 return self.matches(other, parse_all=True)
2014 elif isinstance(other, ParserElement):
2015 return vars(self) == vars(other)
2016 return False
2018 def __hash__(self):
2019 return id(self)
2021 def matches(
2022 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True
2023 ) -> bool:
2024 """
2025 Method for quick testing of a parser against a test string. Good for simple
2026 inline microtests of sub expressions while building up larger parser.
2028 Parameters:
2030 - ``test_string`` - to test against this expression for a match
2031 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2033 Example::
2035 expr = Word(nums)
2036 assert expr.matches("100")
2037 """
2038 parseAll = parseAll and parse_all
2039 try:
2040 self.parse_string(str(test_string), parse_all=parseAll)
2041 return True
2042 except ParseBaseException:
2043 return False
2045 def run_tests(
2046 self,
2047 tests: Union[str, list[str]],
2048 parse_all: bool = True,
2049 comment: typing.Optional[Union[ParserElement, str]] = "#",
2050 full_dump: bool = True,
2051 print_results: bool = True,
2052 failure_tests: bool = False,
2053 post_parse: typing.Optional[
2054 Callable[[str, ParseResults], typing.Optional[str]]
2055 ] = None,
2056 file: typing.Optional[TextIO] = None,
2057 with_line_numbers: bool = False,
2058 *,
2059 parseAll: bool = True,
2060 fullDump: bool = True,
2061 printResults: bool = True,
2062 failureTests: bool = False,
2063 postParse: typing.Optional[
2064 Callable[[str, ParseResults], typing.Optional[str]]
2065 ] = None,
2066 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:
2067 """
2068 Execute the parse expression on a series of test strings, showing each
2069 test, the parsed results or where the parse failed. Quick and easy way to
2070 run a parse expression against a list of sample strings.
2072 Parameters:
2074 - ``tests`` - a list of separate test strings, or a multiline string of test strings
2075 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2076 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
2077 string; pass None to disable comment filtering
2078 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
2079 if False, only dump nested list
2080 - ``print_results`` - (default= ``True``) prints test output to stdout
2081 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
2082 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
2083 `fn(test_string, parse_results)` and returns a string to be added to the test output
2084 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
2085 if None, will default to ``sys.stdout``
2086 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
2088 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2089 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
2090 test's output
2092 Example::
2094 number_expr = pyparsing_common.number.copy()
2096 result = number_expr.run_tests('''
2097 # unsigned integer
2098 100
2099 # negative integer
2100 -100
2101 # float with scientific notation
2102 6.02e23
2103 # integer with scientific notation
2104 1e-12
2105 ''')
2106 print("Success" if result[0] else "Failed!")
2108 result = number_expr.run_tests('''
2109 # stray character
2110 100Z
2111 # missing leading digit before '.'
2112 -.100
2113 # too many '.'
2114 3.14.159
2115 ''', failure_tests=True)
2116 print("Success" if result[0] else "Failed!")
2118 prints::
2120 # unsigned integer
2121 100
2122 [100]
2124 # negative integer
2125 -100
2126 [-100]
2128 # float with scientific notation
2129 6.02e23
2130 [6.02e+23]
2132 # integer with scientific notation
2133 1e-12
2134 [1e-12]
2136 Success
2138 # stray character
2139 100Z
2140 ^
2141 FAIL: Expected end of text (at char 3), (line:1, col:4)
2143 # missing leading digit before '.'
2144 -.100
2145 ^
2146 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2148 # too many '.'
2149 3.14.159
2150 ^
2151 FAIL: Expected end of text (at char 4), (line:1, col:5)
2153 Success
2155 Each test string must be on a single line. If you want to test a string that spans multiple
2156 lines, create a test like this::
2158 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2160 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2161 """
2162 from .testing import pyparsing_test
2164 parseAll = parseAll and parse_all
2165 fullDump = fullDump and full_dump
2166 printResults = printResults and print_results
2167 failureTests = failureTests or failure_tests
2168 postParse = postParse or post_parse
2169 if isinstance(tests, str_type):
2170 tests = typing.cast(str, tests)
2171 line_strip = type(tests).strip
2172 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2173 comment_specified = comment is not None
2174 if comment_specified:
2175 if isinstance(comment, str_type):
2176 comment = typing.cast(str, comment)
2177 comment = Literal(comment)
2178 comment = typing.cast(ParserElement, comment)
2179 if file is None:
2180 file = sys.stdout
2181 print_ = file.write
2183 result: Union[ParseResults, Exception]
2184 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []
2185 comments: list[str] = []
2186 success = True
2187 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2188 BOM = "\ufeff"
2189 nlstr = "\n"
2190 for t in tests:
2191 if comment_specified and comment.matches(t, False) or comments and not t:
2192 comments.append(
2193 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2194 )
2195 continue
2196 if not t:
2197 continue
2198 out = [
2199 f"{nlstr}{nlstr.join(comments) if comments else ''}",
2200 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2201 ]
2202 comments.clear()
2203 try:
2204 # convert newline marks to actual newlines, and strip leading BOM if present
2205 t = NL.transform_string(t.lstrip(BOM))
2206 result = self.parse_string(t, parse_all=parseAll)
2207 except ParseBaseException as pe:
2208 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""
2209 out.append(pe.explain())
2210 out.append(f"FAIL: {fatal}{pe}")
2211 if ParserElement.verbose_stacktrace:
2212 out.extend(traceback.format_tb(pe.__traceback__))
2213 success = success and failureTests
2214 result = pe
2215 except Exception as exc:
2216 tag = "FAIL-EXCEPTION"
2218 # see if this exception was raised in a parse action
2219 tb = exc.__traceback__
2220 it = iter(traceback.walk_tb(tb))
2221 for f, line in it:
2222 if (f.f_code.co_filename, line) == pa_call_line_synth:
2223 next_f = next(it)[0]
2224 tag += f" (raised in parse action {next_f.f_code.co_name!r})"
2225 break
2227 out.append(f"{tag}: {type(exc).__name__}: {exc}")
2228 if ParserElement.verbose_stacktrace:
2229 out.extend(traceback.format_tb(exc.__traceback__))
2230 success = success and failureTests
2231 result = exc
2232 else:
2233 success = success and not failureTests
2234 if postParse is not None:
2235 try:
2236 pp_value = postParse(t, result)
2237 if pp_value is not None:
2238 if isinstance(pp_value, ParseResults):
2239 out.append(pp_value.dump())
2240 else:
2241 out.append(str(pp_value))
2242 else:
2243 out.append(result.dump())
2244 except Exception as e:
2245 out.append(result.dump(full=fullDump))
2246 out.append(
2247 f"{postParse.__name__} failed: {type(e).__name__}: {e}"
2248 )
2249 else:
2250 out.append(result.dump(full=fullDump))
2251 out.append("")
2253 if printResults:
2254 print_("\n".join(out))
2256 allResults.append((t, result))
2258 return success, allResults
2260 def create_diagram(
2261 self,
2262 output_html: Union[TextIO, Path, str],
2263 vertical: int = 3,
2264 show_results_names: bool = False,
2265 show_groups: bool = False,
2266 embed: bool = False,
2267 **kwargs,
2268 ) -> None:
2269 """
2270 Create a railroad diagram for the parser.
2272 Parameters:
2274 - ``output_html`` (str or file-like object) - output target for generated
2275 diagram HTML
2276 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically
2277 instead of horizontally (default=3)
2278 - ``show_results_names`` - bool flag whether diagram should show annotations for
2279 defined results names
2280 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box
2281 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed
2282 the resulting HTML in an enclosing HTML source
2283 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;
2284 can be used to insert custom CSS styling
2285 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the
2286 generated code
2288 Additional diagram-formatting keyword arguments can also be included;
2289 see railroad.Diagram class.
2290 """
2292 try:
2293 from .diagram import to_railroad, railroad_to_html
2294 except ImportError as ie:
2295 raise Exception(
2296 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2297 ) from ie
2299 self.streamline()
2301 railroad = to_railroad(
2302 self,
2303 vertical=vertical,
2304 show_results_names=show_results_names,
2305 show_groups=show_groups,
2306 diagram_kwargs=kwargs,
2307 )
2308 if not isinstance(output_html, (str, Path)):
2309 # we were passed a file-like object, just write to it
2310 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))
2311 return
2313 with open(output_html, "w", encoding="utf-8") as diag_file:
2314 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))
2316 # Compatibility synonyms
2317 # fmt: off
2318 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))
2319 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(
2320 "setDefaultWhitespaceChars", set_default_whitespace_chars
2321 ))
2322 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))
2323 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))
2324 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))
2325 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))
2327 setResultsName = replaced_by_pep8("setResultsName", set_results_name)
2328 setBreak = replaced_by_pep8("setBreak", set_break)
2329 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)
2330 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)
2331 addCondition = replaced_by_pep8("addCondition", add_condition)
2332 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)
2333 tryParse = replaced_by_pep8("tryParse", try_parse)
2334 parseString = replaced_by_pep8("parseString", parse_string)
2335 scanString = replaced_by_pep8("scanString", scan_string)
2336 transformString = replaced_by_pep8("transformString", transform_string)
2337 searchString = replaced_by_pep8("searchString", search_string)
2338 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
2339 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
2340 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)
2341 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)
2342 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)
2343 setDebug = replaced_by_pep8("setDebug", set_debug)
2344 setName = replaced_by_pep8("setName", set_name)
2345 parseFile = replaced_by_pep8("parseFile", parse_file)
2346 runTests = replaced_by_pep8("runTests", run_tests)
2347 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)
2348 defaultName = default_name
2349 # fmt: on
2352class _PendingSkip(ParserElement):
2353 # internal placeholder class to hold a place were '...' is added to a parser element,
2354 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2355 def __init__(self, expr: ParserElement, must_skip: bool = False):
2356 super().__init__()
2357 self.anchor = expr
2358 self.must_skip = must_skip
2360 def _generateDefaultName(self) -> str:
2361 return str(self.anchor + Empty()).replace("Empty", "...")
2363 def __add__(self, other) -> ParserElement:
2364 skipper = SkipTo(other).set_name("...")("_skipped*")
2365 if self.must_skip:
2367 def must_skip(t):
2368 if not t._skipped or t._skipped.as_list() == [""]:
2369 del t[0]
2370 t.pop("_skipped", None)
2372 def show_skip(t):
2373 if t._skipped.as_list()[-1:] == [""]:
2374 t.pop("_skipped")
2375 t["_skipped"] = f"missing <{self.anchor!r}>"
2377 return (
2378 self.anchor + skipper().add_parse_action(must_skip)
2379 | skipper().add_parse_action(show_skip)
2380 ) + other
2382 return self.anchor + skipper + other
2384 def __repr__(self):
2385 return self.defaultName
2387 def parseImpl(self, *args) -> ParseImplReturnType:
2388 raise Exception(
2389 "use of `...` expression without following SkipTo target expression"
2390 )
2393class Token(ParserElement):
2394 """Abstract :class:`ParserElement` subclass, for defining atomic
2395 matching patterns.
2396 """
2398 def __init__(self):
2399 super().__init__(savelist=False)
2401 def _generateDefaultName(self) -> str:
2402 return type(self).__name__
2405class NoMatch(Token):
2406 """
2407 A token that will never match.
2408 """
2410 def __init__(self):
2411 super().__init__()
2412 self.mayReturnEmpty = True
2413 self.mayIndexError = False
2414 self.errmsg = "Unmatchable token"
2416 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2417 raise ParseException(instring, loc, self.errmsg, self)
2420class Literal(Token):
2421 """
2422 Token to exactly match a specified string.
2424 Example::
2426 Literal('abc').parse_string('abc') # -> ['abc']
2427 Literal('abc').parse_string('abcdef') # -> ['abc']
2428 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"
2430 For case-insensitive matching, use :class:`CaselessLiteral`.
2432 For keyword matching (force word break before and after the matched string),
2433 use :class:`Keyword` or :class:`CaselessKeyword`.
2434 """
2436 def __new__(cls, match_string: str = "", *, matchString: str = ""):
2437 # Performance tuning: select a subclass with optimized parseImpl
2438 if cls is Literal:
2439 match_string = matchString or match_string
2440 if not match_string:
2441 return super().__new__(Empty)
2442 if len(match_string) == 1:
2443 return super().__new__(_SingleCharLiteral)
2445 # Default behavior
2446 return super().__new__(cls)
2448 # Needed to make copy.copy() work correctly if we customize __new__
2449 def __getnewargs__(self):
2450 return (self.match,)
2452 def __init__(self, match_string: str = "", *, matchString: str = ""):
2453 super().__init__()
2454 match_string = matchString or match_string
2455 self.match = match_string
2456 self.matchLen = len(match_string)
2457 self.firstMatchChar = match_string[:1]
2458 self.errmsg = f"Expected {self.name}"
2459 self.mayReturnEmpty = False
2460 self.mayIndexError = False
2462 def _generateDefaultName(self) -> str:
2463 return repr(self.match)
2465 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2466 if instring[loc] == self.firstMatchChar and instring.startswith(
2467 self.match, loc
2468 ):
2469 return loc + self.matchLen, self.match
2470 raise ParseException(instring, loc, self.errmsg, self)
2473class Empty(Literal):
2474 """
2475 An empty token, will always match.
2476 """
2478 def __init__(self, match_string="", *, matchString=""):
2479 super().__init__("")
2480 self.mayReturnEmpty = True
2481 self.mayIndexError = False
2483 def _generateDefaultName(self) -> str:
2484 return "Empty"
2486 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2487 return loc, []
2490class _SingleCharLiteral(Literal):
2491 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2492 if instring[loc] == self.firstMatchChar:
2493 return loc + 1, self.match
2494 raise ParseException(instring, loc, self.errmsg, self)
2497ParserElement._literalStringClass = Literal
2500class Keyword(Token):
2501 """
2502 Token to exactly match a specified string as a keyword, that is,
2503 it must be immediately preceded and followed by whitespace or
2504 non-keyword characters. Compare with :class:`Literal`:
2506 - ``Literal("if")`` will match the leading ``'if'`` in
2507 ``'ifAndOnlyIf'``.
2508 - ``Keyword("if")`` will not; it will only match the leading
2509 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2511 Accepts two optional constructor arguments in addition to the
2512 keyword string:
2514 - ``ident_chars`` is a string of characters that would be valid
2515 identifier characters, defaulting to all alphanumerics + "_" and
2516 "$"
2517 - ``caseless`` allows case-insensitive matching, default is ``False``.
2519 Example::
2521 Keyword("start").parse_string("start") # -> ['start']
2522 Keyword("start").parse_string("starting") # -> Exception
2524 For case-insensitive matching, use :class:`CaselessKeyword`.
2525 """
2527 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2529 def __init__(
2530 self,
2531 match_string: str = "",
2532 ident_chars: typing.Optional[str] = None,
2533 caseless: bool = False,
2534 *,
2535 matchString: str = "",
2536 identChars: typing.Optional[str] = None,
2537 ):
2538 super().__init__()
2539 identChars = identChars or ident_chars
2540 if identChars is None:
2541 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2542 match_string = matchString or match_string
2543 self.match = match_string
2544 self.matchLen = len(match_string)
2545 self.firstMatchChar = match_string[:1]
2546 if not self.firstMatchChar:
2547 raise ValueError("null string passed to Keyword; use Empty() instead")
2548 self.errmsg = f"Expected {type(self).__name__} {self.name}"
2549 self.mayReturnEmpty = False
2550 self.mayIndexError = False
2551 self.caseless = caseless
2552 if caseless:
2553 self.caselessmatch = match_string.upper()
2554 identChars = identChars.upper()
2555 self.identChars = set(identChars)
2557 def _generateDefaultName(self) -> str:
2558 return repr(self.match)
2560 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2561 errmsg = self.errmsg or ""
2562 errloc = loc
2563 if self.caseless:
2564 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2565 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2566 if (
2567 loc >= len(instring) - self.matchLen
2568 or instring[loc + self.matchLen].upper() not in self.identChars
2569 ):
2570 return loc + self.matchLen, self.match
2572 # followed by keyword char
2573 errmsg += ", was immediately followed by keyword character"
2574 errloc = loc + self.matchLen
2575 else:
2576 # preceded by keyword char
2577 errmsg += ", keyword was immediately preceded by keyword character"
2578 errloc = loc - 1
2579 # else no match just raise plain exception
2581 elif (
2582 instring[loc] == self.firstMatchChar
2583 and self.matchLen == 1
2584 or instring.startswith(self.match, loc)
2585 ):
2586 if loc == 0 or instring[loc - 1] not in self.identChars:
2587 if (
2588 loc >= len(instring) - self.matchLen
2589 or instring[loc + self.matchLen] not in self.identChars
2590 ):
2591 return loc + self.matchLen, self.match
2593 # followed by keyword char
2594 errmsg += ", keyword was immediately followed by keyword character"
2595 errloc = loc + self.matchLen
2596 else:
2597 # preceded by keyword char
2598 errmsg += ", keyword was immediately preceded by keyword character"
2599 errloc = loc - 1
2600 # else no match just raise plain exception
2602 raise ParseException(instring, errloc, errmsg, self)
2604 @staticmethod
2605 def set_default_keyword_chars(chars) -> None:
2606 """
2607 Overrides the default characters used by :class:`Keyword` expressions.
2608 """
2609 Keyword.DEFAULT_KEYWORD_CHARS = chars
2611 # Compatibility synonyms
2612 setDefaultKeywordChars = staticmethod(
2613 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)
2614 )
2617class CaselessLiteral(Literal):
2618 """
2619 Token to match a specified string, ignoring case of letters.
2620 Note: the matched results will always be in the case of the given
2621 match string, NOT the case of the input text.
2623 Example::
2625 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2626 # -> ['CMD', 'CMD', 'CMD']
2628 (Contrast with example for :class:`CaselessKeyword`.)
2629 """
2631 def __init__(self, match_string: str = "", *, matchString: str = ""):
2632 match_string = matchString or match_string
2633 super().__init__(match_string.upper())
2634 # Preserve the defining literal.
2635 self.returnString = match_string
2636 self.errmsg = f"Expected {self.name}"
2638 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2639 if instring[loc : loc + self.matchLen].upper() == self.match:
2640 return loc + self.matchLen, self.returnString
2641 raise ParseException(instring, loc, self.errmsg, self)
2644class CaselessKeyword(Keyword):
2645 """
2646 Caseless version of :class:`Keyword`.
2648 Example::
2650 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2651 # -> ['CMD', 'CMD']
2653 (Contrast with example for :class:`CaselessLiteral`.)
2654 """
2656 def __init__(
2657 self,
2658 match_string: str = "",
2659 ident_chars: typing.Optional[str] = None,
2660 *,
2661 matchString: str = "",
2662 identChars: typing.Optional[str] = None,
2663 ):
2664 identChars = identChars or ident_chars
2665 match_string = matchString or match_string
2666 super().__init__(match_string, identChars, caseless=True)
2669class CloseMatch(Token):
2670 """A variation on :class:`Literal` which matches "close" matches,
2671 that is, strings with at most 'n' mismatching characters.
2672 :class:`CloseMatch` takes parameters:
2674 - ``match_string`` - string to be matched
2675 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2676 - ``max_mismatches`` - (``default=1``) maximum number of
2677 mismatches allowed to count as a match
2679 The results from a successful parse will contain the matched text
2680 from the input string and the following named results:
2682 - ``mismatches`` - a list of the positions within the
2683 match_string where mismatches were found
2684 - ``original`` - the original match_string used to compare
2685 against the input string
2687 If ``mismatches`` is an empty list, then the match was an exact
2688 match.
2690 Example::
2692 patt = CloseMatch("ATCATCGAATGGA")
2693 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2694 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2696 # exact match
2697 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2699 # close match allowing up to 2 mismatches
2700 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
2701 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2702 """
2704 def __init__(
2705 self,
2706 match_string: str,
2707 max_mismatches: typing.Optional[int] = None,
2708 *,
2709 maxMismatches: int = 1,
2710 caseless=False,
2711 ):
2712 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
2713 super().__init__()
2714 self.match_string = match_string
2715 self.maxMismatches = maxMismatches
2716 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"
2717 self.caseless = caseless
2718 self.mayIndexError = False
2719 self.mayReturnEmpty = False
2721 def _generateDefaultName(self) -> str:
2722 return f"{type(self).__name__}:{self.match_string!r}"
2724 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2725 start = loc
2726 instrlen = len(instring)
2727 maxloc = start + len(self.match_string)
2729 if maxloc <= instrlen:
2730 match_string = self.match_string
2731 match_stringloc = 0
2732 mismatches = []
2733 maxMismatches = self.maxMismatches
2735 for match_stringloc, s_m in enumerate(
2736 zip(instring[loc:maxloc], match_string)
2737 ):
2738 src, mat = s_m
2739 if self.caseless:
2740 src, mat = src.lower(), mat.lower()
2742 if src != mat:
2743 mismatches.append(match_stringloc)
2744 if len(mismatches) > maxMismatches:
2745 break
2746 else:
2747 loc = start + match_stringloc + 1
2748 results = ParseResults([instring[start:loc]])
2749 results["original"] = match_string
2750 results["mismatches"] = mismatches
2751 return loc, results
2753 raise ParseException(instring, loc, self.errmsg, self)
2756class Word(Token):
2757 """Token for matching words composed of allowed character sets.
2759 Parameters:
2761 - ``init_chars`` - string of all characters that should be used to
2762 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
2763 if ``body_chars`` is also specified, then this is the string of
2764 initial characters
2765 - ``body_chars`` - string of characters that
2766 can be used for matching after a matched initial character as
2767 given in ``init_chars``; if omitted, same as the initial characters
2768 (default=``None``)
2769 - ``min`` - minimum number of characters to match (default=1)
2770 - ``max`` - maximum number of characters to match (default=0)
2771 - ``exact`` - exact number of characters to match (default=0)
2772 - ``as_keyword`` - match as a keyword (default=``False``)
2773 - ``exclude_chars`` - characters that might be
2774 found in the input ``body_chars`` string but which should not be
2775 accepted for matching ;useful to define a word of all
2776 printables except for one or two characters, for instance
2777 (default=``None``)
2779 :class:`srange` is useful for defining custom character set strings
2780 for defining :class:`Word` expressions, using range notation from
2781 regular expression character sets.
2783 A common mistake is to use :class:`Word` to match a specific literal
2784 string, as in ``Word("Address")``. Remember that :class:`Word`
2785 uses the string argument to define *sets* of matchable characters.
2786 This expression would match "Add", "AAA", "dAred", or any other word
2787 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
2788 exact literal string, use :class:`Literal` or :class:`Keyword`.
2790 pyparsing includes helper strings for building Words:
2792 - :class:`alphas`
2793 - :class:`nums`
2794 - :class:`alphanums`
2795 - :class:`hexnums`
2796 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
2797 - accented, tilded, umlauted, etc.)
2798 - :class:`punc8bit` (non-alphabetic characters in ASCII range
2799 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2800 - :class:`printables` (any non-whitespace character)
2802 ``alphas``, ``nums``, and ``printables`` are also defined in several
2803 Unicode sets - see :class:`pyparsing_unicode``.
2805 Example::
2807 # a word composed of digits
2808 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2810 # a word with a leading capital, and zero or more lowercase
2811 capitalized_word = Word(alphas.upper(), alphas.lower())
2813 # hostnames are alphanumeric, with leading alpha, and '-'
2814 hostname = Word(alphas, alphanums + '-')
2816 # roman numeral (not a strict parser, accepts invalid mix of characters)
2817 roman = Word("IVXLCDM")
2819 # any string of non-whitespace characters, except for ','
2820 csv_value = Word(printables, exclude_chars=",")
2821 """
2823 def __init__(
2824 self,
2825 init_chars: str = "",
2826 body_chars: typing.Optional[str] = None,
2827 min: int = 1,
2828 max: int = 0,
2829 exact: int = 0,
2830 as_keyword: bool = False,
2831 exclude_chars: typing.Optional[str] = None,
2832 *,
2833 initChars: typing.Optional[str] = None,
2834 bodyChars: typing.Optional[str] = None,
2835 asKeyword: bool = False,
2836 excludeChars: typing.Optional[str] = None,
2837 ):
2838 initChars = initChars or init_chars
2839 bodyChars = bodyChars or body_chars
2840 asKeyword = asKeyword or as_keyword
2841 excludeChars = excludeChars or exclude_chars
2842 super().__init__()
2843 if not initChars:
2844 raise ValueError(
2845 f"invalid {type(self).__name__}, initChars cannot be empty string"
2846 )
2848 initChars_set = set(initChars)
2849 if excludeChars:
2850 excludeChars_set = set(excludeChars)
2851 initChars_set -= excludeChars_set
2852 if bodyChars:
2853 bodyChars = "".join(set(bodyChars) - excludeChars_set)
2854 self.initChars = initChars_set
2855 self.initCharsOrig = "".join(sorted(initChars_set))
2857 if bodyChars:
2858 self.bodyChars = set(bodyChars)
2859 self.bodyCharsOrig = "".join(sorted(bodyChars))
2860 else:
2861 self.bodyChars = initChars_set
2862 self.bodyCharsOrig = self.initCharsOrig
2864 self.maxSpecified = max > 0
2866 if min < 1:
2867 raise ValueError(
2868 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
2869 )
2871 if self.maxSpecified and min > max:
2872 raise ValueError(
2873 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
2874 )
2876 self.minLen = min
2878 if max > 0:
2879 self.maxLen = max
2880 else:
2881 self.maxLen = _MAX_INT
2883 if exact > 0:
2884 min = max = exact
2885 self.maxLen = exact
2886 self.minLen = exact
2888 self.errmsg = f"Expected {self.name}"
2889 self.mayIndexError = False
2890 self.asKeyword = asKeyword
2891 if self.asKeyword:
2892 self.errmsg += " as a keyword"
2894 # see if we can make a regex for this Word
2895 if " " not in (self.initChars | self.bodyChars):
2896 if len(self.initChars) == 1:
2897 re_leading_fragment = re.escape(self.initCharsOrig)
2898 else:
2899 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
2901 if self.bodyChars == self.initChars:
2902 if max == 0 and self.minLen == 1:
2903 repeat = "+"
2904 elif max == 1:
2905 repeat = ""
2906 else:
2907 if self.minLen != self.maxLen:
2908 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
2909 else:
2910 repeat = f"{{{self.minLen}}}"
2911 self.reString = f"{re_leading_fragment}{repeat}"
2912 else:
2913 if max == 1:
2914 re_body_fragment = ""
2915 repeat = ""
2916 else:
2917 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
2918 if max == 0 and self.minLen == 1:
2919 repeat = "*"
2920 elif max == 2:
2921 repeat = "?" if min <= 1 else ""
2922 else:
2923 if min != max:
2924 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"
2925 else:
2926 repeat = f"{{{min - 1 if min > 0 else ''}}}"
2928 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"
2930 if self.asKeyword:
2931 self.reString = rf"\b{self.reString}\b"
2933 try:
2934 self.re = re.compile(self.reString)
2935 except re.error:
2936 self.re = None # type: ignore[assignment]
2937 else:
2938 self.re_match = self.re.match
2939 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]
2941 def _generateDefaultName(self) -> str:
2942 def charsAsStr(s):
2943 max_repr_len = 16
2944 s = _collapse_string_to_ranges(s, re_escape=False)
2946 if len(s) > max_repr_len:
2947 return s[: max_repr_len - 3] + "..."
2949 return s
2951 if self.initChars != self.bodyChars:
2952 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"
2953 else:
2954 base = f"W:({charsAsStr(self.initChars)})"
2956 # add length specification
2957 if self.minLen > 1 or self.maxLen != _MAX_INT:
2958 if self.minLen == self.maxLen:
2959 if self.minLen == 1:
2960 return base[2:]
2961 else:
2962 return base + f"{{{self.minLen}}}"
2963 elif self.maxLen == _MAX_INT:
2964 return base + f"{{{self.minLen},...}}"
2965 else:
2966 return base + f"{{{self.minLen},{self.maxLen}}}"
2967 return base
2969 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2970 if instring[loc] not in self.initChars:
2971 raise ParseException(instring, loc, self.errmsg, self)
2973 start = loc
2974 loc += 1
2975 instrlen = len(instring)
2976 body_chars: set[str] = self.bodyChars
2977 maxloc = start + self.maxLen
2978 maxloc = min(maxloc, instrlen)
2979 while loc < maxloc and instring[loc] in body_chars:
2980 loc += 1
2982 throw_exception = False
2983 if loc - start < self.minLen:
2984 throw_exception = True
2985 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:
2986 throw_exception = True
2987 elif self.asKeyword and (
2988 (start > 0 and instring[start - 1] in body_chars)
2989 or (loc < instrlen and instring[loc] in body_chars)
2990 ):
2991 throw_exception = True
2993 if throw_exception:
2994 raise ParseException(instring, loc, self.errmsg, self)
2996 return loc, instring[start:loc]
2998 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2999 result = self.re_match(instring, loc)
3000 if not result:
3001 raise ParseException(instring, loc, self.errmsg, self)
3003 loc = result.end()
3004 return loc, result.group()
3007class Char(Word):
3008 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
3009 when defining a match of any single character in a string of
3010 characters.
3011 """
3013 def __init__(
3014 self,
3015 charset: str,
3016 as_keyword: bool = False,
3017 exclude_chars: typing.Optional[str] = None,
3018 *,
3019 asKeyword: bool = False,
3020 excludeChars: typing.Optional[str] = None,
3021 ):
3022 asKeyword = asKeyword or as_keyword
3023 excludeChars = excludeChars or exclude_chars
3024 super().__init__(
3025 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars
3026 )
3029class Regex(Token):
3030 r"""Token for matching strings that match a given regular
3031 expression. Defined with string specifying the regular expression in
3032 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3033 If the given regex contains named groups (defined using ``(?P<name>...)``),
3034 these will be preserved as named :class:`ParseResults`.
3036 If instead of the Python stdlib ``re`` module you wish to use a different RE module
3037 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
3038 a compiled RE that was compiled using ``regex``.
3040 Example::
3042 realnum = Regex(r"[+-]?\d+\.\d*")
3043 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3044 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3046 # named fields in a regex will be returned as named results
3047 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3049 # the Regex class will accept re's compiled using the regex module
3050 import regex
3051 parser = pp.Regex(regex.compile(r'[0-9]'))
3052 """
3054 def __init__(
3055 self,
3056 pattern: Any,
3057 flags: Union[re.RegexFlag, int] = 0,
3058 as_group_list: bool = False,
3059 as_match: bool = False,
3060 *,
3061 asGroupList: bool = False,
3062 asMatch: bool = False,
3063 ):
3064 """The parameters ``pattern`` and ``flags`` are passed
3065 to the ``re.compile()`` function as-is. See the Python
3066 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3067 explanation of the acceptable patterns and flags.
3068 """
3069 super().__init__()
3070 asGroupList = asGroupList or as_group_list
3071 asMatch = asMatch or as_match
3073 if isinstance(pattern, str_type):
3074 if not pattern:
3075 raise ValueError("null string passed to Regex; use Empty() instead")
3077 self._re = None
3078 self.reString = self.pattern = pattern
3080 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
3081 self._re = pattern
3082 self.pattern = self.reString = pattern.pattern
3084 elif callable(pattern):
3085 # defer creating this pattern until we really need it
3086 self.pattern = pattern
3087 self._re = None
3089 else:
3090 raise TypeError(
3091 "Regex may only be constructed with a string or a compiled RE object,"
3092 " or a callable that takes no arguments and returns a string or a"
3093 " compiled RE object"
3094 )
3096 self.flags = flags
3097 self.errmsg = f"Expected {self.name}"
3098 self.mayIndexError = False
3099 self.asGroupList = asGroupList
3100 self.asMatch = asMatch
3101 if self.asGroupList:
3102 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]
3103 if self.asMatch:
3104 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]
3106 @cached_property
3107 def re(self) -> re.Pattern:
3108 if self._re:
3109 return self._re
3111 if callable(self.pattern):
3112 # replace self.pattern with the string returned by calling self.pattern()
3113 self.pattern = cast(Callable[[], str], self.pattern)()
3115 # see if we got a compiled RE back instead of a str - if so, we're done
3116 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):
3117 self._re = cast(re.Pattern[str], self.pattern)
3118 self.pattern = self.reString = self._re.pattern
3119 return self._re
3121 try:
3122 self._re = re.compile(self.pattern, self.flags)
3123 return self._re
3124 except re.error:
3125 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3127 @cached_property
3128 def re_match(self) -> Callable[[str, int], Any]:
3129 return self.re.match
3131 @cached_property
3132 def mayReturnEmpty(self) -> bool: # type: ignore[override]
3133 return self.re_match("", 0) is not None
3135 def _generateDefaultName(self) -> str:
3136 unescaped = repr(self.pattern).replace("\\\\", "\\")
3137 return f"Re:({unescaped})"
3139 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3140 result = self.re_match(instring, loc)
3141 if not result:
3142 raise ParseException(instring, loc, self.errmsg, self)
3144 loc = result.end()
3145 ret = ParseResults(result.group())
3146 d = result.groupdict()
3148 for k, v in d.items():
3149 ret[k] = v
3151 return loc, ret
3153 def parseImplAsGroupList(self, instring, loc, do_actions=True):
3154 result = self.re_match(instring, loc)
3155 if not result:
3156 raise ParseException(instring, loc, self.errmsg, self)
3158 loc = result.end()
3159 ret = result.groups()
3160 return loc, ret
3162 def parseImplAsMatch(self, instring, loc, do_actions=True):
3163 result = self.re_match(instring, loc)
3164 if not result:
3165 raise ParseException(instring, loc, self.errmsg, self)
3167 loc = result.end()
3168 ret = result
3169 return loc, ret
3171 def sub(self, repl: str) -> ParserElement:
3172 r"""
3173 Return :class:`Regex` with an attached parse action to transform the parsed
3174 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3176 Example::
3178 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3179 print(make_html.transform_string("h1:main title:"))
3180 # prints "<h1>main title</h1>"
3181 """
3182 if self.asGroupList:
3183 raise TypeError("cannot use sub() with Regex(as_group_list=True)")
3185 if self.asMatch and callable(repl):
3186 raise TypeError(
3187 "cannot use sub() with a callable with Regex(as_match=True)"
3188 )
3190 if self.asMatch:
3192 def pa(tokens):
3193 return tokens[0].expand(repl)
3195 else:
3197 def pa(tokens):
3198 return self.re.sub(repl, tokens[0])
3200 return self.add_parse_action(pa)
3203class QuotedString(Token):
3204 r"""
3205 Token for matching strings that are delimited by quoting characters.
3207 Defined with the following parameters:
3209 - ``quote_char`` - string of one or more characters defining the
3210 quote delimiting string
3211 - ``esc_char`` - character to re_escape quotes, typically backslash
3212 (default= ``None``)
3213 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3214 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3215 (default= ``None``)
3216 - ``multiline`` - boolean indicating whether quotes can span
3217 multiple lines (default= ``False``)
3218 - ``unquote_results`` - boolean indicating whether the matched text
3219 should be unquoted (default= ``True``)
3220 - ``end_quote_char`` - string of one or more characters defining the
3221 end of the quote delimited string (default= ``None`` => same as
3222 quote_char)
3223 - ``convert_whitespace_escapes`` - convert escaped whitespace
3224 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3225 (default= ``True``)
3227 Example::
3229 qs = QuotedString('"')
3230 print(qs.search_string('lsjdf "This is the quote" sldjf'))
3231 complex_qs = QuotedString('{{', end_quote_char='}}')
3232 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))
3233 sql_qs = QuotedString('"', esc_quote='""')
3234 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3236 prints::
3238 [['This is the quote']]
3239 [['This is the "quote"']]
3240 [['This is the quote with "embedded" quotes']]
3241 """
3243 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))
3245 def __init__(
3246 self,
3247 quote_char: str = "",
3248 esc_char: typing.Optional[str] = None,
3249 esc_quote: typing.Optional[str] = None,
3250 multiline: bool = False,
3251 unquote_results: bool = True,
3252 end_quote_char: typing.Optional[str] = None,
3253 convert_whitespace_escapes: bool = True,
3254 *,
3255 quoteChar: str = "",
3256 escChar: typing.Optional[str] = None,
3257 escQuote: typing.Optional[str] = None,
3258 unquoteResults: bool = True,
3259 endQuoteChar: typing.Optional[str] = None,
3260 convertWhitespaceEscapes: bool = True,
3261 ):
3262 super().__init__()
3263 esc_char = escChar or esc_char
3264 esc_quote = escQuote or esc_quote
3265 unquote_results = unquoteResults and unquote_results
3266 end_quote_char = endQuoteChar or end_quote_char
3267 convert_whitespace_escapes = (
3268 convertWhitespaceEscapes and convert_whitespace_escapes
3269 )
3270 quote_char = quoteChar or quote_char
3272 # remove white space from quote chars
3273 quote_char = quote_char.strip()
3274 if not quote_char:
3275 raise ValueError("quote_char cannot be the empty string")
3277 if end_quote_char is None:
3278 end_quote_char = quote_char
3279 else:
3280 end_quote_char = end_quote_char.strip()
3281 if not end_quote_char:
3282 raise ValueError("end_quote_char cannot be the empty string")
3284 self.quote_char: str = quote_char
3285 self.quote_char_len: int = len(quote_char)
3286 self.first_quote_char: str = quote_char[0]
3287 self.end_quote_char: str = end_quote_char
3288 self.end_quote_char_len: int = len(end_quote_char)
3289 self.esc_char: str = esc_char or ""
3290 self.has_esc_char: bool = esc_char is not None
3291 self.esc_quote: str = esc_quote or ""
3292 self.unquote_results: bool = unquote_results
3293 self.convert_whitespace_escapes: bool = convert_whitespace_escapes
3294 self.multiline = multiline
3295 self.re_flags = re.RegexFlag(0)
3297 # fmt: off
3298 # build up re pattern for the content between the quote delimiters
3299 inner_pattern: list[str] = []
3301 if esc_quote:
3302 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
3304 if esc_char:
3305 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
3307 if len(self.end_quote_char) > 1:
3308 inner_pattern.append(
3309 "(?:"
3310 + "|".join(
3311 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3312 for i in range(len(self.end_quote_char) - 1, 0, -1)
3313 )
3314 + ")"
3315 )
3317 if self.multiline:
3318 self.re_flags |= re.MULTILINE | re.DOTALL
3319 inner_pattern.append(
3320 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3321 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3322 )
3323 else:
3324 inner_pattern.append(
3325 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3326 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3327 )
3329 self.pattern = "".join(
3330 [
3331 re.escape(self.quote_char),
3332 "(?:",
3333 '|'.join(inner_pattern),
3334 ")*",
3335 re.escape(self.end_quote_char),
3336 ]
3337 )
3339 if self.unquote_results:
3340 if self.convert_whitespace_escapes:
3341 self.unquote_scan_re = re.compile(
3342 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3343 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"
3344 rf"|({re.escape(self.esc_char)}.)"
3345 rf"|(\n|.)",
3346 flags=self.re_flags,
3347 )
3348 else:
3349 self.unquote_scan_re = re.compile(
3350 rf"({re.escape(self.esc_char)}.)"
3351 rf"|(\n|.)",
3352 flags=self.re_flags
3353 )
3354 # fmt: on
3356 try:
3357 self.re = re.compile(self.pattern, self.re_flags)
3358 self.reString = self.pattern
3359 self.re_match = self.re.match
3360 except re.error:
3361 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")
3363 self.errmsg = f"Expected {self.name}"
3364 self.mayIndexError = False
3365 self.mayReturnEmpty = True
3367 def _generateDefaultName(self) -> str:
3368 if self.quote_char == self.end_quote_char and isinstance(
3369 self.quote_char, str_type
3370 ):
3371 return f"string enclosed in {self.quote_char!r}"
3373 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
3375 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3376 # check first character of opening quote to see if that is a match
3377 # before doing the more complicated regex match
3378 result = (
3379 instring[loc] == self.first_quote_char
3380 and self.re_match(instring, loc)
3381 or None
3382 )
3383 if not result:
3384 raise ParseException(instring, loc, self.errmsg, self)
3386 # get ending loc and matched string from regex matching result
3387 loc = result.end()
3388 ret = result.group()
3390 def convert_escaped_numerics(s: str) -> str:
3391 if s == "0":
3392 return "\0"
3393 if s.isdigit() and len(s) == 3:
3394 return chr(int(s, base=8))
3395 elif s.startswith(("u", "x")):
3396 return chr(int(s[1:], base=16))
3397 else:
3398 return s
3400 if self.unquote_results:
3401 # strip off quotes
3402 ret = ret[self.quote_char_len : -self.end_quote_char_len]
3404 if isinstance(ret, str_type):
3405 # fmt: off
3406 if self.convert_whitespace_escapes:
3407 # as we iterate over matches in the input string,
3408 # collect from whichever match group of the unquote_scan_re
3409 # regex matches (only 1 group will match at any given time)
3410 ret = "".join(
3411 # match group 1 matches \t, \n, etc.
3412 self.ws_map[match.group(1)] if match.group(1)
3413 # match group 2 matches escaped octal, null, hex, and Unicode
3414 # sequences
3415 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)
3416 # match group 3 matches escaped characters
3417 else match.group(3)[-1] if match.group(3)
3418 # match group 4 matches any character
3419 else match.group(4)
3420 for match in self.unquote_scan_re.finditer(ret)
3421 )
3422 else:
3423 ret = "".join(
3424 # match group 1 matches escaped characters
3425 match.group(1)[-1] if match.group(1)
3426 # match group 2 matches any character
3427 else match.group(2)
3428 for match in self.unquote_scan_re.finditer(ret)
3429 )
3430 # fmt: on
3432 # replace escaped quotes
3433 if self.esc_quote:
3434 ret = ret.replace(self.esc_quote, self.end_quote_char)
3436 return loc, ret
3439class CharsNotIn(Token):
3440 """Token for matching words composed of characters *not* in a given
3441 set (will include whitespace in matched characters if not listed in
3442 the provided exclusion set - see example). Defined with string
3443 containing all disallowed characters, and an optional minimum,
3444 maximum, and/or exact length. The default value for ``min`` is
3445 1 (a minimum value < 1 is not valid); the default values for
3446 ``max`` and ``exact`` are 0, meaning no maximum or exact
3447 length restriction.
3449 Example::
3451 # define a comma-separated-value as anything that is not a ','
3452 csv_value = CharsNotIn(',')
3453 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))
3455 prints::
3457 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3458 """
3460 def __init__(
3461 self,
3462 not_chars: str = "",
3463 min: int = 1,
3464 max: int = 0,
3465 exact: int = 0,
3466 *,
3467 notChars: str = "",
3468 ):
3469 super().__init__()
3470 self.skipWhitespace = False
3471 self.notChars = not_chars or notChars
3472 self.notCharsSet = set(self.notChars)
3474 if min < 1:
3475 raise ValueError(
3476 "cannot specify a minimum length < 1; use"
3477 " Opt(CharsNotIn()) if zero-length char group is permitted"
3478 )
3480 self.minLen = min
3482 if max > 0:
3483 self.maxLen = max
3484 else:
3485 self.maxLen = _MAX_INT
3487 if exact > 0:
3488 self.maxLen = exact
3489 self.minLen = exact
3491 self.errmsg = f"Expected {self.name}"
3492 self.mayReturnEmpty = self.minLen == 0
3493 self.mayIndexError = False
3495 def _generateDefaultName(self) -> str:
3496 not_chars_str = _collapse_string_to_ranges(self.notChars)
3497 if len(not_chars_str) > 16:
3498 return f"!W:({self.notChars[: 16 - 3]}...)"
3499 else:
3500 return f"!W:({self.notChars})"
3502 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3503 notchars = self.notCharsSet
3504 if instring[loc] in notchars:
3505 raise ParseException(instring, loc, self.errmsg, self)
3507 start = loc
3508 loc += 1
3509 maxlen = min(start + self.maxLen, len(instring))
3510 while loc < maxlen and instring[loc] not in notchars:
3511 loc += 1
3513 if loc - start < self.minLen:
3514 raise ParseException(instring, loc, self.errmsg, self)
3516 return loc, instring[start:loc]
3519class White(Token):
3520 """Special matching class for matching whitespace. Normally,
3521 whitespace is ignored by pyparsing grammars. This class is included
3522 when some whitespace structures are significant. Define with
3523 a string containing the whitespace characters to be matched; default
3524 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3525 ``max``, and ``exact`` arguments, as defined for the
3526 :class:`Word` class.
3527 """
3529 whiteStrs = {
3530 " ": "<SP>",
3531 "\t": "<TAB>",
3532 "\n": "<LF>",
3533 "\r": "<CR>",
3534 "\f": "<FF>",
3535 "\u00A0": "<NBSP>",
3536 "\u1680": "<OGHAM_SPACE_MARK>",
3537 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",
3538 "\u2000": "<EN_QUAD>",
3539 "\u2001": "<EM_QUAD>",
3540 "\u2002": "<EN_SPACE>",
3541 "\u2003": "<EM_SPACE>",
3542 "\u2004": "<THREE-PER-EM_SPACE>",
3543 "\u2005": "<FOUR-PER-EM_SPACE>",
3544 "\u2006": "<SIX-PER-EM_SPACE>",
3545 "\u2007": "<FIGURE_SPACE>",
3546 "\u2008": "<PUNCTUATION_SPACE>",
3547 "\u2009": "<THIN_SPACE>",
3548 "\u200A": "<HAIR_SPACE>",
3549 "\u200B": "<ZERO_WIDTH_SPACE>",
3550 "\u202F": "<NNBSP>",
3551 "\u205F": "<MMSP>",
3552 "\u3000": "<IDEOGRAPHIC_SPACE>",
3553 }
3555 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):
3556 super().__init__()
3557 self.matchWhite = ws
3558 self.set_whitespace_chars(
3559 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3560 copy_defaults=True,
3561 )
3562 # self.leave_whitespace()
3563 self.mayReturnEmpty = True
3564 self.errmsg = f"Expected {self.name}"
3566 self.minLen = min
3568 if max > 0:
3569 self.maxLen = max
3570 else:
3571 self.maxLen = _MAX_INT
3573 if exact > 0:
3574 self.maxLen = exact
3575 self.minLen = exact
3577 def _generateDefaultName(self) -> str:
3578 return "".join(White.whiteStrs[c] for c in self.matchWhite)
3580 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3581 if instring[loc] not in self.matchWhite:
3582 raise ParseException(instring, loc, self.errmsg, self)
3583 start = loc
3584 loc += 1
3585 maxloc = start + self.maxLen
3586 maxloc = min(maxloc, len(instring))
3587 while loc < maxloc and instring[loc] in self.matchWhite:
3588 loc += 1
3590 if loc - start < self.minLen:
3591 raise ParseException(instring, loc, self.errmsg, self)
3593 return loc, instring[start:loc]
3596class PositionToken(Token):
3597 def __init__(self):
3598 super().__init__()
3599 self.mayReturnEmpty = True
3600 self.mayIndexError = False
3603class GoToColumn(PositionToken):
3604 """Token to advance to a specific column of input text; useful for
3605 tabular report scraping.
3606 """
3608 def __init__(self, colno: int):
3609 super().__init__()
3610 self.col = colno
3612 def preParse(self, instring: str, loc: int) -> int:
3613 if col(loc, instring) == self.col:
3614 return loc
3616 instrlen = len(instring)
3617 if self.ignoreExprs:
3618 loc = self._skipIgnorables(instring, loc)
3619 while (
3620 loc < instrlen
3621 and instring[loc].isspace()
3622 and col(loc, instring) != self.col
3623 ):
3624 loc += 1
3626 return loc
3628 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3629 thiscol = col(loc, instring)
3630 if thiscol > self.col:
3631 raise ParseException(instring, loc, "Text not in expected column", self)
3632 newloc = loc + self.col - thiscol
3633 ret = instring[loc:newloc]
3634 return newloc, ret
3637class LineStart(PositionToken):
3638 r"""Matches if current position is at the beginning of a line within
3639 the parse string
3641 Example::
3643 test = '''\
3644 AAA this line
3645 AAA and this line
3646 AAA but not this one
3647 B AAA and definitely not this one
3648 '''
3650 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):
3651 print(t)
3653 prints::
3655 ['AAA', ' this line']
3656 ['AAA', ' and this line']
3658 """
3660 def __init__(self):
3661 super().__init__()
3662 self.leave_whitespace()
3663 self.orig_whiteChars = set() | self.whiteChars
3664 self.whiteChars.discard("\n")
3665 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
3666 self.set_name("start of line")
3668 def preParse(self, instring: str, loc: int) -> int:
3669 if loc == 0:
3670 return loc
3672 ret = self.skipper.preParse(instring, loc)
3674 if "\n" in self.orig_whiteChars:
3675 while instring[ret : ret + 1] == "\n":
3676 ret = self.skipper.preParse(instring, ret + 1)
3678 return ret
3680 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3681 if col(loc, instring) == 1:
3682 return loc, []
3683 raise ParseException(instring, loc, self.errmsg, self)
3686class LineEnd(PositionToken):
3687 """Matches if current position is at the end of a line within the
3688 parse string
3689 """
3691 def __init__(self):
3692 super().__init__()
3693 self.whiteChars.discard("\n")
3694 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
3695 self.set_name("end of line")
3697 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3698 if loc < len(instring):
3699 if instring[loc] == "\n":
3700 return loc + 1, "\n"
3701 else:
3702 raise ParseException(instring, loc, self.errmsg, self)
3703 elif loc == len(instring):
3704 return loc + 1, []
3705 else:
3706 raise ParseException(instring, loc, self.errmsg, self)
3709class StringStart(PositionToken):
3710 """Matches if current position is at the beginning of the parse
3711 string
3712 """
3714 def __init__(self):
3715 super().__init__()
3716 self.set_name("start of text")
3718 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3719 # see if entire string up to here is just whitespace and ignoreables
3720 if loc != 0 and loc != self.preParse(instring, 0):
3721 raise ParseException(instring, loc, self.errmsg, self)
3723 return loc, []
3726class StringEnd(PositionToken):
3727 """
3728 Matches if current position is at the end of the parse string
3729 """
3731 def __init__(self):
3732 super().__init__()
3733 self.set_name("end of text")
3735 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3736 if loc < len(instring):
3737 raise ParseException(instring, loc, self.errmsg, self)
3738 if loc == len(instring):
3739 return loc + 1, []
3740 if loc > len(instring):
3741 return loc, []
3743 raise ParseException(instring, loc, self.errmsg, self)
3746class WordStart(PositionToken):
3747 """Matches if the current position is at the beginning of a
3748 :class:`Word`, and is not preceded by any character in a given
3749 set of ``word_chars`` (default= ``printables``). To emulate the
3750 ``\b`` behavior of regular expressions, use
3751 ``WordStart(alphanums)``. ``WordStart`` will also match at
3752 the beginning of the string being parsed, or at the beginning of
3753 a line.
3754 """
3756 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3757 wordChars = word_chars if wordChars == printables else wordChars
3758 super().__init__()
3759 self.wordChars = set(wordChars)
3760 self.set_name("start of a word")
3762 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3763 if loc != 0:
3764 if (
3765 instring[loc - 1] in self.wordChars
3766 or instring[loc] not in self.wordChars
3767 ):
3768 raise ParseException(instring, loc, self.errmsg, self)
3769 return loc, []
3772class WordEnd(PositionToken):
3773 """Matches if the current position is at the end of a :class:`Word`,
3774 and is not followed by any character in a given set of ``word_chars``
3775 (default= ``printables``). To emulate the ``\b`` behavior of
3776 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3777 will also match at the end of the string being parsed, or at the end
3778 of a line.
3779 """
3781 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3782 wordChars = word_chars if wordChars == printables else wordChars
3783 super().__init__()
3784 self.wordChars = set(wordChars)
3785 self.skipWhitespace = False
3786 self.set_name("end of a word")
3788 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3789 instrlen = len(instring)
3790 if instrlen > 0 and loc < instrlen:
3791 if (
3792 instring[loc] in self.wordChars
3793 or instring[loc - 1] not in self.wordChars
3794 ):
3795 raise ParseException(instring, loc, self.errmsg, self)
3796 return loc, []
3799class Tag(Token):
3800 """
3801 A meta-element for inserting a named result into the parsed
3802 tokens that may be checked later in a parse action or while
3803 processing the parsed results. Accepts an optional tag value,
3804 defaulting to `True`.
3806 Example::
3808 end_punc = "." | ("!" + Tag("enthusiastic")))
3809 greeting = "Hello," + Word(alphas) + end_punc
3811 result = greeting.parse_string("Hello, World.")
3812 print(result.dump())
3814 result = greeting.parse_string("Hello, World!")
3815 print(result.dump())
3817 prints::
3819 ['Hello,', 'World', '.']
3821 ['Hello,', 'World', '!']
3822 - enthusiastic: True
3823 """
3825 def __init__(self, tag_name: str, value: Any = True):
3826 super().__init__()
3827 self.mayReturnEmpty = True
3828 self.mayIndexError = False
3829 self.leave_whitespace()
3830 self.tag_name = tag_name
3831 self.tag_value = value
3832 self.add_parse_action(self._add_tag)
3834 def _add_tag(self, tokens: ParseResults):
3835 tokens[self.tag_name] = self.tag_value
3837 def _generateDefaultName(self) -> str:
3838 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"
3841class ParseExpression(ParserElement):
3842 """Abstract subclass of ParserElement, for combining and
3843 post-processing parsed tokens.
3844 """
3846 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
3847 super().__init__(savelist)
3848 self.exprs: list[ParserElement]
3849 if isinstance(exprs, _generatorType):
3850 exprs = list(exprs)
3852 if isinstance(exprs, str_type):
3853 self.exprs = [self._literalStringClass(exprs)]
3854 elif isinstance(exprs, ParserElement):
3855 self.exprs = [exprs]
3856 elif isinstance(exprs, Iterable):
3857 exprs = list(exprs)
3858 # if sequence of strings provided, wrap with Literal
3859 if any(isinstance(expr, str_type) for expr in exprs):
3860 exprs = (
3861 self._literalStringClass(e) if isinstance(e, str_type) else e
3862 for e in exprs
3863 )
3864 self.exprs = list(exprs)
3865 else:
3866 try:
3867 self.exprs = list(exprs)
3868 except TypeError:
3869 self.exprs = [exprs]
3870 self.callPreparse = False
3872 def recurse(self) -> list[ParserElement]:
3873 return self.exprs[:]
3875 def append(self, other) -> ParserElement:
3876 self.exprs.append(other)
3877 self._defaultName = None
3878 return self
3880 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
3881 """
3882 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3883 all contained expressions.
3884 """
3885 super().leave_whitespace(recursive)
3887 if recursive:
3888 self.exprs = [e.copy() for e in self.exprs]
3889 for e in self.exprs:
3890 e.leave_whitespace(recursive)
3891 return self
3893 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
3894 """
3895 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3896 all contained expressions.
3897 """
3898 super().ignore_whitespace(recursive)
3899 if recursive:
3900 self.exprs = [e.copy() for e in self.exprs]
3901 for e in self.exprs:
3902 e.ignore_whitespace(recursive)
3903 return self
3905 def ignore(self, other) -> ParserElement:
3906 if isinstance(other, Suppress):
3907 if other not in self.ignoreExprs:
3908 super().ignore(other)
3909 for e in self.exprs:
3910 e.ignore(self.ignoreExprs[-1])
3911 else:
3912 super().ignore(other)
3913 for e in self.exprs:
3914 e.ignore(self.ignoreExprs[-1])
3915 return self
3917 def _generateDefaultName(self) -> str:
3918 return f"{type(self).__name__}:({self.exprs})"
3920 def streamline(self) -> ParserElement:
3921 if self.streamlined:
3922 return self
3924 super().streamline()
3926 for e in self.exprs:
3927 e.streamline()
3929 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
3930 # but only if there are no parse actions or resultsNames on the nested And's
3931 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
3932 if len(self.exprs) == 2:
3933 other = self.exprs[0]
3934 if (
3935 isinstance(other, self.__class__)
3936 and not other.parseAction
3937 and other.resultsName is None
3938 and not other.debug
3939 ):
3940 self.exprs = other.exprs[:] + [self.exprs[1]]
3941 self._defaultName = None
3942 self.mayReturnEmpty |= other.mayReturnEmpty
3943 self.mayIndexError |= other.mayIndexError
3945 other = self.exprs[-1]
3946 if (
3947 isinstance(other, self.__class__)
3948 and not other.parseAction
3949 and other.resultsName is None
3950 and not other.debug
3951 ):
3952 self.exprs = self.exprs[:-1] + other.exprs[:]
3953 self._defaultName = None
3954 self.mayReturnEmpty |= other.mayReturnEmpty
3955 self.mayIndexError |= other.mayIndexError
3957 self.errmsg = f"Expected {self}"
3959 return self
3961 def validate(self, validateTrace=None) -> None:
3962 warnings.warn(
3963 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
3964 DeprecationWarning,
3965 stacklevel=2,
3966 )
3967 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3968 for e in self.exprs:
3969 e.validate(tmp)
3970 self._checkRecursion([])
3972 def copy(self) -> ParserElement:
3973 ret = super().copy()
3974 ret = typing.cast(ParseExpression, ret)
3975 ret.exprs = [e.copy() for e in self.exprs]
3976 return ret
3978 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
3979 if not (
3980 __diag__.warn_ungrouped_named_tokens_in_collection
3981 and Diagnostics.warn_ungrouped_named_tokens_in_collection
3982 not in self.suppress_warnings_
3983 ):
3984 return super()._setResultsName(name, list_all_matches)
3986 for e in self.exprs:
3987 if (
3988 isinstance(e, ParserElement)
3989 and e.resultsName
3990 and (
3991 Diagnostics.warn_ungrouped_named_tokens_in_collection
3992 not in e.suppress_warnings_
3993 )
3994 ):
3995 warning = (
3996 "warn_ungrouped_named_tokens_in_collection:"
3997 f" setting results name {name!r} on {type(self).__name__} expression"
3998 f" collides with {e.resultsName!r} on contained expression"
3999 )
4000 warnings.warn(warning, stacklevel=3)
4001 break
4003 return super()._setResultsName(name, list_all_matches)
4005 # Compatibility synonyms
4006 # fmt: off
4007 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4008 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4009 # fmt: on
4012class And(ParseExpression):
4013 """
4014 Requires all given :class:`ParserElement` s to be found in the given order.
4015 Expressions may be separated by whitespace.
4016 May be constructed using the ``'+'`` operator.
4017 May also be constructed using the ``'-'`` operator, which will
4018 suppress backtracking.
4020 Example::
4022 integer = Word(nums)
4023 name_expr = Word(alphas)[1, ...]
4025 expr = And([integer("id"), name_expr("name"), integer("age")])
4026 # more easily written as:
4027 expr = integer("id") + name_expr("name") + integer("age")
4028 """
4030 class _ErrorStop(Empty):
4031 def __init__(self, *args, **kwargs):
4032 super().__init__(*args, **kwargs)
4033 self.leave_whitespace()
4035 def _generateDefaultName(self) -> str:
4036 return "-"
4038 def __init__(
4039 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True
4040 ):
4041 exprs: list[ParserElement] = list(exprs_arg)
4042 if exprs and Ellipsis in exprs:
4043 tmp: list[ParserElement] = []
4044 for i, expr in enumerate(exprs):
4045 if expr is not Ellipsis:
4046 tmp.append(expr)
4047 continue
4049 if i < len(exprs) - 1:
4050 skipto_arg: ParserElement = typing.cast(
4051 ParseExpression, (Empty() + exprs[i + 1])
4052 ).exprs[-1]
4053 tmp.append(SkipTo(skipto_arg)("_skipped*"))
4054 continue
4056 raise Exception("cannot construct And with sequence ending in ...")
4057 exprs[:] = tmp
4058 super().__init__(exprs, savelist)
4059 if self.exprs:
4060 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4061 if not isinstance(self.exprs[0], White):
4062 self.set_whitespace_chars(
4063 self.exprs[0].whiteChars,
4064 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
4065 )
4066 self.skipWhitespace = self.exprs[0].skipWhitespace
4067 else:
4068 self.skipWhitespace = False
4069 else:
4070 self.mayReturnEmpty = True
4071 self.callPreparse = True
4073 def streamline(self) -> ParserElement:
4074 # collapse any _PendingSkip's
4075 if self.exprs and any(
4076 isinstance(e, ParseExpression)
4077 and e.exprs
4078 and isinstance(e.exprs[-1], _PendingSkip)
4079 for e in self.exprs[:-1]
4080 ):
4081 deleted_expr_marker = NoMatch()
4082 for i, e in enumerate(self.exprs[:-1]):
4083 if e is deleted_expr_marker:
4084 continue
4085 if (
4086 isinstance(e, ParseExpression)
4087 and e.exprs
4088 and isinstance(e.exprs[-1], _PendingSkip)
4089 ):
4090 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4091 self.exprs[i + 1] = deleted_expr_marker
4092 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]
4094 super().streamline()
4096 # link any IndentedBlocks to the prior expression
4097 prev: ParserElement
4098 cur: ParserElement
4099 for prev, cur in zip(self.exprs, self.exprs[1:]):
4100 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
4101 # (but watch out for recursive grammar)
4102 seen = set()
4103 while True:
4104 if id(cur) in seen:
4105 break
4106 seen.add(id(cur))
4107 if isinstance(cur, IndentedBlock):
4108 prev.add_parse_action(
4109 lambda s, l, t, cur_=cur: setattr(
4110 cur_, "parent_anchor", col(l, s)
4111 )
4112 )
4113 break
4114 subs = cur.recurse()
4115 next_first = next(iter(subs), None)
4116 if next_first is None:
4117 break
4118 cur = typing.cast(ParserElement, next_first)
4120 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4121 return self
4123 def parseImpl(self, instring, loc, do_actions=True):
4124 # pass False as callPreParse arg to _parse for first element, since we already
4125 # pre-parsed the string as part of our And pre-parsing
4126 loc, resultlist = self.exprs[0]._parse(
4127 instring, loc, do_actions, callPreParse=False
4128 )
4129 errorStop = False
4130 for e in self.exprs[1:]:
4131 # if isinstance(e, And._ErrorStop):
4132 if type(e) is And._ErrorStop:
4133 errorStop = True
4134 continue
4135 if errorStop:
4136 try:
4137 loc, exprtokens = e._parse(instring, loc, do_actions)
4138 except ParseSyntaxException:
4139 raise
4140 except ParseBaseException as pe:
4141 pe.__traceback__ = None
4142 raise ParseSyntaxException._from_exception(pe)
4143 except IndexError:
4144 raise ParseSyntaxException(
4145 instring, len(instring), self.errmsg, self
4146 )
4147 else:
4148 loc, exprtokens = e._parse(instring, loc, do_actions)
4149 resultlist += exprtokens
4150 return loc, resultlist
4152 def __iadd__(self, other):
4153 if isinstance(other, str_type):
4154 other = self._literalStringClass(other)
4155 if not isinstance(other, ParserElement):
4156 return NotImplemented
4157 return self.append(other) # And([self, other])
4159 def _checkRecursion(self, parseElementList):
4160 subRecCheckList = parseElementList[:] + [self]
4161 for e in self.exprs:
4162 e._checkRecursion(subRecCheckList)
4163 if not e.mayReturnEmpty:
4164 break
4166 def _generateDefaultName(self) -> str:
4167 inner = " ".join(str(e) for e in self.exprs)
4168 # strip off redundant inner {}'s
4169 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4170 inner = inner[1:-1]
4171 return f"{{{inner}}}"
4174class Or(ParseExpression):
4175 """Requires that at least one :class:`ParserElement` is found. If
4176 two expressions match, the expression that matches the longest
4177 string will be used. May be constructed using the ``'^'``
4178 operator.
4180 Example::
4182 # construct Or using '^' operator
4184 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4185 print(number.search_string("123 3.1416 789"))
4187 prints::
4189 [['123'], ['3.1416'], ['789']]
4190 """
4192 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
4193 super().__init__(exprs, savelist)
4194 if self.exprs:
4195 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4196 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4197 else:
4198 self.mayReturnEmpty = True
4200 def streamline(self) -> ParserElement:
4201 super().streamline()
4202 if self.exprs:
4203 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4204 self.saveAsList = any(e.saveAsList for e in self.exprs)
4205 self.skipWhitespace = all(
4206 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4207 )
4208 else:
4209 self.saveAsList = False
4210 return self
4212 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4213 maxExcLoc = -1
4214 maxException = None
4215 matches: list[tuple[int, ParserElement]] = []
4216 fatals: list[ParseFatalException] = []
4217 if all(e.callPreparse for e in self.exprs):
4218 loc = self.preParse(instring, loc)
4219 for e in self.exprs:
4220 try:
4221 loc2 = e.try_parse(instring, loc, raise_fatal=True)
4222 except ParseFatalException as pfe:
4223 pfe.__traceback__ = None
4224 pfe.parser_element = e
4225 fatals.append(pfe)
4226 maxException = None
4227 maxExcLoc = -1
4228 except ParseException as err:
4229 if not fatals:
4230 err.__traceback__ = None
4231 if err.loc > maxExcLoc:
4232 maxException = err
4233 maxExcLoc = err.loc
4234 except IndexError:
4235 if len(instring) > maxExcLoc:
4236 maxException = ParseException(
4237 instring, len(instring), e.errmsg, self
4238 )
4239 maxExcLoc = len(instring)
4240 else:
4241 # save match among all matches, to retry longest to shortest
4242 matches.append((loc2, e))
4244 if matches:
4245 # re-evaluate all matches in descending order of length of match, in case attached actions
4246 # might change whether or how much they match of the input.
4247 matches.sort(key=itemgetter(0), reverse=True)
4249 if not do_actions:
4250 # no further conditions or parse actions to change the selection of
4251 # alternative, so the first match will be the best match
4252 best_expr = matches[0][1]
4253 return best_expr._parse(instring, loc, do_actions)
4255 longest: tuple[int, typing.Optional[ParseResults]] = -1, None
4256 for loc1, expr1 in matches:
4257 if loc1 <= longest[0]:
4258 # already have a longer match than this one will deliver, we are done
4259 return longest
4261 try:
4262 loc2, toks = expr1._parse(instring, loc, do_actions)
4263 except ParseException as err:
4264 err.__traceback__ = None
4265 if err.loc > maxExcLoc:
4266 maxException = err
4267 maxExcLoc = err.loc
4268 else:
4269 if loc2 >= loc1:
4270 return loc2, toks
4271 # didn't match as much as before
4272 elif loc2 > longest[0]:
4273 longest = loc2, toks
4275 if longest != (-1, None):
4276 return longest
4278 if fatals:
4279 if len(fatals) > 1:
4280 fatals.sort(key=lambda e: -e.loc)
4281 if fatals[0].loc == fatals[1].loc:
4282 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4283 max_fatal = fatals[0]
4284 raise max_fatal
4286 if maxException is not None:
4287 # infer from this check that all alternatives failed at the current position
4288 # so emit this collective error message instead of any single error message
4289 if maxExcLoc == loc:
4290 maxException.msg = self.errmsg or ""
4291 raise maxException
4293 raise ParseException(instring, loc, "no defined alternatives to match", self)
4295 def __ixor__(self, other):
4296 if isinstance(other, str_type):
4297 other = self._literalStringClass(other)
4298 if not isinstance(other, ParserElement):
4299 return NotImplemented
4300 return self.append(other) # Or([self, other])
4302 def _generateDefaultName(self) -> str:
4303 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"
4305 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4306 if (
4307 __diag__.warn_multiple_tokens_in_named_alternation
4308 and Diagnostics.warn_multiple_tokens_in_named_alternation
4309 not in self.suppress_warnings_
4310 ):
4311 if any(
4312 isinstance(e, And)
4313 and Diagnostics.warn_multiple_tokens_in_named_alternation
4314 not in e.suppress_warnings_
4315 for e in self.exprs
4316 ):
4317 warning = (
4318 "warn_multiple_tokens_in_named_alternation:"
4319 f" setting results name {name!r} on {type(self).__name__} expression"
4320 " will return a list of all parsed tokens in an And alternative,"
4321 " in prior versions only the first token was returned; enclose"
4322 " contained argument in Group"
4323 )
4324 warnings.warn(warning, stacklevel=3)
4326 return super()._setResultsName(name, list_all_matches)
4329class MatchFirst(ParseExpression):
4330 """Requires that at least one :class:`ParserElement` is found. If
4331 more than one expression matches, the first one listed is the one that will
4332 match. May be constructed using the ``'|'`` operator.
4334 Example::
4336 # construct MatchFirst using '|' operator
4338 # watch the order of expressions to match
4339 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4340 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4342 # put more selective expression first
4343 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4344 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4345 """
4347 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
4348 super().__init__(exprs, savelist)
4349 if self.exprs:
4350 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4351 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4352 else:
4353 self.mayReturnEmpty = True
4355 def streamline(self) -> ParserElement:
4356 if self.streamlined:
4357 return self
4359 super().streamline()
4360 if self.exprs:
4361 self.saveAsList = any(e.saveAsList for e in self.exprs)
4362 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4363 self.skipWhitespace = all(
4364 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4365 )
4366 else:
4367 self.saveAsList = False
4368 self.mayReturnEmpty = True
4369 return self
4371 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4372 maxExcLoc = -1
4373 maxException = None
4375 for e in self.exprs:
4376 try:
4377 return e._parse(instring, loc, do_actions)
4378 except ParseFatalException as pfe:
4379 pfe.__traceback__ = None
4380 pfe.parser_element = e
4381 raise
4382 except ParseException as err:
4383 if err.loc > maxExcLoc:
4384 maxException = err
4385 maxExcLoc = err.loc
4386 except IndexError:
4387 if len(instring) > maxExcLoc:
4388 maxException = ParseException(
4389 instring, len(instring), e.errmsg, self
4390 )
4391 maxExcLoc = len(instring)
4393 if maxException is not None:
4394 # infer from this check that all alternatives failed at the current position
4395 # so emit this collective error message instead of any individual error message
4396 if maxExcLoc == loc:
4397 maxException.msg = self.errmsg or ""
4398 raise maxException
4400 raise ParseException(instring, loc, "no defined alternatives to match", self)
4402 def __ior__(self, other):
4403 if isinstance(other, str_type):
4404 other = self._literalStringClass(other)
4405 if not isinstance(other, ParserElement):
4406 return NotImplemented
4407 return self.append(other) # MatchFirst([self, other])
4409 def _generateDefaultName(self) -> str:
4410 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"
4412 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4413 if (
4414 __diag__.warn_multiple_tokens_in_named_alternation
4415 and Diagnostics.warn_multiple_tokens_in_named_alternation
4416 not in self.suppress_warnings_
4417 ):
4418 if any(
4419 isinstance(e, And)
4420 and Diagnostics.warn_multiple_tokens_in_named_alternation
4421 not in e.suppress_warnings_
4422 for e in self.exprs
4423 ):
4424 warning = (
4425 "warn_multiple_tokens_in_named_alternation:"
4426 f" setting results name {name!r} on {type(self).__name__} expression"
4427 " will return a list of all parsed tokens in an And alternative,"
4428 " in prior versions only the first token was returned; enclose"
4429 " contained argument in Group"
4430 )
4431 warnings.warn(warning, stacklevel=3)
4433 return super()._setResultsName(name, list_all_matches)
4436class Each(ParseExpression):
4437 """Requires all given :class:`ParserElement` s to be found, but in
4438 any order. Expressions may be separated by whitespace.
4440 May be constructed using the ``'&'`` operator.
4442 Example::
4444 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4445 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4446 integer = Word(nums)
4447 shape_attr = "shape:" + shape_type("shape")
4448 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4449 color_attr = "color:" + color("color")
4450 size_attr = "size:" + integer("size")
4452 # use Each (using operator '&') to accept attributes in any order
4453 # (shape and posn are required, color and size are optional)
4454 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4456 shape_spec.run_tests('''
4457 shape: SQUARE color: BLACK posn: 100, 120
4458 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4459 color:GREEN size:20 shape:TRIANGLE posn:20,40
4460 '''
4461 )
4463 prints::
4465 shape: SQUARE color: BLACK posn: 100, 120
4466 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4467 - color: BLACK
4468 - posn: ['100', ',', '120']
4469 - x: 100
4470 - y: 120
4471 - shape: SQUARE
4474 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4475 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4476 - color: BLUE
4477 - posn: ['50', ',', '80']
4478 - x: 50
4479 - y: 80
4480 - shape: CIRCLE
4481 - size: 50
4484 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4485 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4486 - color: GREEN
4487 - posn: ['20', ',', '40']
4488 - x: 20
4489 - y: 40
4490 - shape: TRIANGLE
4491 - size: 20
4492 """
4494 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True):
4495 super().__init__(exprs, savelist)
4496 if self.exprs:
4497 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4498 else:
4499 self.mayReturnEmpty = True
4500 self.skipWhitespace = True
4501 self.initExprGroups = True
4502 self.saveAsList = True
4504 def __iand__(self, other):
4505 if isinstance(other, str_type):
4506 other = self._literalStringClass(other)
4507 if not isinstance(other, ParserElement):
4508 return NotImplemented
4509 return self.append(other) # Each([self, other])
4511 def streamline(self) -> ParserElement:
4512 super().streamline()
4513 if self.exprs:
4514 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4515 else:
4516 self.mayReturnEmpty = True
4517 return self
4519 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4520 if self.initExprGroups:
4521 self.opt1map = dict(
4522 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
4523 )
4524 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
4525 opt2 = [
4526 e
4527 for e in self.exprs
4528 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
4529 ]
4530 self.optionals = opt1 + opt2
4531 self.multioptionals = [
4532 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4533 for e in self.exprs
4534 if isinstance(e, _MultipleMatch)
4535 ]
4536 self.multirequired = [
4537 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4538 for e in self.exprs
4539 if isinstance(e, OneOrMore)
4540 ]
4541 self.required = [
4542 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
4543 ]
4544 self.required += self.multirequired
4545 self.initExprGroups = False
4547 tmpLoc = loc
4548 tmpReqd = self.required[:]
4549 tmpOpt = self.optionals[:]
4550 multis = self.multioptionals[:]
4551 matchOrder: list[ParserElement] = []
4553 keepMatching = True
4554 failed: list[ParserElement] = []
4555 fatals: list[ParseFatalException] = []
4556 while keepMatching:
4557 tmpExprs = tmpReqd + tmpOpt + multis
4558 failed.clear()
4559 fatals.clear()
4560 for e in tmpExprs:
4561 try:
4562 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
4563 except ParseFatalException as pfe:
4564 pfe.__traceback__ = None
4565 pfe.parser_element = e
4566 fatals.append(pfe)
4567 failed.append(e)
4568 except ParseException:
4569 failed.append(e)
4570 else:
4571 matchOrder.append(self.opt1map.get(id(e), e))
4572 if e in tmpReqd:
4573 tmpReqd.remove(e)
4574 elif e in tmpOpt:
4575 tmpOpt.remove(e)
4576 if len(failed) == len(tmpExprs):
4577 keepMatching = False
4579 # look for any ParseFatalExceptions
4580 if fatals:
4581 if len(fatals) > 1:
4582 fatals.sort(key=lambda e: -e.loc)
4583 if fatals[0].loc == fatals[1].loc:
4584 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4585 max_fatal = fatals[0]
4586 raise max_fatal
4588 if tmpReqd:
4589 missing = ", ".join([str(e) for e in tmpReqd])
4590 raise ParseException(
4591 instring,
4592 loc,
4593 f"Missing one or more required elements ({missing})",
4594 )
4596 # add any unmatched Opts, in case they have default values defined
4597 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
4599 total_results = ParseResults([])
4600 for e in matchOrder:
4601 loc, results = e._parse(instring, loc, do_actions)
4602 total_results += results
4604 return loc, total_results
4606 def _generateDefaultName(self) -> str:
4607 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"
4610class ParseElementEnhance(ParserElement):
4611 """Abstract subclass of :class:`ParserElement`, for combining and
4612 post-processing parsed tokens.
4613 """
4615 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
4616 super().__init__(savelist)
4617 if isinstance(expr, str_type):
4618 expr_str = typing.cast(str, expr)
4619 if issubclass(self._literalStringClass, Token):
4620 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]
4621 elif issubclass(type(self), self._literalStringClass):
4622 expr = Literal(expr_str)
4623 else:
4624 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]
4625 expr = typing.cast(ParserElement, expr)
4626 self.expr = expr
4627 if expr is not None:
4628 self.mayIndexError = expr.mayIndexError
4629 self.mayReturnEmpty = expr.mayReturnEmpty
4630 self.set_whitespace_chars(
4631 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
4632 )
4633 self.skipWhitespace = expr.skipWhitespace
4634 self.saveAsList = expr.saveAsList
4635 self.callPreparse = expr.callPreparse
4636 self.ignoreExprs.extend(expr.ignoreExprs)
4638 def recurse(self) -> list[ParserElement]:
4639 return [self.expr] if self.expr is not None else []
4641 def parseImpl(self, instring, loc, do_actions=True):
4642 if self.expr is None:
4643 raise ParseException(instring, loc, "No expression defined", self)
4645 try:
4646 return self.expr._parse(instring, loc, do_actions, callPreParse=False)
4647 except ParseSyntaxException:
4648 raise
4649 except ParseBaseException as pbe:
4650 pbe.pstr = pbe.pstr or instring
4651 pbe.loc = pbe.loc or loc
4652 pbe.parser_element = pbe.parser_element or self
4653 if not isinstance(self, Forward) and self.customName is not None:
4654 if self.errmsg:
4655 pbe.msg = self.errmsg
4656 raise
4658 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4659 super().leave_whitespace(recursive)
4661 if recursive:
4662 if self.expr is not None:
4663 self.expr = self.expr.copy()
4664 self.expr.leave_whitespace(recursive)
4665 return self
4667 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4668 super().ignore_whitespace(recursive)
4670 if recursive:
4671 if self.expr is not None:
4672 self.expr = self.expr.copy()
4673 self.expr.ignore_whitespace(recursive)
4674 return self
4676 def ignore(self, other) -> ParserElement:
4677 if not isinstance(other, Suppress) or other not in self.ignoreExprs:
4678 super().ignore(other)
4679 if self.expr is not None:
4680 self.expr.ignore(self.ignoreExprs[-1])
4682 return self
4684 def streamline(self) -> ParserElement:
4685 super().streamline()
4686 if self.expr is not None:
4687 self.expr.streamline()
4688 return self
4690 def _checkRecursion(self, parseElementList):
4691 if self in parseElementList:
4692 raise RecursiveGrammarException(parseElementList + [self])
4693 subRecCheckList = parseElementList[:] + [self]
4694 if self.expr is not None:
4695 self.expr._checkRecursion(subRecCheckList)
4697 def validate(self, validateTrace=None) -> None:
4698 warnings.warn(
4699 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4700 DeprecationWarning,
4701 stacklevel=2,
4702 )
4703 if validateTrace is None:
4704 validateTrace = []
4705 tmp = validateTrace[:] + [self]
4706 if self.expr is not None:
4707 self.expr.validate(tmp)
4708 self._checkRecursion([])
4710 def _generateDefaultName(self) -> str:
4711 return f"{type(self).__name__}:({self.expr})"
4713 # Compatibility synonyms
4714 # fmt: off
4715 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4716 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4717 # fmt: on
4720class IndentedBlock(ParseElementEnhance):
4721 """
4722 Expression to match one or more expressions at a given indentation level.
4723 Useful for parsing text where structure is implied by indentation (like Python source code).
4724 """
4726 class _Indent(Empty):
4727 def __init__(self, ref_col: int):
4728 super().__init__()
4729 self.errmsg = f"expected indent at column {ref_col}"
4730 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
4732 class _IndentGreater(Empty):
4733 def __init__(self, ref_col: int):
4734 super().__init__()
4735 self.errmsg = f"expected indent at column greater than {ref_col}"
4736 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
4738 def __init__(
4739 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
4740 ):
4741 super().__init__(expr, savelist=True)
4742 # if recursive:
4743 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
4744 self._recursive = recursive
4745 self._grouped = grouped
4746 self.parent_anchor = 1
4748 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4749 # advance parse position to non-whitespace by using an Empty()
4750 # this should be the column to be used for all subsequent indented lines
4751 anchor_loc = Empty().preParse(instring, loc)
4753 # see if self.expr matches at the current location - if not it will raise an exception
4754 # and no further work is necessary
4755 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)
4757 indent_col = col(anchor_loc, instring)
4758 peer_detect_expr = self._Indent(indent_col)
4760 inner_expr = Empty() + peer_detect_expr + self.expr
4761 if self._recursive:
4762 sub_indent = self._IndentGreater(indent_col)
4763 nested_block = IndentedBlock(
4764 self.expr, recursive=self._recursive, grouped=self._grouped
4765 )
4766 nested_block.set_debug(self.debug)
4767 nested_block.parent_anchor = indent_col
4768 inner_expr += Opt(sub_indent + nested_block)
4770 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
4771 block = OneOrMore(inner_expr)
4773 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
4775 if self._grouped:
4776 wrapper = Group
4777 else:
4778 wrapper = lambda expr: expr # type: ignore[misc, assignment]
4779 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
4780 instring, anchor_loc, do_actions
4781 )
4784class AtStringStart(ParseElementEnhance):
4785 """Matches if expression matches at the beginning of the parse
4786 string::
4788 AtStringStart(Word(nums)).parse_string("123")
4789 # prints ["123"]
4791 AtStringStart(Word(nums)).parse_string(" 123")
4792 # raises ParseException
4793 """
4795 def __init__(self, expr: Union[ParserElement, str]):
4796 super().__init__(expr)
4797 self.callPreparse = False
4799 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4800 if loc != 0:
4801 raise ParseException(instring, loc, "not found at string start")
4802 return super().parseImpl(instring, loc, do_actions)
4805class AtLineStart(ParseElementEnhance):
4806 r"""Matches if an expression matches at the beginning of a line within
4807 the parse string
4809 Example::
4811 test = '''\
4812 AAA this line
4813 AAA and this line
4814 AAA but not this one
4815 B AAA and definitely not this one
4816 '''
4818 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):
4819 print(t)
4821 prints::
4823 ['AAA', ' this line']
4824 ['AAA', ' and this line']
4826 """
4828 def __init__(self, expr: Union[ParserElement, str]):
4829 super().__init__(expr)
4830 self.callPreparse = False
4832 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4833 if col(loc, instring) != 1:
4834 raise ParseException(instring, loc, "not found at line start")
4835 return super().parseImpl(instring, loc, do_actions)
4838class FollowedBy(ParseElementEnhance):
4839 """Lookahead matching of the given parse expression.
4840 ``FollowedBy`` does *not* advance the parsing position within
4841 the input string, it only verifies that the specified parse
4842 expression matches at the current position. ``FollowedBy``
4843 always returns a null token list. If any results names are defined
4844 in the lookahead expression, those *will* be returned for access by
4845 name.
4847 Example::
4849 # use FollowedBy to match a label only if it is followed by a ':'
4850 data_word = Word(alphas)
4851 label = data_word + FollowedBy(':')
4852 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4854 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()
4856 prints::
4858 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4859 """
4861 def __init__(self, expr: Union[ParserElement, str]):
4862 super().__init__(expr)
4863 self.mayReturnEmpty = True
4865 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4866 # by using self._expr.parse and deleting the contents of the returned ParseResults list
4867 # we keep any named results that were defined in the FollowedBy expression
4868 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)
4869 del ret[:]
4871 return loc, ret
4874class PrecededBy(ParseElementEnhance):
4875 """Lookbehind matching of the given parse expression.
4876 ``PrecededBy`` does not advance the parsing position within the
4877 input string, it only verifies that the specified parse expression
4878 matches prior to the current position. ``PrecededBy`` always
4879 returns a null token list, but if a results name is defined on the
4880 given expression, it is returned.
4882 Parameters:
4884 - ``expr`` - expression that must match prior to the current parse
4885 location
4886 - ``retreat`` - (default= ``None``) - (int) maximum number of characters
4887 to lookbehind prior to the current parse location
4889 If the lookbehind expression is a string, :class:`Literal`,
4890 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
4891 with a specified exact or maximum length, then the retreat
4892 parameter is not required. Otherwise, retreat must be specified to
4893 give a maximum number of characters to look back from
4894 the current parse position for a lookbehind match.
4896 Example::
4898 # VB-style variable names with type prefixes
4899 int_var = PrecededBy("#") + pyparsing_common.identifier
4900 str_var = PrecededBy("$") + pyparsing_common.identifier
4902 """
4904 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0):
4905 super().__init__(expr)
4906 self.expr = self.expr().leave_whitespace()
4907 self.mayReturnEmpty = True
4908 self.mayIndexError = False
4909 self.exact = False
4910 if isinstance(expr, str_type):
4911 expr = typing.cast(str, expr)
4912 retreat = len(expr)
4913 self.exact = True
4914 elif isinstance(expr, (Literal, Keyword)):
4915 retreat = expr.matchLen
4916 self.exact = True
4917 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4918 retreat = expr.maxLen
4919 self.exact = True
4920 elif isinstance(expr, PositionToken):
4921 retreat = 0
4922 self.exact = True
4923 self.retreat = retreat
4924 self.errmsg = f"not preceded by {expr}"
4925 self.skipWhitespace = False
4926 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4928 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:
4929 if self.exact:
4930 if loc < self.retreat:
4931 raise ParseException(instring, loc, self.errmsg, self)
4932 start = loc - self.retreat
4933 _, ret = self.expr._parse(instring, start)
4934 return loc, ret
4936 # retreat specified a maximum lookbehind window, iterate
4937 test_expr = self.expr + StringEnd()
4938 instring_slice = instring[max(0, loc - self.retreat) : loc]
4939 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)
4941 for offset in range(1, min(loc, self.retreat + 1) + 1):
4942 try:
4943 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
4944 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
4945 except ParseBaseException as pbe:
4946 last_expr = pbe
4947 else:
4948 break
4949 else:
4950 raise last_expr
4952 return loc, ret
4955class Located(ParseElementEnhance):
4956 """
4957 Decorates a returned token with its starting and ending
4958 locations in the input string.
4960 This helper adds the following results names:
4962 - ``locn_start`` - location where matched expression begins
4963 - ``locn_end`` - location where matched expression ends
4964 - ``value`` - the actual parsed results
4966 Be careful if the input text contains ``<TAB>`` characters, you
4967 may want to call :class:`ParserElement.parse_with_tabs`
4969 Example::
4971 wd = Word(alphas)
4972 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
4973 print(match)
4975 prints::
4977 [0, ['ljsdf'], 5]
4978 [8, ['lksdjjf'], 15]
4979 [18, ['lkkjj'], 23]
4981 """
4983 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4984 start = loc
4985 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)
4986 ret_tokens = ParseResults([start, tokens, loc])
4987 ret_tokens["locn_start"] = start
4988 ret_tokens["value"] = tokens
4989 ret_tokens["locn_end"] = loc
4990 if self.resultsName:
4991 # must return as a list, so that the name will be attached to the complete group
4992 return loc, [ret_tokens]
4993 else:
4994 return loc, ret_tokens
4997class NotAny(ParseElementEnhance):
4998 """
4999 Lookahead to disallow matching with the given parse expression.
5000 ``NotAny`` does *not* advance the parsing position within the
5001 input string, it only verifies that the specified parse expression
5002 does *not* match at the current position. Also, ``NotAny`` does
5003 *not* skip over leading whitespace. ``NotAny`` always returns
5004 a null token list. May be constructed using the ``'~'`` operator.
5006 Example::
5008 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
5010 # take care not to mistake keywords for identifiers
5011 ident = ~(AND | OR | NOT) + Word(alphas)
5012 boolean_term = Opt(NOT) + ident
5014 # very crude boolean expression - to support parenthesis groups and
5015 # operation hierarchy, use infix_notation
5016 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
5018 # integers that are followed by "." are actually floats
5019 integer = Word(nums) + ~Char(".")
5020 """
5022 def __init__(self, expr: Union[ParserElement, str]):
5023 super().__init__(expr)
5024 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
5025 # self.leave_whitespace()
5026 self.skipWhitespace = False
5028 self.mayReturnEmpty = True
5029 self.errmsg = f"Found unwanted token, {self.expr}"
5031 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5032 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):
5033 raise ParseException(instring, loc, self.errmsg, self)
5034 return loc, []
5036 def _generateDefaultName(self) -> str:
5037 return f"~{{{self.expr}}}"
5040class _MultipleMatch(ParseElementEnhance):
5041 def __init__(
5042 self,
5043 expr: Union[str, ParserElement],
5044 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5045 *,
5046 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5047 ):
5048 super().__init__(expr)
5049 stopOn = stopOn or stop_on
5050 self.saveAsList = True
5051 ender = stopOn
5052 if isinstance(ender, str_type):
5053 ender = self._literalStringClass(ender)
5054 self.stopOn(ender)
5056 def stopOn(self, ender) -> ParserElement:
5057 if isinstance(ender, str_type):
5058 ender = self._literalStringClass(ender)
5059 self.not_ender = ~ender if ender is not None else None
5060 return self
5062 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5063 self_expr_parse = self.expr._parse
5064 self_skip_ignorables = self._skipIgnorables
5065 check_ender = self.not_ender is not None
5066 if check_ender:
5067 try_not_ender = self.not_ender.try_parse
5069 # must be at least one (but first see if we are the stopOn sentinel;
5070 # if so, fail)
5071 if check_ender:
5072 try_not_ender(instring, loc)
5073 loc, tokens = self_expr_parse(instring, loc, do_actions)
5074 try:
5075 hasIgnoreExprs = not not self.ignoreExprs
5076 while 1:
5077 if check_ender:
5078 try_not_ender(instring, loc)
5079 if hasIgnoreExprs:
5080 preloc = self_skip_ignorables(instring, loc)
5081 else:
5082 preloc = loc
5083 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)
5084 tokens += tmptokens
5085 except (ParseException, IndexError):
5086 pass
5088 return loc, tokens
5090 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5091 if (
5092 __diag__.warn_ungrouped_named_tokens_in_collection
5093 and Diagnostics.warn_ungrouped_named_tokens_in_collection
5094 not in self.suppress_warnings_
5095 ):
5096 for e in [self.expr] + self.expr.recurse():
5097 if (
5098 isinstance(e, ParserElement)
5099 and e.resultsName
5100 and (
5101 Diagnostics.warn_ungrouped_named_tokens_in_collection
5102 not in e.suppress_warnings_
5103 )
5104 ):
5105 warning = (
5106 "warn_ungrouped_named_tokens_in_collection:"
5107 f" setting results name {name!r} on {type(self).__name__} expression"
5108 f" collides with {e.resultsName!r} on contained expression"
5109 )
5110 warnings.warn(warning, stacklevel=3)
5111 break
5113 return super()._setResultsName(name, list_all_matches)
5116class OneOrMore(_MultipleMatch):
5117 """
5118 Repetition of one or more of the given expression.
5120 Parameters:
5122 - ``expr`` - expression that must match one or more times
5123 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel
5124 (only required if the sentinel would ordinarily match the repetition
5125 expression)
5127 Example::
5129 data_word = Word(alphas)
5130 label = data_word + FollowedBy(':')
5131 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))
5133 text = "shape: SQUARE posn: upper left color: BLACK"
5134 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
5136 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data
5137 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5138 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5140 # could also be written as
5141 (attr_expr * (1,)).parse_string(text).pprint()
5142 """
5144 def _generateDefaultName(self) -> str:
5145 return f"{{{self.expr}}}..."
5148class ZeroOrMore(_MultipleMatch):
5149 """
5150 Optional repetition of zero or more of the given expression.
5152 Parameters:
5154 - ``expr`` - expression that must match zero or more times
5155 - ``stop_on`` - expression for a terminating sentinel
5156 (only required if the sentinel would ordinarily match the repetition
5157 expression) - (default= ``None``)
5159 Example: similar to :class:`OneOrMore`
5160 """
5162 def __init__(
5163 self,
5164 expr: Union[str, ParserElement],
5165 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5166 *,
5167 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5168 ):
5169 super().__init__(expr, stopOn=stopOn or stop_on)
5170 self.mayReturnEmpty = True
5172 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5173 try:
5174 return super().parseImpl(instring, loc, do_actions)
5175 except (ParseException, IndexError):
5176 return loc, ParseResults([], name=self.resultsName)
5178 def _generateDefaultName(self) -> str:
5179 return f"[{self.expr}]..."
5182class DelimitedList(ParseElementEnhance):
5183 def __init__(
5184 self,
5185 expr: Union[str, ParserElement],
5186 delim: Union[str, ParserElement] = ",",
5187 combine: bool = False,
5188 min: typing.Optional[int] = None,
5189 max: typing.Optional[int] = None,
5190 *,
5191 allow_trailing_delim: bool = False,
5192 ):
5193 """Helper to define a delimited list of expressions - the delimiter
5194 defaults to ','. By default, the list elements and delimiters can
5195 have intervening whitespace, and comments, but this can be
5196 overridden by passing ``combine=True`` in the constructor. If
5197 ``combine`` is set to ``True``, the matching tokens are
5198 returned as a single token string, with the delimiters included;
5199 otherwise, the matching tokens are returned as a list of tokens,
5200 with the delimiters suppressed.
5202 If ``allow_trailing_delim`` is set to True, then the list may end with
5203 a delimiter.
5205 Example::
5207 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5208 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5209 """
5210 if isinstance(expr, str_type):
5211 expr = ParserElement._literalStringClass(expr)
5212 expr = typing.cast(ParserElement, expr)
5214 if min is not None and min < 1:
5215 raise ValueError("min must be greater than 0")
5217 if max is not None and min is not None and max < min:
5218 raise ValueError("max must be greater than, or equal to min")
5220 self.content = expr
5221 self.raw_delim = str(delim)
5222 self.delim = delim
5223 self.combine = combine
5224 if not combine:
5225 self.delim = Suppress(delim)
5226 self.min = min or 1
5227 self.max = max
5228 self.allow_trailing_delim = allow_trailing_delim
5230 delim_list_expr = self.content + (self.delim + self.content) * (
5231 self.min - 1,
5232 None if self.max is None else self.max - 1,
5233 )
5234 if self.allow_trailing_delim:
5235 delim_list_expr += Opt(self.delim)
5237 if self.combine:
5238 delim_list_expr = Combine(delim_list_expr)
5240 super().__init__(delim_list_expr, savelist=True)
5242 def _generateDefaultName(self) -> str:
5243 content_expr = self.content.streamline()
5244 return f"{content_expr} [{self.raw_delim} {content_expr}]..."
5247class _NullToken:
5248 def __bool__(self):
5249 return False
5251 def __str__(self):
5252 return ""
5255class Opt(ParseElementEnhance):
5256 """
5257 Optional matching of the given expression.
5259 Parameters:
5261 - ``expr`` - expression that must match zero or more times
5262 - ``default`` (optional) - value to be returned if the optional expression is not found.
5264 Example::
5266 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
5267 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
5268 zip.run_tests('''
5269 # traditional ZIP code
5270 12345
5272 # ZIP+4 form
5273 12101-0001
5275 # invalid ZIP
5276 98765-
5277 ''')
5279 prints::
5281 # traditional ZIP code
5282 12345
5283 ['12345']
5285 # ZIP+4 form
5286 12101-0001
5287 ['12101-0001']
5289 # invalid ZIP
5290 98765-
5291 ^
5292 FAIL: Expected end of text (at char 5), (line:1, col:6)
5293 """
5295 __optionalNotMatched = _NullToken()
5297 def __init__(
5298 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
5299 ):
5300 super().__init__(expr, savelist=False)
5301 self.saveAsList = self.expr.saveAsList
5302 self.defaultValue = default
5303 self.mayReturnEmpty = True
5305 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5306 self_expr = self.expr
5307 try:
5308 loc, tokens = self_expr._parse(
5309 instring, loc, do_actions, callPreParse=False
5310 )
5311 except (ParseException, IndexError):
5312 default_value = self.defaultValue
5313 if default_value is not self.__optionalNotMatched:
5314 if self_expr.resultsName:
5315 tokens = ParseResults([default_value])
5316 tokens[self_expr.resultsName] = default_value
5317 else:
5318 tokens = [default_value] # type: ignore[assignment]
5319 else:
5320 tokens = [] # type: ignore[assignment]
5321 return loc, tokens
5323 def _generateDefaultName(self) -> str:
5324 inner = str(self.expr)
5325 # strip off redundant inner {}'s
5326 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
5327 inner = inner[1:-1]
5328 return f"[{inner}]"
5331Optional = Opt
5334class SkipTo(ParseElementEnhance):
5335 """
5336 Token for skipping over all undefined text until the matched
5337 expression is found.
5339 Parameters:
5341 - ``expr`` - target expression marking the end of the data to be skipped
5342 - ``include`` - if ``True``, the target expression is also parsed
5343 (the skipped text and target expression are returned as a 2-element
5344 list) (default= ``False``).
5345 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and
5346 comments) that might contain false matches to the target expression
5347 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be
5348 included in the skipped test; if found before the target expression is found,
5349 the :class:`SkipTo` is not a match
5351 Example::
5353 report = '''
5354 Outstanding Issues Report - 1 Jan 2000
5356 # | Severity | Description | Days Open
5357 -----+----------+-------------------------------------------+-----------
5358 101 | Critical | Intermittent system crash | 6
5359 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5360 79 | Minor | System slow when running too many reports | 47
5361 '''
5362 integer = Word(nums)
5363 SEP = Suppress('|')
5364 # use SkipTo to simply match everything up until the next SEP
5365 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
5366 # - parse action will call token.strip() for each matched token, i.e., the description body
5367 string_data = SkipTo(SEP, ignore=quoted_string)
5368 string_data.set_parse_action(token_map(str.strip))
5369 ticket_expr = (integer("issue_num") + SEP
5370 + string_data("sev") + SEP
5371 + string_data("desc") + SEP
5372 + integer("days_open"))
5374 for tkt in ticket_expr.search_string(report):
5375 print tkt.dump()
5377 prints::
5379 ['101', 'Critical', 'Intermittent system crash', '6']
5380 - days_open: '6'
5381 - desc: 'Intermittent system crash'
5382 - issue_num: '101'
5383 - sev: 'Critical'
5384 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
5385 - days_open: '14'
5386 - desc: "Spelling error on Login ('log|n')"
5387 - issue_num: '94'
5388 - sev: 'Cosmetic'
5389 ['79', 'Minor', 'System slow when running too many reports', '47']
5390 - days_open: '47'
5391 - desc: 'System slow when running too many reports'
5392 - issue_num: '79'
5393 - sev: 'Minor'
5394 """
5396 def __init__(
5397 self,
5398 other: Union[ParserElement, str],
5399 include: bool = False,
5400 ignore: typing.Optional[Union[ParserElement, str]] = None,
5401 fail_on: typing.Optional[Union[ParserElement, str]] = None,
5402 *,
5403 failOn: typing.Optional[Union[ParserElement, str]] = None,
5404 ):
5405 super().__init__(other)
5406 failOn = failOn or fail_on
5407 self.ignoreExpr = ignore
5408 self.mayReturnEmpty = True
5409 self.mayIndexError = False
5410 self.includeMatch = include
5411 self.saveAsList = False
5412 if isinstance(failOn, str_type):
5413 self.failOn = self._literalStringClass(failOn)
5414 else:
5415 self.failOn = failOn
5416 self.errmsg = f"No match found for {self.expr}"
5417 self.ignorer = Empty().leave_whitespace()
5418 self._update_ignorer()
5420 def _update_ignorer(self):
5421 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr
5422 self.ignorer.ignoreExprs.clear()
5423 for e in self.expr.ignoreExprs:
5424 self.ignorer.ignore(e)
5425 if self.ignoreExpr:
5426 self.ignorer.ignore(self.ignoreExpr)
5428 def ignore(self, expr):
5429 super().ignore(expr)
5430 self._update_ignorer()
5432 def parseImpl(self, instring, loc, do_actions=True):
5433 startloc = loc
5434 instrlen = len(instring)
5435 self_expr_parse = self.expr._parse
5436 self_failOn_canParseNext = (
5437 self.failOn.canParseNext if self.failOn is not None else None
5438 )
5439 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None
5441 tmploc = loc
5442 while tmploc <= instrlen:
5443 if self_failOn_canParseNext is not None:
5444 # break if failOn expression matches
5445 if self_failOn_canParseNext(instring, tmploc):
5446 break
5448 if ignorer_try_parse is not None:
5449 # advance past ignore expressions
5450 prev_tmploc = tmploc
5451 while 1:
5452 try:
5453 tmploc = ignorer_try_parse(instring, tmploc)
5454 except ParseBaseException:
5455 break
5456 # see if all ignorers matched, but didn't actually ignore anything
5457 if tmploc == prev_tmploc:
5458 break
5459 prev_tmploc = tmploc
5461 try:
5462 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)
5463 except (ParseException, IndexError):
5464 # no match, advance loc in string
5465 tmploc += 1
5466 else:
5467 # matched skipto expr, done
5468 break
5470 else:
5471 # ran off the end of the input string without matching skipto expr, fail
5472 raise ParseException(instring, loc, self.errmsg, self)
5474 # build up return values
5475 loc = tmploc
5476 skiptext = instring[startloc:loc]
5477 skipresult = ParseResults(skiptext)
5479 if self.includeMatch:
5480 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)
5481 skipresult += mat
5483 return loc, skipresult
5486class Forward(ParseElementEnhance):
5487 """
5488 Forward declaration of an expression to be defined later -
5489 used for recursive grammars, such as algebraic infix notation.
5490 When the expression is known, it is assigned to the ``Forward``
5491 variable using the ``'<<'`` operator.
5493 Note: take care when assigning to ``Forward`` not to overlook
5494 precedence of operators.
5496 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
5498 fwd_expr << a | b | c
5500 will actually be evaluated as::
5502 (fwd_expr << a) | b | c
5504 thereby leaving b and c out as parseable alternatives. It is recommended that you
5505 explicitly group the values inserted into the ``Forward``::
5507 fwd_expr << (a | b | c)
5509 Converting to use the ``'<<='`` operator instead will avoid this problem.
5511 See :class:`ParseResults.pprint` for an example of a recursive
5512 parser created using ``Forward``.
5513 """
5515 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None):
5516 self.caller_frame = traceback.extract_stack(limit=2)[0]
5517 super().__init__(other, savelist=False) # type: ignore[arg-type]
5518 self.lshift_line = None
5520 def __lshift__(self, other) -> Forward:
5521 if hasattr(self, "caller_frame"):
5522 del self.caller_frame
5523 if isinstance(other, str_type):
5524 other = self._literalStringClass(other)
5526 if not isinstance(other, ParserElement):
5527 return NotImplemented
5529 self.expr = other
5530 self.streamlined = other.streamlined
5531 self.mayIndexError = self.expr.mayIndexError
5532 self.mayReturnEmpty = self.expr.mayReturnEmpty
5533 self.set_whitespace_chars(
5534 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
5535 )
5536 self.skipWhitespace = self.expr.skipWhitespace
5537 self.saveAsList = self.expr.saveAsList
5538 self.ignoreExprs.extend(self.expr.ignoreExprs)
5539 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]
5540 return self
5542 def __ilshift__(self, other) -> Forward:
5543 if not isinstance(other, ParserElement):
5544 return NotImplemented
5546 return self << other
5548 def __or__(self, other) -> ParserElement:
5549 caller_line = traceback.extract_stack(limit=2)[-2]
5550 if (
5551 __diag__.warn_on_match_first_with_lshift_operator
5552 and caller_line == self.lshift_line
5553 and Diagnostics.warn_on_match_first_with_lshift_operator
5554 not in self.suppress_warnings_
5555 ):
5556 warnings.warn(
5557 "warn_on_match_first_with_lshift_operator:"
5558 " using '<<' operator with '|' is probably an error, use '<<='",
5559 stacklevel=2,
5560 )
5561 ret = super().__or__(other)
5562 return ret
5564 def __del__(self):
5565 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
5566 if (
5567 self.expr is None
5568 and __diag__.warn_on_assignment_to_Forward
5569 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
5570 ):
5571 warnings.warn_explicit(
5572 "warn_on_assignment_to_Forward:"
5573 " Forward defined here but no expression attached later using '<<=' or '<<'",
5574 UserWarning,
5575 filename=self.caller_frame.filename,
5576 lineno=self.caller_frame.lineno,
5577 )
5579 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5580 if (
5581 self.expr is None
5582 and __diag__.warn_on_parse_using_empty_Forward
5583 and Diagnostics.warn_on_parse_using_empty_Forward
5584 not in self.suppress_warnings_
5585 ):
5586 # walk stack until parse_string, scan_string, search_string, or transform_string is found
5587 parse_fns = (
5588 "parse_string",
5589 "scan_string",
5590 "search_string",
5591 "transform_string",
5592 )
5593 tb = traceback.extract_stack(limit=200)
5594 for i, frm in enumerate(reversed(tb), start=1):
5595 if frm.name in parse_fns:
5596 stacklevel = i + 1
5597 break
5598 else:
5599 stacklevel = 2
5600 warnings.warn(
5601 "warn_on_parse_using_empty_Forward:"
5602 " Forward expression was never assigned a value, will not parse any input",
5603 stacklevel=stacklevel,
5604 )
5605 if not ParserElement._left_recursion_enabled:
5606 return super().parseImpl(instring, loc, do_actions)
5607 # ## Bounded Recursion algorithm ##
5608 # Recursion only needs to be processed at ``Forward`` elements, since they are
5609 # the only ones that can actually refer to themselves. The general idea is
5610 # to handle recursion stepwise: We start at no recursion, then recurse once,
5611 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
5612 #
5613 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
5614 # - to *match* a specific recursion level, and
5615 # - to *search* the bounded recursion level
5616 # and the two run concurrently. The *search* must *match* each recursion level
5617 # to find the best possible match. This is handled by a memo table, which
5618 # provides the previous match to the next level match attempt.
5619 #
5620 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
5621 #
5622 # There is a complication since we not only *parse* but also *transform* via
5623 # actions: We do not want to run the actions too often while expanding. Thus,
5624 # we expand using `do_actions=False` and only run `do_actions=True` if the next
5625 # recursion level is acceptable.
5626 with ParserElement.recursion_lock:
5627 memo = ParserElement.recursion_memos
5628 try:
5629 # we are parsing at a specific recursion expansion - use it as-is
5630 prev_loc, prev_result = memo[loc, self, do_actions]
5631 if isinstance(prev_result, Exception):
5632 raise prev_result
5633 return prev_loc, prev_result.copy()
5634 except KeyError:
5635 act_key = (loc, self, True)
5636 peek_key = (loc, self, False)
5637 # we are searching for the best recursion expansion - keep on improving
5638 # both `do_actions` cases must be tracked separately here!
5639 prev_loc, prev_peek = memo[peek_key] = (
5640 loc - 1,
5641 ParseException(
5642 instring, loc, "Forward recursion without base case", self
5643 ),
5644 )
5645 if do_actions:
5646 memo[act_key] = memo[peek_key]
5647 while True:
5648 try:
5649 new_loc, new_peek = super().parseImpl(instring, loc, False)
5650 except ParseException:
5651 # we failed before getting any match – do not hide the error
5652 if isinstance(prev_peek, Exception):
5653 raise
5654 new_loc, new_peek = prev_loc, prev_peek
5655 # the match did not get better: we are done
5656 if new_loc <= prev_loc:
5657 if do_actions:
5658 # replace the match for do_actions=False as well,
5659 # in case the action did backtrack
5660 prev_loc, prev_result = memo[peek_key] = memo[act_key]
5661 del memo[peek_key], memo[act_key]
5662 return prev_loc, copy.copy(prev_result)
5663 del memo[peek_key]
5664 return prev_loc, copy.copy(prev_peek)
5665 # the match did get better: see if we can improve further
5666 if do_actions:
5667 try:
5668 memo[act_key] = super().parseImpl(instring, loc, True)
5669 except ParseException as e:
5670 memo[peek_key] = memo[act_key] = (new_loc, e)
5671 raise
5672 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
5674 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5675 self.skipWhitespace = False
5676 return self
5678 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5679 self.skipWhitespace = True
5680 return self
5682 def streamline(self) -> ParserElement:
5683 if not self.streamlined:
5684 self.streamlined = True
5685 if self.expr is not None:
5686 self.expr.streamline()
5687 return self
5689 def validate(self, validateTrace=None) -> None:
5690 warnings.warn(
5691 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
5692 DeprecationWarning,
5693 stacklevel=2,
5694 )
5695 if validateTrace is None:
5696 validateTrace = []
5698 if self not in validateTrace:
5699 tmp = validateTrace[:] + [self]
5700 if self.expr is not None:
5701 self.expr.validate(tmp)
5702 self._checkRecursion([])
5704 def _generateDefaultName(self) -> str:
5705 # Avoid infinite recursion by setting a temporary _defaultName
5706 self._defaultName = ": ..."
5708 # Use the string representation of main expression.
5709 retString = "..."
5710 try:
5711 if self.expr is not None:
5712 retString = str(self.expr)[:1000]
5713 else:
5714 retString = "None"
5715 finally:
5716 return f"{type(self).__name__}: {retString}"
5718 def copy(self) -> ParserElement:
5719 if self.expr is not None:
5720 return super().copy()
5721 else:
5722 ret = Forward()
5723 ret <<= self
5724 return ret
5726 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5727 # fmt: off
5728 if (
5729 __diag__.warn_name_set_on_empty_Forward
5730 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_
5731 and self.expr is None
5732 ):
5733 warning = (
5734 "warn_name_set_on_empty_Forward:"
5735 f" setting results name {name!r} on {type(self).__name__} expression"
5736 " that has no contained expression"
5737 )
5738 warnings.warn(warning, stacklevel=3)
5739 # fmt: on
5741 return super()._setResultsName(name, list_all_matches)
5743 # Compatibility synonyms
5744 # fmt: off
5745 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
5746 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
5747 # fmt: on
5750class TokenConverter(ParseElementEnhance):
5751 """
5752 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.
5753 """
5755 def __init__(self, expr: Union[ParserElement, str], savelist=False):
5756 super().__init__(expr) # , savelist)
5757 self.saveAsList = False
5760class Combine(TokenConverter):
5761 """Converter to concatenate all matching tokens to a single string.
5762 By default, the matching patterns must also be contiguous in the
5763 input string; this can be disabled by specifying
5764 ``'adjacent=False'`` in the constructor.
5766 Example::
5768 real = Word(nums) + '.' + Word(nums)
5769 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']
5770 # will also erroneously match the following
5771 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']
5773 real = Combine(Word(nums) + '.' + Word(nums))
5774 print(real.parse_string('3.1416')) # -> ['3.1416']
5775 # no match when there are internal spaces
5776 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)
5777 """
5779 def __init__(
5780 self,
5781 expr: ParserElement,
5782 join_string: str = "",
5783 adjacent: bool = True,
5784 *,
5785 joinString: typing.Optional[str] = None,
5786 ):
5787 super().__init__(expr)
5788 joinString = joinString if joinString is not None else join_string
5789 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5790 if adjacent:
5791 self.leave_whitespace()
5792 self.adjacent = adjacent
5793 self.skipWhitespace = True
5794 self.joinString = joinString
5795 self.callPreparse = True
5797 def ignore(self, other) -> ParserElement:
5798 if self.adjacent:
5799 ParserElement.ignore(self, other)
5800 else:
5801 super().ignore(other)
5802 return self
5804 def postParse(self, instring, loc, tokenlist):
5805 retToks = tokenlist.copy()
5806 del retToks[:]
5807 retToks += ParseResults(
5808 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
5809 )
5811 if self.resultsName and retToks.haskeys():
5812 return [retToks]
5813 else:
5814 return retToks
5817class Group(TokenConverter):
5818 """Converter to return the matched tokens as a list - useful for
5819 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5821 The optional ``aslist`` argument when set to True will return the
5822 parsed tokens as a Python list instead of a pyparsing ParseResults.
5824 Example::
5826 ident = Word(alphas)
5827 num = Word(nums)
5828 term = ident | num
5829 func = ident + Opt(DelimitedList(term))
5830 print(func.parse_string("fn a, b, 100"))
5831 # -> ['fn', 'a', 'b', '100']
5833 func = ident + Group(Opt(DelimitedList(term)))
5834 print(func.parse_string("fn a, b, 100"))
5835 # -> ['fn', ['a', 'b', '100']]
5836 """
5838 def __init__(self, expr: ParserElement, aslist: bool = False):
5839 super().__init__(expr)
5840 self.saveAsList = True
5841 self._asPythonList = aslist
5843 def postParse(self, instring, loc, tokenlist):
5844 if self._asPythonList:
5845 return ParseResults.List(
5846 tokenlist.asList()
5847 if isinstance(tokenlist, ParseResults)
5848 else list(tokenlist)
5849 )
5851 return [tokenlist]
5854class Dict(TokenConverter):
5855 """Converter to return a repetitive expression as a list, but also
5856 as a dictionary. Each element can also be referenced using the first
5857 token in the expression as its key. Useful for tabular report
5858 scraping when the first column can be used as a item key.
5860 The optional ``asdict`` argument when set to True will return the
5861 parsed tokens as a Python dict instead of a pyparsing ParseResults.
5863 Example::
5865 data_word = Word(alphas)
5866 label = data_word + FollowedBy(':')
5868 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5869 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5871 # print attributes as plain groups
5872 print(attr_expr[1, ...].parse_string(text).dump())
5874 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names
5875 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
5876 print(result.dump())
5878 # access named fields as dict entries, or output as dict
5879 print(result['shape'])
5880 print(result.as_dict())
5882 prints::
5884 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5885 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5886 - color: 'light blue'
5887 - posn: 'upper left'
5888 - shape: 'SQUARE'
5889 - texture: 'burlap'
5890 SQUARE
5891 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5893 See more examples at :class:`ParseResults` of accessing fields by results name.
5894 """
5896 def __init__(self, expr: ParserElement, asdict: bool = False):
5897 super().__init__(expr)
5898 self.saveAsList = True
5899 self._asPythonDict = asdict
5901 def postParse(self, instring, loc, tokenlist):
5902 for i, tok in enumerate(tokenlist):
5903 if len(tok) == 0:
5904 continue
5906 ikey = tok[0]
5907 if isinstance(ikey, int):
5908 ikey = str(ikey).strip()
5910 if len(tok) == 1:
5911 tokenlist[ikey] = _ParseResultsWithOffset("", i)
5913 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5914 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5916 else:
5917 try:
5918 dictvalue = tok.copy() # ParseResults(i)
5919 except Exception:
5920 exc = TypeError(
5921 "could not extract dict values from parsed results"
5922 " - Dict expression must contain Grouped expressions"
5923 )
5924 raise exc from None
5926 del dictvalue[0]
5928 if len(dictvalue) != 1 or (
5929 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
5930 ):
5931 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5932 else:
5933 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5935 if self._asPythonDict:
5936 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
5938 return [tokenlist] if self.resultsName else tokenlist
5941class Suppress(TokenConverter):
5942 """Converter for ignoring the results of a parsed expression.
5944 Example::
5946 source = "a, b, c,d"
5947 wd = Word(alphas)
5948 wd_list1 = wd + (',' + wd)[...]
5949 print(wd_list1.parse_string(source))
5951 # often, delimiters that are useful during parsing are just in the
5952 # way afterward - use Suppress to keep them out of the parsed output
5953 wd_list2 = wd + (Suppress(',') + wd)[...]
5954 print(wd_list2.parse_string(source))
5956 # Skipped text (using '...') can be suppressed as well
5957 source = "lead in START relevant text END trailing text"
5958 start_marker = Keyword("START")
5959 end_marker = Keyword("END")
5960 find_body = Suppress(...) + start_marker + ... + end_marker
5961 print(find_body.parse_string(source)
5963 prints::
5965 ['a', ',', 'b', ',', 'c', ',', 'd']
5966 ['a', 'b', 'c', 'd']
5967 ['START', 'relevant text ', 'END']
5969 (See also :class:`DelimitedList`.)
5970 """
5972 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
5973 if expr is ...:
5974 expr = _PendingSkip(NoMatch())
5975 super().__init__(expr)
5977 def __add__(self, other) -> ParserElement:
5978 if isinstance(self.expr, _PendingSkip):
5979 return Suppress(SkipTo(other)) + other
5981 return super().__add__(other)
5983 def __sub__(self, other) -> ParserElement:
5984 if isinstance(self.expr, _PendingSkip):
5985 return Suppress(SkipTo(other)) - other
5987 return super().__sub__(other)
5989 def postParse(self, instring, loc, tokenlist):
5990 return []
5992 def suppress(self) -> ParserElement:
5993 return self
5996def trace_parse_action(f: ParseAction) -> ParseAction:
5997 """Decorator for debugging parse actions.
5999 When the parse action is called, this decorator will print
6000 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
6001 When the parse action completes, the decorator will print
6002 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
6004 Example::
6006 wd = Word(alphas)
6008 @trace_parse_action
6009 def remove_duplicate_chars(tokens):
6010 return ''.join(sorted(set(''.join(tokens))))
6012 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
6013 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
6015 prints::
6017 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
6018 <<leaving remove_duplicate_chars (ret: 'dfjkls')
6019 ['dfjkls']
6020 """
6021 f = _trim_arity(f)
6023 def z(*paArgs):
6024 thisFunc = f.__name__
6025 s, l, t = paArgs[-3:]
6026 if len(paArgs) > 3:
6027 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"
6028 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")
6029 try:
6030 ret = f(*paArgs)
6031 except Exception as exc:
6032 sys.stderr.write(
6033 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"
6034 )
6035 raise
6036 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")
6037 return ret
6039 z.__name__ = f.__name__
6040 return z
6043# convenience constants for positional expressions
6044empty = Empty().set_name("empty")
6045line_start = LineStart().set_name("line_start")
6046line_end = LineEnd().set_name("line_end")
6047string_start = StringStart().set_name("string_start")
6048string_end = StringEnd().set_name("string_end")
6050_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(
6051 lambda s, l, t: t[0][1]
6052)
6053_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
6054 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
6055)
6056_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
6057 lambda s, l, t: chr(int(t[0][1:], 8))
6058)
6059_singleChar = (
6060 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
6061)
6062_charRange = Group(_singleChar + Suppress("-") + _singleChar)
6063_reBracketExpr = (
6064 Literal("[")
6065 + Opt("^").set_results_name("negate")
6066 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
6067 + Literal("]")
6068)
6071def srange(s: str) -> str:
6072 r"""Helper to easily define string ranges for use in :class:`Word`
6073 construction. Borrows syntax from regexp ``'[]'`` string range
6074 definitions::
6076 srange("[0-9]") -> "0123456789"
6077 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
6078 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
6080 The input string must be enclosed in []'s, and the returned string
6081 is the expanded character set joined into a single string. The
6082 values enclosed in the []'s may be:
6084 - a single character
6085 - an escaped character with a leading backslash (such as ``\-``
6086 or ``\]``)
6087 - an escaped hex character with a leading ``'\x'``
6088 (``\x21``, which is a ``'!'`` character) (``\0x##``
6089 is also supported for backwards compatibility)
6090 - an escaped octal character with a leading ``'\0'``
6091 (``\041``, which is a ``'!'`` character)
6092 - a range of any of the above, separated by a dash (``'a-z'``,
6093 etc.)
6094 - any combination of the above (``'aeiouy'``,
6095 ``'a-zA-Z0-9_$'``, etc.)
6096 """
6097 _expanded = lambda p: (
6098 p
6099 if not isinstance(p, ParseResults)
6100 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
6101 )
6102 try:
6103 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)
6104 except Exception as e:
6105 return ""
6108def token_map(func, *args) -> ParseAction:
6109 """Helper to define a parse action by mapping a function to all
6110 elements of a :class:`ParseResults` list. If any additional args are passed,
6111 they are forwarded to the given function as additional arguments
6112 after the token, as in
6113 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
6114 which will convert the parsed data to an integer using base 16.
6116 Example (compare the last to example in :class:`ParserElement.transform_string`::
6118 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
6119 hex_ints.run_tests('''
6120 00 11 22 aa FF 0a 0d 1a
6121 ''')
6123 upperword = Word(alphas).set_parse_action(token_map(str.upper))
6124 upperword[1, ...].run_tests('''
6125 my kingdom for a horse
6126 ''')
6128 wd = Word(alphas).set_parse_action(token_map(str.title))
6129 wd[1, ...].set_parse_action(' '.join).run_tests('''
6130 now is the winter of our discontent made glorious summer by this sun of york
6131 ''')
6133 prints::
6135 00 11 22 aa FF 0a 0d 1a
6136 [0, 17, 34, 170, 255, 10, 13, 26]
6138 my kingdom for a horse
6139 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
6141 now is the winter of our discontent made glorious summer by this sun of york
6142 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
6143 """
6145 def pa(s, l, t):
6146 return [func(tokn, *args) for tokn in t]
6148 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
6149 pa.__name__ = func_name
6151 return pa
6154def autoname_elements() -> None:
6155 """
6156 Utility to simplify mass-naming of parser elements, for
6157 generating railroad diagram with named subdiagrams.
6158 """
6159 calling_frame = sys._getframe(1)
6160 if calling_frame is None:
6161 return
6162 calling_frame = typing.cast(types.FrameType, calling_frame)
6163 for name, var in calling_frame.f_locals.items():
6164 if isinstance(var, ParserElement) and not var.customName:
6165 var.set_name(name)
6168dbl_quoted_string = Combine(
6169 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6170).set_name("string enclosed in double quotes")
6172sgl_quoted_string = Combine(
6173 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
6174).set_name("string enclosed in single quotes")
6176quoted_string = Combine(
6177 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6178 "double quoted string"
6179 )
6180 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6181 "single quoted string"
6182 )
6183).set_name("quoted string using single or double quotes")
6185python_quoted_string = Combine(
6186 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(
6187 "multiline double quoted string"
6188 )
6189 ^ (
6190 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"
6191 ).set_name("multiline single quoted string")
6192 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6193 "double quoted string"
6194 )
6195 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6196 "single quoted string"
6197 )
6198).set_name("Python quoted string")
6200unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
6203alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6204punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6206# build list of built-in expressions, for future reference if a global default value
6207# gets updated
6208_builtin_exprs: list[ParserElement] = [
6209 v for v in vars().values() if isinstance(v, ParserElement)
6210]
6212# Compatibility synonyms
6213# fmt: off
6214sglQuotedString = sgl_quoted_string
6215dblQuotedString = dbl_quoted_string
6216quotedString = quoted_string
6217unicodeString = unicode_string
6218lineStart = line_start
6219lineEnd = line_end
6220stringStart = string_start
6221stringEnd = string_end
6222nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)
6223traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)
6224conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)
6225tokenMap = replaced_by_pep8("tokenMap", token_map)
6226# fmt: on