Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 44%
2602 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
1#
2# core.py
3#
5from collections import deque
6import os
7import typing
8from typing import (
9 Any,
10 Callable,
11 Generator,
12 List,
13 NamedTuple,
14 Sequence,
15 Set,
16 TextIO,
17 Tuple,
18 Union,
19 cast,
20)
21from abc import ABC, abstractmethod
22from enum import Enum
23import string
24import copy
25import warnings
26import re
27import sys
28from collections.abc import Iterable
29import traceback
30import types
31from operator import itemgetter
32from functools import wraps
33from threading import RLock
34from pathlib import Path
36from .util import (
37 _FifoCache,
38 _UnboundedCache,
39 __config_flags,
40 _collapse_string_to_ranges,
41 _escape_regex_range_chars,
42 _bslash,
43 _flatten,
44 LRUMemo as _LRUMemo,
45 UnboundedMemo as _UnboundedMemo,
46 replaced_by_pep8,
47)
48from .exceptions import *
49from .actions import *
50from .results import ParseResults, _ParseResultsWithOffset
51from .unicode import pyparsing_unicode
53_MAX_INT = sys.maxsize
54str_type: Tuple[type, ...] = (str, bytes)
56#
57# Copyright (c) 2003-2022 Paul T. McGuire
58#
59# Permission is hereby granted, free of charge, to any person obtaining
60# a copy of this software and associated documentation files (the
61# "Software"), to deal in the Software without restriction, including
62# without limitation the rights to use, copy, modify, merge, publish,
63# distribute, sublicense, and/or sell copies of the Software, and to
64# permit persons to whom the Software is furnished to do so, subject to
65# the following conditions:
66#
67# The above copyright notice and this permission notice shall be
68# included in all copies or substantial portions of the Software.
69#
70# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
71# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
72# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
73# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
74# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
75# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
76# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
77#
80if sys.version_info >= (3, 8):
81 from functools import cached_property
82else:
84 class cached_property:
85 def __init__(self, func):
86 self._func = func
88 def __get__(self, instance, owner=None):
89 ret = instance.__dict__[self._func.__name__] = self._func(instance)
90 return ret
93class __compat__(__config_flags):
94 """
95 A cross-version compatibility configuration for pyparsing features that will be
96 released in a future version. By setting values in this configuration to True,
97 those features can be enabled in prior versions for compatibility development
98 and testing.
100 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
101 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
102 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
103 behavior
104 """
106 _type_desc = "compatibility"
108 collect_all_And_tokens = True
110 _all_names = [__ for __ in locals() if not __.startswith("_")]
111 _fixed_names = """
112 collect_all_And_tokens
113 """.split()
116class __diag__(__config_flags):
117 _type_desc = "diagnostic"
119 warn_multiple_tokens_in_named_alternation = False
120 warn_ungrouped_named_tokens_in_collection = False
121 warn_name_set_on_empty_Forward = False
122 warn_on_parse_using_empty_Forward = False
123 warn_on_assignment_to_Forward = False
124 warn_on_multiple_string_args_to_oneof = False
125 warn_on_match_first_with_lshift_operator = False
126 enable_debug_on_named_expressions = False
128 _all_names = [__ for __ in locals() if not __.startswith("_")]
129 _warning_names = [name for name in _all_names if name.startswith("warn")]
130 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
132 @classmethod
133 def enable_all_warnings(cls) -> None:
134 for name in cls._warning_names:
135 cls.enable(name)
138class Diagnostics(Enum):
139 """
140 Diagnostic configuration (all default to disabled)
142 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
143 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
144 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
145 name is defined on a containing expression with ungrouped subexpressions that also
146 have results names
147 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
148 with a results name, but has no contents defined
149 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
150 defined in a grammar but has never had an expression attached to it
151 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
152 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
153 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
154 incorrectly called with multiple str arguments
155 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
156 calls to :class:`ParserElement.set_name`
158 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
159 All warnings can be enabled by calling :class:`enable_all_warnings`.
160 """
162 warn_multiple_tokens_in_named_alternation = 0
163 warn_ungrouped_named_tokens_in_collection = 1
164 warn_name_set_on_empty_Forward = 2
165 warn_on_parse_using_empty_Forward = 3
166 warn_on_assignment_to_Forward = 4
167 warn_on_multiple_string_args_to_oneof = 5
168 warn_on_match_first_with_lshift_operator = 6
169 enable_debug_on_named_expressions = 7
172def enable_diag(diag_enum: Diagnostics) -> None:
173 """
174 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
175 """
176 __diag__.enable(diag_enum.name)
179def disable_diag(diag_enum: Diagnostics) -> None:
180 """
181 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
182 """
183 __diag__.disable(diag_enum.name)
186def enable_all_warnings() -> None:
187 """
188 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
189 """
190 __diag__.enable_all_warnings()
193# hide abstract class
194del __config_flags
197def _should_enable_warnings(
198 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
199) -> bool:
200 enable = bool(warn_env_var)
201 for warn_opt in cmd_line_warn_options:
202 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
203 ":"
204 )[:5]
205 if not w_action.lower().startswith("i") and (
206 not (w_message or w_category or w_module) or w_module == "pyparsing"
207 ):
208 enable = True
209 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
210 enable = False
211 return enable
214if _should_enable_warnings(
215 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
216):
217 enable_all_warnings()
220# build list of single arg builtins, that can be used as parse actions
221_single_arg_builtins = {
222 sum,
223 len,
224 sorted,
225 reversed,
226 list,
227 tuple,
228 set,
229 any,
230 all,
231 min,
232 max,
233}
235_generatorType = types.GeneratorType
236ParseImplReturnType = Tuple[int, Any]
237PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]
238ParseAction = Union[
239 Callable[[], Any],
240 Callable[[ParseResults], Any],
241 Callable[[int, ParseResults], Any],
242 Callable[[str, int, ParseResults], Any],
243]
244ParseCondition = Union[
245 Callable[[], bool],
246 Callable[[ParseResults], bool],
247 Callable[[int, ParseResults], bool],
248 Callable[[str, int, ParseResults], bool],
249]
250ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
251DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
252DebugSuccessAction = Callable[
253 [str, int, int, "ParserElement", ParseResults, bool], None
254]
255DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
258alphas = string.ascii_uppercase + string.ascii_lowercase
259identchars = pyparsing_unicode.Latin1.identchars
260identbodychars = pyparsing_unicode.Latin1.identbodychars
261nums = "0123456789"
262hexnums = nums + "ABCDEFabcdef"
263alphanums = alphas + nums
264printables = "".join([c for c in string.printable if c not in string.whitespace])
266_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]
269def _trim_arity(func, max_limit=3):
270 """decorator to trim function calls to match the arity of the target"""
271 global _trim_arity_call_line
273 if func in _single_arg_builtins:
274 return lambda s, l, t: func(t)
276 limit = 0
277 found_arity = False
279 # synthesize what would be returned by traceback.extract_stack at the call to
280 # user's parse action 'func', so that we don't incur call penalty at parse time
282 # fmt: off
283 LINE_DIFF = 7
284 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
285 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
286 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1])
287 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
289 def wrapper(*args):
290 nonlocal found_arity, limit
291 while 1:
292 try:
293 ret = func(*args[limit:])
294 found_arity = True
295 return ret
296 except TypeError as te:
297 # re-raise TypeErrors if they did not come from our arity testing
298 if found_arity:
299 raise
300 else:
301 tb = te.__traceback__
302 frames = traceback.extract_tb(tb, limit=2)
303 frame_summary = frames[-1]
304 trim_arity_type_error = (
305 [frame_summary[:2]][-1][:2] == pa_call_line_synth
306 )
307 del tb
309 if trim_arity_type_error:
310 if limit < max_limit:
311 limit += 1
312 continue
314 raise
315 # fmt: on
317 # copy func name to wrapper for sensible debug output
318 # (can't use functools.wraps, since that messes with function signature)
319 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
320 wrapper.__name__ = func_name
321 wrapper.__doc__ = func.__doc__
323 return wrapper
326def condition_as_parse_action(
327 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False
328) -> ParseAction:
329 """
330 Function to convert a simple predicate function that returns ``True`` or ``False``
331 into a parse action. Can be used in places when a parse action is required
332 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition
333 to an operator level in :class:`infix_notation`).
335 Optional keyword arguments:
337 - ``message`` - define a custom message to be used in the raised exception
338 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;
339 otherwise will raise :class:`ParseException`
341 """
342 msg = message if message is not None else "failed user-defined condition"
343 exc_type = ParseFatalException if fatal else ParseException
344 fn = _trim_arity(fn)
346 @wraps(fn)
347 def pa(s, l, t):
348 if not bool(fn(s, l, t)):
349 raise exc_type(s, l, msg)
351 return pa
354def _default_start_debug_action(
355 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False
356):
357 cache_hit_str = "*" if cache_hit else ""
358 print(
359 (
360 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"
361 f" {line(loc, instring)}\n"
362 f" {' ' * (col(loc, instring) - 1)}^"
363 )
364 )
367def _default_success_debug_action(
368 instring: str,
369 startloc: int,
370 endloc: int,
371 expr: "ParserElement",
372 toks: ParseResults,
373 cache_hit: bool = False,
374):
375 cache_hit_str = "*" if cache_hit else ""
376 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")
379def _default_exception_debug_action(
380 instring: str,
381 loc: int,
382 expr: "ParserElement",
383 exc: Exception,
384 cache_hit: bool = False,
385):
386 cache_hit_str = "*" if cache_hit else ""
387 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")
390def null_debug_action(*args):
391 """'Do-nothing' debug action, to suppress debugging output during parsing."""
394class ParserElement(ABC):
395 """Abstract base level parser element class."""
397 DEFAULT_WHITE_CHARS: str = " \n\t\r"
398 verbose_stacktrace: bool = False
399 _literalStringClass: type = None # type: ignore[assignment]
401 @staticmethod
402 def set_default_whitespace_chars(chars: str) -> None:
403 r"""
404 Overrides the default whitespace chars
406 Example::
408 # default whitespace chars are space, <TAB> and newline
409 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
411 # change to just treat newline as significant
412 ParserElement.set_default_whitespace_chars(" \t")
413 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def']
414 """
415 ParserElement.DEFAULT_WHITE_CHARS = chars
417 # update whitespace all parse expressions defined in this module
418 for expr in _builtin_exprs:
419 if expr.copyDefaultWhiteChars:
420 expr.whiteChars = set(chars)
422 @staticmethod
423 def inline_literals_using(cls: type) -> None:
424 """
425 Set class to be used for inclusion of string literals into a parser.
427 Example::
429 # default literal class used is Literal
430 integer = Word(nums)
431 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
433 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31']
436 # change to Suppress
437 ParserElement.inline_literals_using(Suppress)
438 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
440 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31']
441 """
442 ParserElement._literalStringClass = cls
444 @classmethod
445 def using_each(cls, seq, **class_kwargs):
446 """
447 Yields a sequence of class(obj, **class_kwargs) for obj in seq.
449 Example::
451 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")
453 """
454 yield from (cls(obj, **class_kwargs) for obj in seq)
456 class DebugActions(NamedTuple):
457 debug_try: typing.Optional[DebugStartAction]
458 debug_match: typing.Optional[DebugSuccessAction]
459 debug_fail: typing.Optional[DebugExceptionAction]
461 def __init__(self, savelist: bool = False):
462 self.parseAction: List[ParseAction] = list()
463 self.failAction: typing.Optional[ParseFailAction] = None
464 self.customName: str = None # type: ignore[assignment]
465 self._defaultName: typing.Optional[str] = None
466 self.resultsName: str = None # type: ignore[assignment]
467 self.saveAsList = savelist
468 self.skipWhitespace = True
469 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
470 self.copyDefaultWhiteChars = True
471 # used when checking for left-recursion
472 self.mayReturnEmpty = False
473 self.keepTabs = False
474 self.ignoreExprs: List["ParserElement"] = list()
475 self.debug = False
476 self.streamlined = False
477 # optimize exception handling for subclasses that don't advance parse index
478 self.mayIndexError = True
479 self.errmsg = ""
480 # mark results names as modal (report only last) or cumulative (list all)
481 self.modalResults = True
482 # custom debug actions
483 self.debugActions = self.DebugActions(None, None, None)
484 # avoid redundant calls to preParse
485 self.callPreparse = True
486 self.callDuringTry = False
487 self.suppress_warnings_: List[Diagnostics] = []
489 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement":
490 """
491 Suppress warnings emitted for a particular diagnostic on this expression.
493 Example::
495 base = pp.Forward()
496 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)
498 # statement would normally raise a warning, but is now suppressed
499 print(base.parse_string("x"))
501 """
502 self.suppress_warnings_.append(warning_type)
503 return self
505 def visit_all(self):
506 """General-purpose method to yield all expressions and sub-expressions
507 in a grammar. Typically just for internal use.
508 """
509 to_visit = deque([self])
510 seen = set()
511 while to_visit:
512 cur = to_visit.popleft()
514 # guard against looping forever through recursive grammars
515 if cur in seen:
516 continue
517 seen.add(cur)
519 to_visit.extend(cur.recurse())
520 yield cur
522 def copy(self) -> "ParserElement":
523 """
524 Make a copy of this :class:`ParserElement`. Useful for defining
525 different parse actions for the same parsing pattern, using copies of
526 the original parse element.
528 Example::
530 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
531 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")
532 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
534 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M"))
536 prints::
538 [5120, 100, 655360, 268435456]
540 Equivalent form of ``expr.copy()`` is just ``expr()``::
542 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
543 """
544 cpy = copy.copy(self)
545 cpy.parseAction = self.parseAction[:]
546 cpy.ignoreExprs = self.ignoreExprs[:]
547 if self.copyDefaultWhiteChars:
548 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
549 return cpy
551 def set_results_name(
552 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False
553 ) -> "ParserElement":
554 """
555 Define name for referencing matching tokens as a nested attribute
556 of the returned parse results.
558 Normally, results names are assigned as you would assign keys in a dict:
559 any existing value is overwritten by later values. If it is necessary to
560 keep all values captured for a particular results name, call ``set_results_name``
561 with ``list_all_matches`` = True.
563 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
564 this is so that the client can define a basic element, such as an
565 integer, and reference it in multiple places with different names.
567 You can also set results names using the abbreviated syntax,
568 ``expr("name")`` in place of ``expr.set_results_name("name")``
569 - see :class:`__call__`. If ``list_all_matches`` is required, use
570 ``expr("name*")``.
572 Example::
574 date_str = (integer.set_results_name("year") + '/'
575 + integer.set_results_name("month") + '/'
576 + integer.set_results_name("day"))
578 # equivalent form:
579 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
580 """
581 listAllMatches = listAllMatches or list_all_matches
582 return self._setResultsName(name, listAllMatches)
584 def _setResultsName(self, name, listAllMatches=False):
585 if name is None:
586 return self
587 newself = self.copy()
588 if name.endswith("*"):
589 name = name[:-1]
590 listAllMatches = True
591 newself.resultsName = name
592 newself.modalResults = not listAllMatches
593 return newself
595 def set_break(self, break_flag: bool = True) -> "ParserElement":
596 """
597 Method to invoke the Python pdb debugger when this element is
598 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
599 disable.
600 """
601 if break_flag:
602 _parseMethod = self._parse
604 def breaker(instring, loc, doActions=True, callPreParse=True):
605 import pdb
607 # this call to pdb.set_trace() is intentional, not a checkin error
608 pdb.set_trace()
609 return _parseMethod(instring, loc, doActions, callPreParse)
611 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]
612 self._parse = breaker # type: ignore [assignment]
613 else:
614 if hasattr(self._parse, "_originalParseMethod"):
615 self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment]
616 return self
618 def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":
619 """
620 Define one or more actions to perform when successfully matching parse element definition.
622 Parse actions can be called to perform data conversions, do extra validation,
623 update external data structures, or enhance or replace the parsed tokens.
624 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
625 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
627 - ``s`` = the original string being parsed (see note below)
628 - ``loc`` = the location of the matching substring
629 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object
631 The parsed tokens are passed to the parse action as ParseResults. They can be
632 modified in place using list-style append, extend, and pop operations to update
633 the parsed list elements; and with dictionary-style item set and del operations
634 to add, update, or remove any named results. If the tokens are modified in place,
635 it is not necessary to return them with a return statement.
637 Parse actions can also completely replace the given tokens, with another ``ParseResults``
638 object, or with some entirely different object (common for parse actions that perform data
639 conversions). A convenient way to build a new parse result is to define the values
640 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
642 If None is passed as the ``fn`` parse action, all previously added parse actions for this
643 expression are cleared.
645 Optional keyword arguments:
647 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during
648 lookaheads and alternate testing. For parse actions that have side effects, it is
649 important to only call the parse action once it is determined that it is being
650 called as part of a successful parse. For parse actions that perform additional
651 validation, then call_during_try should be passed as True, so that the validation
652 code is included in the preliminary "try" parses.
654 Note: the default parsing behavior is to expand tabs in the input string
655 before starting the parsing process. See :class:`parse_string` for more
656 information on parsing strings containing ``<TAB>`` s, and suggested
657 methods to maintain a consistent view of the parsed string, the parse
658 location, and line and column positions within the parsed string.
660 Example::
662 # parse dates in the form YYYY/MM/DD
664 # use parse action to convert toks from str to int at parse time
665 def convert_to_int(toks):
666 return int(toks[0])
668 # use a parse action to verify that the date is a valid date
669 def is_valid_date(instring, loc, toks):
670 from datetime import date
671 year, month, day = toks[::2]
672 try:
673 date(year, month, day)
674 except ValueError:
675 raise ParseException(instring, loc, "invalid date given")
677 integer = Word(nums)
678 date_str = integer + '/' + integer + '/' + integer
680 # add parse actions
681 integer.set_parse_action(convert_to_int)
682 date_str.set_parse_action(is_valid_date)
684 # note that integer fields are now ints, not strings
685 date_str.run_tests('''
686 # successful parse - note that integer fields were converted to ints
687 1999/12/31
689 # fail - invalid date
690 1999/13/31
691 ''')
692 """
693 if list(fns) == [None]:
694 self.parseAction = []
695 else:
696 if not all(callable(fn) for fn in fns):
697 raise TypeError("parse actions must be callable")
698 self.parseAction = [_trim_arity(fn) for fn in fns]
699 self.callDuringTry = kwargs.get(
700 "call_during_try", kwargs.get("callDuringTry", False)
701 )
702 return self
704 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":
705 """
706 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
708 See examples in :class:`copy`.
709 """
710 self.parseAction += [_trim_arity(fn) for fn in fns]
711 self.callDuringTry = self.callDuringTry or kwargs.get(
712 "call_during_try", kwargs.get("callDuringTry", False)
713 )
714 return self
716 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement":
717 """Add a boolean predicate function to expression's list of parse actions. See
718 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
719 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
721 Optional keyword arguments:
723 - ``message`` = define a custom message to be used in the raised exception
724 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
725 ParseException
726 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,
727 default=False
729 Example::
731 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
732 year_int = integer.copy()
733 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
734 date_str = year_int + '/' + integer + '/' + integer
736 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0),
737 (line:1, col:1)
738 """
739 for fn in fns:
740 self.parseAction.append(
741 condition_as_parse_action(
742 fn,
743 message=str(kwargs.get("message")),
744 fatal=bool(kwargs.get("fatal", False)),
745 )
746 )
748 self.callDuringTry = self.callDuringTry or kwargs.get(
749 "call_during_try", kwargs.get("callDuringTry", False)
750 )
751 return self
753 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement":
754 """
755 Define action to perform if parsing fails at this expression.
756 Fail acton fn is a callable function that takes the arguments
757 ``fn(s, loc, expr, err)`` where:
759 - ``s`` = string being parsed
760 - ``loc`` = location where expression match was attempted and failed
761 - ``expr`` = the parse expression that failed
762 - ``err`` = the exception thrown
764 The function returns no value. It may throw :class:`ParseFatalException`
765 if it is desired to stop parsing immediately."""
766 self.failAction = fn
767 return self
769 def _skipIgnorables(self, instring: str, loc: int) -> int:
770 if not self.ignoreExprs:
771 return loc
772 exprsFound = True
773 ignore_expr_fns = [e._parse for e in self.ignoreExprs]
774 last_loc = loc
775 while exprsFound:
776 exprsFound = False
777 for ignore_fn in ignore_expr_fns:
778 try:
779 while 1:
780 loc, dummy = ignore_fn(instring, loc)
781 exprsFound = True
782 except ParseException:
783 pass
784 # check if all ignore exprs matched but didn't actually advance the parse location
785 if loc == last_loc:
786 break
787 last_loc = loc
788 return loc
790 def preParse(self, instring: str, loc: int) -> int:
791 if self.ignoreExprs:
792 loc = self._skipIgnorables(instring, loc)
794 if self.skipWhitespace:
795 instrlen = len(instring)
796 white_chars = self.whiteChars
797 while loc < instrlen and instring[loc] in white_chars:
798 loc += 1
800 return loc
802 def parseImpl(self, instring, loc, doActions=True):
803 return loc, []
805 def postParse(self, instring, loc, tokenlist):
806 return tokenlist
808 # @profile
809 def _parseNoCache(
810 self, instring, loc, doActions=True, callPreParse=True
811 ) -> Tuple[int, ParseResults]:
812 TRY, MATCH, FAIL = 0, 1, 2
813 debugging = self.debug # and doActions)
814 len_instring = len(instring)
816 if debugging or self.failAction:
817 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
818 try:
819 if callPreParse and self.callPreparse:
820 pre_loc = self.preParse(instring, loc)
821 else:
822 pre_loc = loc
823 tokens_start = pre_loc
824 if self.debugActions.debug_try:
825 self.debugActions.debug_try(instring, tokens_start, self, False)
826 if self.mayIndexError or pre_loc >= len_instring:
827 try:
828 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
829 except IndexError:
830 raise ParseException(instring, len_instring, self.errmsg, self)
831 else:
832 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
833 except Exception as err:
834 # print("Exception raised:", err)
835 if self.debugActions.debug_fail:
836 self.debugActions.debug_fail(
837 instring, tokens_start, self, err, False
838 )
839 if self.failAction:
840 self.failAction(instring, tokens_start, self, err)
841 raise
842 else:
843 if callPreParse and self.callPreparse:
844 pre_loc = self.preParse(instring, loc)
845 else:
846 pre_loc = loc
847 tokens_start = pre_loc
848 if self.mayIndexError or pre_loc >= len_instring:
849 try:
850 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
851 except IndexError:
852 raise ParseException(instring, len_instring, self.errmsg, self)
853 else:
854 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
856 tokens = self.postParse(instring, loc, tokens)
858 ret_tokens = ParseResults(
859 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults
860 )
861 if self.parseAction and (doActions or self.callDuringTry):
862 if debugging:
863 try:
864 for fn in self.parseAction:
865 try:
866 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
867 except IndexError as parse_action_exc:
868 exc = ParseException("exception raised in parse action")
869 raise exc from parse_action_exc
871 if tokens is not None and tokens is not ret_tokens:
872 ret_tokens = ParseResults(
873 tokens,
874 self.resultsName,
875 asList=self.saveAsList
876 and isinstance(tokens, (ParseResults, list)),
877 modal=self.modalResults,
878 )
879 except Exception as err:
880 # print "Exception raised in user parse action:", err
881 if self.debugActions.debug_fail:
882 self.debugActions.debug_fail(
883 instring, tokens_start, self, err, False
884 )
885 raise
886 else:
887 for fn in self.parseAction:
888 try:
889 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
890 except IndexError as parse_action_exc:
891 exc = ParseException("exception raised in parse action")
892 raise exc from parse_action_exc
894 if tokens is not None and tokens is not ret_tokens:
895 ret_tokens = ParseResults(
896 tokens,
897 self.resultsName,
898 asList=self.saveAsList
899 and isinstance(tokens, (ParseResults, list)),
900 modal=self.modalResults,
901 )
902 if debugging:
903 # print("Matched", self, "->", ret_tokens.as_list())
904 if self.debugActions.debug_match:
905 self.debugActions.debug_match(
906 instring, tokens_start, loc, self, ret_tokens, False
907 )
909 return loc, ret_tokens
911 def try_parse(
912 self,
913 instring: str,
914 loc: int,
915 *,
916 raise_fatal: bool = False,
917 do_actions: bool = False,
918 ) -> int:
919 try:
920 return self._parse(instring, loc, doActions=do_actions)[0]
921 except ParseFatalException:
922 if raise_fatal:
923 raise
924 raise ParseException(instring, loc, self.errmsg, self)
926 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:
927 try:
928 self.try_parse(instring, loc, do_actions=do_actions)
929 except (ParseException, IndexError):
930 return False
931 else:
932 return True
934 # cache for left-recursion in Forward references
935 recursion_lock = RLock()
936 recursion_memos: typing.Dict[
937 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]]
938 ] = {}
940 class _CacheType(dict):
941 """
942 class to help type checking
943 """
945 not_in_cache: bool
947 def get(self, *args):
948 ...
950 def set(self, *args):
951 ...
953 # argument cache for optimizing repeated calls when backtracking through recursive expressions
954 packrat_cache = (
955 _CacheType()
956 ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail
957 packrat_cache_lock = RLock()
958 packrat_cache_stats = [0, 0]
960 # this method gets repeatedly called during backtracking with the same arguments -
961 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
962 def _parseCache(
963 self, instring, loc, doActions=True, callPreParse=True
964 ) -> Tuple[int, ParseResults]:
965 HIT, MISS = 0, 1
966 TRY, MATCH, FAIL = 0, 1, 2
967 lookup = (self, instring, loc, callPreParse, doActions)
968 with ParserElement.packrat_cache_lock:
969 cache = ParserElement.packrat_cache
970 value = cache.get(lookup)
971 if value is cache.not_in_cache:
972 ParserElement.packrat_cache_stats[MISS] += 1
973 try:
974 value = self._parseNoCache(instring, loc, doActions, callPreParse)
975 except ParseBaseException as pe:
976 # cache a copy of the exception, without the traceback
977 cache.set(lookup, pe.__class__(*pe.args))
978 raise
979 else:
980 cache.set(lookup, (value[0], value[1].copy(), loc))
981 return value
982 else:
983 ParserElement.packrat_cache_stats[HIT] += 1
984 if self.debug and self.debugActions.debug_try:
985 try:
986 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]
987 except TypeError:
988 pass
989 if isinstance(value, Exception):
990 if self.debug and self.debugActions.debug_fail:
991 try:
992 self.debugActions.debug_fail(
993 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]
994 )
995 except TypeError:
996 pass
997 raise value
999 value = cast(Tuple[int, ParseResults, int], value)
1000 loc_, result, endloc = value[0], value[1].copy(), value[2]
1001 if self.debug and self.debugActions.debug_match:
1002 try:
1003 self.debugActions.debug_match(
1004 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]
1005 )
1006 except TypeError:
1007 pass
1009 return loc_, result
1011 _parse = _parseNoCache
1013 @staticmethod
1014 def reset_cache() -> None:
1015 ParserElement.packrat_cache.clear()
1016 ParserElement.packrat_cache_stats[:] = [0] * len(
1017 ParserElement.packrat_cache_stats
1018 )
1019 ParserElement.recursion_memos.clear()
1021 _packratEnabled = False
1022 _left_recursion_enabled = False
1024 @staticmethod
1025 def disable_memoization() -> None:
1026 """
1027 Disables active Packrat or Left Recursion parsing and their memoization
1029 This method also works if neither Packrat nor Left Recursion are enabled.
1030 This makes it safe to call before activating Packrat nor Left Recursion
1031 to clear any previous settings.
1032 """
1033 ParserElement.reset_cache()
1034 ParserElement._left_recursion_enabled = False
1035 ParserElement._packratEnabled = False
1036 ParserElement._parse = ParserElement._parseNoCache
1038 @staticmethod
1039 def enable_left_recursion(
1040 cache_size_limit: typing.Optional[int] = None, *, force=False
1041 ) -> None:
1042 """
1043 Enables "bounded recursion" parsing, which allows for both direct and indirect
1044 left-recursion. During parsing, left-recursive :class:`Forward` elements are
1045 repeatedly matched with a fixed recursion depth that is gradually increased
1046 until finding the longest match.
1048 Example::
1050 import pyparsing as pp
1051 pp.ParserElement.enable_left_recursion()
1053 E = pp.Forward("E")
1054 num = pp.Word(pp.nums)
1055 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1056 E <<= E + '+' - num | num
1058 print(E.parse_string("1+2+3"))
1060 Recursion search naturally memoizes matches of ``Forward`` elements and may
1061 thus skip reevaluation of parse actions during backtracking. This may break
1062 programs with parse actions which rely on strict ordering of side-effects.
1064 Parameters:
1066 - ``cache_size_limit`` - (default=``None``) - memoize at most this many
1067 ``Forward`` elements during matching; if ``None`` (the default),
1068 memoize all ``Forward`` elements.
1070 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1071 thus the two cannot be used together. Use ``force=True`` to disable any
1072 previous, conflicting settings.
1073 """
1074 if force:
1075 ParserElement.disable_memoization()
1076 elif ParserElement._packratEnabled:
1077 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1078 if cache_size_limit is None:
1079 ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment]
1080 elif cache_size_limit > 0:
1081 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]
1082 else:
1083 raise NotImplementedError("Memo size of %s" % cache_size_limit)
1084 ParserElement._left_recursion_enabled = True
1086 @staticmethod
1087 def enable_packrat(
1088 cache_size_limit: Union[int, None] = 128, *, force: bool = False
1089 ) -> None:
1090 """
1091 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1092 Repeated parse attempts at the same string location (which happens
1093 often in many complex grammars) can immediately return a cached value,
1094 instead of re-executing parsing/validating code. Memoizing is done of
1095 both valid results and parsing exceptions.
1097 Parameters:
1099 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided
1100 will limit the size of the packrat cache; if None is passed, then
1101 the cache size will be unbounded; if 0 is passed, the cache will
1102 be effectively disabled.
1104 This speedup may break existing programs that use parse actions that
1105 have side-effects. For this reason, packrat parsing is disabled when
1106 you first import pyparsing. To activate the packrat feature, your
1107 program must call the class method :class:`ParserElement.enable_packrat`.
1108 For best results, call ``enable_packrat()`` immediately after
1109 importing pyparsing.
1111 Example::
1113 import pyparsing
1114 pyparsing.ParserElement.enable_packrat()
1116 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1117 thus the two cannot be used together. Use ``force=True`` to disable any
1118 previous, conflicting settings.
1119 """
1120 if force:
1121 ParserElement.disable_memoization()
1122 elif ParserElement._left_recursion_enabled:
1123 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1124 if not ParserElement._packratEnabled:
1125 ParserElement._packratEnabled = True
1126 if cache_size_limit is None:
1127 ParserElement.packrat_cache = _UnboundedCache()
1128 else:
1129 ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment]
1130 ParserElement._parse = ParserElement._parseCache
1132 def parse_string(
1133 self, instring: str, parse_all: bool = False, *, parseAll: bool = False
1134 ) -> ParseResults:
1135 """
1136 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1137 client code.
1139 :param instring: The input string to be parsed.
1140 :param parse_all: If set, the entire input string must match the grammar.
1141 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1142 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1143 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1144 an object with attributes if the given parser includes results names.
1146 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1147 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().
1149 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1150 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1151 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1152 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1154 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1155 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1156 parse action's ``s`` argument, or
1157 - explicitly expand the tabs in your input string before calling ``parse_string``.
1159 Examples:
1161 By default, partial matches are OK.
1163 >>> res = Word('a').parse_string('aaaaabaaa')
1164 >>> print(res)
1165 ['aaaaa']
1167 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1168 directly to see more examples.
1170 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1172 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1173 Traceback (most recent call last):
1174 ...
1175 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6)
1176 """
1177 parseAll = parse_all or parseAll
1179 ParserElement.reset_cache()
1180 if not self.streamlined:
1181 self.streamline()
1182 for e in self.ignoreExprs:
1183 e.streamline()
1184 if not self.keepTabs:
1185 instring = instring.expandtabs()
1186 try:
1187 loc, tokens = self._parse(instring, 0)
1188 if parseAll:
1189 loc = self.preParse(instring, loc)
1190 se = Empty() + StringEnd()
1191 se._parse(instring, loc)
1192 except ParseBaseException as exc:
1193 if ParserElement.verbose_stacktrace:
1194 raise
1195 else:
1196 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1197 raise exc.with_traceback(None)
1198 else:
1199 return tokens
1201 def scan_string(
1202 self,
1203 instring: str,
1204 max_matches: int = _MAX_INT,
1205 overlap: bool = False,
1206 *,
1207 debug: bool = False,
1208 maxMatches: int = _MAX_INT,
1209 ) -> Generator[Tuple[ParseResults, int, int], None, None]:
1210 """
1211 Scan the input string for expression matches. Each match will return the
1212 matching tokens, start location, and end location. May be called with optional
1213 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1214 ``overlap`` is specified, then overlapping matches will be reported.
1216 Note that the start and end locations are reported relative to the string
1217 being parsed. See :class:`parse_string` for more information on parsing
1218 strings with embedded tabs.
1220 Example::
1222 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1223 print(source)
1224 for tokens, start, end in Word(alphas).scan_string(source):
1225 print(' '*start + '^'*(end-start))
1226 print(' '*start + tokens[0])
1228 prints::
1230 sldjf123lsdjjkf345sldkjf879lkjsfd987
1231 ^^^^^
1232 sldjf
1233 ^^^^^^^
1234 lsdjjkf
1235 ^^^^^^
1236 sldkjf
1237 ^^^^^^
1238 lkjsfd
1239 """
1240 maxMatches = min(maxMatches, max_matches)
1241 if not self.streamlined:
1242 self.streamline()
1243 for e in self.ignoreExprs:
1244 e.streamline()
1246 if not self.keepTabs:
1247 instring = str(instring).expandtabs()
1248 instrlen = len(instring)
1249 loc = 0
1250 preparseFn = self.preParse
1251 parseFn = self._parse
1252 ParserElement.resetCache()
1253 matches = 0
1254 try:
1255 while loc <= instrlen and matches < maxMatches:
1256 try:
1257 preloc: int = preparseFn(instring, loc)
1258 nextLoc: int
1259 tokens: ParseResults
1260 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1261 except ParseException:
1262 loc = preloc + 1
1263 else:
1264 if nextLoc > loc:
1265 matches += 1
1266 if debug:
1267 print(
1268 {
1269 "tokens": tokens.asList(),
1270 "start": preloc,
1271 "end": nextLoc,
1272 }
1273 )
1274 yield tokens, preloc, nextLoc
1275 if overlap:
1276 nextloc = preparseFn(instring, loc)
1277 if nextloc > loc:
1278 loc = nextLoc
1279 else:
1280 loc += 1
1281 else:
1282 loc = nextLoc
1283 else:
1284 loc = preloc + 1
1285 except ParseBaseException as exc:
1286 if ParserElement.verbose_stacktrace:
1287 raise
1288 else:
1289 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1290 raise exc.with_traceback(None)
1292 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1293 """
1294 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1295 be returned from a parse action. To use ``transform_string``, define a grammar and
1296 attach a parse action to it that modifies the returned token list.
1297 Invoking ``transform_string()`` on a target string will then scan for matches,
1298 and replace the matched text patterns according to the logic in the parse
1299 action. ``transform_string()`` returns the resulting transformed string.
1301 Example::
1303 wd = Word(alphas)
1304 wd.set_parse_action(lambda toks: toks[0].title())
1306 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))
1308 prints::
1310 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1311 """
1312 out: List[str] = []
1313 lastE = 0
1314 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1315 # keep string locs straight between transform_string and scan_string
1316 self.keepTabs = True
1317 try:
1318 for t, s, e in self.scan_string(instring, debug=debug):
1319 out.append(instring[lastE:s])
1320 if t:
1321 if isinstance(t, ParseResults):
1322 out += t.as_list()
1323 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1324 out.extend(t)
1325 else:
1326 out.append(t)
1327 lastE = e
1328 out.append(instring[lastE:])
1329 out = [o for o in out if o]
1330 return "".join([str(s) for s in _flatten(out)])
1331 except ParseBaseException as exc:
1332 if ParserElement.verbose_stacktrace:
1333 raise
1334 else:
1335 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1336 raise exc.with_traceback(None)
1338 def search_string(
1339 self,
1340 instring: str,
1341 max_matches: int = _MAX_INT,
1342 *,
1343 debug: bool = False,
1344 maxMatches: int = _MAX_INT,
1345 ) -> ParseResults:
1346 """
1347 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1348 to match the given parse expression. May be called with optional
1349 ``max_matches`` argument, to clip searching after 'n' matches are found.
1351 Example::
1353 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1354 cap_word = Word(alphas.upper(), alphas.lower())
1356 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))
1358 # the sum() builtin can be used to merge results into a single ParseResults object
1359 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))
1361 prints::
1363 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1364 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1365 """
1366 maxMatches = min(maxMatches, max_matches)
1367 try:
1368 return ParseResults(
1369 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)]
1370 )
1371 except ParseBaseException as exc:
1372 if ParserElement.verbose_stacktrace:
1373 raise
1374 else:
1375 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1376 raise exc.with_traceback(None)
1378 def split(
1379 self,
1380 instring: str,
1381 maxsplit: int = _MAX_INT,
1382 include_separators: bool = False,
1383 *,
1384 includeSeparators=False,
1385 ) -> Generator[str, None, None]:
1386 """
1387 Generator method to split a string using the given expression as a separator.
1388 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1389 and the optional ``include_separators`` argument (default= ``False``), if the separating
1390 matching text should be included in the split results.
1392 Example::
1394 punc = one_of(list(".,;:/-!?"))
1395 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1397 prints::
1399 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1400 """
1401 includeSeparators = includeSeparators or include_separators
1402 last = 0
1403 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1404 yield instring[last:s]
1405 if includeSeparators:
1406 yield t[0]
1407 last = e
1408 yield instring[last:]
1410 def __add__(self, other) -> "ParserElement":
1411 """
1412 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1413 converts them to :class:`Literal`\\ s by default.
1415 Example::
1417 greet = Word(alphas) + "," + Word(alphas) + "!"
1418 hello = "Hello, World!"
1419 print(hello, "->", greet.parse_string(hello))
1421 prints::
1423 Hello, World! -> ['Hello', ',', 'World', '!']
1425 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`::
1427 Literal('start') + ... + Literal('end')
1429 is equivalent to::
1431 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1433 Note that the skipped text is returned with '_skipped' as a results name,
1434 and to support having multiple skips in the same parser, the value returned is
1435 a list of all skipped text.
1436 """
1437 if other is Ellipsis:
1438 return _PendingSkip(self)
1440 if isinstance(other, str_type):
1441 other = self._literalStringClass(other)
1442 if not isinstance(other, ParserElement):
1443 return NotImplemented
1444 return And([self, other])
1446 def __radd__(self, other) -> "ParserElement":
1447 """
1448 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1449 """
1450 if other is Ellipsis:
1451 return SkipTo(self)("_skipped*") + self
1453 if isinstance(other, str_type):
1454 other = self._literalStringClass(other)
1455 if not isinstance(other, ParserElement):
1456 return NotImplemented
1457 return other + self
1459 def __sub__(self, other) -> "ParserElement":
1460 """
1461 Implementation of ``-`` operator, returns :class:`And` with error stop
1462 """
1463 if isinstance(other, str_type):
1464 other = self._literalStringClass(other)
1465 if not isinstance(other, ParserElement):
1466 return NotImplemented
1467 return self + And._ErrorStop() + other
1469 def __rsub__(self, other) -> "ParserElement":
1470 """
1471 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1472 """
1473 if isinstance(other, str_type):
1474 other = self._literalStringClass(other)
1475 if not isinstance(other, ParserElement):
1476 return NotImplemented
1477 return other - self
1479 def __mul__(self, other) -> "ParserElement":
1480 """
1481 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1482 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1483 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1484 may also include ``None`` as in:
1486 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1487 to ``expr*n + ZeroOrMore(expr)``
1488 (read as "at least n instances of ``expr``")
1489 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1490 (read as "0 to n instances of ``expr``")
1491 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1492 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1494 Note that ``expr*(None, n)`` does not raise an exception if
1495 more than n exprs exist in the input stream; that is,
1496 ``expr*(None, n)`` does not enforce a maximum number of expr
1497 occurrences. If this behavior is desired, then write
1498 ``expr*(None, n) + ~expr``
1499 """
1500 if other is Ellipsis:
1501 other = (0, None)
1502 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1503 other = ((0,) + other[1:] + (None,))[:2]
1505 if isinstance(other, int):
1506 minElements, optElements = other, 0
1507 elif isinstance(other, tuple):
1508 other = tuple(o if o is not Ellipsis else None for o in other)
1509 other = (other + (None, None))[:2]
1510 if other[0] is None:
1511 other = (0, other[1])
1512 if isinstance(other[0], int) and other[1] is None:
1513 if other[0] == 0:
1514 return ZeroOrMore(self)
1515 if other[0] == 1:
1516 return OneOrMore(self)
1517 else:
1518 return self * other[0] + ZeroOrMore(self)
1519 elif isinstance(other[0], int) and isinstance(other[1], int):
1520 minElements, optElements = other
1521 optElements -= minElements
1522 else:
1523 return NotImplemented
1524 else:
1525 return NotImplemented
1527 if minElements < 0:
1528 raise ValueError("cannot multiply ParserElement by negative value")
1529 if optElements < 0:
1530 raise ValueError(
1531 "second tuple value must be greater or equal to first tuple value"
1532 )
1533 if minElements == optElements == 0:
1534 return And([])
1536 if optElements:
1538 def makeOptionalList(n):
1539 if n > 1:
1540 return Opt(self + makeOptionalList(n - 1))
1541 else:
1542 return Opt(self)
1544 if minElements:
1545 if minElements == 1:
1546 ret = self + makeOptionalList(optElements)
1547 else:
1548 ret = And([self] * minElements) + makeOptionalList(optElements)
1549 else:
1550 ret = makeOptionalList(optElements)
1551 else:
1552 if minElements == 1:
1553 ret = self
1554 else:
1555 ret = And([self] * minElements)
1556 return ret
1558 def __rmul__(self, other) -> "ParserElement":
1559 return self.__mul__(other)
1561 def __or__(self, other) -> "ParserElement":
1562 """
1563 Implementation of ``|`` operator - returns :class:`MatchFirst`
1564 """
1565 if other is Ellipsis:
1566 return _PendingSkip(self, must_skip=True)
1568 if isinstance(other, str_type):
1569 # `expr | ""` is equivalent to `Opt(expr)`
1570 if other == "":
1571 return Opt(self)
1572 other = self._literalStringClass(other)
1573 if not isinstance(other, ParserElement):
1574 return NotImplemented
1575 return MatchFirst([self, other])
1577 def __ror__(self, other) -> "ParserElement":
1578 """
1579 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1580 """
1581 if isinstance(other, str_type):
1582 other = self._literalStringClass(other)
1583 if not isinstance(other, ParserElement):
1584 return NotImplemented
1585 return other | self
1587 def __xor__(self, other) -> "ParserElement":
1588 """
1589 Implementation of ``^`` operator - returns :class:`Or`
1590 """
1591 if isinstance(other, str_type):
1592 other = self._literalStringClass(other)
1593 if not isinstance(other, ParserElement):
1594 return NotImplemented
1595 return Or([self, other])
1597 def __rxor__(self, other) -> "ParserElement":
1598 """
1599 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1600 """
1601 if isinstance(other, str_type):
1602 other = self._literalStringClass(other)
1603 if not isinstance(other, ParserElement):
1604 return NotImplemented
1605 return other ^ self
1607 def __and__(self, other) -> "ParserElement":
1608 """
1609 Implementation of ``&`` operator - returns :class:`Each`
1610 """
1611 if isinstance(other, str_type):
1612 other = self._literalStringClass(other)
1613 if not isinstance(other, ParserElement):
1614 return NotImplemented
1615 return Each([self, other])
1617 def __rand__(self, other) -> "ParserElement":
1618 """
1619 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1620 """
1621 if isinstance(other, str_type):
1622 other = self._literalStringClass(other)
1623 if not isinstance(other, ParserElement):
1624 return NotImplemented
1625 return other & self
1627 def __invert__(self) -> "ParserElement":
1628 """
1629 Implementation of ``~`` operator - returns :class:`NotAny`
1630 """
1631 return NotAny(self)
1633 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1634 # iterate over a sequence
1635 __iter__ = None
1637 def __getitem__(self, key):
1638 """
1639 use ``[]`` indexing notation as a short form for expression repetition:
1641 - ``expr[n]`` is equivalent to ``expr*n``
1642 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1643 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1644 to ``expr*n + ZeroOrMore(expr)``
1645 (read as "at least n instances of ``expr``")
1646 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1647 (read as "0 to n instances of ``expr``")
1648 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1649 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1651 ``None`` may be used in place of ``...``.
1653 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception
1654 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is
1655 desired, then write ``expr[..., n] + ~expr``.
1657 For repetition with a stop_on expression, use slice notation:
1659 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``
1660 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``
1662 """
1664 stop_on_defined = False
1665 stop_on = NoMatch()
1666 if isinstance(key, slice):
1667 key, stop_on = key.start, key.stop
1668 if key is None:
1669 key = ...
1670 stop_on_defined = True
1671 elif isinstance(key, tuple) and isinstance(key[-1], slice):
1672 key, stop_on = (key[0], key[1].start), key[1].stop
1673 stop_on_defined = True
1675 # convert single arg keys to tuples
1676 if isinstance(key, str_type):
1677 key = (key,)
1678 try:
1679 iter(key)
1680 except TypeError:
1681 key = (key, key)
1683 if len(key) > 2:
1684 raise TypeError(
1685 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"
1686 )
1688 # clip to 2 elements
1689 ret = self * tuple(key[:2])
1690 ret = typing.cast(_MultipleMatch, ret)
1692 if stop_on_defined:
1693 ret.stopOn(stop_on)
1695 return ret
1697 def __call__(self, name: typing.Optional[str] = None) -> "ParserElement":
1698 """
1699 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1701 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1702 passed as ``True``.
1704 If ``name`` is omitted, same as calling :class:`copy`.
1706 Example::
1708 # these are equivalent
1709 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")
1710 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1711 """
1712 if name is not None:
1713 return self._setResultsName(name)
1714 else:
1715 return self.copy()
1717 def suppress(self) -> "ParserElement":
1718 """
1719 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1720 cluttering up returned output.
1721 """
1722 return Suppress(self)
1724 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement":
1725 """
1726 Enables the skipping of whitespace before matching the characters in the
1727 :class:`ParserElement`'s defined pattern.
1729 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1730 """
1731 self.skipWhitespace = True
1732 return self
1734 def leave_whitespace(self, recursive: bool = True) -> "ParserElement":
1735 """
1736 Disables the skipping of whitespace before matching the characters in the
1737 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1738 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1740 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1741 """
1742 self.skipWhitespace = False
1743 return self
1745 def set_whitespace_chars(
1746 self, chars: Union[Set[str], str], copy_defaults: bool = False
1747 ) -> "ParserElement":
1748 """
1749 Overrides the default whitespace chars
1750 """
1751 self.skipWhitespace = True
1752 self.whiteChars = set(chars)
1753 self.copyDefaultWhiteChars = copy_defaults
1754 return self
1756 def parse_with_tabs(self) -> "ParserElement":
1757 """
1758 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1759 Must be called before ``parse_string`` when the input grammar contains elements that
1760 match ``<TAB>`` characters.
1761 """
1762 self.keepTabs = True
1763 return self
1765 def ignore(self, other: "ParserElement") -> "ParserElement":
1766 """
1767 Define expression to be ignored (e.g., comments) while doing pattern
1768 matching; may be called repeatedly, to define multiple comment or other
1769 ignorable patterns.
1771 Example::
1773 patt = Word(alphas)[1, ...]
1774 patt.parse_string('ablaj /* comment */ lskjd')
1775 # -> ['ablaj']
1777 patt.ignore(c_style_comment)
1778 patt.parse_string('ablaj /* comment */ lskjd')
1779 # -> ['ablaj', 'lskjd']
1780 """
1781 import typing
1783 if isinstance(other, str_type):
1784 other = Suppress(other)
1786 if isinstance(other, Suppress):
1787 if other not in self.ignoreExprs:
1788 self.ignoreExprs.append(other)
1789 else:
1790 self.ignoreExprs.append(Suppress(other.copy()))
1791 return self
1793 def set_debug_actions(
1794 self,
1795 start_action: DebugStartAction,
1796 success_action: DebugSuccessAction,
1797 exception_action: DebugExceptionAction,
1798 ) -> "ParserElement":
1799 """
1800 Customize display of debugging messages while doing pattern matching:
1802 - ``start_action`` - method to be called when an expression is about to be parsed;
1803 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``
1805 - ``success_action`` - method to be called when an expression has successfully parsed;
1806 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``
1808 - ``exception_action`` - method to be called when expression fails to parse;
1809 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
1810 """
1811 self.debugActions = self.DebugActions(
1812 start_action or _default_start_debug_action, # type: ignore[truthy-function]
1813 success_action or _default_success_debug_action, # type: ignore[truthy-function]
1814 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]
1815 )
1816 self.debug = True
1817 return self
1819 def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement":
1820 """
1821 Enable display of debugging messages while doing pattern matching.
1822 Set ``flag`` to ``True`` to enable, ``False`` to disable.
1823 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.
1825 Example::
1827 wd = Word(alphas).set_name("alphaword")
1828 integer = Word(nums).set_name("numword")
1829 term = wd | integer
1831 # turn on debugging for wd
1832 wd.set_debug()
1834 term[1, ...].parse_string("abc 123 xyz 890")
1836 prints::
1838 Match alphaword at loc 0(1,1)
1839 Matched alphaword -> ['abc']
1840 Match alphaword at loc 3(1,4)
1841 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
1842 Match alphaword at loc 7(1,8)
1843 Matched alphaword -> ['xyz']
1844 Match alphaword at loc 11(1,12)
1845 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
1846 Match alphaword at loc 15(1,16)
1847 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
1849 The output shown is that produced by the default debug actions - custom debug actions can be
1850 specified using :class:`set_debug_actions`. Prior to attempting
1851 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
1852 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
1853 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,
1854 which makes debugging and exception messages easier to understand - for instance, the default
1855 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.
1856 """
1857 if recurse:
1858 for expr in self.visit_all():
1859 expr.set_debug(flag, recurse=False)
1860 return self
1862 if flag:
1863 self.set_debug_actions(
1864 _default_start_debug_action,
1865 _default_success_debug_action,
1866 _default_exception_debug_action,
1867 )
1868 else:
1869 self.debug = False
1870 return self
1872 @property
1873 def default_name(self) -> str:
1874 if self._defaultName is None:
1875 self._defaultName = self._generateDefaultName()
1876 return self._defaultName
1878 @abstractmethod
1879 def _generateDefaultName(self) -> str:
1880 """
1881 Child classes must define this method, which defines how the ``default_name`` is set.
1882 """
1884 def set_name(self, name: str) -> "ParserElement":
1885 """
1886 Define name for this expression, makes debugging and exception messages clearer.
1888 Example::
1890 Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)
1891 Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1892 """
1893 self.customName = name
1894 self.errmsg = "Expected " + self.name
1895 if __diag__.enable_debug_on_named_expressions:
1896 self.set_debug()
1897 return self
1899 @property
1900 def name(self) -> str:
1901 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name
1902 return self.customName if self.customName is not None else self.default_name
1904 def __str__(self) -> str:
1905 return self.name
1907 def __repr__(self) -> str:
1908 return str(self)
1910 def streamline(self) -> "ParserElement":
1911 self.streamlined = True
1912 self._defaultName = None
1913 return self
1915 def recurse(self) -> List["ParserElement"]:
1916 return []
1918 def _checkRecursion(self, parseElementList):
1919 subRecCheckList = parseElementList[:] + [self]
1920 for e in self.recurse():
1921 e._checkRecursion(subRecCheckList)
1923 def validate(self, validateTrace=None) -> None:
1924 """
1925 Check defined expressions for valid structure, check for infinite recursive definitions.
1926 """
1927 warnings.warn(
1928 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
1929 DeprecationWarning,
1930 stacklevel=2,
1931 )
1932 self._checkRecursion([])
1934 def parse_file(
1935 self,
1936 file_or_filename: Union[str, Path, TextIO],
1937 encoding: str = "utf-8",
1938 parse_all: bool = False,
1939 *,
1940 parseAll: bool = False,
1941 ) -> ParseResults:
1942 """
1943 Execute the parse expression on the given file or filename.
1944 If a filename is specified (instead of a file object),
1945 the entire file is opened, read, and closed before parsing.
1946 """
1947 parseAll = parseAll or parse_all
1948 try:
1949 file_or_filename = typing.cast(TextIO, file_or_filename)
1950 file_contents = file_or_filename.read()
1951 except AttributeError:
1952 file_or_filename = typing.cast(str, file_or_filename)
1953 with open(file_or_filename, "r", encoding=encoding) as f:
1954 file_contents = f.read()
1955 try:
1956 return self.parse_string(file_contents, parseAll)
1957 except ParseBaseException as exc:
1958 if ParserElement.verbose_stacktrace:
1959 raise
1960 else:
1961 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1962 raise exc.with_traceback(None)
1964 def __eq__(self, other):
1965 if self is other:
1966 return True
1967 elif isinstance(other, str_type):
1968 return self.matches(other, parse_all=True)
1969 elif isinstance(other, ParserElement):
1970 return vars(self) == vars(other)
1971 return False
1973 def __hash__(self):
1974 return id(self)
1976 def matches(
1977 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True
1978 ) -> bool:
1979 """
1980 Method for quick testing of a parser against a test string. Good for simple
1981 inline microtests of sub expressions while building up larger parser.
1983 Parameters:
1985 - ``test_string`` - to test against this expression for a match
1986 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
1988 Example::
1990 expr = Word(nums)
1991 assert expr.matches("100")
1992 """
1993 parseAll = parseAll and parse_all
1994 try:
1995 self.parse_string(str(test_string), parse_all=parseAll)
1996 return True
1997 except ParseBaseException:
1998 return False
2000 def run_tests(
2001 self,
2002 tests: Union[str, List[str]],
2003 parse_all: bool = True,
2004 comment: typing.Optional[Union["ParserElement", str]] = "#",
2005 full_dump: bool = True,
2006 print_results: bool = True,
2007 failure_tests: bool = False,
2008 post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None,
2009 file: typing.Optional[TextIO] = None,
2010 with_line_numbers: bool = False,
2011 *,
2012 parseAll: bool = True,
2013 fullDump: bool = True,
2014 printResults: bool = True,
2015 failureTests: bool = False,
2016 postParse: typing.Optional[Callable[[str, ParseResults], str]] = None,
2017 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]:
2018 """
2019 Execute the parse expression on a series of test strings, showing each
2020 test, the parsed results or where the parse failed. Quick and easy way to
2021 run a parse expression against a list of sample strings.
2023 Parameters:
2025 - ``tests`` - a list of separate test strings, or a multiline string of test strings
2026 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2027 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
2028 string; pass None to disable comment filtering
2029 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
2030 if False, only dump nested list
2031 - ``print_results`` - (default= ``True``) prints test output to stdout
2032 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
2033 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
2034 `fn(test_string, parse_results)` and returns a string to be added to the test output
2035 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
2036 if None, will default to ``sys.stdout``
2037 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
2039 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2040 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
2041 test's output
2043 Example::
2045 number_expr = pyparsing_common.number.copy()
2047 result = number_expr.run_tests('''
2048 # unsigned integer
2049 100
2050 # negative integer
2051 -100
2052 # float with scientific notation
2053 6.02e23
2054 # integer with scientific notation
2055 1e-12
2056 ''')
2057 print("Success" if result[0] else "Failed!")
2059 result = number_expr.run_tests('''
2060 # stray character
2061 100Z
2062 # missing leading digit before '.'
2063 -.100
2064 # too many '.'
2065 3.14.159
2066 ''', failure_tests=True)
2067 print("Success" if result[0] else "Failed!")
2069 prints::
2071 # unsigned integer
2072 100
2073 [100]
2075 # negative integer
2076 -100
2077 [-100]
2079 # float with scientific notation
2080 6.02e23
2081 [6.02e+23]
2083 # integer with scientific notation
2084 1e-12
2085 [1e-12]
2087 Success
2089 # stray character
2090 100Z
2091 ^
2092 FAIL: Expected end of text (at char 3), (line:1, col:4)
2094 # missing leading digit before '.'
2095 -.100
2096 ^
2097 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2099 # too many '.'
2100 3.14.159
2101 ^
2102 FAIL: Expected end of text (at char 4), (line:1, col:5)
2104 Success
2106 Each test string must be on a single line. If you want to test a string that spans multiple
2107 lines, create a test like this::
2109 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2111 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2112 """
2113 from .testing import pyparsing_test
2115 parseAll = parseAll and parse_all
2116 fullDump = fullDump and full_dump
2117 printResults = printResults and print_results
2118 failureTests = failureTests or failure_tests
2119 postParse = postParse or post_parse
2120 if isinstance(tests, str_type):
2121 tests = typing.cast(str, tests)
2122 line_strip = type(tests).strip
2123 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2124 comment_specified = comment is not None
2125 if comment_specified:
2126 if isinstance(comment, str_type):
2127 comment = typing.cast(str, comment)
2128 comment = Literal(comment)
2129 comment = typing.cast(ParserElement, comment)
2130 if file is None:
2131 file = sys.stdout
2132 print_ = file.write
2134 result: Union[ParseResults, Exception]
2135 allResults: List[Tuple[str, Union[ParseResults, Exception]]] = []
2136 comments: List[str] = []
2137 success = True
2138 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2139 BOM = "\ufeff"
2140 for t in tests:
2141 if comment_specified and comment.matches(t, False) or comments and not t:
2142 comments.append(
2143 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2144 )
2145 continue
2146 if not t:
2147 continue
2148 out = [
2149 "\n" + "\n".join(comments) if comments else "",
2150 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2151 ]
2152 comments = []
2153 try:
2154 # convert newline marks to actual newlines, and strip leading BOM if present
2155 t = NL.transform_string(t.lstrip(BOM))
2156 result = self.parse_string(t, parse_all=parseAll)
2157 except ParseBaseException as pe:
2158 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2159 out.append(pe.explain())
2160 out.append("FAIL: " + str(pe))
2161 if ParserElement.verbose_stacktrace:
2162 out.extend(traceback.format_tb(pe.__traceback__))
2163 success = success and failureTests
2164 result = pe
2165 except Exception as exc:
2166 out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}")
2167 if ParserElement.verbose_stacktrace:
2168 out.extend(traceback.format_tb(exc.__traceback__))
2169 success = success and failureTests
2170 result = exc
2171 else:
2172 success = success and not failureTests
2173 if postParse is not None:
2174 try:
2175 pp_value = postParse(t, result)
2176 if pp_value is not None:
2177 if isinstance(pp_value, ParseResults):
2178 out.append(pp_value.dump())
2179 else:
2180 out.append(str(pp_value))
2181 else:
2182 out.append(result.dump())
2183 except Exception as e:
2184 out.append(result.dump(full=fullDump))
2185 out.append(
2186 f"{postParse.__name__} failed: {type(e).__name__}: {e}"
2187 )
2188 else:
2189 out.append(result.dump(full=fullDump))
2190 out.append("")
2192 if printResults:
2193 print_("\n".join(out))
2195 allResults.append((t, result))
2197 return success, allResults
2199 def create_diagram(
2200 self,
2201 output_html: Union[TextIO, Path, str],
2202 vertical: int = 3,
2203 show_results_names: bool = False,
2204 show_groups: bool = False,
2205 embed: bool = False,
2206 **kwargs,
2207 ) -> None:
2208 """
2209 Create a railroad diagram for the parser.
2211 Parameters:
2213 - ``output_html`` (str or file-like object) - output target for generated
2214 diagram HTML
2215 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically
2216 instead of horizontally (default=3)
2217 - ``show_results_names`` - bool flag whether diagram should show annotations for
2218 defined results names
2219 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box
2220 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed
2221 the resulting HTML in an enclosing HTML source
2222 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;
2223 can be used to insert custom CSS styling
2224 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the
2225 generated code
2227 Additional diagram-formatting keyword arguments can also be included;
2228 see railroad.Diagram class.
2229 """
2231 try:
2232 from .diagram import to_railroad, railroad_to_html
2233 except ImportError as ie:
2234 raise Exception(
2235 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2236 ) from ie
2238 self.streamline()
2240 railroad = to_railroad(
2241 self,
2242 vertical=vertical,
2243 show_results_names=show_results_names,
2244 show_groups=show_groups,
2245 diagram_kwargs=kwargs,
2246 )
2247 if isinstance(output_html, (str, Path)):
2248 with open(output_html, "w", encoding="utf-8") as diag_file:
2249 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))
2250 else:
2251 # we were passed a file-like object, just write to it
2252 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))
2254 # Compatibility synonyms
2255 # fmt: off
2256 @staticmethod
2257 @replaced_by_pep8(inline_literals_using)
2258 def inlineLiteralsUsing(): ...
2260 @staticmethod
2261 @replaced_by_pep8(set_default_whitespace_chars)
2262 def setDefaultWhitespaceChars(): ...
2264 @replaced_by_pep8(set_results_name)
2265 def setResultsName(self): ...
2267 @replaced_by_pep8(set_break)
2268 def setBreak(self): ...
2270 @replaced_by_pep8(set_parse_action)
2271 def setParseAction(self): ...
2273 @replaced_by_pep8(add_parse_action)
2274 def addParseAction(self): ...
2276 @replaced_by_pep8(add_condition)
2277 def addCondition(self): ...
2279 @replaced_by_pep8(set_fail_action)
2280 def setFailAction(self): ...
2282 @replaced_by_pep8(try_parse)
2283 def tryParse(self): ...
2285 @staticmethod
2286 @replaced_by_pep8(enable_left_recursion)
2287 def enableLeftRecursion(): ...
2289 @staticmethod
2290 @replaced_by_pep8(enable_packrat)
2291 def enablePackrat(): ...
2293 @replaced_by_pep8(parse_string)
2294 def parseString(self): ...
2296 @replaced_by_pep8(scan_string)
2297 def scanString(self): ...
2299 @replaced_by_pep8(transform_string)
2300 def transformString(self): ...
2302 @replaced_by_pep8(search_string)
2303 def searchString(self): ...
2305 @replaced_by_pep8(ignore_whitespace)
2306 def ignoreWhitespace(self): ...
2308 @replaced_by_pep8(leave_whitespace)
2309 def leaveWhitespace(self): ...
2311 @replaced_by_pep8(set_whitespace_chars)
2312 def setWhitespaceChars(self): ...
2314 @replaced_by_pep8(parse_with_tabs)
2315 def parseWithTabs(self): ...
2317 @replaced_by_pep8(set_debug_actions)
2318 def setDebugActions(self): ...
2320 @replaced_by_pep8(set_debug)
2321 def setDebug(self): ...
2323 @replaced_by_pep8(set_name)
2324 def setName(self): ...
2326 @replaced_by_pep8(parse_file)
2327 def parseFile(self): ...
2329 @replaced_by_pep8(run_tests)
2330 def runTests(self): ...
2332 canParseNext = can_parse_next
2333 resetCache = reset_cache
2334 defaultName = default_name
2335 # fmt: on
2338class _PendingSkip(ParserElement):
2339 # internal placeholder class to hold a place were '...' is added to a parser element,
2340 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2341 def __init__(self, expr: ParserElement, must_skip: bool = False):
2342 super().__init__()
2343 self.anchor = expr
2344 self.must_skip = must_skip
2346 def _generateDefaultName(self) -> str:
2347 return str(self.anchor + Empty()).replace("Empty", "...")
2349 def __add__(self, other) -> "ParserElement":
2350 skipper = SkipTo(other).set_name("...")("_skipped*")
2351 if self.must_skip:
2353 def must_skip(t):
2354 if not t._skipped or t._skipped.as_list() == [""]:
2355 del t[0]
2356 t.pop("_skipped", None)
2358 def show_skip(t):
2359 if t._skipped.as_list()[-1:] == [""]:
2360 t.pop("_skipped")
2361 t["_skipped"] = "missing <" + repr(self.anchor) + ">"
2363 return (
2364 self.anchor + skipper().add_parse_action(must_skip)
2365 | skipper().add_parse_action(show_skip)
2366 ) + other
2368 return self.anchor + skipper + other
2370 def __repr__(self):
2371 return self.defaultName
2373 def parseImpl(self, *args):
2374 raise Exception(
2375 "use of `...` expression without following SkipTo target expression"
2376 )
2379class Token(ParserElement):
2380 """Abstract :class:`ParserElement` subclass, for defining atomic
2381 matching patterns.
2382 """
2384 def __init__(self):
2385 super().__init__(savelist=False)
2387 def _generateDefaultName(self) -> str:
2388 return type(self).__name__
2391class NoMatch(Token):
2392 """
2393 A token that will never match.
2394 """
2396 def __init__(self):
2397 super().__init__()
2398 self.mayReturnEmpty = True
2399 self.mayIndexError = False
2400 self.errmsg = "Unmatchable token"
2402 def parseImpl(self, instring, loc, doActions=True):
2403 raise ParseException(instring, loc, self.errmsg, self)
2406class Literal(Token):
2407 """
2408 Token to exactly match a specified string.
2410 Example::
2412 Literal('blah').parse_string('blah') # -> ['blah']
2413 Literal('blah').parse_string('blahfooblah') # -> ['blah']
2414 Literal('blah').parse_string('bla') # -> Exception: Expected "blah"
2416 For case-insensitive matching, use :class:`CaselessLiteral`.
2418 For keyword matching (force word break before and after the matched string),
2419 use :class:`Keyword` or :class:`CaselessKeyword`.
2420 """
2422 def __new__(cls, match_string: str = "", *, matchString: str = ""):
2423 # Performance tuning: select a subclass with optimized parseImpl
2424 if cls is Literal:
2425 match_string = matchString or match_string
2426 if not match_string:
2427 return super().__new__(Empty)
2428 if len(match_string) == 1:
2429 return super().__new__(_SingleCharLiteral)
2431 # Default behavior
2432 return super().__new__(cls)
2434 # Needed to make copy.copy() work correctly if we customize __new__
2435 def __getnewargs__(self):
2436 return (self.match,)
2438 def __init__(self, match_string: str = "", *, matchString: str = ""):
2439 super().__init__()
2440 match_string = matchString or match_string
2441 self.match = match_string
2442 self.matchLen = len(match_string)
2443 self.firstMatchChar = match_string[:1]
2444 self.errmsg = "Expected " + self.name
2445 self.mayReturnEmpty = False
2446 self.mayIndexError = False
2448 def _generateDefaultName(self) -> str:
2449 return repr(self.match)
2451 def parseImpl(self, instring, loc, doActions=True):
2452 if instring[loc] == self.firstMatchChar and instring.startswith(
2453 self.match, loc
2454 ):
2455 return loc + self.matchLen, self.match
2456 raise ParseException(instring, loc, self.errmsg, self)
2459class Empty(Literal):
2460 """
2461 An empty token, will always match.
2462 """
2464 def __init__(self, match_string="", *, matchString=""):
2465 super().__init__("")
2466 self.mayReturnEmpty = True
2467 self.mayIndexError = False
2469 def _generateDefaultName(self) -> str:
2470 return "Empty"
2472 def parseImpl(self, instring, loc, doActions=True):
2473 return loc, []
2476class _SingleCharLiteral(Literal):
2477 def parseImpl(self, instring, loc, doActions=True):
2478 if instring[loc] == self.firstMatchChar:
2479 return loc + 1, self.match
2480 raise ParseException(instring, loc, self.errmsg, self)
2483ParserElement._literalStringClass = Literal
2486class Keyword(Token):
2487 """
2488 Token to exactly match a specified string as a keyword, that is,
2489 it must be immediately preceded and followed by whitespace or
2490 non-keyword characters. Compare with :class:`Literal`:
2492 - ``Literal("if")`` will match the leading ``'if'`` in
2493 ``'ifAndOnlyIf'``.
2494 - ``Keyword("if")`` will not; it will only match the leading
2495 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2497 Accepts two optional constructor arguments in addition to the
2498 keyword string:
2500 - ``ident_chars`` is a string of characters that would be valid
2501 identifier characters, defaulting to all alphanumerics + "_" and
2502 "$"
2503 - ``caseless`` allows case-insensitive matching, default is ``False``.
2505 Example::
2507 Keyword("start").parse_string("start") # -> ['start']
2508 Keyword("start").parse_string("starting") # -> Exception
2510 For case-insensitive matching, use :class:`CaselessKeyword`.
2511 """
2513 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2515 def __init__(
2516 self,
2517 match_string: str = "",
2518 ident_chars: typing.Optional[str] = None,
2519 caseless: bool = False,
2520 *,
2521 matchString: str = "",
2522 identChars: typing.Optional[str] = None,
2523 ):
2524 super().__init__()
2525 identChars = identChars or ident_chars
2526 if identChars is None:
2527 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2528 match_string = matchString or match_string
2529 self.match = match_string
2530 self.matchLen = len(match_string)
2531 try:
2532 self.firstMatchChar = match_string[0]
2533 except IndexError:
2534 raise ValueError("null string passed to Keyword; use Empty() instead")
2535 self.errmsg = f"Expected {type(self).__name__} {self.name}"
2536 self.mayReturnEmpty = False
2537 self.mayIndexError = False
2538 self.caseless = caseless
2539 if caseless:
2540 self.caselessmatch = match_string.upper()
2541 identChars = identChars.upper()
2542 self.identChars = set(identChars)
2544 def _generateDefaultName(self) -> str:
2545 return repr(self.match)
2547 def parseImpl(self, instring, loc, doActions=True):
2548 errmsg = self.errmsg
2549 errloc = loc
2550 if self.caseless:
2551 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2552 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2553 if (
2554 loc >= len(instring) - self.matchLen
2555 or instring[loc + self.matchLen].upper() not in self.identChars
2556 ):
2557 return loc + self.matchLen, self.match
2558 else:
2559 # followed by keyword char
2560 errmsg += ", was immediately followed by keyword character"
2561 errloc = loc + self.matchLen
2562 else:
2563 # preceded by keyword char
2564 errmsg += ", keyword was immediately preceded by keyword character"
2565 errloc = loc - 1
2566 # else no match just raise plain exception
2568 else:
2569 if (
2570 instring[loc] == self.firstMatchChar
2571 and self.matchLen == 1
2572 or instring.startswith(self.match, loc)
2573 ):
2574 if loc == 0 or instring[loc - 1] not in self.identChars:
2575 if (
2576 loc >= len(instring) - self.matchLen
2577 or instring[loc + self.matchLen] not in self.identChars
2578 ):
2579 return loc + self.matchLen, self.match
2580 else:
2581 # followed by keyword char
2582 errmsg += (
2583 ", keyword was immediately followed by keyword character"
2584 )
2585 errloc = loc + self.matchLen
2586 else:
2587 # preceded by keyword char
2588 errmsg += ", keyword was immediately preceded by keyword character"
2589 errloc = loc - 1
2590 # else no match just raise plain exception
2592 raise ParseException(instring, errloc, errmsg, self)
2594 @staticmethod
2595 def set_default_keyword_chars(chars) -> None:
2596 """
2597 Overrides the default characters used by :class:`Keyword` expressions.
2598 """
2599 Keyword.DEFAULT_KEYWORD_CHARS = chars
2601 setDefaultKeywordChars = set_default_keyword_chars
2604class CaselessLiteral(Literal):
2605 """
2606 Token to match a specified string, ignoring case of letters.
2607 Note: the matched results will always be in the case of the given
2608 match string, NOT the case of the input text.
2610 Example::
2612 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2613 # -> ['CMD', 'CMD', 'CMD']
2615 (Contrast with example for :class:`CaselessKeyword`.)
2616 """
2618 def __init__(self, match_string: str = "", *, matchString: str = ""):
2619 match_string = matchString or match_string
2620 super().__init__(match_string.upper())
2621 # Preserve the defining literal.
2622 self.returnString = match_string
2623 self.errmsg = "Expected " + self.name
2625 def parseImpl(self, instring, loc, doActions=True):
2626 if instring[loc : loc + self.matchLen].upper() == self.match:
2627 return loc + self.matchLen, self.returnString
2628 raise ParseException(instring, loc, self.errmsg, self)
2631class CaselessKeyword(Keyword):
2632 """
2633 Caseless version of :class:`Keyword`.
2635 Example::
2637 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2638 # -> ['CMD', 'CMD']
2640 (Contrast with example for :class:`CaselessLiteral`.)
2641 """
2643 def __init__(
2644 self,
2645 match_string: str = "",
2646 ident_chars: typing.Optional[str] = None,
2647 *,
2648 matchString: str = "",
2649 identChars: typing.Optional[str] = None,
2650 ):
2651 identChars = identChars or ident_chars
2652 match_string = matchString or match_string
2653 super().__init__(match_string, identChars, caseless=True)
2656class CloseMatch(Token):
2657 """A variation on :class:`Literal` which matches "close" matches,
2658 that is, strings with at most 'n' mismatching characters.
2659 :class:`CloseMatch` takes parameters:
2661 - ``match_string`` - string to be matched
2662 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2663 - ``max_mismatches`` - (``default=1``) maximum number of
2664 mismatches allowed to count as a match
2666 The results from a successful parse will contain the matched text
2667 from the input string and the following named results:
2669 - ``mismatches`` - a list of the positions within the
2670 match_string where mismatches were found
2671 - ``original`` - the original match_string used to compare
2672 against the input string
2674 If ``mismatches`` is an empty list, then the match was an exact
2675 match.
2677 Example::
2679 patt = CloseMatch("ATCATCGAATGGA")
2680 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2681 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2683 # exact match
2684 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2686 # close match allowing up to 2 mismatches
2687 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
2688 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2689 """
2691 def __init__(
2692 self,
2693 match_string: str,
2694 max_mismatches: typing.Optional[int] = None,
2695 *,
2696 maxMismatches: int = 1,
2697 caseless=False,
2698 ):
2699 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
2700 super().__init__()
2701 self.match_string = match_string
2702 self.maxMismatches = maxMismatches
2703 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"
2704 self.caseless = caseless
2705 self.mayIndexError = False
2706 self.mayReturnEmpty = False
2708 def _generateDefaultName(self) -> str:
2709 return f"{type(self).__name__}:{self.match_string!r}"
2711 def parseImpl(self, instring, loc, doActions=True):
2712 start = loc
2713 instrlen = len(instring)
2714 maxloc = start + len(self.match_string)
2716 if maxloc <= instrlen:
2717 match_string = self.match_string
2718 match_stringloc = 0
2719 mismatches = []
2720 maxMismatches = self.maxMismatches
2722 for match_stringloc, s_m in enumerate(
2723 zip(instring[loc:maxloc], match_string)
2724 ):
2725 src, mat = s_m
2726 if self.caseless:
2727 src, mat = src.lower(), mat.lower()
2729 if src != mat:
2730 mismatches.append(match_stringloc)
2731 if len(mismatches) > maxMismatches:
2732 break
2733 else:
2734 loc = start + match_stringloc + 1
2735 results = ParseResults([instring[start:loc]])
2736 results["original"] = match_string
2737 results["mismatches"] = mismatches
2738 return loc, results
2740 raise ParseException(instring, loc, self.errmsg, self)
2743class Word(Token):
2744 """Token for matching words composed of allowed character sets.
2746 Parameters:
2748 - ``init_chars`` - string of all characters that should be used to
2749 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
2750 if ``body_chars`` is also specified, then this is the string of
2751 initial characters
2752 - ``body_chars`` - string of characters that
2753 can be used for matching after a matched initial character as
2754 given in ``init_chars``; if omitted, same as the initial characters
2755 (default=``None``)
2756 - ``min`` - minimum number of characters to match (default=1)
2757 - ``max`` - maximum number of characters to match (default=0)
2758 - ``exact`` - exact number of characters to match (default=0)
2759 - ``as_keyword`` - match as a keyword (default=``False``)
2760 - ``exclude_chars`` - characters that might be
2761 found in the input ``body_chars`` string but which should not be
2762 accepted for matching ;useful to define a word of all
2763 printables except for one or two characters, for instance
2764 (default=``None``)
2766 :class:`srange` is useful for defining custom character set strings
2767 for defining :class:`Word` expressions, using range notation from
2768 regular expression character sets.
2770 A common mistake is to use :class:`Word` to match a specific literal
2771 string, as in ``Word("Address")``. Remember that :class:`Word`
2772 uses the string argument to define *sets* of matchable characters.
2773 This expression would match "Add", "AAA", "dAred", or any other word
2774 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
2775 exact literal string, use :class:`Literal` or :class:`Keyword`.
2777 pyparsing includes helper strings for building Words:
2779 - :class:`alphas`
2780 - :class:`nums`
2781 - :class:`alphanums`
2782 - :class:`hexnums`
2783 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
2784 - accented, tilded, umlauted, etc.)
2785 - :class:`punc8bit` (non-alphabetic characters in ASCII range
2786 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2787 - :class:`printables` (any non-whitespace character)
2789 ``alphas``, ``nums``, and ``printables`` are also defined in several
2790 Unicode sets - see :class:`pyparsing_unicode``.
2792 Example::
2794 # a word composed of digits
2795 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2797 # a word with a leading capital, and zero or more lowercase
2798 capital_word = Word(alphas.upper(), alphas.lower())
2800 # hostnames are alphanumeric, with leading alpha, and '-'
2801 hostname = Word(alphas, alphanums + '-')
2803 # roman numeral (not a strict parser, accepts invalid mix of characters)
2804 roman = Word("IVXLCDM")
2806 # any string of non-whitespace characters, except for ','
2807 csv_value = Word(printables, exclude_chars=",")
2808 """
2810 def __init__(
2811 self,
2812 init_chars: str = "",
2813 body_chars: typing.Optional[str] = None,
2814 min: int = 1,
2815 max: int = 0,
2816 exact: int = 0,
2817 as_keyword: bool = False,
2818 exclude_chars: typing.Optional[str] = None,
2819 *,
2820 initChars: typing.Optional[str] = None,
2821 bodyChars: typing.Optional[str] = None,
2822 asKeyword: bool = False,
2823 excludeChars: typing.Optional[str] = None,
2824 ):
2825 initChars = initChars or init_chars
2826 bodyChars = bodyChars or body_chars
2827 asKeyword = asKeyword or as_keyword
2828 excludeChars = excludeChars or exclude_chars
2829 super().__init__()
2830 if not initChars:
2831 raise ValueError(
2832 f"invalid {type(self).__name__}, initChars cannot be empty string"
2833 )
2835 initChars_set = set(initChars)
2836 if excludeChars:
2837 excludeChars_set = set(excludeChars)
2838 initChars_set -= excludeChars_set
2839 if bodyChars:
2840 bodyChars = "".join(set(bodyChars) - excludeChars_set)
2841 self.initChars = initChars_set
2842 self.initCharsOrig = "".join(sorted(initChars_set))
2844 if bodyChars:
2845 self.bodyChars = set(bodyChars)
2846 self.bodyCharsOrig = "".join(sorted(bodyChars))
2847 else:
2848 self.bodyChars = initChars_set
2849 self.bodyCharsOrig = self.initCharsOrig
2851 self.maxSpecified = max > 0
2853 if min < 1:
2854 raise ValueError(
2855 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
2856 )
2858 if self.maxSpecified and min > max:
2859 raise ValueError(
2860 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
2861 )
2863 self.minLen = min
2865 if max > 0:
2866 self.maxLen = max
2867 else:
2868 self.maxLen = _MAX_INT
2870 if exact > 0:
2871 min = max = exact
2872 self.maxLen = exact
2873 self.minLen = exact
2875 self.errmsg = "Expected " + self.name
2876 self.mayIndexError = False
2877 self.asKeyword = asKeyword
2878 if self.asKeyword:
2879 self.errmsg += " as a keyword"
2881 # see if we can make a regex for this Word
2882 if " " not in (self.initChars | self.bodyChars):
2883 if len(self.initChars) == 1:
2884 re_leading_fragment = re.escape(self.initCharsOrig)
2885 else:
2886 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
2888 if self.bodyChars == self.initChars:
2889 if max == 0 and self.minLen == 1:
2890 repeat = "+"
2891 elif max == 1:
2892 repeat = ""
2893 else:
2894 if self.minLen != self.maxLen:
2895 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
2896 else:
2897 repeat = f"{{{self.minLen}}}"
2898 self.reString = f"{re_leading_fragment}{repeat}"
2899 else:
2900 if max == 1:
2901 re_body_fragment = ""
2902 repeat = ""
2903 else:
2904 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
2905 if max == 0 and self.minLen == 1:
2906 repeat = "*"
2907 elif max == 2:
2908 repeat = "?" if min <= 1 else ""
2909 else:
2910 if min != max:
2911 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"
2912 else:
2913 repeat = f"{{{min - 1 if min > 0 else ''}}}"
2915 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"
2917 if self.asKeyword:
2918 self.reString = rf"\b{self.reString}\b"
2920 try:
2921 self.re = re.compile(self.reString)
2922 except re.error:
2923 self.re = None # type: ignore[assignment]
2924 else:
2925 self.re_match = self.re.match
2926 self.parseImpl = self.parseImpl_regex # type: ignore[assignment]
2928 def _generateDefaultName(self) -> str:
2929 def charsAsStr(s):
2930 max_repr_len = 16
2931 s = _collapse_string_to_ranges(s, re_escape=False)
2932 if len(s) > max_repr_len:
2933 return s[: max_repr_len - 3] + "..."
2934 else:
2935 return s
2937 if self.initChars != self.bodyChars:
2938 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"
2939 else:
2940 base = f"W:({charsAsStr(self.initChars)})"
2942 # add length specification
2943 if self.minLen > 1 or self.maxLen != _MAX_INT:
2944 if self.minLen == self.maxLen:
2945 if self.minLen == 1:
2946 return base[2:]
2947 else:
2948 return base + f"{{{self.minLen}}}"
2949 elif self.maxLen == _MAX_INT:
2950 return base + f"{{{self.minLen},...}}"
2951 else:
2952 return base + f"{{{self.minLen},{self.maxLen}}}"
2953 return base
2955 def parseImpl(self, instring, loc, doActions=True):
2956 if instring[loc] not in self.initChars:
2957 raise ParseException(instring, loc, self.errmsg, self)
2959 start = loc
2960 loc += 1
2961 instrlen = len(instring)
2962 bodychars = self.bodyChars
2963 maxloc = start + self.maxLen
2964 maxloc = min(maxloc, instrlen)
2965 while loc < maxloc and instring[loc] in bodychars:
2966 loc += 1
2968 throwException = False
2969 if loc - start < self.minLen:
2970 throwException = True
2971 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2972 throwException = True
2973 elif self.asKeyword:
2974 if (
2975 start > 0
2976 and instring[start - 1] in bodychars
2977 or loc < instrlen
2978 and instring[loc] in bodychars
2979 ):
2980 throwException = True
2982 if throwException:
2983 raise ParseException(instring, loc, self.errmsg, self)
2985 return loc, instring[start:loc]
2987 def parseImpl_regex(self, instring, loc, doActions=True):
2988 result = self.re_match(instring, loc)
2989 if not result:
2990 raise ParseException(instring, loc, self.errmsg, self)
2992 loc = result.end()
2993 return loc, result.group()
2996class Char(Word):
2997 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
2998 when defining a match of any single character in a string of
2999 characters.
3000 """
3002 def __init__(
3003 self,
3004 charset: str,
3005 as_keyword: bool = False,
3006 exclude_chars: typing.Optional[str] = None,
3007 *,
3008 asKeyword: bool = False,
3009 excludeChars: typing.Optional[str] = None,
3010 ):
3011 asKeyword = asKeyword or as_keyword
3012 excludeChars = excludeChars or exclude_chars
3013 super().__init__(
3014 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars
3015 )
3018class Regex(Token):
3019 r"""Token for matching strings that match a given regular
3020 expression. Defined with string specifying the regular expression in
3021 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3022 If the given regex contains named groups (defined using ``(?P<name>...)``),
3023 these will be preserved as named :class:`ParseResults`.
3025 If instead of the Python stdlib ``re`` module you wish to use a different RE module
3026 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
3027 a compiled RE that was compiled using ``regex``.
3029 Example::
3031 realnum = Regex(r"[+-]?\d+\.\d*")
3032 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3033 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3035 # named fields in a regex will be returned as named results
3036 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3038 # the Regex class will accept re's compiled using the regex module
3039 import regex
3040 parser = pp.Regex(regex.compile(r'[0-9]'))
3041 """
3043 def __init__(
3044 self,
3045 pattern: Any,
3046 flags: Union[re.RegexFlag, int] = 0,
3047 as_group_list: bool = False,
3048 as_match: bool = False,
3049 *,
3050 asGroupList: bool = False,
3051 asMatch: bool = False,
3052 ):
3053 """The parameters ``pattern`` and ``flags`` are passed
3054 to the ``re.compile()`` function as-is. See the Python
3055 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3056 explanation of the acceptable patterns and flags.
3057 """
3058 super().__init__()
3059 asGroupList = asGroupList or as_group_list
3060 asMatch = asMatch or as_match
3062 if isinstance(pattern, str_type):
3063 if not pattern:
3064 raise ValueError("null string passed to Regex; use Empty() instead")
3066 self._re = None
3067 self.reString = self.pattern = pattern
3068 self.flags = flags
3070 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
3071 self._re = pattern
3072 self.pattern = self.reString = pattern.pattern
3073 self.flags = flags
3075 else:
3076 raise TypeError(
3077 "Regex may only be constructed with a string or a compiled RE object"
3078 )
3080 self.errmsg = "Expected " + self.name
3081 self.mayIndexError = False
3082 self.asGroupList = asGroupList
3083 self.asMatch = asMatch
3084 if self.asGroupList:
3085 self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment]
3086 if self.asMatch:
3087 self.parseImpl = self.parseImplAsMatch # type: ignore [assignment]
3089 @cached_property
3090 def re(self):
3091 if self._re:
3092 return self._re
3093 else:
3094 try:
3095 return re.compile(self.pattern, self.flags)
3096 except re.error:
3097 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3099 @cached_property
3100 def re_match(self):
3101 return self.re.match
3103 @cached_property
3104 def mayReturnEmpty(self):
3105 return self.re_match("") is not None
3107 def _generateDefaultName(self) -> str:
3108 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\"))
3110 def parseImpl(self, instring, loc, doActions=True):
3111 result = self.re_match(instring, loc)
3112 if not result:
3113 raise ParseException(instring, loc, self.errmsg, self)
3115 loc = result.end()
3116 ret = ParseResults(result.group())
3117 d = result.groupdict()
3118 if d:
3119 for k, v in d.items():
3120 ret[k] = v
3121 return loc, ret
3123 def parseImplAsGroupList(self, instring, loc, doActions=True):
3124 result = self.re_match(instring, loc)
3125 if not result:
3126 raise ParseException(instring, loc, self.errmsg, self)
3128 loc = result.end()
3129 ret = result.groups()
3130 return loc, ret
3132 def parseImplAsMatch(self, instring, loc, doActions=True):
3133 result = self.re_match(instring, loc)
3134 if not result:
3135 raise ParseException(instring, loc, self.errmsg, self)
3137 loc = result.end()
3138 ret = result
3139 return loc, ret
3141 def sub(self, repl: str) -> ParserElement:
3142 r"""
3143 Return :class:`Regex` with an attached parse action to transform the parsed
3144 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3146 Example::
3148 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3149 print(make_html.transform_string("h1:main title:"))
3150 # prints "<h1>main title</h1>"
3151 """
3152 if self.asGroupList:
3153 raise TypeError("cannot use sub() with Regex(as_group_list=True)")
3155 if self.asMatch and callable(repl):
3156 raise TypeError(
3157 "cannot use sub() with a callable with Regex(as_match=True)"
3158 )
3160 if self.asMatch:
3162 def pa(tokens):
3163 return tokens[0].expand(repl)
3165 else:
3167 def pa(tokens):
3168 return self.re.sub(repl, tokens[0])
3170 return self.add_parse_action(pa)
3173class QuotedString(Token):
3174 r"""
3175 Token for matching strings that are delimited by quoting characters.
3177 Defined with the following parameters:
3179 - ``quote_char`` - string of one or more characters defining the
3180 quote delimiting string
3181 - ``esc_char`` - character to re_escape quotes, typically backslash
3182 (default= ``None``)
3183 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3184 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3185 (default= ``None``)
3186 - ``multiline`` - boolean indicating whether quotes can span
3187 multiple lines (default= ``False``)
3188 - ``unquote_results`` - boolean indicating whether the matched text
3189 should be unquoted (default= ``True``)
3190 - ``end_quote_char`` - string of one or more characters defining the
3191 end of the quote delimited string (default= ``None`` => same as
3192 quote_char)
3193 - ``convert_whitespace_escapes`` - convert escaped whitespace
3194 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3195 (default= ``True``)
3197 Example::
3199 qs = QuotedString('"')
3200 print(qs.search_string('lsjdf "This is the quote" sldjf'))
3201 complex_qs = QuotedString('{{', end_quote_char='}}')
3202 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))
3203 sql_qs = QuotedString('"', esc_quote='""')
3204 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3206 prints::
3208 [['This is the quote']]
3209 [['This is the "quote"']]
3210 [['This is the quote with "embedded" quotes']]
3211 """
3212 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))
3214 def __init__(
3215 self,
3216 quote_char: str = "",
3217 esc_char: typing.Optional[str] = None,
3218 esc_quote: typing.Optional[str] = None,
3219 multiline: bool = False,
3220 unquote_results: bool = True,
3221 end_quote_char: typing.Optional[str] = None,
3222 convert_whitespace_escapes: bool = True,
3223 *,
3224 quoteChar: str = "",
3225 escChar: typing.Optional[str] = None,
3226 escQuote: typing.Optional[str] = None,
3227 unquoteResults: bool = True,
3228 endQuoteChar: typing.Optional[str] = None,
3229 convertWhitespaceEscapes: bool = True,
3230 ):
3231 super().__init__()
3232 esc_char = escChar or esc_char
3233 esc_quote = escQuote or esc_quote
3234 unquote_results = unquoteResults and unquote_results
3235 end_quote_char = endQuoteChar or end_quote_char
3236 convert_whitespace_escapes = (
3237 convertWhitespaceEscapes and convert_whitespace_escapes
3238 )
3239 quote_char = quoteChar or quote_char
3241 # remove white space from quote chars
3242 quote_char = quote_char.strip()
3243 if not quote_char:
3244 raise ValueError("quote_char cannot be the empty string")
3246 if end_quote_char is None:
3247 end_quote_char = quote_char
3248 else:
3249 end_quote_char = end_quote_char.strip()
3250 if not end_quote_char:
3251 raise ValueError("end_quote_char cannot be the empty string")
3253 self.quote_char: str = quote_char
3254 self.quote_char_len: int = len(quote_char)
3255 self.first_quote_char: str = quote_char[0]
3256 self.end_quote_char: str = end_quote_char
3257 self.end_quote_char_len: int = len(end_quote_char)
3258 self.esc_char: str = esc_char or ""
3259 self.has_esc_char: bool = esc_char is not None
3260 self.esc_quote: str = esc_quote or ""
3261 self.unquote_results: bool = unquote_results
3262 self.convert_whitespace_escapes: bool = convert_whitespace_escapes
3263 self.multiline = multiline
3264 self.re_flags = re.RegexFlag(0)
3266 # fmt: off
3267 # build up re pattern for the content between the quote delimiters
3268 inner_pattern = []
3270 if esc_quote:
3271 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
3273 if esc_char:
3274 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
3276 if len(self.end_quote_char) > 1:
3277 inner_pattern.append(
3278 "(?:"
3279 + "|".join(
3280 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3281 for i in range(len(self.end_quote_char) - 1, 0, -1)
3282 )
3283 + ")"
3284 )
3286 if self.multiline:
3287 self.re_flags |= re.MULTILINE | re.DOTALL
3288 inner_pattern.append(
3289 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3290 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"
3291 )
3292 else:
3293 inner_pattern.append(
3294 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3295 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"
3296 )
3298 self.pattern = "".join(
3299 [
3300 re.escape(self.quote_char),
3301 "(?:",
3302 '|'.join(inner_pattern),
3303 ")*",
3304 re.escape(self.end_quote_char),
3305 ]
3306 )
3308 if self.unquote_results:
3309 if self.convert_whitespace_escapes:
3310 self.unquote_scan_re = re.compile(
3311 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3312 rf"|({re.escape(self.esc_char)}.)"
3313 rf"|(\n|.)",
3314 flags=self.re_flags,
3315 )
3316 else:
3317 self.unquote_scan_re = re.compile(
3318 rf"({re.escape(self.esc_char)}.)"
3319 rf"|(\n|.)",
3320 flags=self.re_flags
3321 )
3322 # fmt: on
3324 try:
3325 self.re = re.compile(self.pattern, self.re_flags)
3326 self.reString = self.pattern
3327 self.re_match = self.re.match
3328 except re.error:
3329 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")
3331 self.errmsg = "Expected " + self.name
3332 self.mayIndexError = False
3333 self.mayReturnEmpty = True
3335 def _generateDefaultName(self) -> str:
3336 if self.quote_char == self.end_quote_char and isinstance(
3337 self.quote_char, str_type
3338 ):
3339 return f"string enclosed in {self.quote_char!r}"
3341 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
3343 def parseImpl(self, instring, loc, doActions=True):
3344 # check first character of opening quote to see if that is a match
3345 # before doing the more complicated regex match
3346 result = (
3347 instring[loc] == self.first_quote_char
3348 and self.re_match(instring, loc)
3349 or None
3350 )
3351 if not result:
3352 raise ParseException(instring, loc, self.errmsg, self)
3354 # get ending loc and matched string from regex matching result
3355 loc = result.end()
3356 ret = result.group()
3358 if self.unquote_results:
3359 # strip off quotes
3360 ret = ret[self.quote_char_len : -self.end_quote_char_len]
3362 if isinstance(ret, str_type):
3363 # fmt: off
3364 if self.convert_whitespace_escapes:
3365 # as we iterate over matches in the input string,
3366 # collect from whichever match group of the unquote_scan_re
3367 # regex matches (only 1 group will match at any given time)
3368 ret = "".join(
3369 # match group 1 matches \t, \n, etc.
3370 self.ws_map[match.group(1)] if match.group(1)
3371 # match group 2 matches escaped characters
3372 else match.group(2)[-1] if match.group(2)
3373 # match group 3 matches any character
3374 else match.group(3)
3375 for match in self.unquote_scan_re.finditer(ret)
3376 )
3377 else:
3378 ret = "".join(
3379 # match group 1 matches escaped characters
3380 match.group(1)[-1] if match.group(1)
3381 # match group 2 matches any character
3382 else match.group(2)
3383 for match in self.unquote_scan_re.finditer(ret)
3384 )
3385 # fmt: on
3387 # replace escaped quotes
3388 if self.esc_quote:
3389 ret = ret.replace(self.esc_quote, self.end_quote_char)
3391 return loc, ret
3394class CharsNotIn(Token):
3395 """Token for matching words composed of characters *not* in a given
3396 set (will include whitespace in matched characters if not listed in
3397 the provided exclusion set - see example). Defined with string
3398 containing all disallowed characters, and an optional minimum,
3399 maximum, and/or exact length. The default value for ``min`` is
3400 1 (a minimum value < 1 is not valid); the default values for
3401 ``max`` and ``exact`` are 0, meaning no maximum or exact
3402 length restriction.
3404 Example::
3406 # define a comma-separated-value as anything that is not a ','
3407 csv_value = CharsNotIn(',')
3408 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))
3410 prints::
3412 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3413 """
3415 def __init__(
3416 self,
3417 not_chars: str = "",
3418 min: int = 1,
3419 max: int = 0,
3420 exact: int = 0,
3421 *,
3422 notChars: str = "",
3423 ):
3424 super().__init__()
3425 self.skipWhitespace = False
3426 self.notChars = not_chars or notChars
3427 self.notCharsSet = set(self.notChars)
3429 if min < 1:
3430 raise ValueError(
3431 "cannot specify a minimum length < 1; use "
3432 "Opt(CharsNotIn()) if zero-length char group is permitted"
3433 )
3435 self.minLen = min
3437 if max > 0:
3438 self.maxLen = max
3439 else:
3440 self.maxLen = _MAX_INT
3442 if exact > 0:
3443 self.maxLen = exact
3444 self.minLen = exact
3446 self.errmsg = "Expected " + self.name
3447 self.mayReturnEmpty = self.minLen == 0
3448 self.mayIndexError = False
3450 def _generateDefaultName(self) -> str:
3451 not_chars_str = _collapse_string_to_ranges(self.notChars)
3452 if len(not_chars_str) > 16:
3453 return f"!W:({self.notChars[: 16 - 3]}...)"
3454 else:
3455 return f"!W:({self.notChars})"
3457 def parseImpl(self, instring, loc, doActions=True):
3458 notchars = self.notCharsSet
3459 if instring[loc] in notchars:
3460 raise ParseException(instring, loc, self.errmsg, self)
3462 start = loc
3463 loc += 1
3464 maxlen = min(start + self.maxLen, len(instring))
3465 while loc < maxlen and instring[loc] not in notchars:
3466 loc += 1
3468 if loc - start < self.minLen:
3469 raise ParseException(instring, loc, self.errmsg, self)
3471 return loc, instring[start:loc]
3474class White(Token):
3475 """Special matching class for matching whitespace. Normally,
3476 whitespace is ignored by pyparsing grammars. This class is included
3477 when some whitespace structures are significant. Define with
3478 a string containing the whitespace characters to be matched; default
3479 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3480 ``max``, and ``exact`` arguments, as defined for the
3481 :class:`Word` class.
3482 """
3484 whiteStrs = {
3485 " ": "<SP>",
3486 "\t": "<TAB>",
3487 "\n": "<LF>",
3488 "\r": "<CR>",
3489 "\f": "<FF>",
3490 "\u00A0": "<NBSP>",
3491 "\u1680": "<OGHAM_SPACE_MARK>",
3492 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",
3493 "\u2000": "<EN_QUAD>",
3494 "\u2001": "<EM_QUAD>",
3495 "\u2002": "<EN_SPACE>",
3496 "\u2003": "<EM_SPACE>",
3497 "\u2004": "<THREE-PER-EM_SPACE>",
3498 "\u2005": "<FOUR-PER-EM_SPACE>",
3499 "\u2006": "<SIX-PER-EM_SPACE>",
3500 "\u2007": "<FIGURE_SPACE>",
3501 "\u2008": "<PUNCTUATION_SPACE>",
3502 "\u2009": "<THIN_SPACE>",
3503 "\u200A": "<HAIR_SPACE>",
3504 "\u200B": "<ZERO_WIDTH_SPACE>",
3505 "\u202F": "<NNBSP>",
3506 "\u205F": "<MMSP>",
3507 "\u3000": "<IDEOGRAPHIC_SPACE>",
3508 }
3510 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):
3511 super().__init__()
3512 self.matchWhite = ws
3513 self.set_whitespace_chars(
3514 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3515 copy_defaults=True,
3516 )
3517 # self.leave_whitespace()
3518 self.mayReturnEmpty = True
3519 self.errmsg = "Expected " + self.name
3521 self.minLen = min
3523 if max > 0:
3524 self.maxLen = max
3525 else:
3526 self.maxLen = _MAX_INT
3528 if exact > 0:
3529 self.maxLen = exact
3530 self.minLen = exact
3532 def _generateDefaultName(self) -> str:
3533 return "".join(White.whiteStrs[c] for c in self.matchWhite)
3535 def parseImpl(self, instring, loc, doActions=True):
3536 if instring[loc] not in self.matchWhite:
3537 raise ParseException(instring, loc, self.errmsg, self)
3538 start = loc
3539 loc += 1
3540 maxloc = start + self.maxLen
3541 maxloc = min(maxloc, len(instring))
3542 while loc < maxloc and instring[loc] in self.matchWhite:
3543 loc += 1
3545 if loc - start < self.minLen:
3546 raise ParseException(instring, loc, self.errmsg, self)
3548 return loc, instring[start:loc]
3551class PositionToken(Token):
3552 def __init__(self):
3553 super().__init__()
3554 self.mayReturnEmpty = True
3555 self.mayIndexError = False
3558class GoToColumn(PositionToken):
3559 """Token to advance to a specific column of input text; useful for
3560 tabular report scraping.
3561 """
3563 def __init__(self, colno: int):
3564 super().__init__()
3565 self.col = colno
3567 def preParse(self, instring: str, loc: int) -> int:
3568 if col(loc, instring) != self.col:
3569 instrlen = len(instring)
3570 if self.ignoreExprs:
3571 loc = self._skipIgnorables(instring, loc)
3572 while (
3573 loc < instrlen
3574 and instring[loc].isspace()
3575 and col(loc, instring) != self.col
3576 ):
3577 loc += 1
3578 return loc
3580 def parseImpl(self, instring, loc, doActions=True):
3581 thiscol = col(loc, instring)
3582 if thiscol > self.col:
3583 raise ParseException(instring, loc, "Text not in expected column", self)
3584 newloc = loc + self.col - thiscol
3585 ret = instring[loc:newloc]
3586 return newloc, ret
3589class LineStart(PositionToken):
3590 r"""Matches if current position is at the beginning of a line within
3591 the parse string
3593 Example::
3595 test = '''\
3596 AAA this line
3597 AAA and this line
3598 AAA but not this one
3599 B AAA and definitely not this one
3600 '''
3602 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):
3603 print(t)
3605 prints::
3607 ['AAA', ' this line']
3608 ['AAA', ' and this line']
3610 """
3612 def __init__(self):
3613 super().__init__()
3614 self.leave_whitespace()
3615 self.orig_whiteChars = set() | self.whiteChars
3616 self.whiteChars.discard("\n")
3617 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
3618 self.errmsg = "Expected start of line"
3620 def preParse(self, instring: str, loc: int) -> int:
3621 if loc == 0:
3622 return loc
3623 else:
3624 ret = self.skipper.preParse(instring, loc)
3625 if "\n" in self.orig_whiteChars:
3626 while instring[ret : ret + 1] == "\n":
3627 ret = self.skipper.preParse(instring, ret + 1)
3628 return ret
3630 def parseImpl(self, instring, loc, doActions=True):
3631 if col(loc, instring) == 1:
3632 return loc, []
3633 raise ParseException(instring, loc, self.errmsg, self)
3636class LineEnd(PositionToken):
3637 """Matches if current position is at the end of a line within the
3638 parse string
3639 """
3641 def __init__(self):
3642 super().__init__()
3643 self.whiteChars.discard("\n")
3644 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
3645 self.errmsg = "Expected end of line"
3647 def parseImpl(self, instring, loc, doActions=True):
3648 if loc < len(instring):
3649 if instring[loc] == "\n":
3650 return loc + 1, "\n"
3651 else:
3652 raise ParseException(instring, loc, self.errmsg, self)
3653 elif loc == len(instring):
3654 return loc + 1, []
3655 else:
3656 raise ParseException(instring, loc, self.errmsg, self)
3659class StringStart(PositionToken):
3660 """Matches if current position is at the beginning of the parse
3661 string
3662 """
3664 def __init__(self):
3665 super().__init__()
3666 self.errmsg = "Expected start of text"
3668 def parseImpl(self, instring, loc, doActions=True):
3669 if loc != 0:
3670 # see if entire string up to here is just whitespace and ignoreables
3671 if loc != self.preParse(instring, 0):
3672 raise ParseException(instring, loc, self.errmsg, self)
3673 return loc, []
3676class StringEnd(PositionToken):
3677 """
3678 Matches if current position is at the end of the parse string
3679 """
3681 def __init__(self):
3682 super().__init__()
3683 self.errmsg = "Expected end of text"
3685 def parseImpl(self, instring, loc, doActions=True):
3686 if loc < len(instring):
3687 raise ParseException(instring, loc, self.errmsg, self)
3688 elif loc == len(instring):
3689 return loc + 1, []
3690 elif loc > len(instring):
3691 return loc, []
3692 else:
3693 raise ParseException(instring, loc, self.errmsg, self)
3696class WordStart(PositionToken):
3697 """Matches if the current position is at the beginning of a
3698 :class:`Word`, and is not preceded by any character in a given
3699 set of ``word_chars`` (default= ``printables``). To emulate the
3700 ``\b`` behavior of regular expressions, use
3701 ``WordStart(alphanums)``. ``WordStart`` will also match at
3702 the beginning of the string being parsed, or at the beginning of
3703 a line.
3704 """
3706 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3707 wordChars = word_chars if wordChars == printables else wordChars
3708 super().__init__()
3709 self.wordChars = set(wordChars)
3710 self.errmsg = "Not at the start of a word"
3712 def parseImpl(self, instring, loc, doActions=True):
3713 if loc != 0:
3714 if (
3715 instring[loc - 1] in self.wordChars
3716 or instring[loc] not in self.wordChars
3717 ):
3718 raise ParseException(instring, loc, self.errmsg, self)
3719 return loc, []
3722class WordEnd(PositionToken):
3723 """Matches if the current position is at the end of a :class:`Word`,
3724 and is not followed by any character in a given set of ``word_chars``
3725 (default= ``printables``). To emulate the ``\b`` behavior of
3726 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3727 will also match at the end of the string being parsed, or at the end
3728 of a line.
3729 """
3731 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3732 wordChars = word_chars if wordChars == printables else wordChars
3733 super().__init__()
3734 self.wordChars = set(wordChars)
3735 self.skipWhitespace = False
3736 self.errmsg = "Not at the end of a word"
3738 def parseImpl(self, instring, loc, doActions=True):
3739 instrlen = len(instring)
3740 if instrlen > 0 and loc < instrlen:
3741 if (
3742 instring[loc] in self.wordChars
3743 or instring[loc - 1] not in self.wordChars
3744 ):
3745 raise ParseException(instring, loc, self.errmsg, self)
3746 return loc, []
3749class ParseExpression(ParserElement):
3750 """Abstract subclass of ParserElement, for combining and
3751 post-processing parsed tokens.
3752 """
3754 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
3755 super().__init__(savelist)
3756 self.exprs: List[ParserElement]
3757 if isinstance(exprs, _generatorType):
3758 exprs = list(exprs)
3760 if isinstance(exprs, str_type):
3761 self.exprs = [self._literalStringClass(exprs)]
3762 elif isinstance(exprs, ParserElement):
3763 self.exprs = [exprs]
3764 elif isinstance(exprs, Iterable):
3765 exprs = list(exprs)
3766 # if sequence of strings provided, wrap with Literal
3767 if any(isinstance(expr, str_type) for expr in exprs):
3768 exprs = (
3769 self._literalStringClass(e) if isinstance(e, str_type) else e
3770 for e in exprs
3771 )
3772 self.exprs = list(exprs)
3773 else:
3774 try:
3775 self.exprs = list(exprs)
3776 except TypeError:
3777 self.exprs = [exprs]
3778 self.callPreparse = False
3780 def recurse(self) -> List[ParserElement]:
3781 return self.exprs[:]
3783 def append(self, other) -> ParserElement:
3784 self.exprs.append(other)
3785 self._defaultName = None
3786 return self
3788 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
3789 """
3790 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3791 all contained expressions.
3792 """
3793 super().leave_whitespace(recursive)
3795 if recursive:
3796 self.exprs = [e.copy() for e in self.exprs]
3797 for e in self.exprs:
3798 e.leave_whitespace(recursive)
3799 return self
3801 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
3802 """
3803 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3804 all contained expressions.
3805 """
3806 super().ignore_whitespace(recursive)
3807 if recursive:
3808 self.exprs = [e.copy() for e in self.exprs]
3809 for e in self.exprs:
3810 e.ignore_whitespace(recursive)
3811 return self
3813 def ignore(self, other) -> ParserElement:
3814 if isinstance(other, Suppress):
3815 if other not in self.ignoreExprs:
3816 super().ignore(other)
3817 for e in self.exprs:
3818 e.ignore(self.ignoreExprs[-1])
3819 else:
3820 super().ignore(other)
3821 for e in self.exprs:
3822 e.ignore(self.ignoreExprs[-1])
3823 return self
3825 def _generateDefaultName(self) -> str:
3826 return f"{self.__class__.__name__}:({str(self.exprs)})"
3828 def streamline(self) -> ParserElement:
3829 if self.streamlined:
3830 return self
3832 super().streamline()
3834 for e in self.exprs:
3835 e.streamline()
3837 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
3838 # but only if there are no parse actions or resultsNames on the nested And's
3839 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
3840 if len(self.exprs) == 2:
3841 other = self.exprs[0]
3842 if (
3843 isinstance(other, self.__class__)
3844 and not other.parseAction
3845 and other.resultsName is None
3846 and not other.debug
3847 ):
3848 self.exprs = other.exprs[:] + [self.exprs[1]]
3849 self._defaultName = None
3850 self.mayReturnEmpty |= other.mayReturnEmpty
3851 self.mayIndexError |= other.mayIndexError
3853 other = self.exprs[-1]
3854 if (
3855 isinstance(other, self.__class__)
3856 and not other.parseAction
3857 and other.resultsName is None
3858 and not other.debug
3859 ):
3860 self.exprs = self.exprs[:-1] + other.exprs[:]
3861 self._defaultName = None
3862 self.mayReturnEmpty |= other.mayReturnEmpty
3863 self.mayIndexError |= other.mayIndexError
3865 self.errmsg = "Expected " + str(self)
3867 return self
3869 def validate(self, validateTrace=None) -> None:
3870 warnings.warn(
3871 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
3872 DeprecationWarning,
3873 stacklevel=2,
3874 )
3875 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3876 for e in self.exprs:
3877 e.validate(tmp)
3878 self._checkRecursion([])
3880 def copy(self) -> ParserElement:
3881 ret = super().copy()
3882 ret = typing.cast(ParseExpression, ret)
3883 ret.exprs = [e.copy() for e in self.exprs]
3884 return ret
3886 def _setResultsName(self, name, listAllMatches=False):
3887 if (
3888 __diag__.warn_ungrouped_named_tokens_in_collection
3889 and Diagnostics.warn_ungrouped_named_tokens_in_collection
3890 not in self.suppress_warnings_
3891 ):
3892 for e in self.exprs:
3893 if (
3894 isinstance(e, ParserElement)
3895 and e.resultsName
3896 and Diagnostics.warn_ungrouped_named_tokens_in_collection
3897 not in e.suppress_warnings_
3898 ):
3899 warnings.warn(
3900 "{}: setting results name {!r} on {} expression "
3901 "collides with {!r} on contained expression".format(
3902 "warn_ungrouped_named_tokens_in_collection",
3903 name,
3904 type(self).__name__,
3905 e.resultsName,
3906 ),
3907 stacklevel=3,
3908 )
3910 return super()._setResultsName(name, listAllMatches)
3912 # Compatibility synonyms
3913 # fmt: off
3914 @replaced_by_pep8(leave_whitespace)
3915 def leaveWhitespace(self): ...
3917 @replaced_by_pep8(ignore_whitespace)
3918 def ignoreWhitespace(self): ...
3919 # fmt: on
3922class And(ParseExpression):
3923 """
3924 Requires all given :class:`ParseExpression` s to be found in the given order.
3925 Expressions may be separated by whitespace.
3926 May be constructed using the ``'+'`` operator.
3927 May also be constructed using the ``'-'`` operator, which will
3928 suppress backtracking.
3930 Example::
3932 integer = Word(nums)
3933 name_expr = Word(alphas)[1, ...]
3935 expr = And([integer("id"), name_expr("name"), integer("age")])
3936 # more easily written as:
3937 expr = integer("id") + name_expr("name") + integer("age")
3938 """
3940 class _ErrorStop(Empty):
3941 def __init__(self, *args, **kwargs):
3942 super().__init__(*args, **kwargs)
3943 self.leave_whitespace()
3945 def _generateDefaultName(self) -> str:
3946 return "-"
3948 def __init__(
3949 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True
3950 ):
3951 exprs: List[ParserElement] = list(exprs_arg)
3952 if exprs and Ellipsis in exprs:
3953 tmp = []
3954 for i, expr in enumerate(exprs):
3955 if expr is Ellipsis:
3956 if i < len(exprs) - 1:
3957 skipto_arg: ParserElement = typing.cast(
3958 ParseExpression, (Empty() + exprs[i + 1])
3959 ).exprs[-1]
3960 tmp.append(SkipTo(skipto_arg)("_skipped*"))
3961 else:
3962 raise Exception(
3963 "cannot construct And with sequence ending in ..."
3964 )
3965 else:
3966 tmp.append(expr)
3967 exprs[:] = tmp
3968 super().__init__(exprs, savelist)
3969 if self.exprs:
3970 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3971 if not isinstance(self.exprs[0], White):
3972 self.set_whitespace_chars(
3973 self.exprs[0].whiteChars,
3974 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
3975 )
3976 self.skipWhitespace = self.exprs[0].skipWhitespace
3977 else:
3978 self.skipWhitespace = False
3979 else:
3980 self.mayReturnEmpty = True
3981 self.callPreparse = True
3983 def streamline(self) -> ParserElement:
3984 # collapse any _PendingSkip's
3985 if self.exprs:
3986 if any(
3987 isinstance(e, ParseExpression)
3988 and e.exprs
3989 and isinstance(e.exprs[-1], _PendingSkip)
3990 for e in self.exprs[:-1]
3991 ):
3992 deleted_expr_marker = NoMatch()
3993 for i, e in enumerate(self.exprs[:-1]):
3994 if e is deleted_expr_marker:
3995 continue
3996 if (
3997 isinstance(e, ParseExpression)
3998 and e.exprs
3999 and isinstance(e.exprs[-1], _PendingSkip)
4000 ):
4001 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4002 self.exprs[i + 1] = deleted_expr_marker
4003 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]
4005 super().streamline()
4007 # link any IndentedBlocks to the prior expression
4008 prev: ParserElement
4009 cur: ParserElement
4010 for prev, cur in zip(self.exprs, self.exprs[1:]):
4011 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
4012 # (but watch out for recursive grammar)
4013 seen = set()
4014 while True:
4015 if id(cur) in seen:
4016 break
4017 seen.add(id(cur))
4018 if isinstance(cur, IndentedBlock):
4019 prev.add_parse_action(
4020 lambda s, l, t, cur_=cur: setattr(
4021 cur_, "parent_anchor", col(l, s)
4022 )
4023 )
4024 break
4025 subs = cur.recurse()
4026 next_first = next(iter(subs), None)
4027 if next_first is None:
4028 break
4029 cur = typing.cast(ParserElement, next_first)
4031 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4032 return self
4034 def parseImpl(self, instring, loc, doActions=True):
4035 # pass False as callPreParse arg to _parse for first element, since we already
4036 # pre-parsed the string as part of our And pre-parsing
4037 loc, resultlist = self.exprs[0]._parse(
4038 instring, loc, doActions, callPreParse=False
4039 )
4040 errorStop = False
4041 for e in self.exprs[1:]:
4042 # if isinstance(e, And._ErrorStop):
4043 if type(e) is And._ErrorStop:
4044 errorStop = True
4045 continue
4046 if errorStop:
4047 try:
4048 loc, exprtokens = e._parse(instring, loc, doActions)
4049 except ParseSyntaxException:
4050 raise
4051 except ParseBaseException as pe:
4052 pe.__traceback__ = None
4053 raise ParseSyntaxException._from_exception(pe)
4054 except IndexError:
4055 raise ParseSyntaxException(
4056 instring, len(instring), self.errmsg, self
4057 )
4058 else:
4059 loc, exprtokens = e._parse(instring, loc, doActions)
4060 resultlist += exprtokens
4061 return loc, resultlist
4063 def __iadd__(self, other):
4064 if isinstance(other, str_type):
4065 other = self._literalStringClass(other)
4066 if not isinstance(other, ParserElement):
4067 return NotImplemented
4068 return self.append(other) # And([self, other])
4070 def _checkRecursion(self, parseElementList):
4071 subRecCheckList = parseElementList[:] + [self]
4072 for e in self.exprs:
4073 e._checkRecursion(subRecCheckList)
4074 if not e.mayReturnEmpty:
4075 break
4077 def _generateDefaultName(self) -> str:
4078 inner = " ".join(str(e) for e in self.exprs)
4079 # strip off redundant inner {}'s
4080 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4081 inner = inner[1:-1]
4082 return "{" + inner + "}"
4085class Or(ParseExpression):
4086 """Requires that at least one :class:`ParseExpression` is found. If
4087 two expressions match, the expression that matches the longest
4088 string will be used. May be constructed using the ``'^'``
4089 operator.
4091 Example::
4093 # construct Or using '^' operator
4095 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4096 print(number.search_string("123 3.1416 789"))
4098 prints::
4100 [['123'], ['3.1416'], ['789']]
4101 """
4103 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
4104 super().__init__(exprs, savelist)
4105 if self.exprs:
4106 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4107 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4108 else:
4109 self.mayReturnEmpty = True
4111 def streamline(self) -> ParserElement:
4112 super().streamline()
4113 if self.exprs:
4114 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4115 self.saveAsList = any(e.saveAsList for e in self.exprs)
4116 self.skipWhitespace = all(
4117 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4118 )
4119 else:
4120 self.saveAsList = False
4121 return self
4123 def parseImpl(self, instring, loc, doActions=True):
4124 maxExcLoc = -1
4125 maxException = None
4126 matches = []
4127 fatals = []
4128 if all(e.callPreparse for e in self.exprs):
4129 loc = self.preParse(instring, loc)
4130 for e in self.exprs:
4131 try:
4132 loc2 = e.try_parse(instring, loc, raise_fatal=True)
4133 except ParseFatalException as pfe:
4134 pfe.__traceback__ = None
4135 pfe.parser_element = e
4136 fatals.append(pfe)
4137 maxException = None
4138 maxExcLoc = -1
4139 except ParseException as err:
4140 if not fatals:
4141 err.__traceback__ = None
4142 if err.loc > maxExcLoc:
4143 maxException = err
4144 maxExcLoc = err.loc
4145 except IndexError:
4146 if len(instring) > maxExcLoc:
4147 maxException = ParseException(
4148 instring, len(instring), e.errmsg, self
4149 )
4150 maxExcLoc = len(instring)
4151 else:
4152 # save match among all matches, to retry longest to shortest
4153 matches.append((loc2, e))
4155 if matches:
4156 # re-evaluate all matches in descending order of length of match, in case attached actions
4157 # might change whether or how much they match of the input.
4158 matches.sort(key=itemgetter(0), reverse=True)
4160 if not doActions:
4161 # no further conditions or parse actions to change the selection of
4162 # alternative, so the first match will be the best match
4163 best_expr = matches[0][1]
4164 return best_expr._parse(instring, loc, doActions)
4166 longest = -1, None
4167 for loc1, expr1 in matches:
4168 if loc1 <= longest[0]:
4169 # already have a longer match than this one will deliver, we are done
4170 return longest
4172 try:
4173 loc2, toks = expr1._parse(instring, loc, doActions)
4174 except ParseException as err:
4175 err.__traceback__ = None
4176 if err.loc > maxExcLoc:
4177 maxException = err
4178 maxExcLoc = err.loc
4179 else:
4180 if loc2 >= loc1:
4181 return loc2, toks
4182 # didn't match as much as before
4183 elif loc2 > longest[0]:
4184 longest = loc2, toks
4186 if longest != (-1, None):
4187 return longest
4189 if fatals:
4190 if len(fatals) > 1:
4191 fatals.sort(key=lambda e: -e.loc)
4192 if fatals[0].loc == fatals[1].loc:
4193 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4194 max_fatal = fatals[0]
4195 raise max_fatal
4197 if maxException is not None:
4198 # infer from this check that all alternatives failed at the current position
4199 # so emit this collective error message instead of any single error message
4200 if maxExcLoc == loc:
4201 maxException.msg = self.errmsg
4202 raise maxException
4203 else:
4204 raise ParseException(
4205 instring, loc, "no defined alternatives to match", self
4206 )
4208 def __ixor__(self, other):
4209 if isinstance(other, str_type):
4210 other = self._literalStringClass(other)
4211 if not isinstance(other, ParserElement):
4212 return NotImplemented
4213 return self.append(other) # Or([self, other])
4215 def _generateDefaultName(self) -> str:
4216 return "{" + " ^ ".join(str(e) for e in self.exprs) + "}"
4218 def _setResultsName(self, name, listAllMatches=False):
4219 if (
4220 __diag__.warn_multiple_tokens_in_named_alternation
4221 and Diagnostics.warn_multiple_tokens_in_named_alternation
4222 not in self.suppress_warnings_
4223 ):
4224 if any(
4225 isinstance(e, And)
4226 and Diagnostics.warn_multiple_tokens_in_named_alternation
4227 not in e.suppress_warnings_
4228 for e in self.exprs
4229 ):
4230 warnings.warn(
4231 "{}: setting results name {!r} on {} expression "
4232 "will return a list of all parsed tokens in an And alternative, "
4233 "in prior versions only the first token was returned; enclose "
4234 "contained argument in Group".format(
4235 "warn_multiple_tokens_in_named_alternation",
4236 name,
4237 type(self).__name__,
4238 ),
4239 stacklevel=3,
4240 )
4242 return super()._setResultsName(name, listAllMatches)
4245class MatchFirst(ParseExpression):
4246 """Requires that at least one :class:`ParseExpression` is found. If
4247 more than one expression matches, the first one listed is the one that will
4248 match. May be constructed using the ``'|'`` operator.
4250 Example::
4252 # construct MatchFirst using '|' operator
4254 # watch the order of expressions to match
4255 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4256 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4258 # put more selective expression first
4259 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4260 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4261 """
4263 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
4264 super().__init__(exprs, savelist)
4265 if self.exprs:
4266 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4267 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4268 else:
4269 self.mayReturnEmpty = True
4271 def streamline(self) -> ParserElement:
4272 if self.streamlined:
4273 return self
4275 super().streamline()
4276 if self.exprs:
4277 self.saveAsList = any(e.saveAsList for e in self.exprs)
4278 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4279 self.skipWhitespace = all(
4280 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4281 )
4282 else:
4283 self.saveAsList = False
4284 self.mayReturnEmpty = True
4285 return self
4287 def parseImpl(self, instring, loc, doActions=True):
4288 maxExcLoc = -1
4289 maxException = None
4291 for e in self.exprs:
4292 try:
4293 return e._parse(
4294 instring,
4295 loc,
4296 doActions,
4297 )
4298 except ParseFatalException as pfe:
4299 pfe.__traceback__ = None
4300 pfe.parser_element = e
4301 raise
4302 except ParseException as err:
4303 if err.loc > maxExcLoc:
4304 maxException = err
4305 maxExcLoc = err.loc
4306 except IndexError:
4307 if len(instring) > maxExcLoc:
4308 maxException = ParseException(
4309 instring, len(instring), e.errmsg, self
4310 )
4311 maxExcLoc = len(instring)
4313 if maxException is not None:
4314 # infer from this check that all alternatives failed at the current position
4315 # so emit this collective error message instead of any individual error message
4316 if maxExcLoc == loc:
4317 maxException.msg = self.errmsg
4318 raise maxException
4319 else:
4320 raise ParseException(
4321 instring, loc, "no defined alternatives to match", self
4322 )
4324 def __ior__(self, other):
4325 if isinstance(other, str_type):
4326 other = self._literalStringClass(other)
4327 if not isinstance(other, ParserElement):
4328 return NotImplemented
4329 return self.append(other) # MatchFirst([self, other])
4331 def _generateDefaultName(self) -> str:
4332 return "{" + " | ".join(str(e) for e in self.exprs) + "}"
4334 def _setResultsName(self, name, listAllMatches=False):
4335 if (
4336 __diag__.warn_multiple_tokens_in_named_alternation
4337 and Diagnostics.warn_multiple_tokens_in_named_alternation
4338 not in self.suppress_warnings_
4339 ):
4340 if any(
4341 isinstance(e, And)
4342 and Diagnostics.warn_multiple_tokens_in_named_alternation
4343 not in e.suppress_warnings_
4344 for e in self.exprs
4345 ):
4346 warnings.warn(
4347 "{}: setting results name {!r} on {} expression "
4348 "will return a list of all parsed tokens in an And alternative, "
4349 "in prior versions only the first token was returned; enclose "
4350 "contained argument in Group".format(
4351 "warn_multiple_tokens_in_named_alternation",
4352 name,
4353 type(self).__name__,
4354 ),
4355 stacklevel=3,
4356 )
4358 return super()._setResultsName(name, listAllMatches)
4361class Each(ParseExpression):
4362 """Requires all given :class:`ParseExpression` s to be found, but in
4363 any order. Expressions may be separated by whitespace.
4365 May be constructed using the ``'&'`` operator.
4367 Example::
4369 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4370 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4371 integer = Word(nums)
4372 shape_attr = "shape:" + shape_type("shape")
4373 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4374 color_attr = "color:" + color("color")
4375 size_attr = "size:" + integer("size")
4377 # use Each (using operator '&') to accept attributes in any order
4378 # (shape and posn are required, color and size are optional)
4379 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4381 shape_spec.run_tests('''
4382 shape: SQUARE color: BLACK posn: 100, 120
4383 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4384 color:GREEN size:20 shape:TRIANGLE posn:20,40
4385 '''
4386 )
4388 prints::
4390 shape: SQUARE color: BLACK posn: 100, 120
4391 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4392 - color: BLACK
4393 - posn: ['100', ',', '120']
4394 - x: 100
4395 - y: 120
4396 - shape: SQUARE
4399 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4400 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4401 - color: BLUE
4402 - posn: ['50', ',', '80']
4403 - x: 50
4404 - y: 80
4405 - shape: CIRCLE
4406 - size: 50
4409 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4410 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4411 - color: GREEN
4412 - posn: ['20', ',', '40']
4413 - x: 20
4414 - y: 40
4415 - shape: TRIANGLE
4416 - size: 20
4417 """
4419 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True):
4420 super().__init__(exprs, savelist)
4421 if self.exprs:
4422 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4423 else:
4424 self.mayReturnEmpty = True
4425 self.skipWhitespace = True
4426 self.initExprGroups = True
4427 self.saveAsList = True
4429 def __iand__(self, other):
4430 if isinstance(other, str_type):
4431 other = self._literalStringClass(other)
4432 if not isinstance(other, ParserElement):
4433 return NotImplemented
4434 return self.append(other) # Each([self, other])
4436 def streamline(self) -> ParserElement:
4437 super().streamline()
4438 if self.exprs:
4439 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4440 else:
4441 self.mayReturnEmpty = True
4442 return self
4444 def parseImpl(self, instring, loc, doActions=True):
4445 if self.initExprGroups:
4446 self.opt1map = dict(
4447 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
4448 )
4449 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
4450 opt2 = [
4451 e
4452 for e in self.exprs
4453 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
4454 ]
4455 self.optionals = opt1 + opt2
4456 self.multioptionals = [
4457 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4458 for e in self.exprs
4459 if isinstance(e, _MultipleMatch)
4460 ]
4461 self.multirequired = [
4462 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4463 for e in self.exprs
4464 if isinstance(e, OneOrMore)
4465 ]
4466 self.required = [
4467 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
4468 ]
4469 self.required += self.multirequired
4470 self.initExprGroups = False
4472 tmpLoc = loc
4473 tmpReqd = self.required[:]
4474 tmpOpt = self.optionals[:]
4475 multis = self.multioptionals[:]
4476 matchOrder = []
4478 keepMatching = True
4479 failed = []
4480 fatals = []
4481 while keepMatching:
4482 tmpExprs = tmpReqd + tmpOpt + multis
4483 failed.clear()
4484 fatals.clear()
4485 for e in tmpExprs:
4486 try:
4487 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
4488 except ParseFatalException as pfe:
4489 pfe.__traceback__ = None
4490 pfe.parser_element = e
4491 fatals.append(pfe)
4492 failed.append(e)
4493 except ParseException:
4494 failed.append(e)
4495 else:
4496 matchOrder.append(self.opt1map.get(id(e), e))
4497 if e in tmpReqd:
4498 tmpReqd.remove(e)
4499 elif e in tmpOpt:
4500 tmpOpt.remove(e)
4501 if len(failed) == len(tmpExprs):
4502 keepMatching = False
4504 # look for any ParseFatalExceptions
4505 if fatals:
4506 if len(fatals) > 1:
4507 fatals.sort(key=lambda e: -e.loc)
4508 if fatals[0].loc == fatals[1].loc:
4509 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4510 max_fatal = fatals[0]
4511 raise max_fatal
4513 if tmpReqd:
4514 missing = ", ".join([str(e) for e in tmpReqd])
4515 raise ParseException(
4516 instring,
4517 loc,
4518 f"Missing one or more required elements ({missing})",
4519 )
4521 # add any unmatched Opts, in case they have default values defined
4522 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
4524 total_results = ParseResults([])
4525 for e in matchOrder:
4526 loc, results = e._parse(instring, loc, doActions)
4527 total_results += results
4529 return loc, total_results
4531 def _generateDefaultName(self) -> str:
4532 return "{" + " & ".join(str(e) for e in self.exprs) + "}"
4535class ParseElementEnhance(ParserElement):
4536 """Abstract subclass of :class:`ParserElement`, for combining and
4537 post-processing parsed tokens.
4538 """
4540 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
4541 super().__init__(savelist)
4542 if isinstance(expr, str_type):
4543 expr_str = typing.cast(str, expr)
4544 if issubclass(self._literalStringClass, Token):
4545 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]
4546 elif issubclass(type(self), self._literalStringClass):
4547 expr = Literal(expr_str)
4548 else:
4549 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]
4550 expr = typing.cast(ParserElement, expr)
4551 self.expr = expr
4552 if expr is not None:
4553 self.mayIndexError = expr.mayIndexError
4554 self.mayReturnEmpty = expr.mayReturnEmpty
4555 self.set_whitespace_chars(
4556 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
4557 )
4558 self.skipWhitespace = expr.skipWhitespace
4559 self.saveAsList = expr.saveAsList
4560 self.callPreparse = expr.callPreparse
4561 self.ignoreExprs.extend(expr.ignoreExprs)
4563 def recurse(self) -> List[ParserElement]:
4564 return [self.expr] if self.expr is not None else []
4566 def parseImpl(self, instring, loc, doActions=True):
4567 if self.expr is not None:
4568 try:
4569 return self.expr._parse(instring, loc, doActions, callPreParse=False)
4570 except ParseBaseException as pbe:
4571 if not isinstance(self, Forward) or self.customName is not None:
4572 pbe.msg = self.errmsg
4573 raise
4574 else:
4575 raise ParseException(instring, loc, "No expression defined", self)
4577 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4578 super().leave_whitespace(recursive)
4580 if recursive:
4581 if self.expr is not None:
4582 self.expr = self.expr.copy()
4583 self.expr.leave_whitespace(recursive)
4584 return self
4586 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4587 super().ignore_whitespace(recursive)
4589 if recursive:
4590 if self.expr is not None:
4591 self.expr = self.expr.copy()
4592 self.expr.ignore_whitespace(recursive)
4593 return self
4595 def ignore(self, other) -> ParserElement:
4596 if isinstance(other, Suppress):
4597 if other not in self.ignoreExprs:
4598 super().ignore(other)
4599 if self.expr is not None:
4600 self.expr.ignore(self.ignoreExprs[-1])
4601 else:
4602 super().ignore(other)
4603 if self.expr is not None:
4604 self.expr.ignore(self.ignoreExprs[-1])
4605 return self
4607 def streamline(self) -> ParserElement:
4608 super().streamline()
4609 if self.expr is not None:
4610 self.expr.streamline()
4611 return self
4613 def _checkRecursion(self, parseElementList):
4614 if self in parseElementList:
4615 raise RecursiveGrammarException(parseElementList + [self])
4616 subRecCheckList = parseElementList[:] + [self]
4617 if self.expr is not None:
4618 self.expr._checkRecursion(subRecCheckList)
4620 def validate(self, validateTrace=None) -> None:
4621 warnings.warn(
4622 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4623 DeprecationWarning,
4624 stacklevel=2,
4625 )
4626 if validateTrace is None:
4627 validateTrace = []
4628 tmp = validateTrace[:] + [self]
4629 if self.expr is not None:
4630 self.expr.validate(tmp)
4631 self._checkRecursion([])
4633 def _generateDefaultName(self) -> str:
4634 return f"{self.__class__.__name__}:({str(self.expr)})"
4636 # Compatibility synonyms
4637 # fmt: off
4638 @replaced_by_pep8(leave_whitespace)
4639 def leaveWhitespace(self): ...
4641 @replaced_by_pep8(ignore_whitespace)
4642 def ignoreWhitespace(self): ...
4643 # fmt: on
4646class IndentedBlock(ParseElementEnhance):
4647 """
4648 Expression to match one or more expressions at a given indentation level.
4649 Useful for parsing text where structure is implied by indentation (like Python source code).
4650 """
4652 class _Indent(Empty):
4653 def __init__(self, ref_col: int):
4654 super().__init__()
4655 self.errmsg = f"expected indent at column {ref_col}"
4656 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
4658 class _IndentGreater(Empty):
4659 def __init__(self, ref_col: int):
4660 super().__init__()
4661 self.errmsg = f"expected indent at column greater than {ref_col}"
4662 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
4664 def __init__(
4665 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
4666 ):
4667 super().__init__(expr, savelist=True)
4668 # if recursive:
4669 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
4670 self._recursive = recursive
4671 self._grouped = grouped
4672 self.parent_anchor = 1
4674 def parseImpl(self, instring, loc, doActions=True):
4675 # advance parse position to non-whitespace by using an Empty()
4676 # this should be the column to be used for all subsequent indented lines
4677 anchor_loc = Empty().preParse(instring, loc)
4679 # see if self.expr matches at the current location - if not it will raise an exception
4680 # and no further work is necessary
4681 self.expr.try_parse(instring, anchor_loc, do_actions=doActions)
4683 indent_col = col(anchor_loc, instring)
4684 peer_detect_expr = self._Indent(indent_col)
4686 inner_expr = Empty() + peer_detect_expr + self.expr
4687 if self._recursive:
4688 sub_indent = self._IndentGreater(indent_col)
4689 nested_block = IndentedBlock(
4690 self.expr, recursive=self._recursive, grouped=self._grouped
4691 )
4692 nested_block.set_debug(self.debug)
4693 nested_block.parent_anchor = indent_col
4694 inner_expr += Opt(sub_indent + nested_block)
4696 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
4697 block = OneOrMore(inner_expr)
4699 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
4701 if self._grouped:
4702 wrapper = Group
4703 else:
4704 wrapper = lambda expr: expr
4705 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
4706 instring, anchor_loc, doActions
4707 )
4710class AtStringStart(ParseElementEnhance):
4711 """Matches if expression matches at the beginning of the parse
4712 string::
4714 AtStringStart(Word(nums)).parse_string("123")
4715 # prints ["123"]
4717 AtStringStart(Word(nums)).parse_string(" 123")
4718 # raises ParseException
4719 """
4721 def __init__(self, expr: Union[ParserElement, str]):
4722 super().__init__(expr)
4723 self.callPreparse = False
4725 def parseImpl(self, instring, loc, doActions=True):
4726 if loc != 0:
4727 raise ParseException(instring, loc, "not found at string start")
4728 return super().parseImpl(instring, loc, doActions)
4731class AtLineStart(ParseElementEnhance):
4732 r"""Matches if an expression matches at the beginning of a line within
4733 the parse string
4735 Example::
4737 test = '''\
4738 AAA this line
4739 AAA and this line
4740 AAA but not this one
4741 B AAA and definitely not this one
4742 '''
4744 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):
4745 print(t)
4747 prints::
4749 ['AAA', ' this line']
4750 ['AAA', ' and this line']
4752 """
4754 def __init__(self, expr: Union[ParserElement, str]):
4755 super().__init__(expr)
4756 self.callPreparse = False
4758 def parseImpl(self, instring, loc, doActions=True):
4759 if col(loc, instring) != 1:
4760 raise ParseException(instring, loc, "not found at line start")
4761 return super().parseImpl(instring, loc, doActions)
4764class FollowedBy(ParseElementEnhance):
4765 """Lookahead matching of the given parse expression.
4766 ``FollowedBy`` does *not* advance the parsing position within
4767 the input string, it only verifies that the specified parse
4768 expression matches at the current position. ``FollowedBy``
4769 always returns a null token list. If any results names are defined
4770 in the lookahead expression, those *will* be returned for access by
4771 name.
4773 Example::
4775 # use FollowedBy to match a label only if it is followed by a ':'
4776 data_word = Word(alphas)
4777 label = data_word + FollowedBy(':')
4778 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4780 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()
4782 prints::
4784 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4785 """
4787 def __init__(self, expr: Union[ParserElement, str]):
4788 super().__init__(expr)
4789 self.mayReturnEmpty = True
4791 def parseImpl(self, instring, loc, doActions=True):
4792 # by using self._expr.parse and deleting the contents of the returned ParseResults list
4793 # we keep any named results that were defined in the FollowedBy expression
4794 _, ret = self.expr._parse(instring, loc, doActions=doActions)
4795 del ret[:]
4797 return loc, ret
4800class PrecededBy(ParseElementEnhance):
4801 """Lookbehind matching of the given parse expression.
4802 ``PrecededBy`` does not advance the parsing position within the
4803 input string, it only verifies that the specified parse expression
4804 matches prior to the current position. ``PrecededBy`` always
4805 returns a null token list, but if a results name is defined on the
4806 given expression, it is returned.
4808 Parameters:
4810 - ``expr`` - expression that must match prior to the current parse
4811 location
4812 - ``retreat`` - (default= ``None``) - (int) maximum number of characters
4813 to lookbehind prior to the current parse location
4815 If the lookbehind expression is a string, :class:`Literal`,
4816 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
4817 with a specified exact or maximum length, then the retreat
4818 parameter is not required. Otherwise, retreat must be specified to
4819 give a maximum number of characters to look back from
4820 the current parse position for a lookbehind match.
4822 Example::
4824 # VB-style variable names with type prefixes
4825 int_var = PrecededBy("#") + pyparsing_common.identifier
4826 str_var = PrecededBy("$") + pyparsing_common.identifier
4828 """
4830 def __init__(
4831 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None
4832 ):
4833 super().__init__(expr)
4834 self.expr = self.expr().leave_whitespace()
4835 self.mayReturnEmpty = True
4836 self.mayIndexError = False
4837 self.exact = False
4838 if isinstance(expr, str_type):
4839 expr = typing.cast(str, expr)
4840 retreat = len(expr)
4841 self.exact = True
4842 elif isinstance(expr, (Literal, Keyword)):
4843 retreat = expr.matchLen
4844 self.exact = True
4845 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4846 retreat = expr.maxLen
4847 self.exact = True
4848 elif isinstance(expr, PositionToken):
4849 retreat = 0
4850 self.exact = True
4851 self.retreat = retreat
4852 self.errmsg = "not preceded by " + str(expr)
4853 self.skipWhitespace = False
4854 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4856 def parseImpl(self, instring, loc=0, doActions=True):
4857 if self.exact:
4858 if loc < self.retreat:
4859 raise ParseException(instring, loc, self.errmsg)
4860 start = loc - self.retreat
4861 _, ret = self.expr._parse(instring, start)
4862 else:
4863 # retreat specified a maximum lookbehind window, iterate
4864 test_expr = self.expr + StringEnd()
4865 instring_slice = instring[max(0, loc - self.retreat) : loc]
4866 last_expr = ParseException(instring, loc, self.errmsg)
4867 for offset in range(1, min(loc, self.retreat + 1) + 1):
4868 try:
4869 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
4870 _, ret = test_expr._parse(
4871 instring_slice, len(instring_slice) - offset
4872 )
4873 except ParseBaseException as pbe:
4874 last_expr = pbe
4875 else:
4876 break
4877 else:
4878 raise last_expr
4879 return loc, ret
4882class Located(ParseElementEnhance):
4883 """
4884 Decorates a returned token with its starting and ending
4885 locations in the input string.
4887 This helper adds the following results names:
4889 - ``locn_start`` - location where matched expression begins
4890 - ``locn_end`` - location where matched expression ends
4891 - ``value`` - the actual parsed results
4893 Be careful if the input text contains ``<TAB>`` characters, you
4894 may want to call :class:`ParserElement.parse_with_tabs`
4896 Example::
4898 wd = Word(alphas)
4899 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
4900 print(match)
4902 prints::
4904 [0, ['ljsdf'], 5]
4905 [8, ['lksdjjf'], 15]
4906 [18, ['lkkjj'], 23]
4908 """
4910 def parseImpl(self, instring, loc, doActions=True):
4911 start = loc
4912 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False)
4913 ret_tokens = ParseResults([start, tokens, loc])
4914 ret_tokens["locn_start"] = start
4915 ret_tokens["value"] = tokens
4916 ret_tokens["locn_end"] = loc
4917 if self.resultsName:
4918 # must return as a list, so that the name will be attached to the complete group
4919 return loc, [ret_tokens]
4920 else:
4921 return loc, ret_tokens
4924class NotAny(ParseElementEnhance):
4925 """
4926 Lookahead to disallow matching with the given parse expression.
4927 ``NotAny`` does *not* advance the parsing position within the
4928 input string, it only verifies that the specified parse expression
4929 does *not* match at the current position. Also, ``NotAny`` does
4930 *not* skip over leading whitespace. ``NotAny`` always returns
4931 a null token list. May be constructed using the ``'~'`` operator.
4933 Example::
4935 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4937 # take care not to mistake keywords for identifiers
4938 ident = ~(AND | OR | NOT) + Word(alphas)
4939 boolean_term = Opt(NOT) + ident
4941 # very crude boolean expression - to support parenthesis groups and
4942 # operation hierarchy, use infix_notation
4943 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
4945 # integers that are followed by "." are actually floats
4946 integer = Word(nums) + ~Char(".")
4947 """
4949 def __init__(self, expr: Union[ParserElement, str]):
4950 super().__init__(expr)
4951 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
4952 # self.leave_whitespace()
4953 self.skipWhitespace = False
4955 self.mayReturnEmpty = True
4956 self.errmsg = "Found unwanted token, " + str(self.expr)
4958 def parseImpl(self, instring, loc, doActions=True):
4959 if self.expr.can_parse_next(instring, loc, do_actions=doActions):
4960 raise ParseException(instring, loc, self.errmsg, self)
4961 return loc, []
4963 def _generateDefaultName(self) -> str:
4964 return "~{" + str(self.expr) + "}"
4967class _MultipleMatch(ParseElementEnhance):
4968 def __init__(
4969 self,
4970 expr: Union[str, ParserElement],
4971 stop_on: typing.Optional[Union[ParserElement, str]] = None,
4972 *,
4973 stopOn: typing.Optional[Union[ParserElement, str]] = None,
4974 ):
4975 super().__init__(expr)
4976 stopOn = stopOn or stop_on
4977 self.saveAsList = True
4978 ender = stopOn
4979 if isinstance(ender, str_type):
4980 ender = self._literalStringClass(ender)
4981 self.stopOn(ender)
4983 def stopOn(self, ender) -> ParserElement:
4984 if isinstance(ender, str_type):
4985 ender = self._literalStringClass(ender)
4986 self.not_ender = ~ender if ender is not None else None
4987 return self
4989 def parseImpl(self, instring, loc, doActions=True):
4990 self_expr_parse = self.expr._parse
4991 self_skip_ignorables = self._skipIgnorables
4992 check_ender = self.not_ender is not None
4993 if check_ender:
4994 try_not_ender = self.not_ender.try_parse
4996 # must be at least one (but first see if we are the stopOn sentinel;
4997 # if so, fail)
4998 if check_ender:
4999 try_not_ender(instring, loc)
5000 loc, tokens = self_expr_parse(instring, loc, doActions)
5001 try:
5002 hasIgnoreExprs = not not self.ignoreExprs
5003 while 1:
5004 if check_ender:
5005 try_not_ender(instring, loc)
5006 if hasIgnoreExprs:
5007 preloc = self_skip_ignorables(instring, loc)
5008 else:
5009 preloc = loc
5010 loc, tmptokens = self_expr_parse(instring, preloc, doActions)
5011 tokens += tmptokens
5012 except (ParseException, IndexError):
5013 pass
5015 return loc, tokens
5017 def _setResultsName(self, name, listAllMatches=False):
5018 if (
5019 __diag__.warn_ungrouped_named_tokens_in_collection
5020 and Diagnostics.warn_ungrouped_named_tokens_in_collection
5021 not in self.suppress_warnings_
5022 ):
5023 for e in [self.expr] + self.expr.recurse():
5024 if (
5025 isinstance(e, ParserElement)
5026 and e.resultsName
5027 and Diagnostics.warn_ungrouped_named_tokens_in_collection
5028 not in e.suppress_warnings_
5029 ):
5030 warnings.warn(
5031 "{}: setting results name {!r} on {} expression "
5032 "collides with {!r} on contained expression".format(
5033 "warn_ungrouped_named_tokens_in_collection",
5034 name,
5035 type(self).__name__,
5036 e.resultsName,
5037 ),
5038 stacklevel=3,
5039 )
5041 return super()._setResultsName(name, listAllMatches)
5044class OneOrMore(_MultipleMatch):
5045 """
5046 Repetition of one or more of the given expression.
5048 Parameters:
5050 - ``expr`` - expression that must match one or more times
5051 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel
5052 (only required if the sentinel would ordinarily match the repetition
5053 expression)
5055 Example::
5057 data_word = Word(alphas)
5058 label = data_word + FollowedBy(':')
5059 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))
5061 text = "shape: SQUARE posn: upper left color: BLACK"
5062 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
5064 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data
5065 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5066 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5068 # could also be written as
5069 (attr_expr * (1,)).parse_string(text).pprint()
5070 """
5072 def _generateDefaultName(self) -> str:
5073 return "{" + str(self.expr) + "}..."
5076class ZeroOrMore(_MultipleMatch):
5077 """
5078 Optional repetition of zero or more of the given expression.
5080 Parameters:
5082 - ``expr`` - expression that must match zero or more times
5083 - ``stop_on`` - expression for a terminating sentinel
5084 (only required if the sentinel would ordinarily match the repetition
5085 expression) - (default= ``None``)
5087 Example: similar to :class:`OneOrMore`
5088 """
5090 def __init__(
5091 self,
5092 expr: Union[str, ParserElement],
5093 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5094 *,
5095 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5096 ):
5097 super().__init__(expr, stopOn=stopOn or stop_on)
5098 self.mayReturnEmpty = True
5100 def parseImpl(self, instring, loc, doActions=True):
5101 try:
5102 return super().parseImpl(instring, loc, doActions)
5103 except (ParseException, IndexError):
5104 return loc, ParseResults([], name=self.resultsName)
5106 def _generateDefaultName(self) -> str:
5107 return "[" + str(self.expr) + "]..."
5110class DelimitedList(ParseElementEnhance):
5111 def __init__(
5112 self,
5113 expr: Union[str, ParserElement],
5114 delim: Union[str, ParserElement] = ",",
5115 combine: bool = False,
5116 min: typing.Optional[int] = None,
5117 max: typing.Optional[int] = None,
5118 *,
5119 allow_trailing_delim: bool = False,
5120 ):
5121 """Helper to define a delimited list of expressions - the delimiter
5122 defaults to ','. By default, the list elements and delimiters can
5123 have intervening whitespace, and comments, but this can be
5124 overridden by passing ``combine=True`` in the constructor. If
5125 ``combine`` is set to ``True``, the matching tokens are
5126 returned as a single token string, with the delimiters included;
5127 otherwise, the matching tokens are returned as a list of tokens,
5128 with the delimiters suppressed.
5130 If ``allow_trailing_delim`` is set to True, then the list may end with
5131 a delimiter.
5133 Example::
5135 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5136 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5137 """
5138 if isinstance(expr, str_type):
5139 expr = ParserElement._literalStringClass(expr)
5140 expr = typing.cast(ParserElement, expr)
5142 if min is not None:
5143 if min < 1:
5144 raise ValueError("min must be greater than 0")
5145 if max is not None:
5146 if min is not None and max < min:
5147 raise ValueError("max must be greater than, or equal to min")
5149 self.content = expr
5150 self.raw_delim = str(delim)
5151 self.delim = delim
5152 self.combine = combine
5153 if not combine:
5154 self.delim = Suppress(delim)
5155 self.min = min or 1
5156 self.max = max
5157 self.allow_trailing_delim = allow_trailing_delim
5159 delim_list_expr = self.content + (self.delim + self.content) * (
5160 self.min - 1,
5161 None if self.max is None else self.max - 1,
5162 )
5163 if self.allow_trailing_delim:
5164 delim_list_expr += Opt(self.delim)
5166 if self.combine:
5167 delim_list_expr = Combine(delim_list_expr)
5169 super().__init__(delim_list_expr, savelist=True)
5171 def _generateDefaultName(self) -> str:
5172 return "{0} [{1} {0}]...".format(self.content.streamline(), self.raw_delim)
5175class _NullToken:
5176 def __bool__(self):
5177 return False
5179 def __str__(self):
5180 return ""
5183class Opt(ParseElementEnhance):
5184 """
5185 Optional matching of the given expression.
5187 Parameters:
5189 - ``expr`` - expression that must match zero or more times
5190 - ``default`` (optional) - value to be returned if the optional expression is not found.
5192 Example::
5194 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
5195 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
5196 zip.run_tests('''
5197 # traditional ZIP code
5198 12345
5200 # ZIP+4 form
5201 12101-0001
5203 # invalid ZIP
5204 98765-
5205 ''')
5207 prints::
5209 # traditional ZIP code
5210 12345
5211 ['12345']
5213 # ZIP+4 form
5214 12101-0001
5215 ['12101-0001']
5217 # invalid ZIP
5218 98765-
5219 ^
5220 FAIL: Expected end of text (at char 5), (line:1, col:6)
5221 """
5223 __optionalNotMatched = _NullToken()
5225 def __init__(
5226 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
5227 ):
5228 super().__init__(expr, savelist=False)
5229 self.saveAsList = self.expr.saveAsList
5230 self.defaultValue = default
5231 self.mayReturnEmpty = True
5233 def parseImpl(self, instring, loc, doActions=True):
5234 self_expr = self.expr
5235 try:
5236 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False)
5237 except (ParseException, IndexError):
5238 default_value = self.defaultValue
5239 if default_value is not self.__optionalNotMatched:
5240 if self_expr.resultsName:
5241 tokens = ParseResults([default_value])
5242 tokens[self_expr.resultsName] = default_value
5243 else:
5244 tokens = [default_value]
5245 else:
5246 tokens = []
5247 return loc, tokens
5249 def _generateDefaultName(self) -> str:
5250 inner = str(self.expr)
5251 # strip off redundant inner {}'s
5252 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
5253 inner = inner[1:-1]
5254 return "[" + inner + "]"
5257Optional = Opt
5260class SkipTo(ParseElementEnhance):
5261 """
5262 Token for skipping over all undefined text until the matched
5263 expression is found.
5265 Parameters:
5267 - ``expr`` - target expression marking the end of the data to be skipped
5268 - ``include`` - if ``True``, the target expression is also parsed
5269 (the skipped text and target expression are returned as a 2-element
5270 list) (default= ``False``).
5271 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and
5272 comments) that might contain false matches to the target expression
5273 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be
5274 included in the skipped test; if found before the target expression is found,
5275 the :class:`SkipTo` is not a match
5277 Example::
5279 report = '''
5280 Outstanding Issues Report - 1 Jan 2000
5282 # | Severity | Description | Days Open
5283 -----+----------+-------------------------------------------+-----------
5284 101 | Critical | Intermittent system crash | 6
5285 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5286 79 | Minor | System slow when running too many reports | 47
5287 '''
5288 integer = Word(nums)
5289 SEP = Suppress('|')
5290 # use SkipTo to simply match everything up until the next SEP
5291 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
5292 # - parse action will call token.strip() for each matched token, i.e., the description body
5293 string_data = SkipTo(SEP, ignore=quoted_string)
5294 string_data.set_parse_action(token_map(str.strip))
5295 ticket_expr = (integer("issue_num") + SEP
5296 + string_data("sev") + SEP
5297 + string_data("desc") + SEP
5298 + integer("days_open"))
5300 for tkt in ticket_expr.search_string(report):
5301 print tkt.dump()
5303 prints::
5305 ['101', 'Critical', 'Intermittent system crash', '6']
5306 - days_open: '6'
5307 - desc: 'Intermittent system crash'
5308 - issue_num: '101'
5309 - sev: 'Critical'
5310 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
5311 - days_open: '14'
5312 - desc: "Spelling error on Login ('log|n')"
5313 - issue_num: '94'
5314 - sev: 'Cosmetic'
5315 ['79', 'Minor', 'System slow when running too many reports', '47']
5316 - days_open: '47'
5317 - desc: 'System slow when running too many reports'
5318 - issue_num: '79'
5319 - sev: 'Minor'
5320 """
5322 def __init__(
5323 self,
5324 other: Union[ParserElement, str],
5325 include: bool = False,
5326 ignore: typing.Optional[Union[ParserElement, str]] = None,
5327 fail_on: typing.Optional[Union[ParserElement, str]] = None,
5328 *,
5329 failOn: typing.Optional[Union[ParserElement, str]] = None,
5330 ):
5331 super().__init__(other)
5332 failOn = failOn or fail_on
5333 self.ignoreExpr = ignore
5334 self.mayReturnEmpty = True
5335 self.mayIndexError = False
5336 self.includeMatch = include
5337 self.saveAsList = False
5338 if isinstance(failOn, str_type):
5339 self.failOn = self._literalStringClass(failOn)
5340 else:
5341 self.failOn = failOn
5342 self.errmsg = "No match found for " + str(self.expr)
5343 self.ignorer = Empty().leave_whitespace()
5344 self._update_ignorer()
5346 def _update_ignorer(self):
5347 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr
5348 self.ignorer.ignoreExprs.clear()
5349 for e in self.expr.ignoreExprs:
5350 self.ignorer.ignore(e)
5351 if self.ignoreExpr:
5352 self.ignorer.ignore(self.ignoreExpr)
5354 def ignore(self, expr):
5355 super().ignore(expr)
5356 self._update_ignorer()
5358 def parseImpl(self, instring, loc, doActions=True):
5359 startloc = loc
5360 instrlen = len(instring)
5361 self_expr_parse = self.expr._parse
5362 self_failOn_canParseNext = (
5363 self.failOn.canParseNext if self.failOn is not None else None
5364 )
5365 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None
5367 tmploc = loc
5368 while tmploc <= instrlen:
5369 if self_failOn_canParseNext is not None:
5370 # break if failOn expression matches
5371 if self_failOn_canParseNext(instring, tmploc):
5372 break
5374 if ignorer_try_parse is not None:
5375 # advance past ignore expressions
5376 prev_tmploc = tmploc
5377 while 1:
5378 try:
5379 tmploc = ignorer_try_parse(instring, tmploc)
5380 except ParseBaseException:
5381 break
5382 # see if all ignorers matched, but didn't actually ignore anything
5383 if tmploc == prev_tmploc:
5384 break
5385 prev_tmploc = tmploc
5387 try:
5388 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False)
5389 except (ParseException, IndexError):
5390 # no match, advance loc in string
5391 tmploc += 1
5392 else:
5393 # matched skipto expr, done
5394 break
5396 else:
5397 # ran off the end of the input string without matching skipto expr, fail
5398 raise ParseException(instring, loc, self.errmsg, self)
5400 # build up return values
5401 loc = tmploc
5402 skiptext = instring[startloc:loc]
5403 skipresult = ParseResults(skiptext)
5405 if self.includeMatch:
5406 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False)
5407 skipresult += mat
5409 return loc, skipresult
5412class Forward(ParseElementEnhance):
5413 """
5414 Forward declaration of an expression to be defined later -
5415 used for recursive grammars, such as algebraic infix notation.
5416 When the expression is known, it is assigned to the ``Forward``
5417 variable using the ``'<<'`` operator.
5419 Note: take care when assigning to ``Forward`` not to overlook
5420 precedence of operators.
5422 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
5424 fwd_expr << a | b | c
5426 will actually be evaluated as::
5428 (fwd_expr << a) | b | c
5430 thereby leaving b and c out as parseable alternatives. It is recommended that you
5431 explicitly group the values inserted into the ``Forward``::
5433 fwd_expr << (a | b | c)
5435 Converting to use the ``'<<='`` operator instead will avoid this problem.
5437 See :class:`ParseResults.pprint` for an example of a recursive
5438 parser created using ``Forward``.
5439 """
5441 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None):
5442 self.caller_frame = traceback.extract_stack(limit=2)[0]
5443 super().__init__(other, savelist=False) # type: ignore[arg-type]
5444 self.lshift_line = None
5446 def __lshift__(self, other) -> "Forward":
5447 if hasattr(self, "caller_frame"):
5448 del self.caller_frame
5449 if isinstance(other, str_type):
5450 other = self._literalStringClass(other)
5452 if not isinstance(other, ParserElement):
5453 return NotImplemented
5455 self.expr = other
5456 self.streamlined = other.streamlined
5457 self.mayIndexError = self.expr.mayIndexError
5458 self.mayReturnEmpty = self.expr.mayReturnEmpty
5459 self.set_whitespace_chars(
5460 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
5461 )
5462 self.skipWhitespace = self.expr.skipWhitespace
5463 self.saveAsList = self.expr.saveAsList
5464 self.ignoreExprs.extend(self.expr.ignoreExprs)
5465 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]
5466 return self
5468 def __ilshift__(self, other) -> "Forward":
5469 if not isinstance(other, ParserElement):
5470 return NotImplemented
5472 return self << other
5474 def __or__(self, other) -> "ParserElement":
5475 caller_line = traceback.extract_stack(limit=2)[-2]
5476 if (
5477 __diag__.warn_on_match_first_with_lshift_operator
5478 and caller_line == self.lshift_line
5479 and Diagnostics.warn_on_match_first_with_lshift_operator
5480 not in self.suppress_warnings_
5481 ):
5482 warnings.warn(
5483 "using '<<' operator with '|' is probably an error, use '<<='",
5484 stacklevel=2,
5485 )
5486 ret = super().__or__(other)
5487 return ret
5489 def __del__(self):
5490 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
5491 if (
5492 self.expr is None
5493 and __diag__.warn_on_assignment_to_Forward
5494 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
5495 ):
5496 warnings.warn_explicit(
5497 "Forward defined here but no expression attached later using '<<=' or '<<'",
5498 UserWarning,
5499 filename=self.caller_frame.filename,
5500 lineno=self.caller_frame.lineno,
5501 )
5503 def parseImpl(self, instring, loc, doActions=True):
5504 if (
5505 self.expr is None
5506 and __diag__.warn_on_parse_using_empty_Forward
5507 and Diagnostics.warn_on_parse_using_empty_Forward
5508 not in self.suppress_warnings_
5509 ):
5510 # walk stack until parse_string, scan_string, search_string, or transform_string is found
5511 parse_fns = (
5512 "parse_string",
5513 "scan_string",
5514 "search_string",
5515 "transform_string",
5516 )
5517 tb = traceback.extract_stack(limit=200)
5518 for i, frm in enumerate(reversed(tb), start=1):
5519 if frm.name in parse_fns:
5520 stacklevel = i + 1
5521 break
5522 else:
5523 stacklevel = 2
5524 warnings.warn(
5525 "Forward expression was never assigned a value, will not parse any input",
5526 stacklevel=stacklevel,
5527 )
5528 if not ParserElement._left_recursion_enabled:
5529 return super().parseImpl(instring, loc, doActions)
5530 # ## Bounded Recursion algorithm ##
5531 # Recursion only needs to be processed at ``Forward`` elements, since they are
5532 # the only ones that can actually refer to themselves. The general idea is
5533 # to handle recursion stepwise: We start at no recursion, then recurse once,
5534 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
5535 #
5536 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
5537 # - to *match* a specific recursion level, and
5538 # - to *search* the bounded recursion level
5539 # and the two run concurrently. The *search* must *match* each recursion level
5540 # to find the best possible match. This is handled by a memo table, which
5541 # provides the previous match to the next level match attempt.
5542 #
5543 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
5544 #
5545 # There is a complication since we not only *parse* but also *transform* via
5546 # actions: We do not want to run the actions too often while expanding. Thus,
5547 # we expand using `doActions=False` and only run `doActions=True` if the next
5548 # recursion level is acceptable.
5549 with ParserElement.recursion_lock:
5550 memo = ParserElement.recursion_memos
5551 try:
5552 # we are parsing at a specific recursion expansion - use it as-is
5553 prev_loc, prev_result = memo[loc, self, doActions]
5554 if isinstance(prev_result, Exception):
5555 raise prev_result
5556 return prev_loc, prev_result.copy()
5557 except KeyError:
5558 act_key = (loc, self, True)
5559 peek_key = (loc, self, False)
5560 # we are searching for the best recursion expansion - keep on improving
5561 # both `doActions` cases must be tracked separately here!
5562 prev_loc, prev_peek = memo[peek_key] = (
5563 loc - 1,
5564 ParseException(
5565 instring, loc, "Forward recursion without base case", self
5566 ),
5567 )
5568 if doActions:
5569 memo[act_key] = memo[peek_key]
5570 while True:
5571 try:
5572 new_loc, new_peek = super().parseImpl(instring, loc, False)
5573 except ParseException:
5574 # we failed before getting any match – do not hide the error
5575 if isinstance(prev_peek, Exception):
5576 raise
5577 new_loc, new_peek = prev_loc, prev_peek
5578 # the match did not get better: we are done
5579 if new_loc <= prev_loc:
5580 if doActions:
5581 # replace the match for doActions=False as well,
5582 # in case the action did backtrack
5583 prev_loc, prev_result = memo[peek_key] = memo[act_key]
5584 del memo[peek_key], memo[act_key]
5585 return prev_loc, prev_result.copy()
5586 del memo[peek_key]
5587 return prev_loc, prev_peek.copy()
5588 # the match did get better: see if we can improve further
5589 else:
5590 if doActions:
5591 try:
5592 memo[act_key] = super().parseImpl(instring, loc, True)
5593 except ParseException as e:
5594 memo[peek_key] = memo[act_key] = (new_loc, e)
5595 raise
5596 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
5598 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5599 self.skipWhitespace = False
5600 return self
5602 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5603 self.skipWhitespace = True
5604 return self
5606 def streamline(self) -> ParserElement:
5607 if not self.streamlined:
5608 self.streamlined = True
5609 if self.expr is not None:
5610 self.expr.streamline()
5611 return self
5613 def validate(self, validateTrace=None) -> None:
5614 warnings.warn(
5615 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
5616 DeprecationWarning,
5617 stacklevel=2,
5618 )
5619 if validateTrace is None:
5620 validateTrace = []
5622 if self not in validateTrace:
5623 tmp = validateTrace[:] + [self]
5624 if self.expr is not None:
5625 self.expr.validate(tmp)
5626 self._checkRecursion([])
5628 def _generateDefaultName(self) -> str:
5629 # Avoid infinite recursion by setting a temporary _defaultName
5630 self._defaultName = ": ..."
5632 # Use the string representation of main expression.
5633 retString = "..."
5634 try:
5635 if self.expr is not None:
5636 retString = str(self.expr)[:1000]
5637 else:
5638 retString = "None"
5639 finally:
5640 return self.__class__.__name__ + ": " + retString
5642 def copy(self) -> ParserElement:
5643 if self.expr is not None:
5644 return super().copy()
5645 else:
5646 ret = Forward()
5647 ret <<= self
5648 return ret
5650 def _setResultsName(self, name, list_all_matches=False):
5651 if (
5652 __diag__.warn_name_set_on_empty_Forward
5653 and Diagnostics.warn_name_set_on_empty_Forward
5654 not in self.suppress_warnings_
5655 ):
5656 if self.expr is None:
5657 warnings.warn(
5658 "{}: setting results name {!r} on {} expression "
5659 "that has no contained expression".format(
5660 "warn_name_set_on_empty_Forward", name, type(self).__name__
5661 ),
5662 stacklevel=3,
5663 )
5665 return super()._setResultsName(name, list_all_matches)
5667 # Compatibility synonyms
5668 # fmt: off
5669 @replaced_by_pep8(leave_whitespace)
5670 def leaveWhitespace(self): ...
5672 @replaced_by_pep8(ignore_whitespace)
5673 def ignoreWhitespace(self): ...
5674 # fmt: on
5677class TokenConverter(ParseElementEnhance):
5678 """
5679 Abstract subclass of :class:`ParseExpression`, for converting parsed results.
5680 """
5682 def __init__(self, expr: Union[ParserElement, str], savelist=False):
5683 super().__init__(expr) # , savelist)
5684 self.saveAsList = False
5687class Combine(TokenConverter):
5688 """Converter to concatenate all matching tokens to a single string.
5689 By default, the matching patterns must also be contiguous in the
5690 input string; this can be disabled by specifying
5691 ``'adjacent=False'`` in the constructor.
5693 Example::
5695 real = Word(nums) + '.' + Word(nums)
5696 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']
5697 # will also erroneously match the following
5698 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']
5700 real = Combine(Word(nums) + '.' + Word(nums))
5701 print(real.parse_string('3.1416')) # -> ['3.1416']
5702 # no match when there are internal spaces
5703 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)
5704 """
5706 def __init__(
5707 self,
5708 expr: ParserElement,
5709 join_string: str = "",
5710 adjacent: bool = True,
5711 *,
5712 joinString: typing.Optional[str] = None,
5713 ):
5714 super().__init__(expr)
5715 joinString = joinString if joinString is not None else join_string
5716 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5717 if adjacent:
5718 self.leave_whitespace()
5719 self.adjacent = adjacent
5720 self.skipWhitespace = True
5721 self.joinString = joinString
5722 self.callPreparse = True
5724 def ignore(self, other) -> ParserElement:
5725 if self.adjacent:
5726 ParserElement.ignore(self, other)
5727 else:
5728 super().ignore(other)
5729 return self
5731 def postParse(self, instring, loc, tokenlist):
5732 retToks = tokenlist.copy()
5733 del retToks[:]
5734 retToks += ParseResults(
5735 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
5736 )
5738 if self.resultsName and retToks.haskeys():
5739 return [retToks]
5740 else:
5741 return retToks
5744class Group(TokenConverter):
5745 """Converter to return the matched tokens as a list - useful for
5746 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5748 The optional ``aslist`` argument when set to True will return the
5749 parsed tokens as a Python list instead of a pyparsing ParseResults.
5751 Example::
5753 ident = Word(alphas)
5754 num = Word(nums)
5755 term = ident | num
5756 func = ident + Opt(DelimitedList(term))
5757 print(func.parse_string("fn a, b, 100"))
5758 # -> ['fn', 'a', 'b', '100']
5760 func = ident + Group(Opt(DelimitedList(term)))
5761 print(func.parse_string("fn a, b, 100"))
5762 # -> ['fn', ['a', 'b', '100']]
5763 """
5765 def __init__(self, expr: ParserElement, aslist: bool = False):
5766 super().__init__(expr)
5767 self.saveAsList = True
5768 self._asPythonList = aslist
5770 def postParse(self, instring, loc, tokenlist):
5771 if self._asPythonList:
5772 return ParseResults.List(
5773 tokenlist.asList()
5774 if isinstance(tokenlist, ParseResults)
5775 else list(tokenlist)
5776 )
5777 else:
5778 return [tokenlist]
5781class Dict(TokenConverter):
5782 """Converter to return a repetitive expression as a list, but also
5783 as a dictionary. Each element can also be referenced using the first
5784 token in the expression as its key. Useful for tabular report
5785 scraping when the first column can be used as a item key.
5787 The optional ``asdict`` argument when set to True will return the
5788 parsed tokens as a Python dict instead of a pyparsing ParseResults.
5790 Example::
5792 data_word = Word(alphas)
5793 label = data_word + FollowedBy(':')
5795 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5796 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5798 # print attributes as plain groups
5799 print(attr_expr[1, ...].parse_string(text).dump())
5801 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names
5802 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
5803 print(result.dump())
5805 # access named fields as dict entries, or output as dict
5806 print(result['shape'])
5807 print(result.as_dict())
5809 prints::
5811 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5812 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5813 - color: 'light blue'
5814 - posn: 'upper left'
5815 - shape: 'SQUARE'
5816 - texture: 'burlap'
5817 SQUARE
5818 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5820 See more examples at :class:`ParseResults` of accessing fields by results name.
5821 """
5823 def __init__(self, expr: ParserElement, asdict: bool = False):
5824 super().__init__(expr)
5825 self.saveAsList = True
5826 self._asPythonDict = asdict
5828 def postParse(self, instring, loc, tokenlist):
5829 for i, tok in enumerate(tokenlist):
5830 if len(tok) == 0:
5831 continue
5833 ikey = tok[0]
5834 if isinstance(ikey, int):
5835 ikey = str(ikey).strip()
5837 if len(tok) == 1:
5838 tokenlist[ikey] = _ParseResultsWithOffset("", i)
5840 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5841 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5843 else:
5844 try:
5845 dictvalue = tok.copy() # ParseResults(i)
5846 except Exception:
5847 exc = TypeError(
5848 "could not extract dict values from parsed results"
5849 " - Dict expression must contain Grouped expressions"
5850 )
5851 raise exc from None
5853 del dictvalue[0]
5855 if len(dictvalue) != 1 or (
5856 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
5857 ):
5858 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5859 else:
5860 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5862 if self._asPythonDict:
5863 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
5864 else:
5865 return [tokenlist] if self.resultsName else tokenlist
5868class Suppress(TokenConverter):
5869 """Converter for ignoring the results of a parsed expression.
5871 Example::
5873 source = "a, b, c,d"
5874 wd = Word(alphas)
5875 wd_list1 = wd + (',' + wd)[...]
5876 print(wd_list1.parse_string(source))
5878 # often, delimiters that are useful during parsing are just in the
5879 # way afterward - use Suppress to keep them out of the parsed output
5880 wd_list2 = wd + (Suppress(',') + wd)[...]
5881 print(wd_list2.parse_string(source))
5883 # Skipped text (using '...') can be suppressed as well
5884 source = "lead in START relevant text END trailing text"
5885 start_marker = Keyword("START")
5886 end_marker = Keyword("END")
5887 find_body = Suppress(...) + start_marker + ... + end_marker
5888 print(find_body.parse_string(source)
5890 prints::
5892 ['a', ',', 'b', ',', 'c', ',', 'd']
5893 ['a', 'b', 'c', 'd']
5894 ['START', 'relevant text ', 'END']
5896 (See also :class:`DelimitedList`.)
5897 """
5899 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
5900 if expr is ...:
5901 expr = _PendingSkip(NoMatch())
5902 super().__init__(expr)
5904 def __add__(self, other) -> "ParserElement":
5905 if isinstance(self.expr, _PendingSkip):
5906 return Suppress(SkipTo(other)) + other
5907 else:
5908 return super().__add__(other)
5910 def __sub__(self, other) -> "ParserElement":
5911 if isinstance(self.expr, _PendingSkip):
5912 return Suppress(SkipTo(other)) - other
5913 else:
5914 return super().__sub__(other)
5916 def postParse(self, instring, loc, tokenlist):
5917 return []
5919 def suppress(self) -> ParserElement:
5920 return self
5923def trace_parse_action(f: ParseAction) -> ParseAction:
5924 """Decorator for debugging parse actions.
5926 When the parse action is called, this decorator will print
5927 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
5928 When the parse action completes, the decorator will print
5929 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
5931 Example::
5933 wd = Word(alphas)
5935 @trace_parse_action
5936 def remove_duplicate_chars(tokens):
5937 return ''.join(sorted(set(''.join(tokens))))
5939 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
5940 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
5942 prints::
5944 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
5945 <<leaving remove_duplicate_chars (ret: 'dfjkls')
5946 ['dfjkls']
5947 """
5948 f = _trim_arity(f)
5950 def z(*paArgs):
5951 thisFunc = f.__name__
5952 s, l, t = paArgs[-3:]
5953 if len(paArgs) > 3:
5954 thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc
5955 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")
5956 try:
5957 ret = f(*paArgs)
5958 except Exception as exc:
5959 sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n")
5960 raise
5961 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")
5962 return ret
5964 z.__name__ = f.__name__
5965 return z
5968# convenience constants for positional expressions
5969empty = Empty().set_name("empty")
5970line_start = LineStart().set_name("line_start")
5971line_end = LineEnd().set_name("line_end")
5972string_start = StringStart().set_name("string_start")
5973string_end = StringEnd().set_name("string_end")
5975_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(
5976 lambda s, l, t: t[0][1]
5977)
5978_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
5979 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
5980)
5981_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
5982 lambda s, l, t: chr(int(t[0][1:], 8))
5983)
5984_singleChar = (
5985 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
5986)
5987_charRange = Group(_singleChar + Suppress("-") + _singleChar)
5988_reBracketExpr = (
5989 Literal("[")
5990 + Opt("^").set_results_name("negate")
5991 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
5992 + Literal("]")
5993)
5996def srange(s: str) -> str:
5997 r"""Helper to easily define string ranges for use in :class:`Word`
5998 construction. Borrows syntax from regexp ``'[]'`` string range
5999 definitions::
6001 srange("[0-9]") -> "0123456789"
6002 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
6003 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
6005 The input string must be enclosed in []'s, and the returned string
6006 is the expanded character set joined into a single string. The
6007 values enclosed in the []'s may be:
6009 - a single character
6010 - an escaped character with a leading backslash (such as ``\-``
6011 or ``\]``)
6012 - an escaped hex character with a leading ``'\x'``
6013 (``\x21``, which is a ``'!'`` character) (``\0x##``
6014 is also supported for backwards compatibility)
6015 - an escaped octal character with a leading ``'\0'``
6016 (``\041``, which is a ``'!'`` character)
6017 - a range of any of the above, separated by a dash (``'a-z'``,
6018 etc.)
6019 - any combination of the above (``'aeiouy'``,
6020 ``'a-zA-Z0-9_$'``, etc.)
6021 """
6022 _expanded = (
6023 lambda p: p
6024 if not isinstance(p, ParseResults)
6025 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
6026 )
6027 try:
6028 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)
6029 except Exception as e:
6030 return ""
6033def token_map(func, *args) -> ParseAction:
6034 """Helper to define a parse action by mapping a function to all
6035 elements of a :class:`ParseResults` list. If any additional args are passed,
6036 they are forwarded to the given function as additional arguments
6037 after the token, as in
6038 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
6039 which will convert the parsed data to an integer using base 16.
6041 Example (compare the last to example in :class:`ParserElement.transform_string`::
6043 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
6044 hex_ints.run_tests('''
6045 00 11 22 aa FF 0a 0d 1a
6046 ''')
6048 upperword = Word(alphas).set_parse_action(token_map(str.upper))
6049 upperword[1, ...].run_tests('''
6050 my kingdom for a horse
6051 ''')
6053 wd = Word(alphas).set_parse_action(token_map(str.title))
6054 wd[1, ...].set_parse_action(' '.join).run_tests('''
6055 now is the winter of our discontent made glorious summer by this sun of york
6056 ''')
6058 prints::
6060 00 11 22 aa FF 0a 0d 1a
6061 [0, 17, 34, 170, 255, 10, 13, 26]
6063 my kingdom for a horse
6064 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
6066 now is the winter of our discontent made glorious summer by this sun of york
6067 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
6068 """
6070 def pa(s, l, t):
6071 return [func(tokn, *args) for tokn in t]
6073 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
6074 pa.__name__ = func_name
6076 return pa
6079def autoname_elements() -> None:
6080 """
6081 Utility to simplify mass-naming of parser elements, for
6082 generating railroad diagram with named subdiagrams.
6083 """
6084 calling_frame = sys._getframe().f_back
6085 if calling_frame is None:
6086 return
6087 calling_frame = typing.cast(types.FrameType, calling_frame)
6088 for name, var in calling_frame.f_locals.items():
6089 if isinstance(var, ParserElement) and not var.customName:
6090 var.set_name(name)
6093dbl_quoted_string = Combine(
6094 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6095).set_name("string enclosed in double quotes")
6097sgl_quoted_string = Combine(
6098 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
6099).set_name("string enclosed in single quotes")
6101quoted_string = Combine(
6102 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6103 "double quoted string"
6104 )
6105 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6106 "single quoted string"
6107 )
6108).set_name("quoted string using single or double quotes")
6110python_quoted_string = Combine(
6111 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(
6112 "multiline double quoted string"
6113 )
6114 ^ (
6115 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"
6116 ).set_name("multiline single quoted string")
6117 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6118 "double quoted string"
6119 )
6120 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6121 "single quoted string"
6122 )
6123).set_name("Python quoted string")
6125unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
6128alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6129punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6131# build list of built-in expressions, for future reference if a global default value
6132# gets updated
6133_builtin_exprs: List[ParserElement] = [
6134 v for v in vars().values() if isinstance(v, ParserElement)
6135]
6137# backward compatibility names
6138# fmt: off
6139sglQuotedString = sgl_quoted_string
6140dblQuotedString = dbl_quoted_string
6141quotedString = quoted_string
6142unicodeString = unicode_string
6143lineStart = line_start
6144lineEnd = line_end
6145stringStart = string_start
6146stringEnd = string_end
6148@replaced_by_pep8(null_debug_action)
6149def nullDebugAction(): ...
6151@replaced_by_pep8(trace_parse_action)
6152def traceParseAction(): ...
6154@replaced_by_pep8(condition_as_parse_action)
6155def conditionAsParseAction(): ...
6157@replaced_by_pep8(token_map)
6158def tokenMap(): ...
6159# fmt: on