Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 43%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# core.py
3#
5from collections import deque
6import os
7import typing
8from typing import (
9 Any,
10 Callable,
11 Generator,
12 List,
13 NamedTuple,
14 Sequence,
15 Set,
16 TextIO,
17 Tuple,
18 Union,
19 cast,
20)
21from abc import ABC, abstractmethod
22from enum import Enum
23import string
24import copy
25import warnings
26import re
27import sys
28from collections.abc import Iterable
29import traceback
30import types
31from operator import itemgetter
32from functools import wraps
33from threading import RLock
34from pathlib import Path
36from .util import (
37 _FifoCache,
38 _UnboundedCache,
39 __config_flags,
40 _collapse_string_to_ranges,
41 _escape_regex_range_chars,
42 _bslash,
43 _flatten,
44 LRUMemo as _LRUMemo,
45 UnboundedMemo as _UnboundedMemo,
46 replaced_by_pep8,
47)
48from .exceptions import *
49from .actions import *
50from .results import ParseResults, _ParseResultsWithOffset
51from .unicode import pyparsing_unicode
53_MAX_INT = sys.maxsize
54str_type: Tuple[type, ...] = (str, bytes)
56#
57# Copyright (c) 2003-2022 Paul T. McGuire
58#
59# Permission is hereby granted, free of charge, to any person obtaining
60# a copy of this software and associated documentation files (the
61# "Software"), to deal in the Software without restriction, including
62# without limitation the rights to use, copy, modify, merge, publish,
63# distribute, sublicense, and/or sell copies of the Software, and to
64# permit persons to whom the Software is furnished to do so, subject to
65# the following conditions:
66#
67# The above copyright notice and this permission notice shall be
68# included in all copies or substantial portions of the Software.
69#
70# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
71# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
72# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
73# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
74# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
75# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
76# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
77#
80if sys.version_info >= (3, 8):
81 from functools import cached_property
82else:
84 class cached_property:
85 def __init__(self, func):
86 self._func = func
88 def __get__(self, instance, owner=None):
89 ret = instance.__dict__[self._func.__name__] = self._func(instance)
90 return ret
93class __compat__(__config_flags):
94 """
95 A cross-version compatibility configuration for pyparsing features that will be
96 released in a future version. By setting values in this configuration to True,
97 those features can be enabled in prior versions for compatibility development
98 and testing.
100 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
101 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
102 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
103 behavior
104 """
106 _type_desc = "compatibility"
108 collect_all_And_tokens = True
110 _all_names = [__ for __ in locals() if not __.startswith("_")]
111 _fixed_names = """
112 collect_all_And_tokens
113 """.split()
116class __diag__(__config_flags):
117 _type_desc = "diagnostic"
119 warn_multiple_tokens_in_named_alternation = False
120 warn_ungrouped_named_tokens_in_collection = False
121 warn_name_set_on_empty_Forward = False
122 warn_on_parse_using_empty_Forward = False
123 warn_on_assignment_to_Forward = False
124 warn_on_multiple_string_args_to_oneof = False
125 warn_on_match_first_with_lshift_operator = False
126 enable_debug_on_named_expressions = False
128 _all_names = [__ for __ in locals() if not __.startswith("_")]
129 _warning_names = [name for name in _all_names if name.startswith("warn")]
130 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
132 @classmethod
133 def enable_all_warnings(cls) -> None:
134 for name in cls._warning_names:
135 cls.enable(name)
138class Diagnostics(Enum):
139 """
140 Diagnostic configuration (all default to disabled)
142 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
143 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
144 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
145 name is defined on a containing expression with ungrouped subexpressions that also
146 have results names
147 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
148 with a results name, but has no contents defined
149 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
150 defined in a grammar but has never had an expression attached to it
151 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
152 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
153 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
154 incorrectly called with multiple str arguments
155 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
156 calls to :class:`ParserElement.set_name`
158 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
159 All warnings can be enabled by calling :class:`enable_all_warnings`.
160 """
162 warn_multiple_tokens_in_named_alternation = 0
163 warn_ungrouped_named_tokens_in_collection = 1
164 warn_name_set_on_empty_Forward = 2
165 warn_on_parse_using_empty_Forward = 3
166 warn_on_assignment_to_Forward = 4
167 warn_on_multiple_string_args_to_oneof = 5
168 warn_on_match_first_with_lshift_operator = 6
169 enable_debug_on_named_expressions = 7
172def enable_diag(diag_enum: Diagnostics) -> None:
173 """
174 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
175 """
176 __diag__.enable(diag_enum.name)
179def disable_diag(diag_enum: Diagnostics) -> None:
180 """
181 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
182 """
183 __diag__.disable(diag_enum.name)
186def enable_all_warnings() -> None:
187 """
188 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
189 """
190 __diag__.enable_all_warnings()
193# hide abstract class
194del __config_flags
197def _should_enable_warnings(
198 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
199) -> bool:
200 enable = bool(warn_env_var)
201 for warn_opt in cmd_line_warn_options:
202 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
203 ":"
204 )[:5]
205 if not w_action.lower().startswith("i") and (
206 not (w_message or w_category or w_module) or w_module == "pyparsing"
207 ):
208 enable = True
209 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
210 enable = False
211 return enable
214if _should_enable_warnings(
215 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
216):
217 enable_all_warnings()
220# build list of single arg builtins, that can be used as parse actions
221_single_arg_builtins = {
222 sum,
223 len,
224 sorted,
225 reversed,
226 list,
227 tuple,
228 set,
229 any,
230 all,
231 min,
232 max,
233}
235_generatorType = types.GeneratorType
236ParseImplReturnType = Tuple[int, Any]
237PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]
238ParseAction = Union[
239 Callable[[], Any],
240 Callable[[ParseResults], Any],
241 Callable[[int, ParseResults], Any],
242 Callable[[str, int, ParseResults], Any],
243]
244ParseCondition = Union[
245 Callable[[], bool],
246 Callable[[ParseResults], bool],
247 Callable[[int, ParseResults], bool],
248 Callable[[str, int, ParseResults], bool],
249]
250ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
251DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
252DebugSuccessAction = Callable[
253 [str, int, int, "ParserElement", ParseResults, bool], None
254]
255DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
258alphas = string.ascii_uppercase + string.ascii_lowercase
259identchars = pyparsing_unicode.Latin1.identchars
260identbodychars = pyparsing_unicode.Latin1.identbodychars
261nums = "0123456789"
262hexnums = nums + "ABCDEFabcdef"
263alphanums = alphas + nums
264printables = "".join([c for c in string.printable if c not in string.whitespace])
266_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]
269def _trim_arity(func, max_limit=3):
270 """decorator to trim function calls to match the arity of the target"""
271 global _trim_arity_call_line
273 if func in _single_arg_builtins:
274 return lambda s, l, t: func(t)
276 limit = 0
277 found_arity = False
279 # synthesize what would be returned by traceback.extract_stack at the call to
280 # user's parse action 'func', so that we don't incur call penalty at parse time
282 # fmt: off
283 LINE_DIFF = 7
284 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
285 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
286 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1])
287 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
289 def wrapper(*args):
290 nonlocal found_arity, limit
291 while 1:
292 try:
293 ret = func(*args[limit:])
294 found_arity = True
295 return ret
296 except TypeError as te:
297 # re-raise TypeErrors if they did not come from our arity testing
298 if found_arity:
299 raise
300 else:
301 tb = te.__traceback__
302 frames = traceback.extract_tb(tb, limit=2)
303 frame_summary = frames[-1]
304 trim_arity_type_error = (
305 [frame_summary[:2]][-1][:2] == pa_call_line_synth
306 )
307 del tb
309 if trim_arity_type_error:
310 if limit < max_limit:
311 limit += 1
312 continue
314 raise
315 # fmt: on
317 # copy func name to wrapper for sensible debug output
318 # (can't use functools.wraps, since that messes with function signature)
319 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
320 wrapper.__name__ = func_name
321 wrapper.__doc__ = func.__doc__
323 return wrapper
326def condition_as_parse_action(
327 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False
328) -> ParseAction:
329 """
330 Function to convert a simple predicate function that returns ``True`` or ``False``
331 into a parse action. Can be used in places when a parse action is required
332 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition
333 to an operator level in :class:`infix_notation`).
335 Optional keyword arguments:
337 - ``message`` - define a custom message to be used in the raised exception
338 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;
339 otherwise will raise :class:`ParseException`
341 """
342 msg = message if message is not None else "failed user-defined condition"
343 exc_type = ParseFatalException if fatal else ParseException
344 fn = _trim_arity(fn)
346 @wraps(fn)
347 def pa(s, l, t):
348 if not bool(fn(s, l, t)):
349 raise exc_type(s, l, msg)
351 return pa
354def _default_start_debug_action(
355 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False
356):
357 cache_hit_str = "*" if cache_hit else ""
358 print(
359 (
360 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"
361 f" {line(loc, instring)}\n"
362 f" {' ' * (col(loc, instring) - 1)}^"
363 )
364 )
367def _default_success_debug_action(
368 instring: str,
369 startloc: int,
370 endloc: int,
371 expr: "ParserElement",
372 toks: ParseResults,
373 cache_hit: bool = False,
374):
375 cache_hit_str = "*" if cache_hit else ""
376 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")
379def _default_exception_debug_action(
380 instring: str,
381 loc: int,
382 expr: "ParserElement",
383 exc: Exception,
384 cache_hit: bool = False,
385):
386 cache_hit_str = "*" if cache_hit else ""
387 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")
390def null_debug_action(*args):
391 """'Do-nothing' debug action, to suppress debugging output during parsing."""
394class ParserElement(ABC):
395 """Abstract base level parser element class."""
397 DEFAULT_WHITE_CHARS: str = " \n\t\r"
398 verbose_stacktrace: bool = False
399 _literalStringClass: type = None # type: ignore[assignment]
401 @staticmethod
402 def set_default_whitespace_chars(chars: str) -> None:
403 r"""
404 Overrides the default whitespace chars
406 Example::
408 # default whitespace chars are space, <TAB> and newline
409 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
411 # change to just treat newline as significant
412 ParserElement.set_default_whitespace_chars(" \t")
413 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def']
414 """
415 ParserElement.DEFAULT_WHITE_CHARS = chars
417 # update whitespace all parse expressions defined in this module
418 for expr in _builtin_exprs:
419 if expr.copyDefaultWhiteChars:
420 expr.whiteChars = set(chars)
422 @staticmethod
423 def inline_literals_using(cls: type) -> None:
424 """
425 Set class to be used for inclusion of string literals into a parser.
427 Example::
429 # default literal class used is Literal
430 integer = Word(nums)
431 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
433 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31']
436 # change to Suppress
437 ParserElement.inline_literals_using(Suppress)
438 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
440 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31']
441 """
442 ParserElement._literalStringClass = cls
444 @classmethod
445 def using_each(cls, seq, **class_kwargs):
446 """
447 Yields a sequence of class(obj, **class_kwargs) for obj in seq.
449 Example::
451 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")
453 """
454 yield from (cls(obj, **class_kwargs) for obj in seq)
456 class DebugActions(NamedTuple):
457 debug_try: typing.Optional[DebugStartAction]
458 debug_match: typing.Optional[DebugSuccessAction]
459 debug_fail: typing.Optional[DebugExceptionAction]
461 def __init__(self, savelist: bool = False):
462 self.parseAction: List[ParseAction] = list()
463 self.failAction: typing.Optional[ParseFailAction] = None
464 self.customName: str = None # type: ignore[assignment]
465 self._defaultName: typing.Optional[str] = None
466 self.resultsName: str = None # type: ignore[assignment]
467 self.saveAsList = savelist
468 self.skipWhitespace = True
469 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
470 self.copyDefaultWhiteChars = True
471 # used when checking for left-recursion
472 self.mayReturnEmpty = False
473 self.keepTabs = False
474 self.ignoreExprs: List["ParserElement"] = list()
475 self.debug = False
476 self.streamlined = False
477 # optimize exception handling for subclasses that don't advance parse index
478 self.mayIndexError = True
479 self.errmsg = ""
480 # mark results names as modal (report only last) or cumulative (list all)
481 self.modalResults = True
482 # custom debug actions
483 self.debugActions = self.DebugActions(None, None, None)
484 # avoid redundant calls to preParse
485 self.callPreparse = True
486 self.callDuringTry = False
487 self.suppress_warnings_: List[Diagnostics] = []
489 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement":
490 """
491 Suppress warnings emitted for a particular diagnostic on this expression.
493 Example::
495 base = pp.Forward()
496 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)
498 # statement would normally raise a warning, but is now suppressed
499 print(base.parse_string("x"))
501 """
502 self.suppress_warnings_.append(warning_type)
503 return self
505 def visit_all(self):
506 """General-purpose method to yield all expressions and sub-expressions
507 in a grammar. Typically just for internal use.
508 """
509 to_visit = deque([self])
510 seen = set()
511 while to_visit:
512 cur = to_visit.popleft()
514 # guard against looping forever through recursive grammars
515 if cur in seen:
516 continue
517 seen.add(cur)
519 to_visit.extend(cur.recurse())
520 yield cur
522 def copy(self) -> "ParserElement":
523 """
524 Make a copy of this :class:`ParserElement`. Useful for defining
525 different parse actions for the same parsing pattern, using copies of
526 the original parse element.
528 Example::
530 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
531 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")
532 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
534 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M"))
536 prints::
538 [5120, 100, 655360, 268435456]
540 Equivalent form of ``expr.copy()`` is just ``expr()``::
542 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
543 """
544 cpy = copy.copy(self)
545 cpy.parseAction = self.parseAction[:]
546 cpy.ignoreExprs = self.ignoreExprs[:]
547 if self.copyDefaultWhiteChars:
548 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
549 return cpy
551 def set_results_name(
552 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False
553 ) -> "ParserElement":
554 """
555 Define name for referencing matching tokens as a nested attribute
556 of the returned parse results.
558 Normally, results names are assigned as you would assign keys in a dict:
559 any existing value is overwritten by later values. If it is necessary to
560 keep all values captured for a particular results name, call ``set_results_name``
561 with ``list_all_matches`` = True.
563 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
564 this is so that the client can define a basic element, such as an
565 integer, and reference it in multiple places with different names.
567 You can also set results names using the abbreviated syntax,
568 ``expr("name")`` in place of ``expr.set_results_name("name")``
569 - see :class:`__call__`. If ``list_all_matches`` is required, use
570 ``expr("name*")``.
572 Example::
574 integer = Word(nums)
575 date_str = (integer.set_results_name("year") + '/'
576 + integer.set_results_name("month") + '/'
577 + integer.set_results_name("day"))
579 # equivalent form:
580 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
581 """
582 listAllMatches = listAllMatches or list_all_matches
583 return self._setResultsName(name, listAllMatches)
585 def _setResultsName(self, name, listAllMatches=False):
586 if name is None:
587 return self
588 newself = self.copy()
589 if name.endswith("*"):
590 name = name[:-1]
591 listAllMatches = True
592 newself.resultsName = name
593 newself.modalResults = not listAllMatches
594 return newself
596 def set_break(self, break_flag: bool = True) -> "ParserElement":
597 """
598 Method to invoke the Python pdb debugger when this element is
599 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
600 disable.
601 """
602 if break_flag:
603 _parseMethod = self._parse
605 def breaker(instring, loc, doActions=True, callPreParse=True):
606 import pdb
608 # this call to pdb.set_trace() is intentional, not a checkin error
609 pdb.set_trace()
610 return _parseMethod(instring, loc, doActions, callPreParse)
612 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]
613 self._parse = breaker # type: ignore [assignment]
614 elif hasattr(self._parse, "_originalParseMethod"):
615 self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment]
616 return self
618 def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":
619 """
620 Define one or more actions to perform when successfully matching parse element definition.
622 Parse actions can be called to perform data conversions, do extra validation,
623 update external data structures, or enhance or replace the parsed tokens.
624 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
625 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
627 - ``s`` = the original string being parsed (see note below)
628 - ``loc`` = the location of the matching substring
629 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object
631 The parsed tokens are passed to the parse action as ParseResults. They can be
632 modified in place using list-style append, extend, and pop operations to update
633 the parsed list elements; and with dictionary-style item set and del operations
634 to add, update, or remove any named results. If the tokens are modified in place,
635 it is not necessary to return them with a return statement.
637 Parse actions can also completely replace the given tokens, with another ``ParseResults``
638 object, or with some entirely different object (common for parse actions that perform data
639 conversions). A convenient way to build a new parse result is to define the values
640 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
642 If None is passed as the ``fn`` parse action, all previously added parse actions for this
643 expression are cleared.
645 Optional keyword arguments:
647 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during
648 lookaheads and alternate testing. For parse actions that have side effects, it is
649 important to only call the parse action once it is determined that it is being
650 called as part of a successful parse. For parse actions that perform additional
651 validation, then call_during_try should be passed as True, so that the validation
652 code is included in the preliminary "try" parses.
654 Note: the default parsing behavior is to expand tabs in the input string
655 before starting the parsing process. See :class:`parse_string` for more
656 information on parsing strings containing ``<TAB>`` s, and suggested
657 methods to maintain a consistent view of the parsed string, the parse
658 location, and line and column positions within the parsed string.
660 Example::
662 # parse dates in the form YYYY/MM/DD
664 # use parse action to convert toks from str to int at parse time
665 def convert_to_int(toks):
666 return int(toks[0])
668 # use a parse action to verify that the date is a valid date
669 def is_valid_date(instring, loc, toks):
670 from datetime import date
671 year, month, day = toks[::2]
672 try:
673 date(year, month, day)
674 except ValueError:
675 raise ParseException(instring, loc, "invalid date given")
677 integer = Word(nums)
678 date_str = integer + '/' + integer + '/' + integer
680 # add parse actions
681 integer.set_parse_action(convert_to_int)
682 date_str.set_parse_action(is_valid_date)
684 # note that integer fields are now ints, not strings
685 date_str.run_tests('''
686 # successful parse - note that integer fields were converted to ints
687 1999/12/31
689 # fail - invalid date
690 1999/13/31
691 ''')
692 """
693 if list(fns) == [None]:
694 self.parseAction = []
695 return self
697 if not all(callable(fn) for fn in fns):
698 raise TypeError("parse actions must be callable")
699 self.parseAction = [_trim_arity(fn) for fn in fns]
700 self.callDuringTry = kwargs.get(
701 "call_during_try", kwargs.get("callDuringTry", False)
702 )
704 return self
706 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":
707 """
708 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
710 See examples in :class:`copy`.
711 """
712 self.parseAction += [_trim_arity(fn) for fn in fns]
713 self.callDuringTry = self.callDuringTry or kwargs.get(
714 "call_during_try", kwargs.get("callDuringTry", False)
715 )
716 return self
718 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement":
719 """Add a boolean predicate function to expression's list of parse actions. See
720 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
721 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
723 Optional keyword arguments:
725 - ``message`` = define a custom message to be used in the raised exception
726 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
727 ParseException
728 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,
729 default=False
731 Example::
733 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
734 year_int = integer.copy()
735 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
736 date_str = year_int + '/' + integer + '/' + integer
738 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0),
739 (line:1, col:1)
740 """
741 for fn in fns:
742 self.parseAction.append(
743 condition_as_parse_action(
744 fn,
745 message=str(kwargs.get("message")),
746 fatal=bool(kwargs.get("fatal", False)),
747 )
748 )
750 self.callDuringTry = self.callDuringTry or kwargs.get(
751 "call_during_try", kwargs.get("callDuringTry", False)
752 )
753 return self
755 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement":
756 """
757 Define action to perform if parsing fails at this expression.
758 Fail acton fn is a callable function that takes the arguments
759 ``fn(s, loc, expr, err)`` where:
761 - ``s`` = string being parsed
762 - ``loc`` = location where expression match was attempted and failed
763 - ``expr`` = the parse expression that failed
764 - ``err`` = the exception thrown
766 The function returns no value. It may throw :class:`ParseFatalException`
767 if it is desired to stop parsing immediately."""
768 self.failAction = fn
769 return self
771 def _skipIgnorables(self, instring: str, loc: int) -> int:
772 if not self.ignoreExprs:
773 return loc
774 exprsFound = True
775 ignore_expr_fns = [e._parse for e in self.ignoreExprs]
776 last_loc = loc
777 while exprsFound:
778 exprsFound = False
779 for ignore_fn in ignore_expr_fns:
780 try:
781 while 1:
782 loc, dummy = ignore_fn(instring, loc)
783 exprsFound = True
784 except ParseException:
785 pass
786 # check if all ignore exprs matched but didn't actually advance the parse location
787 if loc == last_loc:
788 break
789 last_loc = loc
790 return loc
792 def preParse(self, instring: str, loc: int) -> int:
793 if self.ignoreExprs:
794 loc = self._skipIgnorables(instring, loc)
796 if self.skipWhitespace:
797 instrlen = len(instring)
798 white_chars = self.whiteChars
799 while loc < instrlen and instring[loc] in white_chars:
800 loc += 1
802 return loc
804 def parseImpl(self, instring, loc, doActions=True):
805 return loc, []
807 def postParse(self, instring, loc, tokenlist):
808 return tokenlist
810 # @profile
811 def _parseNoCache(
812 self, instring, loc, doActions=True, callPreParse=True
813 ) -> Tuple[int, ParseResults]:
814 TRY, MATCH, FAIL = 0, 1, 2
815 debugging = self.debug # and doActions)
816 len_instring = len(instring)
818 if debugging or self.failAction:
819 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
820 try:
821 if callPreParse and self.callPreparse:
822 pre_loc = self.preParse(instring, loc)
823 else:
824 pre_loc = loc
825 tokens_start = pre_loc
826 if self.debugActions.debug_try:
827 self.debugActions.debug_try(instring, tokens_start, self, False)
828 if self.mayIndexError or pre_loc >= len_instring:
829 try:
830 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
831 except IndexError:
832 raise ParseException(instring, len_instring, self.errmsg, self)
833 else:
834 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
835 except Exception as err:
836 # print("Exception raised:", err)
837 if self.debugActions.debug_fail:
838 self.debugActions.debug_fail(
839 instring, tokens_start, self, err, False
840 )
841 if self.failAction:
842 self.failAction(instring, tokens_start, self, err)
843 raise
844 else:
845 if callPreParse and self.callPreparse:
846 pre_loc = self.preParse(instring, loc)
847 else:
848 pre_loc = loc
849 tokens_start = pre_loc
850 if self.mayIndexError or pre_loc >= len_instring:
851 try:
852 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
853 except IndexError:
854 raise ParseException(instring, len_instring, self.errmsg, self)
855 else:
856 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
858 tokens = self.postParse(instring, loc, tokens)
860 ret_tokens = ParseResults(
861 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults
862 )
863 if self.parseAction and (doActions or self.callDuringTry):
864 if debugging:
865 try:
866 for fn in self.parseAction:
867 try:
868 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
869 except IndexError as parse_action_exc:
870 exc = ParseException("exception raised in parse action")
871 raise exc from parse_action_exc
873 if tokens is not None and tokens is not ret_tokens:
874 ret_tokens = ParseResults(
875 tokens,
876 self.resultsName,
877 asList=self.saveAsList
878 and isinstance(tokens, (ParseResults, list)),
879 modal=self.modalResults,
880 )
881 except Exception as err:
882 # print "Exception raised in user parse action:", err
883 if self.debugActions.debug_fail:
884 self.debugActions.debug_fail(
885 instring, tokens_start, self, err, False
886 )
887 raise
888 else:
889 for fn in self.parseAction:
890 try:
891 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
892 except IndexError as parse_action_exc:
893 exc = ParseException("exception raised in parse action")
894 raise exc from parse_action_exc
896 if tokens is not None and tokens is not ret_tokens:
897 ret_tokens = ParseResults(
898 tokens,
899 self.resultsName,
900 asList=self.saveAsList
901 and isinstance(tokens, (ParseResults, list)),
902 modal=self.modalResults,
903 )
904 if debugging:
905 # print("Matched", self, "->", ret_tokens.as_list())
906 if self.debugActions.debug_match:
907 self.debugActions.debug_match(
908 instring, tokens_start, loc, self, ret_tokens, False
909 )
911 return loc, ret_tokens
913 def try_parse(
914 self,
915 instring: str,
916 loc: int,
917 *,
918 raise_fatal: bool = False,
919 do_actions: bool = False,
920 ) -> int:
921 try:
922 return self._parse(instring, loc, doActions=do_actions)[0]
923 except ParseFatalException:
924 if raise_fatal:
925 raise
926 raise ParseException(instring, loc, self.errmsg, self)
928 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:
929 try:
930 self.try_parse(instring, loc, do_actions=do_actions)
931 except (ParseException, IndexError):
932 return False
933 else:
934 return True
936 # cache for left-recursion in Forward references
937 recursion_lock = RLock()
938 recursion_memos: typing.Dict[
939 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]]
940 ] = {}
942 class _CacheType(dict):
943 """
944 class to help type checking
945 """
947 not_in_cache: bool
949 def get(self, *args): ...
951 def set(self, *args): ...
953 # argument cache for optimizing repeated calls when backtracking through recursive expressions
954 packrat_cache = (
955 _CacheType()
956 ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail
957 packrat_cache_lock = RLock()
958 packrat_cache_stats = [0, 0]
960 # this method gets repeatedly called during backtracking with the same arguments -
961 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
962 def _parseCache(
963 self, instring, loc, doActions=True, callPreParse=True
964 ) -> Tuple[int, ParseResults]:
965 HIT, MISS = 0, 1
966 TRY, MATCH, FAIL = 0, 1, 2
967 lookup = (self, instring, loc, callPreParse, doActions)
968 with ParserElement.packrat_cache_lock:
969 cache = ParserElement.packrat_cache
970 value = cache.get(lookup)
971 if value is cache.not_in_cache:
972 ParserElement.packrat_cache_stats[MISS] += 1
973 try:
974 value = self._parseNoCache(instring, loc, doActions, callPreParse)
975 except ParseBaseException as pe:
976 # cache a copy of the exception, without the traceback
977 cache.set(lookup, pe.__class__(*pe.args))
978 raise
979 else:
980 cache.set(lookup, (value[0], value[1].copy(), loc))
981 return value
982 else:
983 ParserElement.packrat_cache_stats[HIT] += 1
984 if self.debug and self.debugActions.debug_try:
985 try:
986 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]
987 except TypeError:
988 pass
989 if isinstance(value, Exception):
990 if self.debug and self.debugActions.debug_fail:
991 try:
992 self.debugActions.debug_fail(
993 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]
994 )
995 except TypeError:
996 pass
997 raise value
999 value = cast(Tuple[int, ParseResults, int], value)
1000 loc_, result, endloc = value[0], value[1].copy(), value[2]
1001 if self.debug and self.debugActions.debug_match:
1002 try:
1003 self.debugActions.debug_match(
1004 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]
1005 )
1006 except TypeError:
1007 pass
1009 return loc_, result
1011 _parse = _parseNoCache
1013 @staticmethod
1014 def reset_cache() -> None:
1015 ParserElement.packrat_cache.clear()
1016 ParserElement.packrat_cache_stats[:] = [0] * len(
1017 ParserElement.packrat_cache_stats
1018 )
1019 ParserElement.recursion_memos.clear()
1021 _packratEnabled = False
1022 _left_recursion_enabled = False
1024 @staticmethod
1025 def disable_memoization() -> None:
1026 """
1027 Disables active Packrat or Left Recursion parsing and their memoization
1029 This method also works if neither Packrat nor Left Recursion are enabled.
1030 This makes it safe to call before activating Packrat nor Left Recursion
1031 to clear any previous settings.
1032 """
1033 ParserElement.reset_cache()
1034 ParserElement._left_recursion_enabled = False
1035 ParserElement._packratEnabled = False
1036 ParserElement._parse = ParserElement._parseNoCache
1038 @staticmethod
1039 def enable_left_recursion(
1040 cache_size_limit: typing.Optional[int] = None, *, force=False
1041 ) -> None:
1042 """
1043 Enables "bounded recursion" parsing, which allows for both direct and indirect
1044 left-recursion. During parsing, left-recursive :class:`Forward` elements are
1045 repeatedly matched with a fixed recursion depth that is gradually increased
1046 until finding the longest match.
1048 Example::
1050 import pyparsing as pp
1051 pp.ParserElement.enable_left_recursion()
1053 E = pp.Forward("E")
1054 num = pp.Word(pp.nums)
1055 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1056 E <<= E + '+' - num | num
1058 print(E.parse_string("1+2+3"))
1060 Recursion search naturally memoizes matches of ``Forward`` elements and may
1061 thus skip reevaluation of parse actions during backtracking. This may break
1062 programs with parse actions which rely on strict ordering of side-effects.
1064 Parameters:
1066 - ``cache_size_limit`` - (default=``None``) - memoize at most this many
1067 ``Forward`` elements during matching; if ``None`` (the default),
1068 memoize all ``Forward`` elements.
1070 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1071 thus the two cannot be used together. Use ``force=True`` to disable any
1072 previous, conflicting settings.
1073 """
1074 if force:
1075 ParserElement.disable_memoization()
1076 elif ParserElement._packratEnabled:
1077 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1078 if cache_size_limit is None:
1079 ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment]
1080 elif cache_size_limit > 0:
1081 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]
1082 else:
1083 raise NotImplementedError(f"Memo size of {cache_size_limit}")
1084 ParserElement._left_recursion_enabled = True
1086 @staticmethod
1087 def enable_packrat(
1088 cache_size_limit: Union[int, None] = 128, *, force: bool = False
1089 ) -> None:
1090 """
1091 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1092 Repeated parse attempts at the same string location (which happens
1093 often in many complex grammars) can immediately return a cached value,
1094 instead of re-executing parsing/validating code. Memoizing is done of
1095 both valid results and parsing exceptions.
1097 Parameters:
1099 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided
1100 will limit the size of the packrat cache; if None is passed, then
1101 the cache size will be unbounded; if 0 is passed, the cache will
1102 be effectively disabled.
1104 This speedup may break existing programs that use parse actions that
1105 have side-effects. For this reason, packrat parsing is disabled when
1106 you first import pyparsing. To activate the packrat feature, your
1107 program must call the class method :class:`ParserElement.enable_packrat`.
1108 For best results, call ``enable_packrat()`` immediately after
1109 importing pyparsing.
1111 Example::
1113 import pyparsing
1114 pyparsing.ParserElement.enable_packrat()
1116 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1117 thus the two cannot be used together. Use ``force=True`` to disable any
1118 previous, conflicting settings.
1119 """
1120 if force:
1121 ParserElement.disable_memoization()
1122 elif ParserElement._left_recursion_enabled:
1123 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1125 if ParserElement._packratEnabled:
1126 return
1128 ParserElement._packratEnabled = True
1129 if cache_size_limit is None:
1130 ParserElement.packrat_cache = _UnboundedCache()
1131 else:
1132 ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment]
1133 ParserElement._parse = ParserElement._parseCache
1135 def parse_string(
1136 self, instring: str, parse_all: bool = False, *, parseAll: bool = False
1137 ) -> ParseResults:
1138 """
1139 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1140 client code.
1142 :param instring: The input string to be parsed.
1143 :param parse_all: If set, the entire input string must match the grammar.
1144 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1145 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1146 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1147 an object with attributes if the given parser includes results names.
1149 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1150 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().
1152 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1153 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1154 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1155 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1157 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1158 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1159 parse action's ``s`` argument, or
1160 - explicitly expand the tabs in your input string before calling ``parse_string``.
1162 Examples:
1164 By default, partial matches are OK.
1166 >>> res = Word('a').parse_string('aaaaabaaa')
1167 >>> print(res)
1168 ['aaaaa']
1170 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1171 directly to see more examples.
1173 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1175 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1176 Traceback (most recent call last):
1177 ...
1178 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6)
1179 """
1180 parseAll = parse_all or parseAll
1182 ParserElement.reset_cache()
1183 if not self.streamlined:
1184 self.streamline()
1185 for e in self.ignoreExprs:
1186 e.streamline()
1187 if not self.keepTabs:
1188 instring = instring.expandtabs()
1189 try:
1190 loc, tokens = self._parse(instring, 0)
1191 if parseAll:
1192 loc = self.preParse(instring, loc)
1193 se = Empty() + StringEnd()
1194 se._parse(instring, loc)
1195 except ParseBaseException as exc:
1196 if ParserElement.verbose_stacktrace:
1197 raise
1198 else:
1199 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1200 raise exc.with_traceback(None)
1201 else:
1202 return tokens
1204 def scan_string(
1205 self,
1206 instring: str,
1207 max_matches: int = _MAX_INT,
1208 overlap: bool = False,
1209 *,
1210 debug: bool = False,
1211 maxMatches: int = _MAX_INT,
1212 ) -> Generator[Tuple[ParseResults, int, int], None, None]:
1213 """
1214 Scan the input string for expression matches. Each match will return the
1215 matching tokens, start location, and end location. May be called with optional
1216 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1217 ``overlap`` is specified, then overlapping matches will be reported.
1219 Note that the start and end locations are reported relative to the string
1220 being parsed. See :class:`parse_string` for more information on parsing
1221 strings with embedded tabs.
1223 Example::
1225 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1226 print(source)
1227 for tokens, start, end in Word(alphas).scan_string(source):
1228 print(' '*start + '^'*(end-start))
1229 print(' '*start + tokens[0])
1231 prints::
1233 sldjf123lsdjjkf345sldkjf879lkjsfd987
1234 ^^^^^
1235 sldjf
1236 ^^^^^^^
1237 lsdjjkf
1238 ^^^^^^
1239 sldkjf
1240 ^^^^^^
1241 lkjsfd
1242 """
1243 maxMatches = min(maxMatches, max_matches)
1244 if not self.streamlined:
1245 self.streamline()
1246 for e in self.ignoreExprs:
1247 e.streamline()
1249 if not self.keepTabs:
1250 instring = str(instring).expandtabs()
1251 instrlen = len(instring)
1252 loc = 0
1253 preparseFn = self.preParse
1254 parseFn = self._parse
1255 ParserElement.resetCache()
1256 matches = 0
1257 try:
1258 while loc <= instrlen and matches < maxMatches:
1259 try:
1260 preloc: int = preparseFn(instring, loc)
1261 nextLoc: int
1262 tokens: ParseResults
1263 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1264 except ParseException:
1265 loc = preloc + 1
1266 else:
1267 if nextLoc > loc:
1268 matches += 1
1269 if debug:
1270 print(
1271 {
1272 "tokens": tokens.asList(),
1273 "start": preloc,
1274 "end": nextLoc,
1275 }
1276 )
1277 yield tokens, preloc, nextLoc
1278 if overlap:
1279 nextloc = preparseFn(instring, loc)
1280 if nextloc > loc:
1281 loc = nextLoc
1282 else:
1283 loc += 1
1284 else:
1285 loc = nextLoc
1286 else:
1287 loc = preloc + 1
1288 except ParseBaseException as exc:
1289 if ParserElement.verbose_stacktrace:
1290 raise
1292 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1293 raise exc.with_traceback(None)
1295 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1296 """
1297 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1298 be returned from a parse action. To use ``transform_string``, define a grammar and
1299 attach a parse action to it that modifies the returned token list.
1300 Invoking ``transform_string()`` on a target string will then scan for matches,
1301 and replace the matched text patterns according to the logic in the parse
1302 action. ``transform_string()`` returns the resulting transformed string.
1304 Example::
1306 wd = Word(alphas)
1307 wd.set_parse_action(lambda toks: toks[0].title())
1309 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))
1311 prints::
1313 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1314 """
1315 out: List[str] = []
1316 lastE = 0
1317 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1318 # keep string locs straight between transform_string and scan_string
1319 self.keepTabs = True
1320 try:
1321 for t, s, e in self.scan_string(instring, debug=debug):
1322 out.append(instring[lastE:s])
1323 lastE = e
1325 if not t:
1326 continue
1328 if isinstance(t, ParseResults):
1329 out += t.as_list()
1330 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1331 out.extend(t)
1332 else:
1333 out.append(t)
1335 out.append(instring[lastE:])
1336 out = [o for o in out if o]
1337 return "".join([str(s) for s in _flatten(out)])
1338 except ParseBaseException as exc:
1339 if ParserElement.verbose_stacktrace:
1340 raise
1342 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1343 raise exc.with_traceback(None)
1345 def search_string(
1346 self,
1347 instring: str,
1348 max_matches: int = _MAX_INT,
1349 *,
1350 debug: bool = False,
1351 maxMatches: int = _MAX_INT,
1352 ) -> ParseResults:
1353 """
1354 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1355 to match the given parse expression. May be called with optional
1356 ``max_matches`` argument, to clip searching after 'n' matches are found.
1358 Example::
1360 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1361 cap_word = Word(alphas.upper(), alphas.lower())
1363 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))
1365 # the sum() builtin can be used to merge results into a single ParseResults object
1366 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))
1368 prints::
1370 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1371 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1372 """
1373 maxMatches = min(maxMatches, max_matches)
1374 try:
1375 return ParseResults(
1376 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)]
1377 )
1378 except ParseBaseException as exc:
1379 if ParserElement.verbose_stacktrace:
1380 raise
1382 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1383 raise exc.with_traceback(None)
1385 def split(
1386 self,
1387 instring: str,
1388 maxsplit: int = _MAX_INT,
1389 include_separators: bool = False,
1390 *,
1391 includeSeparators=False,
1392 ) -> Generator[str, None, None]:
1393 """
1394 Generator method to split a string using the given expression as a separator.
1395 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1396 and the optional ``include_separators`` argument (default= ``False``), if the separating
1397 matching text should be included in the split results.
1399 Example::
1401 punc = one_of(list(".,;:/-!?"))
1402 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1404 prints::
1406 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1407 """
1408 includeSeparators = includeSeparators or include_separators
1409 last = 0
1410 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1411 yield instring[last:s]
1412 if includeSeparators:
1413 yield t[0]
1414 last = e
1415 yield instring[last:]
1417 def __add__(self, other) -> "ParserElement":
1418 """
1419 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1420 converts them to :class:`Literal`\\ s by default.
1422 Example::
1424 greet = Word(alphas) + "," + Word(alphas) + "!"
1425 hello = "Hello, World!"
1426 print(hello, "->", greet.parse_string(hello))
1428 prints::
1430 Hello, World! -> ['Hello', ',', 'World', '!']
1432 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`::
1434 Literal('start') + ... + Literal('end')
1436 is equivalent to::
1438 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1440 Note that the skipped text is returned with '_skipped' as a results name,
1441 and to support having multiple skips in the same parser, the value returned is
1442 a list of all skipped text.
1443 """
1444 if other is Ellipsis:
1445 return _PendingSkip(self)
1447 if isinstance(other, str_type):
1448 other = self._literalStringClass(other)
1449 if not isinstance(other, ParserElement):
1450 return NotImplemented
1451 return And([self, other])
1453 def __radd__(self, other) -> "ParserElement":
1454 """
1455 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1456 """
1457 if other is Ellipsis:
1458 return SkipTo(self)("_skipped*") + self
1460 if isinstance(other, str_type):
1461 other = self._literalStringClass(other)
1462 if not isinstance(other, ParserElement):
1463 return NotImplemented
1464 return other + self
1466 def __sub__(self, other) -> "ParserElement":
1467 """
1468 Implementation of ``-`` operator, returns :class:`And` with error stop
1469 """
1470 if isinstance(other, str_type):
1471 other = self._literalStringClass(other)
1472 if not isinstance(other, ParserElement):
1473 return NotImplemented
1474 return self + And._ErrorStop() + other
1476 def __rsub__(self, other) -> "ParserElement":
1477 """
1478 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1479 """
1480 if isinstance(other, str_type):
1481 other = self._literalStringClass(other)
1482 if not isinstance(other, ParserElement):
1483 return NotImplemented
1484 return other - self
1486 def __mul__(self, other) -> "ParserElement":
1487 """
1488 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1489 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1490 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1491 may also include ``None`` as in:
1493 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1494 to ``expr*n + ZeroOrMore(expr)``
1495 (read as "at least n instances of ``expr``")
1496 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1497 (read as "0 to n instances of ``expr``")
1498 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1499 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1501 Note that ``expr*(None, n)`` does not raise an exception if
1502 more than n exprs exist in the input stream; that is,
1503 ``expr*(None, n)`` does not enforce a maximum number of expr
1504 occurrences. If this behavior is desired, then write
1505 ``expr*(None, n) + ~expr``
1506 """
1507 if other is Ellipsis:
1508 other = (0, None)
1509 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1510 other = ((0,) + other[1:] + (None,))[:2]
1512 if not isinstance(other, (int, tuple)):
1513 return NotImplemented
1515 if isinstance(other, int):
1516 minElements, optElements = other, 0
1517 else:
1518 other = tuple(o if o is not Ellipsis else None for o in other)
1519 other = (other + (None, None))[:2]
1520 if other[0] is None:
1521 other = (0, other[1])
1522 if isinstance(other[0], int) and other[1] is None:
1523 if other[0] == 0:
1524 return ZeroOrMore(self)
1525 if other[0] == 1:
1526 return OneOrMore(self)
1527 else:
1528 return self * other[0] + ZeroOrMore(self)
1529 elif isinstance(other[0], int) and isinstance(other[1], int):
1530 minElements, optElements = other
1531 optElements -= minElements
1532 else:
1533 return NotImplemented
1535 if minElements < 0:
1536 raise ValueError("cannot multiply ParserElement by negative value")
1537 if optElements < 0:
1538 raise ValueError(
1539 "second tuple value must be greater or equal to first tuple value"
1540 )
1541 if minElements == optElements == 0:
1542 return And([])
1544 if optElements:
1546 def makeOptionalList(n):
1547 if n > 1:
1548 return Opt(self + makeOptionalList(n - 1))
1549 else:
1550 return Opt(self)
1552 if minElements:
1553 if minElements == 1:
1554 ret = self + makeOptionalList(optElements)
1555 else:
1556 ret = And([self] * minElements) + makeOptionalList(optElements)
1557 else:
1558 ret = makeOptionalList(optElements)
1559 else:
1560 if minElements == 1:
1561 ret = self
1562 else:
1563 ret = And([self] * minElements)
1564 return ret
1566 def __rmul__(self, other) -> "ParserElement":
1567 return self.__mul__(other)
1569 def __or__(self, other) -> "ParserElement":
1570 """
1571 Implementation of ``|`` operator - returns :class:`MatchFirst`
1572 """
1573 if other is Ellipsis:
1574 return _PendingSkip(self, must_skip=True)
1576 if isinstance(other, str_type):
1577 # `expr | ""` is equivalent to `Opt(expr)`
1578 if other == "":
1579 return Opt(self)
1580 other = self._literalStringClass(other)
1581 if not isinstance(other, ParserElement):
1582 return NotImplemented
1583 return MatchFirst([self, other])
1585 def __ror__(self, other) -> "ParserElement":
1586 """
1587 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1588 """
1589 if isinstance(other, str_type):
1590 other = self._literalStringClass(other)
1591 if not isinstance(other, ParserElement):
1592 return NotImplemented
1593 return other | self
1595 def __xor__(self, other) -> "ParserElement":
1596 """
1597 Implementation of ``^`` operator - returns :class:`Or`
1598 """
1599 if isinstance(other, str_type):
1600 other = self._literalStringClass(other)
1601 if not isinstance(other, ParserElement):
1602 return NotImplemented
1603 return Or([self, other])
1605 def __rxor__(self, other) -> "ParserElement":
1606 """
1607 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1608 """
1609 if isinstance(other, str_type):
1610 other = self._literalStringClass(other)
1611 if not isinstance(other, ParserElement):
1612 return NotImplemented
1613 return other ^ self
1615 def __and__(self, other) -> "ParserElement":
1616 """
1617 Implementation of ``&`` operator - returns :class:`Each`
1618 """
1619 if isinstance(other, str_type):
1620 other = self._literalStringClass(other)
1621 if not isinstance(other, ParserElement):
1622 return NotImplemented
1623 return Each([self, other])
1625 def __rand__(self, other) -> "ParserElement":
1626 """
1627 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1628 """
1629 if isinstance(other, str_type):
1630 other = self._literalStringClass(other)
1631 if not isinstance(other, ParserElement):
1632 return NotImplemented
1633 return other & self
1635 def __invert__(self) -> "ParserElement":
1636 """
1637 Implementation of ``~`` operator - returns :class:`NotAny`
1638 """
1639 return NotAny(self)
1641 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1642 # iterate over a sequence
1643 __iter__ = None
1645 def __getitem__(self, key):
1646 """
1647 use ``[]`` indexing notation as a short form for expression repetition:
1649 - ``expr[n]`` is equivalent to ``expr*n``
1650 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1651 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1652 to ``expr*n + ZeroOrMore(expr)``
1653 (read as "at least n instances of ``expr``")
1654 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1655 (read as "0 to n instances of ``expr``")
1656 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1657 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1659 ``None`` may be used in place of ``...``.
1661 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception
1662 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is
1663 desired, then write ``expr[..., n] + ~expr``.
1665 For repetition with a stop_on expression, use slice notation:
1667 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``
1668 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``
1670 """
1672 stop_on_defined = False
1673 stop_on = NoMatch()
1674 if isinstance(key, slice):
1675 key, stop_on = key.start, key.stop
1676 if key is None:
1677 key = ...
1678 stop_on_defined = True
1679 elif isinstance(key, tuple) and isinstance(key[-1], slice):
1680 key, stop_on = (key[0], key[1].start), key[1].stop
1681 stop_on_defined = True
1683 # convert single arg keys to tuples
1684 if isinstance(key, str_type):
1685 key = (key,)
1686 try:
1687 iter(key)
1688 except TypeError:
1689 key = (key, key)
1691 if len(key) > 2:
1692 raise TypeError(
1693 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"
1694 )
1696 # clip to 2 elements
1697 ret = self * tuple(key[:2])
1698 ret = typing.cast(_MultipleMatch, ret)
1700 if stop_on_defined:
1701 ret.stopOn(stop_on)
1703 return ret
1705 def __call__(self, name: typing.Optional[str] = None) -> "ParserElement":
1706 """
1707 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1709 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1710 passed as ``True``.
1712 If ``name`` is omitted, same as calling :class:`copy`.
1714 Example::
1716 # these are equivalent
1717 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")
1718 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1719 """
1720 if name is not None:
1721 return self._setResultsName(name)
1723 return self.copy()
1725 def suppress(self) -> "ParserElement":
1726 """
1727 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1728 cluttering up returned output.
1729 """
1730 return Suppress(self)
1732 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement":
1733 """
1734 Enables the skipping of whitespace before matching the characters in the
1735 :class:`ParserElement`'s defined pattern.
1737 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1738 """
1739 self.skipWhitespace = True
1740 return self
1742 def leave_whitespace(self, recursive: bool = True) -> "ParserElement":
1743 """
1744 Disables the skipping of whitespace before matching the characters in the
1745 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1746 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1748 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1749 """
1750 self.skipWhitespace = False
1751 return self
1753 def set_whitespace_chars(
1754 self, chars: Union[Set[str], str], copy_defaults: bool = False
1755 ) -> "ParserElement":
1756 """
1757 Overrides the default whitespace chars
1758 """
1759 self.skipWhitespace = True
1760 self.whiteChars = set(chars)
1761 self.copyDefaultWhiteChars = copy_defaults
1762 return self
1764 def parse_with_tabs(self) -> "ParserElement":
1765 """
1766 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1767 Must be called before ``parse_string`` when the input grammar contains elements that
1768 match ``<TAB>`` characters.
1769 """
1770 self.keepTabs = True
1771 return self
1773 def ignore(self, other: "ParserElement") -> "ParserElement":
1774 """
1775 Define expression to be ignored (e.g., comments) while doing pattern
1776 matching; may be called repeatedly, to define multiple comment or other
1777 ignorable patterns.
1779 Example::
1781 patt = Word(alphas)[...]
1782 patt.parse_string('ablaj /* comment */ lskjd')
1783 # -> ['ablaj']
1785 patt.ignore(c_style_comment)
1786 patt.parse_string('ablaj /* comment */ lskjd')
1787 # -> ['ablaj', 'lskjd']
1788 """
1789 if isinstance(other, str_type):
1790 other = Suppress(other)
1792 if isinstance(other, Suppress):
1793 if other not in self.ignoreExprs:
1794 self.ignoreExprs.append(other)
1795 else:
1796 self.ignoreExprs.append(Suppress(other.copy()))
1797 return self
1799 def set_debug_actions(
1800 self,
1801 start_action: DebugStartAction,
1802 success_action: DebugSuccessAction,
1803 exception_action: DebugExceptionAction,
1804 ) -> "ParserElement":
1805 """
1806 Customize display of debugging messages while doing pattern matching:
1808 - ``start_action`` - method to be called when an expression is about to be parsed;
1809 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``
1811 - ``success_action`` - method to be called when an expression has successfully parsed;
1812 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``
1814 - ``exception_action`` - method to be called when expression fails to parse;
1815 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
1816 """
1817 self.debugActions = self.DebugActions(
1818 start_action or _default_start_debug_action, # type: ignore[truthy-function]
1819 success_action or _default_success_debug_action, # type: ignore[truthy-function]
1820 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]
1821 )
1822 self.debug = True
1823 return self
1825 def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement":
1826 """
1827 Enable display of debugging messages while doing pattern matching.
1828 Set ``flag`` to ``True`` to enable, ``False`` to disable.
1829 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.
1831 Example::
1833 wd = Word(alphas).set_name("alphaword")
1834 integer = Word(nums).set_name("numword")
1835 term = wd | integer
1837 # turn on debugging for wd
1838 wd.set_debug()
1840 term[1, ...].parse_string("abc 123 xyz 890")
1842 prints::
1844 Match alphaword at loc 0(1,1)
1845 Matched alphaword -> ['abc']
1846 Match alphaword at loc 3(1,4)
1847 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
1848 Match alphaword at loc 7(1,8)
1849 Matched alphaword -> ['xyz']
1850 Match alphaword at loc 11(1,12)
1851 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
1852 Match alphaword at loc 15(1,16)
1853 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
1855 The output shown is that produced by the default debug actions - custom debug actions can be
1856 specified using :class:`set_debug_actions`. Prior to attempting
1857 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
1858 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
1859 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,
1860 which makes debugging and exception messages easier to understand - for instance, the default
1861 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.
1862 """
1863 if recurse:
1864 for expr in self.visit_all():
1865 expr.set_debug(flag, recurse=False)
1866 return self
1868 if flag:
1869 self.set_debug_actions(
1870 _default_start_debug_action,
1871 _default_success_debug_action,
1872 _default_exception_debug_action,
1873 )
1874 else:
1875 self.debug = False
1876 return self
1878 @property
1879 def default_name(self) -> str:
1880 if self._defaultName is None:
1881 self._defaultName = self._generateDefaultName()
1882 return self._defaultName
1884 @abstractmethod
1885 def _generateDefaultName(self) -> str:
1886 """
1887 Child classes must define this method, which defines how the ``default_name`` is set.
1888 """
1890 def set_name(self, name: str) -> "ParserElement":
1891 """
1892 Define name for this expression, makes debugging and exception messages clearer.
1894 Example::
1896 integer = Word(nums)
1897 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)
1899 integer.set_name("integer")
1900 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1901 """
1902 self.customName = name
1903 self.errmsg = f"Expected {self.name}"
1904 if __diag__.enable_debug_on_named_expressions:
1905 self.set_debug()
1906 return self
1908 @property
1909 def name(self) -> str:
1910 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name
1911 return self.customName if self.customName is not None else self.default_name
1913 def __str__(self) -> str:
1914 return self.name
1916 def __repr__(self) -> str:
1917 return str(self)
1919 def streamline(self) -> "ParserElement":
1920 self.streamlined = True
1921 self._defaultName = None
1922 return self
1924 def recurse(self) -> List["ParserElement"]:
1925 return []
1927 def _checkRecursion(self, parseElementList):
1928 subRecCheckList = parseElementList[:] + [self]
1929 for e in self.recurse():
1930 e._checkRecursion(subRecCheckList)
1932 def validate(self, validateTrace=None) -> None:
1933 """
1934 Check defined expressions for valid structure, check for infinite recursive definitions.
1935 """
1936 warnings.warn(
1937 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
1938 DeprecationWarning,
1939 stacklevel=2,
1940 )
1941 self._checkRecursion([])
1943 def parse_file(
1944 self,
1945 file_or_filename: Union[str, Path, TextIO],
1946 encoding: str = "utf-8",
1947 parse_all: bool = False,
1948 *,
1949 parseAll: bool = False,
1950 ) -> ParseResults:
1951 """
1952 Execute the parse expression on the given file or filename.
1953 If a filename is specified (instead of a file object),
1954 the entire file is opened, read, and closed before parsing.
1955 """
1956 parseAll = parseAll or parse_all
1957 try:
1958 file_or_filename = typing.cast(TextIO, file_or_filename)
1959 file_contents = file_or_filename.read()
1960 except AttributeError:
1961 file_or_filename = typing.cast(str, file_or_filename)
1962 with open(file_or_filename, "r", encoding=encoding) as f:
1963 file_contents = f.read()
1964 try:
1965 return self.parse_string(file_contents, parseAll)
1966 except ParseBaseException as exc:
1967 if ParserElement.verbose_stacktrace:
1968 raise
1970 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1971 raise exc.with_traceback(None)
1973 def __eq__(self, other):
1974 if self is other:
1975 return True
1976 elif isinstance(other, str_type):
1977 return self.matches(other, parse_all=True)
1978 elif isinstance(other, ParserElement):
1979 return vars(self) == vars(other)
1980 return False
1982 def __hash__(self):
1983 return id(self)
1985 def matches(
1986 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True
1987 ) -> bool:
1988 """
1989 Method for quick testing of a parser against a test string. Good for simple
1990 inline microtests of sub expressions while building up larger parser.
1992 Parameters:
1994 - ``test_string`` - to test against this expression for a match
1995 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
1997 Example::
1999 expr = Word(nums)
2000 assert expr.matches("100")
2001 """
2002 parseAll = parseAll and parse_all
2003 try:
2004 self.parse_string(str(test_string), parse_all=parseAll)
2005 return True
2006 except ParseBaseException:
2007 return False
2009 def run_tests(
2010 self,
2011 tests: Union[str, List[str]],
2012 parse_all: bool = True,
2013 comment: typing.Optional[Union["ParserElement", str]] = "#",
2014 full_dump: bool = True,
2015 print_results: bool = True,
2016 failure_tests: bool = False,
2017 post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None,
2018 file: typing.Optional[TextIO] = None,
2019 with_line_numbers: bool = False,
2020 *,
2021 parseAll: bool = True,
2022 fullDump: bool = True,
2023 printResults: bool = True,
2024 failureTests: bool = False,
2025 postParse: typing.Optional[Callable[[str, ParseResults], str]] = None,
2026 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]:
2027 """
2028 Execute the parse expression on a series of test strings, showing each
2029 test, the parsed results or where the parse failed. Quick and easy way to
2030 run a parse expression against a list of sample strings.
2032 Parameters:
2034 - ``tests`` - a list of separate test strings, or a multiline string of test strings
2035 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2036 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
2037 string; pass None to disable comment filtering
2038 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
2039 if False, only dump nested list
2040 - ``print_results`` - (default= ``True``) prints test output to stdout
2041 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
2042 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
2043 `fn(test_string, parse_results)` and returns a string to be added to the test output
2044 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
2045 if None, will default to ``sys.stdout``
2046 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
2048 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2049 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
2050 test's output
2052 Example::
2054 number_expr = pyparsing_common.number.copy()
2056 result = number_expr.run_tests('''
2057 # unsigned integer
2058 100
2059 # negative integer
2060 -100
2061 # float with scientific notation
2062 6.02e23
2063 # integer with scientific notation
2064 1e-12
2065 ''')
2066 print("Success" if result[0] else "Failed!")
2068 result = number_expr.run_tests('''
2069 # stray character
2070 100Z
2071 # missing leading digit before '.'
2072 -.100
2073 # too many '.'
2074 3.14.159
2075 ''', failure_tests=True)
2076 print("Success" if result[0] else "Failed!")
2078 prints::
2080 # unsigned integer
2081 100
2082 [100]
2084 # negative integer
2085 -100
2086 [-100]
2088 # float with scientific notation
2089 6.02e23
2090 [6.02e+23]
2092 # integer with scientific notation
2093 1e-12
2094 [1e-12]
2096 Success
2098 # stray character
2099 100Z
2100 ^
2101 FAIL: Expected end of text (at char 3), (line:1, col:4)
2103 # missing leading digit before '.'
2104 -.100
2105 ^
2106 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2108 # too many '.'
2109 3.14.159
2110 ^
2111 FAIL: Expected end of text (at char 4), (line:1, col:5)
2113 Success
2115 Each test string must be on a single line. If you want to test a string that spans multiple
2116 lines, create a test like this::
2118 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2120 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2121 """
2122 from .testing import pyparsing_test
2124 parseAll = parseAll and parse_all
2125 fullDump = fullDump and full_dump
2126 printResults = printResults and print_results
2127 failureTests = failureTests or failure_tests
2128 postParse = postParse or post_parse
2129 if isinstance(tests, str_type):
2130 tests = typing.cast(str, tests)
2131 line_strip = type(tests).strip
2132 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2133 comment_specified = comment is not None
2134 if comment_specified:
2135 if isinstance(comment, str_type):
2136 comment = typing.cast(str, comment)
2137 comment = Literal(comment)
2138 comment = typing.cast(ParserElement, comment)
2139 if file is None:
2140 file = sys.stdout
2141 print_ = file.write
2143 result: Union[ParseResults, Exception]
2144 allResults: List[Tuple[str, Union[ParseResults, Exception]]] = []
2145 comments: List[str] = []
2146 success = True
2147 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2148 BOM = "\ufeff"
2149 nlstr = "\n"
2150 for t in tests:
2151 if comment_specified and comment.matches(t, False) or comments and not t:
2152 comments.append(
2153 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2154 )
2155 continue
2156 if not t:
2157 continue
2158 out = [
2159 f"{nlstr}{nlstr.join(comments) if comments else ''}",
2160 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2161 ]
2162 comments = []
2163 try:
2164 # convert newline marks to actual newlines, and strip leading BOM if present
2165 t = NL.transform_string(t.lstrip(BOM))
2166 result = self.parse_string(t, parse_all=parseAll)
2167 except ParseBaseException as pe:
2168 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""
2169 out.append(pe.explain())
2170 out.append(f"FAIL: {fatal}{pe}")
2171 if ParserElement.verbose_stacktrace:
2172 out.extend(traceback.format_tb(pe.__traceback__))
2173 success = success and failureTests
2174 result = pe
2175 except Exception as exc:
2176 out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}")
2177 if ParserElement.verbose_stacktrace:
2178 out.extend(traceback.format_tb(exc.__traceback__))
2179 success = success and failureTests
2180 result = exc
2181 else:
2182 success = success and not failureTests
2183 if postParse is not None:
2184 try:
2185 pp_value = postParse(t, result)
2186 if pp_value is not None:
2187 if isinstance(pp_value, ParseResults):
2188 out.append(pp_value.dump())
2189 else:
2190 out.append(str(pp_value))
2191 else:
2192 out.append(result.dump())
2193 except Exception as e:
2194 out.append(result.dump(full=fullDump))
2195 out.append(
2196 f"{postParse.__name__} failed: {type(e).__name__}: {e}"
2197 )
2198 else:
2199 out.append(result.dump(full=fullDump))
2200 out.append("")
2202 if printResults:
2203 print_("\n".join(out))
2205 allResults.append((t, result))
2207 return success, allResults
2209 def create_diagram(
2210 self,
2211 output_html: Union[TextIO, Path, str],
2212 vertical: int = 3,
2213 show_results_names: bool = False,
2214 show_groups: bool = False,
2215 embed: bool = False,
2216 **kwargs,
2217 ) -> None:
2218 """
2219 Create a railroad diagram for the parser.
2221 Parameters:
2223 - ``output_html`` (str or file-like object) - output target for generated
2224 diagram HTML
2225 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically
2226 instead of horizontally (default=3)
2227 - ``show_results_names`` - bool flag whether diagram should show annotations for
2228 defined results names
2229 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box
2230 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed
2231 the resulting HTML in an enclosing HTML source
2232 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;
2233 can be used to insert custom CSS styling
2234 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the
2235 generated code
2237 Additional diagram-formatting keyword arguments can also be included;
2238 see railroad.Diagram class.
2239 """
2241 try:
2242 from .diagram import to_railroad, railroad_to_html
2243 except ImportError as ie:
2244 raise Exception(
2245 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2246 ) from ie
2248 self.streamline()
2250 railroad = to_railroad(
2251 self,
2252 vertical=vertical,
2253 show_results_names=show_results_names,
2254 show_groups=show_groups,
2255 diagram_kwargs=kwargs,
2256 )
2257 if not isinstance(output_html, (str, Path)):
2258 # we were passed a file-like object, just write to it
2259 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))
2260 return
2262 with open(output_html, "w", encoding="utf-8") as diag_file:
2263 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))
2265 # Compatibility synonyms
2266 # fmt: off
2267 inlineLiteralsUsing = replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)
2268 setDefaultWhitespaceChars = replaced_by_pep8(
2269 "setDefaultWhitespaceChars", set_default_whitespace_chars
2270 )
2271 setResultsName = replaced_by_pep8("setResultsName", set_results_name)
2272 setBreak = replaced_by_pep8("setBreak", set_break)
2273 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)
2274 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)
2275 addCondition = replaced_by_pep8("addCondition", add_condition)
2276 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)
2277 tryParse = replaced_by_pep8("tryParse", try_parse)
2278 enableLeftRecursion = replaced_by_pep8("enableLeftRecursion", enable_left_recursion)
2279 enablePackrat = replaced_by_pep8("enablePackrat", enable_packrat)
2280 parseString = replaced_by_pep8("parseString", parse_string)
2281 scanString = replaced_by_pep8("scanString", scan_string)
2282 transformString = replaced_by_pep8("transformString", transform_string)
2283 searchString = replaced_by_pep8("searchString", search_string)
2284 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
2285 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
2286 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)
2287 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)
2288 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)
2289 setDebug = replaced_by_pep8("setDebug", set_debug)
2290 setName = replaced_by_pep8("setName", set_name)
2291 parseFile = replaced_by_pep8("parseFile", parse_file)
2292 runTests = replaced_by_pep8("runTests", run_tests)
2293 canParseNext = can_parse_next
2294 resetCache = reset_cache
2295 defaultName = default_name
2296 # fmt: on
2299class _PendingSkip(ParserElement):
2300 # internal placeholder class to hold a place were '...' is added to a parser element,
2301 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2302 def __init__(self, expr: ParserElement, must_skip: bool = False):
2303 super().__init__()
2304 self.anchor = expr
2305 self.must_skip = must_skip
2307 def _generateDefaultName(self) -> str:
2308 return str(self.anchor + Empty()).replace("Empty", "...")
2310 def __add__(self, other) -> "ParserElement":
2311 skipper = SkipTo(other).set_name("...")("_skipped*")
2312 if self.must_skip:
2314 def must_skip(t):
2315 if not t._skipped or t._skipped.as_list() == [""]:
2316 del t[0]
2317 t.pop("_skipped", None)
2319 def show_skip(t):
2320 if t._skipped.as_list()[-1:] == [""]:
2321 t.pop("_skipped")
2322 t["_skipped"] = f"missing <{self.anchor!r}>"
2324 return (
2325 self.anchor + skipper().add_parse_action(must_skip)
2326 | skipper().add_parse_action(show_skip)
2327 ) + other
2329 return self.anchor + skipper + other
2331 def __repr__(self):
2332 return self.defaultName
2334 def parseImpl(self, *args):
2335 raise Exception(
2336 "use of `...` expression without following SkipTo target expression"
2337 )
2340class Token(ParserElement):
2341 """Abstract :class:`ParserElement` subclass, for defining atomic
2342 matching patterns.
2343 """
2345 def __init__(self):
2346 super().__init__(savelist=False)
2348 def _generateDefaultName(self) -> str:
2349 return type(self).__name__
2352class NoMatch(Token):
2353 """
2354 A token that will never match.
2355 """
2357 def __init__(self):
2358 super().__init__()
2359 self.mayReturnEmpty = True
2360 self.mayIndexError = False
2361 self.errmsg = "Unmatchable token"
2363 def parseImpl(self, instring, loc, doActions=True):
2364 raise ParseException(instring, loc, self.errmsg, self)
2367class Literal(Token):
2368 """
2369 Token to exactly match a specified string.
2371 Example::
2373 Literal('abc').parse_string('abc') # -> ['abc']
2374 Literal('abc').parse_string('abcdef') # -> ['abc']
2375 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"
2377 For case-insensitive matching, use :class:`CaselessLiteral`.
2379 For keyword matching (force word break before and after the matched string),
2380 use :class:`Keyword` or :class:`CaselessKeyword`.
2381 """
2383 def __new__(cls, match_string: str = "", *, matchString: str = ""):
2384 # Performance tuning: select a subclass with optimized parseImpl
2385 if cls is Literal:
2386 match_string = matchString or match_string
2387 if not match_string:
2388 return super().__new__(Empty)
2389 if len(match_string) == 1:
2390 return super().__new__(_SingleCharLiteral)
2392 # Default behavior
2393 return super().__new__(cls)
2395 # Needed to make copy.copy() work correctly if we customize __new__
2396 def __getnewargs__(self):
2397 return (self.match,)
2399 def __init__(self, match_string: str = "", *, matchString: str = ""):
2400 super().__init__()
2401 match_string = matchString or match_string
2402 self.match = match_string
2403 self.matchLen = len(match_string)
2404 self.firstMatchChar = match_string[:1]
2405 self.errmsg = f"Expected {self.name}"
2406 self.mayReturnEmpty = False
2407 self.mayIndexError = False
2409 def _generateDefaultName(self) -> str:
2410 return repr(self.match)
2412 def parseImpl(self, instring, loc, doActions=True):
2413 if instring[loc] == self.firstMatchChar and instring.startswith(
2414 self.match, loc
2415 ):
2416 return loc + self.matchLen, self.match
2417 raise ParseException(instring, loc, self.errmsg, self)
2420class Empty(Literal):
2421 """
2422 An empty token, will always match.
2423 """
2425 def __init__(self, match_string="", *, matchString=""):
2426 super().__init__("")
2427 self.mayReturnEmpty = True
2428 self.mayIndexError = False
2430 def _generateDefaultName(self) -> str:
2431 return "Empty"
2433 def parseImpl(self, instring, loc, doActions=True):
2434 return loc, []
2437class _SingleCharLiteral(Literal):
2438 def parseImpl(self, instring, loc, doActions=True):
2439 if instring[loc] == self.firstMatchChar:
2440 return loc + 1, self.match
2441 raise ParseException(instring, loc, self.errmsg, self)
2444ParserElement._literalStringClass = Literal
2447class Keyword(Token):
2448 """
2449 Token to exactly match a specified string as a keyword, that is,
2450 it must be immediately preceded and followed by whitespace or
2451 non-keyword characters. Compare with :class:`Literal`:
2453 - ``Literal("if")`` will match the leading ``'if'`` in
2454 ``'ifAndOnlyIf'``.
2455 - ``Keyword("if")`` will not; it will only match the leading
2456 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2458 Accepts two optional constructor arguments in addition to the
2459 keyword string:
2461 - ``ident_chars`` is a string of characters that would be valid
2462 identifier characters, defaulting to all alphanumerics + "_" and
2463 "$"
2464 - ``caseless`` allows case-insensitive matching, default is ``False``.
2466 Example::
2468 Keyword("start").parse_string("start") # -> ['start']
2469 Keyword("start").parse_string("starting") # -> Exception
2471 For case-insensitive matching, use :class:`CaselessKeyword`.
2472 """
2474 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2476 def __init__(
2477 self,
2478 match_string: str = "",
2479 ident_chars: typing.Optional[str] = None,
2480 caseless: bool = False,
2481 *,
2482 matchString: str = "",
2483 identChars: typing.Optional[str] = None,
2484 ):
2485 super().__init__()
2486 identChars = identChars or ident_chars
2487 if identChars is None:
2488 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2489 match_string = matchString or match_string
2490 self.match = match_string
2491 self.matchLen = len(match_string)
2492 try:
2493 self.firstMatchChar = match_string[0]
2494 except IndexError:
2495 raise ValueError("null string passed to Keyword; use Empty() instead")
2496 self.errmsg = f"Expected {type(self).__name__} {self.name}"
2497 self.mayReturnEmpty = False
2498 self.mayIndexError = False
2499 self.caseless = caseless
2500 if caseless:
2501 self.caselessmatch = match_string.upper()
2502 identChars = identChars.upper()
2503 self.identChars = set(identChars)
2505 def _generateDefaultName(self) -> str:
2506 return repr(self.match)
2508 def parseImpl(self, instring, loc, doActions=True):
2509 errmsg = self.errmsg
2510 errloc = loc
2511 if self.caseless:
2512 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2513 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2514 if (
2515 loc >= len(instring) - self.matchLen
2516 or instring[loc + self.matchLen].upper() not in self.identChars
2517 ):
2518 return loc + self.matchLen, self.match
2520 # followed by keyword char
2521 errmsg += ", was immediately followed by keyword character"
2522 errloc = loc + self.matchLen
2523 else:
2524 # preceded by keyword char
2525 errmsg += ", keyword was immediately preceded by keyword character"
2526 errloc = loc - 1
2527 # else no match just raise plain exception
2529 elif (
2530 instring[loc] == self.firstMatchChar
2531 and self.matchLen == 1
2532 or instring.startswith(self.match, loc)
2533 ):
2534 if loc == 0 or instring[loc - 1] not in self.identChars:
2535 if (
2536 loc >= len(instring) - self.matchLen
2537 or instring[loc + self.matchLen] not in self.identChars
2538 ):
2539 return loc + self.matchLen, self.match
2541 # followed by keyword char
2542 errmsg += ", keyword was immediately followed by keyword character"
2543 errloc = loc + self.matchLen
2544 else:
2545 # preceded by keyword char
2546 errmsg += ", keyword was immediately preceded by keyword character"
2547 errloc = loc - 1
2548 # else no match just raise plain exception
2550 raise ParseException(instring, errloc, errmsg, self)
2552 @staticmethod
2553 def set_default_keyword_chars(chars) -> None:
2554 """
2555 Overrides the default characters used by :class:`Keyword` expressions.
2556 """
2557 Keyword.DEFAULT_KEYWORD_CHARS = chars
2559 setDefaultKeywordChars = set_default_keyword_chars
2562class CaselessLiteral(Literal):
2563 """
2564 Token to match a specified string, ignoring case of letters.
2565 Note: the matched results will always be in the case of the given
2566 match string, NOT the case of the input text.
2568 Example::
2570 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2571 # -> ['CMD', 'CMD', 'CMD']
2573 (Contrast with example for :class:`CaselessKeyword`.)
2574 """
2576 def __init__(self, match_string: str = "", *, matchString: str = ""):
2577 match_string = matchString or match_string
2578 super().__init__(match_string.upper())
2579 # Preserve the defining literal.
2580 self.returnString = match_string
2581 self.errmsg = f"Expected {self.name}"
2583 def parseImpl(self, instring, loc, doActions=True):
2584 if instring[loc : loc + self.matchLen].upper() == self.match:
2585 return loc + self.matchLen, self.returnString
2586 raise ParseException(instring, loc, self.errmsg, self)
2589class CaselessKeyword(Keyword):
2590 """
2591 Caseless version of :class:`Keyword`.
2593 Example::
2595 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2596 # -> ['CMD', 'CMD']
2598 (Contrast with example for :class:`CaselessLiteral`.)
2599 """
2601 def __init__(
2602 self,
2603 match_string: str = "",
2604 ident_chars: typing.Optional[str] = None,
2605 *,
2606 matchString: str = "",
2607 identChars: typing.Optional[str] = None,
2608 ):
2609 identChars = identChars or ident_chars
2610 match_string = matchString or match_string
2611 super().__init__(match_string, identChars, caseless=True)
2614class CloseMatch(Token):
2615 """A variation on :class:`Literal` which matches "close" matches,
2616 that is, strings with at most 'n' mismatching characters.
2617 :class:`CloseMatch` takes parameters:
2619 - ``match_string`` - string to be matched
2620 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2621 - ``max_mismatches`` - (``default=1``) maximum number of
2622 mismatches allowed to count as a match
2624 The results from a successful parse will contain the matched text
2625 from the input string and the following named results:
2627 - ``mismatches`` - a list of the positions within the
2628 match_string where mismatches were found
2629 - ``original`` - the original match_string used to compare
2630 against the input string
2632 If ``mismatches`` is an empty list, then the match was an exact
2633 match.
2635 Example::
2637 patt = CloseMatch("ATCATCGAATGGA")
2638 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2639 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2641 # exact match
2642 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2644 # close match allowing up to 2 mismatches
2645 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
2646 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2647 """
2649 def __init__(
2650 self,
2651 match_string: str,
2652 max_mismatches: typing.Optional[int] = None,
2653 *,
2654 maxMismatches: int = 1,
2655 caseless=False,
2656 ):
2657 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
2658 super().__init__()
2659 self.match_string = match_string
2660 self.maxMismatches = maxMismatches
2661 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"
2662 self.caseless = caseless
2663 self.mayIndexError = False
2664 self.mayReturnEmpty = False
2666 def _generateDefaultName(self) -> str:
2667 return f"{type(self).__name__}:{self.match_string!r}"
2669 def parseImpl(self, instring, loc, doActions=True):
2670 start = loc
2671 instrlen = len(instring)
2672 maxloc = start + len(self.match_string)
2674 if maxloc <= instrlen:
2675 match_string = self.match_string
2676 match_stringloc = 0
2677 mismatches = []
2678 maxMismatches = self.maxMismatches
2680 for match_stringloc, s_m in enumerate(
2681 zip(instring[loc:maxloc], match_string)
2682 ):
2683 src, mat = s_m
2684 if self.caseless:
2685 src, mat = src.lower(), mat.lower()
2687 if src != mat:
2688 mismatches.append(match_stringloc)
2689 if len(mismatches) > maxMismatches:
2690 break
2691 else:
2692 loc = start + match_stringloc + 1
2693 results = ParseResults([instring[start:loc]])
2694 results["original"] = match_string
2695 results["mismatches"] = mismatches
2696 return loc, results
2698 raise ParseException(instring, loc, self.errmsg, self)
2701class Word(Token):
2702 """Token for matching words composed of allowed character sets.
2704 Parameters:
2706 - ``init_chars`` - string of all characters that should be used to
2707 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
2708 if ``body_chars`` is also specified, then this is the string of
2709 initial characters
2710 - ``body_chars`` - string of characters that
2711 can be used for matching after a matched initial character as
2712 given in ``init_chars``; if omitted, same as the initial characters
2713 (default=``None``)
2714 - ``min`` - minimum number of characters to match (default=1)
2715 - ``max`` - maximum number of characters to match (default=0)
2716 - ``exact`` - exact number of characters to match (default=0)
2717 - ``as_keyword`` - match as a keyword (default=``False``)
2718 - ``exclude_chars`` - characters that might be
2719 found in the input ``body_chars`` string but which should not be
2720 accepted for matching ;useful to define a word of all
2721 printables except for one or two characters, for instance
2722 (default=``None``)
2724 :class:`srange` is useful for defining custom character set strings
2725 for defining :class:`Word` expressions, using range notation from
2726 regular expression character sets.
2728 A common mistake is to use :class:`Word` to match a specific literal
2729 string, as in ``Word("Address")``. Remember that :class:`Word`
2730 uses the string argument to define *sets* of matchable characters.
2731 This expression would match "Add", "AAA", "dAred", or any other word
2732 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
2733 exact literal string, use :class:`Literal` or :class:`Keyword`.
2735 pyparsing includes helper strings for building Words:
2737 - :class:`alphas`
2738 - :class:`nums`
2739 - :class:`alphanums`
2740 - :class:`hexnums`
2741 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
2742 - accented, tilded, umlauted, etc.)
2743 - :class:`punc8bit` (non-alphabetic characters in ASCII range
2744 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2745 - :class:`printables` (any non-whitespace character)
2747 ``alphas``, ``nums``, and ``printables`` are also defined in several
2748 Unicode sets - see :class:`pyparsing_unicode``.
2750 Example::
2752 # a word composed of digits
2753 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2755 # a word with a leading capital, and zero or more lowercase
2756 capitalized_word = Word(alphas.upper(), alphas.lower())
2758 # hostnames are alphanumeric, with leading alpha, and '-'
2759 hostname = Word(alphas, alphanums + '-')
2761 # roman numeral (not a strict parser, accepts invalid mix of characters)
2762 roman = Word("IVXLCDM")
2764 # any string of non-whitespace characters, except for ','
2765 csv_value = Word(printables, exclude_chars=",")
2766 """
2768 def __init__(
2769 self,
2770 init_chars: str = "",
2771 body_chars: typing.Optional[str] = None,
2772 min: int = 1,
2773 max: int = 0,
2774 exact: int = 0,
2775 as_keyword: bool = False,
2776 exclude_chars: typing.Optional[str] = None,
2777 *,
2778 initChars: typing.Optional[str] = None,
2779 bodyChars: typing.Optional[str] = None,
2780 asKeyword: bool = False,
2781 excludeChars: typing.Optional[str] = None,
2782 ):
2783 initChars = initChars or init_chars
2784 bodyChars = bodyChars or body_chars
2785 asKeyword = asKeyword or as_keyword
2786 excludeChars = excludeChars or exclude_chars
2787 super().__init__()
2788 if not initChars:
2789 raise ValueError(
2790 f"invalid {type(self).__name__}, initChars cannot be empty string"
2791 )
2793 initChars_set = set(initChars)
2794 if excludeChars:
2795 excludeChars_set = set(excludeChars)
2796 initChars_set -= excludeChars_set
2797 if bodyChars:
2798 bodyChars = "".join(set(bodyChars) - excludeChars_set)
2799 self.initChars = initChars_set
2800 self.initCharsOrig = "".join(sorted(initChars_set))
2802 if bodyChars:
2803 self.bodyChars = set(bodyChars)
2804 self.bodyCharsOrig = "".join(sorted(bodyChars))
2805 else:
2806 self.bodyChars = initChars_set
2807 self.bodyCharsOrig = self.initCharsOrig
2809 self.maxSpecified = max > 0
2811 if min < 1:
2812 raise ValueError(
2813 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
2814 )
2816 if self.maxSpecified and min > max:
2817 raise ValueError(
2818 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
2819 )
2821 self.minLen = min
2823 if max > 0:
2824 self.maxLen = max
2825 else:
2826 self.maxLen = _MAX_INT
2828 if exact > 0:
2829 min = max = exact
2830 self.maxLen = exact
2831 self.minLen = exact
2833 self.errmsg = f"Expected {self.name}"
2834 self.mayIndexError = False
2835 self.asKeyword = asKeyword
2836 if self.asKeyword:
2837 self.errmsg += " as a keyword"
2839 # see if we can make a regex for this Word
2840 if " " not in (self.initChars | self.bodyChars):
2841 if len(self.initChars) == 1:
2842 re_leading_fragment = re.escape(self.initCharsOrig)
2843 else:
2844 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
2846 if self.bodyChars == self.initChars:
2847 if max == 0 and self.minLen == 1:
2848 repeat = "+"
2849 elif max == 1:
2850 repeat = ""
2851 else:
2852 if self.minLen != self.maxLen:
2853 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
2854 else:
2855 repeat = f"{{{self.minLen}}}"
2856 self.reString = f"{re_leading_fragment}{repeat}"
2857 else:
2858 if max == 1:
2859 re_body_fragment = ""
2860 repeat = ""
2861 else:
2862 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
2863 if max == 0 and self.minLen == 1:
2864 repeat = "*"
2865 elif max == 2:
2866 repeat = "?" if min <= 1 else ""
2867 else:
2868 if min != max:
2869 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"
2870 else:
2871 repeat = f"{{{min - 1 if min > 0 else ''}}}"
2873 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"
2875 if self.asKeyword:
2876 self.reString = rf"\b{self.reString}\b"
2878 try:
2879 self.re = re.compile(self.reString)
2880 except re.error:
2881 self.re = None # type: ignore[assignment]
2882 else:
2883 self.re_match = self.re.match
2884 self.parseImpl = self.parseImpl_regex # type: ignore[assignment]
2886 def _generateDefaultName(self) -> str:
2887 def charsAsStr(s):
2888 max_repr_len = 16
2889 s = _collapse_string_to_ranges(s, re_escape=False)
2891 if len(s) > max_repr_len:
2892 return s[: max_repr_len - 3] + "..."
2894 return s
2896 if self.initChars != self.bodyChars:
2897 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"
2898 else:
2899 base = f"W:({charsAsStr(self.initChars)})"
2901 # add length specification
2902 if self.minLen > 1 or self.maxLen != _MAX_INT:
2903 if self.minLen == self.maxLen:
2904 if self.minLen == 1:
2905 return base[2:]
2906 else:
2907 return base + f"{{{self.minLen}}}"
2908 elif self.maxLen == _MAX_INT:
2909 return base + f"{{{self.minLen},...}}"
2910 else:
2911 return base + f"{{{self.minLen},{self.maxLen}}}"
2912 return base
2914 def parseImpl(self, instring, loc, doActions=True):
2915 if instring[loc] not in self.initChars:
2916 raise ParseException(instring, loc, self.errmsg, self)
2918 start = loc
2919 loc += 1
2920 instrlen = len(instring)
2921 bodychars = self.bodyChars
2922 maxloc = start + self.maxLen
2923 maxloc = min(maxloc, instrlen)
2924 while loc < maxloc and instring[loc] in bodychars:
2925 loc += 1
2927 throwException = False
2928 if loc - start < self.minLen:
2929 throwException = True
2930 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2931 throwException = True
2932 elif self.asKeyword and (
2933 (start > 0 and instring[start - 1] in bodychars)
2934 or (loc < instrlen and instring[loc] in bodychars)
2935 ):
2936 throwException = True
2938 if throwException:
2939 raise ParseException(instring, loc, self.errmsg, self)
2941 return loc, instring[start:loc]
2943 def parseImpl_regex(self, instring, loc, doActions=True):
2944 result = self.re_match(instring, loc)
2945 if not result:
2946 raise ParseException(instring, loc, self.errmsg, self)
2948 loc = result.end()
2949 return loc, result.group()
2952class Char(Word):
2953 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
2954 when defining a match of any single character in a string of
2955 characters.
2956 """
2958 def __init__(
2959 self,
2960 charset: str,
2961 as_keyword: bool = False,
2962 exclude_chars: typing.Optional[str] = None,
2963 *,
2964 asKeyword: bool = False,
2965 excludeChars: typing.Optional[str] = None,
2966 ):
2967 asKeyword = asKeyword or as_keyword
2968 excludeChars = excludeChars or exclude_chars
2969 super().__init__(
2970 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars
2971 )
2974class Regex(Token):
2975 r"""Token for matching strings that match a given regular
2976 expression. Defined with string specifying the regular expression in
2977 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
2978 If the given regex contains named groups (defined using ``(?P<name>...)``),
2979 these will be preserved as named :class:`ParseResults`.
2981 If instead of the Python stdlib ``re`` module you wish to use a different RE module
2982 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
2983 a compiled RE that was compiled using ``regex``.
2985 Example::
2987 realnum = Regex(r"[+-]?\d+\.\d*")
2988 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2989 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2991 # named fields in a regex will be returned as named results
2992 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2994 # the Regex class will accept re's compiled using the regex module
2995 import regex
2996 parser = pp.Regex(regex.compile(r'[0-9]'))
2997 """
2999 def __init__(
3000 self,
3001 pattern: Any,
3002 flags: Union[re.RegexFlag, int] = 0,
3003 as_group_list: bool = False,
3004 as_match: bool = False,
3005 *,
3006 asGroupList: bool = False,
3007 asMatch: bool = False,
3008 ):
3009 """The parameters ``pattern`` and ``flags`` are passed
3010 to the ``re.compile()`` function as-is. See the Python
3011 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3012 explanation of the acceptable patterns and flags.
3013 """
3014 super().__init__()
3015 asGroupList = asGroupList or as_group_list
3016 asMatch = asMatch or as_match
3018 if isinstance(pattern, str_type):
3019 if not pattern:
3020 raise ValueError("null string passed to Regex; use Empty() instead")
3022 self._re = None
3023 self.reString = self.pattern = pattern
3024 self.flags = flags
3026 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
3027 self._re = pattern
3028 self.pattern = self.reString = pattern.pattern
3029 self.flags = flags
3031 else:
3032 raise TypeError(
3033 "Regex may only be constructed with a string or a compiled RE object"
3034 )
3036 self.errmsg = f"Expected {self.name}"
3037 self.mayIndexError = False
3038 self.asGroupList = asGroupList
3039 self.asMatch = asMatch
3040 if self.asGroupList:
3041 self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment]
3042 if self.asMatch:
3043 self.parseImpl = self.parseImplAsMatch # type: ignore [assignment]
3045 @cached_property
3046 def re(self):
3047 if self._re:
3048 return self._re
3050 try:
3051 return re.compile(self.pattern, self.flags)
3052 except re.error:
3053 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3055 @cached_property
3056 def re_match(self):
3057 return self.re.match
3059 @cached_property
3060 def mayReturnEmpty(self):
3061 return self.re_match("") is not None
3063 def _generateDefaultName(self) -> str:
3064 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\"))
3066 def parseImpl(self, instring, loc, doActions=True):
3067 result = self.re_match(instring, loc)
3068 if not result:
3069 raise ParseException(instring, loc, self.errmsg, self)
3071 loc = result.end()
3072 ret = ParseResults(result.group())
3073 d = result.groupdict()
3075 for k, v in d.items():
3076 ret[k] = v
3078 return loc, ret
3080 def parseImplAsGroupList(self, instring, loc, doActions=True):
3081 result = self.re_match(instring, loc)
3082 if not result:
3083 raise ParseException(instring, loc, self.errmsg, self)
3085 loc = result.end()
3086 ret = result.groups()
3087 return loc, ret
3089 def parseImplAsMatch(self, instring, loc, doActions=True):
3090 result = self.re_match(instring, loc)
3091 if not result:
3092 raise ParseException(instring, loc, self.errmsg, self)
3094 loc = result.end()
3095 ret = result
3096 return loc, ret
3098 def sub(self, repl: str) -> ParserElement:
3099 r"""
3100 Return :class:`Regex` with an attached parse action to transform the parsed
3101 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3103 Example::
3105 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3106 print(make_html.transform_string("h1:main title:"))
3107 # prints "<h1>main title</h1>"
3108 """
3109 if self.asGroupList:
3110 raise TypeError("cannot use sub() with Regex(as_group_list=True)")
3112 if self.asMatch and callable(repl):
3113 raise TypeError(
3114 "cannot use sub() with a callable with Regex(as_match=True)"
3115 )
3117 if self.asMatch:
3119 def pa(tokens):
3120 return tokens[0].expand(repl)
3122 else:
3124 def pa(tokens):
3125 return self.re.sub(repl, tokens[0])
3127 return self.add_parse_action(pa)
3130class QuotedString(Token):
3131 r"""
3132 Token for matching strings that are delimited by quoting characters.
3134 Defined with the following parameters:
3136 - ``quote_char`` - string of one or more characters defining the
3137 quote delimiting string
3138 - ``esc_char`` - character to re_escape quotes, typically backslash
3139 (default= ``None``)
3140 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3141 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3142 (default= ``None``)
3143 - ``multiline`` - boolean indicating whether quotes can span
3144 multiple lines (default= ``False``)
3145 - ``unquote_results`` - boolean indicating whether the matched text
3146 should be unquoted (default= ``True``)
3147 - ``end_quote_char`` - string of one or more characters defining the
3148 end of the quote delimited string (default= ``None`` => same as
3149 quote_char)
3150 - ``convert_whitespace_escapes`` - convert escaped whitespace
3151 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3152 (default= ``True``)
3154 Example::
3156 qs = QuotedString('"')
3157 print(qs.search_string('lsjdf "This is the quote" sldjf'))
3158 complex_qs = QuotedString('{{', end_quote_char='}}')
3159 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))
3160 sql_qs = QuotedString('"', esc_quote='""')
3161 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3163 prints::
3165 [['This is the quote']]
3166 [['This is the "quote"']]
3167 [['This is the quote with "embedded" quotes']]
3168 """
3170 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))
3172 def __init__(
3173 self,
3174 quote_char: str = "",
3175 esc_char: typing.Optional[str] = None,
3176 esc_quote: typing.Optional[str] = None,
3177 multiline: bool = False,
3178 unquote_results: bool = True,
3179 end_quote_char: typing.Optional[str] = None,
3180 convert_whitespace_escapes: bool = True,
3181 *,
3182 quoteChar: str = "",
3183 escChar: typing.Optional[str] = None,
3184 escQuote: typing.Optional[str] = None,
3185 unquoteResults: bool = True,
3186 endQuoteChar: typing.Optional[str] = None,
3187 convertWhitespaceEscapes: bool = True,
3188 ):
3189 super().__init__()
3190 esc_char = escChar or esc_char
3191 esc_quote = escQuote or esc_quote
3192 unquote_results = unquoteResults and unquote_results
3193 end_quote_char = endQuoteChar or end_quote_char
3194 convert_whitespace_escapes = (
3195 convertWhitespaceEscapes and convert_whitespace_escapes
3196 )
3197 quote_char = quoteChar or quote_char
3199 # remove white space from quote chars
3200 quote_char = quote_char.strip()
3201 if not quote_char:
3202 raise ValueError("quote_char cannot be the empty string")
3204 if end_quote_char is None:
3205 end_quote_char = quote_char
3206 else:
3207 end_quote_char = end_quote_char.strip()
3208 if not end_quote_char:
3209 raise ValueError("end_quote_char cannot be the empty string")
3211 self.quote_char: str = quote_char
3212 self.quote_char_len: int = len(quote_char)
3213 self.first_quote_char: str = quote_char[0]
3214 self.end_quote_char: str = end_quote_char
3215 self.end_quote_char_len: int = len(end_quote_char)
3216 self.esc_char: str = esc_char or ""
3217 self.has_esc_char: bool = esc_char is not None
3218 self.esc_quote: str = esc_quote or ""
3219 self.unquote_results: bool = unquote_results
3220 self.convert_whitespace_escapes: bool = convert_whitespace_escapes
3221 self.multiline = multiline
3222 self.re_flags = re.RegexFlag(0)
3224 # fmt: off
3225 # build up re pattern for the content between the quote delimiters
3226 inner_pattern = []
3228 if esc_quote:
3229 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
3231 if esc_char:
3232 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
3234 if len(self.end_quote_char) > 1:
3235 inner_pattern.append(
3236 "(?:"
3237 + "|".join(
3238 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3239 for i in range(len(self.end_quote_char) - 1, 0, -1)
3240 )
3241 + ")"
3242 )
3244 if self.multiline:
3245 self.re_flags |= re.MULTILINE | re.DOTALL
3246 inner_pattern.append(
3247 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3248 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"
3249 )
3250 else:
3251 inner_pattern.append(
3252 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3253 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"
3254 )
3256 self.pattern = "".join(
3257 [
3258 re.escape(self.quote_char),
3259 "(?:",
3260 '|'.join(inner_pattern),
3261 ")*",
3262 re.escape(self.end_quote_char),
3263 ]
3264 )
3266 if self.unquote_results:
3267 if self.convert_whitespace_escapes:
3268 self.unquote_scan_re = re.compile(
3269 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3270 rf"|({re.escape(self.esc_char)}.)"
3271 rf"|(\n|.)",
3272 flags=self.re_flags,
3273 )
3274 else:
3275 self.unquote_scan_re = re.compile(
3276 rf"({re.escape(self.esc_char)}.)"
3277 rf"|(\n|.)",
3278 flags=self.re_flags
3279 )
3280 # fmt: on
3282 try:
3283 self.re = re.compile(self.pattern, self.re_flags)
3284 self.reString = self.pattern
3285 self.re_match = self.re.match
3286 except re.error:
3287 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")
3289 self.errmsg = f"Expected {self.name}"
3290 self.mayIndexError = False
3291 self.mayReturnEmpty = True
3293 def _generateDefaultName(self) -> str:
3294 if self.quote_char == self.end_quote_char and isinstance(
3295 self.quote_char, str_type
3296 ):
3297 return f"string enclosed in {self.quote_char!r}"
3299 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
3301 def parseImpl(self, instring, loc, doActions=True):
3302 # check first character of opening quote to see if that is a match
3303 # before doing the more complicated regex match
3304 result = (
3305 instring[loc] == self.first_quote_char
3306 and self.re_match(instring, loc)
3307 or None
3308 )
3309 if not result:
3310 raise ParseException(instring, loc, self.errmsg, self)
3312 # get ending loc and matched string from regex matching result
3313 loc = result.end()
3314 ret = result.group()
3316 if self.unquote_results:
3317 # strip off quotes
3318 ret = ret[self.quote_char_len : -self.end_quote_char_len]
3320 if isinstance(ret, str_type):
3321 # fmt: off
3322 if self.convert_whitespace_escapes:
3323 # as we iterate over matches in the input string,
3324 # collect from whichever match group of the unquote_scan_re
3325 # regex matches (only 1 group will match at any given time)
3326 ret = "".join(
3327 # match group 1 matches \t, \n, etc.
3328 self.ws_map[match.group(1)] if match.group(1)
3329 # match group 2 matches escaped characters
3330 else match.group(2)[-1] if match.group(2)
3331 # match group 3 matches any character
3332 else match.group(3)
3333 for match in self.unquote_scan_re.finditer(ret)
3334 )
3335 else:
3336 ret = "".join(
3337 # match group 1 matches escaped characters
3338 match.group(1)[-1] if match.group(1)
3339 # match group 2 matches any character
3340 else match.group(2)
3341 for match in self.unquote_scan_re.finditer(ret)
3342 )
3343 # fmt: on
3345 # replace escaped quotes
3346 if self.esc_quote:
3347 ret = ret.replace(self.esc_quote, self.end_quote_char)
3349 return loc, ret
3352class CharsNotIn(Token):
3353 """Token for matching words composed of characters *not* in a given
3354 set (will include whitespace in matched characters if not listed in
3355 the provided exclusion set - see example). Defined with string
3356 containing all disallowed characters, and an optional minimum,
3357 maximum, and/or exact length. The default value for ``min`` is
3358 1 (a minimum value < 1 is not valid); the default values for
3359 ``max`` and ``exact`` are 0, meaning no maximum or exact
3360 length restriction.
3362 Example::
3364 # define a comma-separated-value as anything that is not a ','
3365 csv_value = CharsNotIn(',')
3366 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))
3368 prints::
3370 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3371 """
3373 def __init__(
3374 self,
3375 not_chars: str = "",
3376 min: int = 1,
3377 max: int = 0,
3378 exact: int = 0,
3379 *,
3380 notChars: str = "",
3381 ):
3382 super().__init__()
3383 self.skipWhitespace = False
3384 self.notChars = not_chars or notChars
3385 self.notCharsSet = set(self.notChars)
3387 if min < 1:
3388 raise ValueError(
3389 "cannot specify a minimum length < 1; use"
3390 " Opt(CharsNotIn()) if zero-length char group is permitted"
3391 )
3393 self.minLen = min
3395 if max > 0:
3396 self.maxLen = max
3397 else:
3398 self.maxLen = _MAX_INT
3400 if exact > 0:
3401 self.maxLen = exact
3402 self.minLen = exact
3404 self.errmsg = f"Expected {self.name}"
3405 self.mayReturnEmpty = self.minLen == 0
3406 self.mayIndexError = False
3408 def _generateDefaultName(self) -> str:
3409 not_chars_str = _collapse_string_to_ranges(self.notChars)
3410 if len(not_chars_str) > 16:
3411 return f"!W:({self.notChars[: 16 - 3]}...)"
3412 else:
3413 return f"!W:({self.notChars})"
3415 def parseImpl(self, instring, loc, doActions=True):
3416 notchars = self.notCharsSet
3417 if instring[loc] in notchars:
3418 raise ParseException(instring, loc, self.errmsg, self)
3420 start = loc
3421 loc += 1
3422 maxlen = min(start + self.maxLen, len(instring))
3423 while loc < maxlen and instring[loc] not in notchars:
3424 loc += 1
3426 if loc - start < self.minLen:
3427 raise ParseException(instring, loc, self.errmsg, self)
3429 return loc, instring[start:loc]
3432class White(Token):
3433 """Special matching class for matching whitespace. Normally,
3434 whitespace is ignored by pyparsing grammars. This class is included
3435 when some whitespace structures are significant. Define with
3436 a string containing the whitespace characters to be matched; default
3437 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3438 ``max``, and ``exact`` arguments, as defined for the
3439 :class:`Word` class.
3440 """
3442 whiteStrs = {
3443 " ": "<SP>",
3444 "\t": "<TAB>",
3445 "\n": "<LF>",
3446 "\r": "<CR>",
3447 "\f": "<FF>",
3448 "\u00A0": "<NBSP>",
3449 "\u1680": "<OGHAM_SPACE_MARK>",
3450 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",
3451 "\u2000": "<EN_QUAD>",
3452 "\u2001": "<EM_QUAD>",
3453 "\u2002": "<EN_SPACE>",
3454 "\u2003": "<EM_SPACE>",
3455 "\u2004": "<THREE-PER-EM_SPACE>",
3456 "\u2005": "<FOUR-PER-EM_SPACE>",
3457 "\u2006": "<SIX-PER-EM_SPACE>",
3458 "\u2007": "<FIGURE_SPACE>",
3459 "\u2008": "<PUNCTUATION_SPACE>",
3460 "\u2009": "<THIN_SPACE>",
3461 "\u200A": "<HAIR_SPACE>",
3462 "\u200B": "<ZERO_WIDTH_SPACE>",
3463 "\u202F": "<NNBSP>",
3464 "\u205F": "<MMSP>",
3465 "\u3000": "<IDEOGRAPHIC_SPACE>",
3466 }
3468 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):
3469 super().__init__()
3470 self.matchWhite = ws
3471 self.set_whitespace_chars(
3472 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3473 copy_defaults=True,
3474 )
3475 # self.leave_whitespace()
3476 self.mayReturnEmpty = True
3477 self.errmsg = f"Expected {self.name}"
3479 self.minLen = min
3481 if max > 0:
3482 self.maxLen = max
3483 else:
3484 self.maxLen = _MAX_INT
3486 if exact > 0:
3487 self.maxLen = exact
3488 self.minLen = exact
3490 def _generateDefaultName(self) -> str:
3491 return "".join(White.whiteStrs[c] for c in self.matchWhite)
3493 def parseImpl(self, instring, loc, doActions=True):
3494 if instring[loc] not in self.matchWhite:
3495 raise ParseException(instring, loc, self.errmsg, self)
3496 start = loc
3497 loc += 1
3498 maxloc = start + self.maxLen
3499 maxloc = min(maxloc, len(instring))
3500 while loc < maxloc and instring[loc] in self.matchWhite:
3501 loc += 1
3503 if loc - start < self.minLen:
3504 raise ParseException(instring, loc, self.errmsg, self)
3506 return loc, instring[start:loc]
3509class PositionToken(Token):
3510 def __init__(self):
3511 super().__init__()
3512 self.mayReturnEmpty = True
3513 self.mayIndexError = False
3516class GoToColumn(PositionToken):
3517 """Token to advance to a specific column of input text; useful for
3518 tabular report scraping.
3519 """
3521 def __init__(self, colno: int):
3522 super().__init__()
3523 self.col = colno
3525 def preParse(self, instring: str, loc: int) -> int:
3526 if col(loc, instring) == self.col:
3527 return loc
3529 instrlen = len(instring)
3530 if self.ignoreExprs:
3531 loc = self._skipIgnorables(instring, loc)
3532 while (
3533 loc < instrlen
3534 and instring[loc].isspace()
3535 and col(loc, instring) != self.col
3536 ):
3537 loc += 1
3539 return loc
3541 def parseImpl(self, instring, loc, doActions=True):
3542 thiscol = col(loc, instring)
3543 if thiscol > self.col:
3544 raise ParseException(instring, loc, "Text not in expected column", self)
3545 newloc = loc + self.col - thiscol
3546 ret = instring[loc:newloc]
3547 return newloc, ret
3550class LineStart(PositionToken):
3551 r"""Matches if current position is at the beginning of a line within
3552 the parse string
3554 Example::
3556 test = '''\
3557 AAA this line
3558 AAA and this line
3559 AAA but not this one
3560 B AAA and definitely not this one
3561 '''
3563 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):
3564 print(t)
3566 prints::
3568 ['AAA', ' this line']
3569 ['AAA', ' and this line']
3571 """
3573 def __init__(self):
3574 super().__init__()
3575 self.leave_whitespace()
3576 self.orig_whiteChars = set() | self.whiteChars
3577 self.whiteChars.discard("\n")
3578 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
3579 self.errmsg = "Expected start of line"
3581 def preParse(self, instring: str, loc: int) -> int:
3582 if loc == 0:
3583 return loc
3585 ret = self.skipper.preParse(instring, loc)
3587 if "\n" in self.orig_whiteChars:
3588 while instring[ret : ret + 1] == "\n":
3589 ret = self.skipper.preParse(instring, ret + 1)
3591 return ret
3593 def parseImpl(self, instring, loc, doActions=True):
3594 if col(loc, instring) == 1:
3595 return loc, []
3596 raise ParseException(instring, loc, self.errmsg, self)
3599class LineEnd(PositionToken):
3600 """Matches if current position is at the end of a line within the
3601 parse string
3602 """
3604 def __init__(self):
3605 super().__init__()
3606 self.whiteChars.discard("\n")
3607 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
3608 self.errmsg = "Expected end of line"
3610 def parseImpl(self, instring, loc, doActions=True):
3611 if loc < len(instring):
3612 if instring[loc] == "\n":
3613 return loc + 1, "\n"
3614 else:
3615 raise ParseException(instring, loc, self.errmsg, self)
3616 elif loc == len(instring):
3617 return loc + 1, []
3618 else:
3619 raise ParseException(instring, loc, self.errmsg, self)
3622class StringStart(PositionToken):
3623 """Matches if current position is at the beginning of the parse
3624 string
3625 """
3627 def __init__(self):
3628 super().__init__()
3629 self.errmsg = "Expected start of text"
3631 def parseImpl(self, instring, loc, doActions=True):
3632 # see if entire string up to here is just whitespace and ignoreables
3633 if loc != 0 and loc != self.preParse(instring, 0):
3634 raise ParseException(instring, loc, self.errmsg, self)
3636 return loc, []
3639class StringEnd(PositionToken):
3640 """
3641 Matches if current position is at the end of the parse string
3642 """
3644 def __init__(self):
3645 super().__init__()
3646 self.errmsg = "Expected end of text"
3648 def parseImpl(self, instring, loc, doActions=True):
3649 if loc < len(instring):
3650 raise ParseException(instring, loc, self.errmsg, self)
3651 if loc == len(instring):
3652 return loc + 1, []
3653 if loc > len(instring):
3654 return loc, []
3656 raise ParseException(instring, loc, self.errmsg, self)
3659class WordStart(PositionToken):
3660 """Matches if the current position is at the beginning of a
3661 :class:`Word`, and is not preceded by any character in a given
3662 set of ``word_chars`` (default= ``printables``). To emulate the
3663 ``\b`` behavior of regular expressions, use
3664 ``WordStart(alphanums)``. ``WordStart`` will also match at
3665 the beginning of the string being parsed, or at the beginning of
3666 a line.
3667 """
3669 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3670 wordChars = word_chars if wordChars == printables else wordChars
3671 super().__init__()
3672 self.wordChars = set(wordChars)
3673 self.errmsg = "Not at the start of a word"
3675 def parseImpl(self, instring, loc, doActions=True):
3676 if loc != 0:
3677 if (
3678 instring[loc - 1] in self.wordChars
3679 or instring[loc] not in self.wordChars
3680 ):
3681 raise ParseException(instring, loc, self.errmsg, self)
3682 return loc, []
3685class WordEnd(PositionToken):
3686 """Matches if the current position is at the end of a :class:`Word`,
3687 and is not followed by any character in a given set of ``word_chars``
3688 (default= ``printables``). To emulate the ``\b`` behavior of
3689 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3690 will also match at the end of the string being parsed, or at the end
3691 of a line.
3692 """
3694 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3695 wordChars = word_chars if wordChars == printables else wordChars
3696 super().__init__()
3697 self.wordChars = set(wordChars)
3698 self.skipWhitespace = False
3699 self.errmsg = "Not at the end of a word"
3701 def parseImpl(self, instring, loc, doActions=True):
3702 instrlen = len(instring)
3703 if instrlen > 0 and loc < instrlen:
3704 if (
3705 instring[loc] in self.wordChars
3706 or instring[loc - 1] not in self.wordChars
3707 ):
3708 raise ParseException(instring, loc, self.errmsg, self)
3709 return loc, []
3712class ParseExpression(ParserElement):
3713 """Abstract subclass of ParserElement, for combining and
3714 post-processing parsed tokens.
3715 """
3717 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
3718 super().__init__(savelist)
3719 self.exprs: List[ParserElement]
3720 if isinstance(exprs, _generatorType):
3721 exprs = list(exprs)
3723 if isinstance(exprs, str_type):
3724 self.exprs = [self._literalStringClass(exprs)]
3725 elif isinstance(exprs, ParserElement):
3726 self.exprs = [exprs]
3727 elif isinstance(exprs, Iterable):
3728 exprs = list(exprs)
3729 # if sequence of strings provided, wrap with Literal
3730 if any(isinstance(expr, str_type) for expr in exprs):
3731 exprs = (
3732 self._literalStringClass(e) if isinstance(e, str_type) else e
3733 for e in exprs
3734 )
3735 self.exprs = list(exprs)
3736 else:
3737 try:
3738 self.exprs = list(exprs)
3739 except TypeError:
3740 self.exprs = [exprs]
3741 self.callPreparse = False
3743 def recurse(self) -> List[ParserElement]:
3744 return self.exprs[:]
3746 def append(self, other) -> ParserElement:
3747 self.exprs.append(other)
3748 self._defaultName = None
3749 return self
3751 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
3752 """
3753 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3754 all contained expressions.
3755 """
3756 super().leave_whitespace(recursive)
3758 if recursive:
3759 self.exprs = [e.copy() for e in self.exprs]
3760 for e in self.exprs:
3761 e.leave_whitespace(recursive)
3762 return self
3764 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
3765 """
3766 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3767 all contained expressions.
3768 """
3769 super().ignore_whitespace(recursive)
3770 if recursive:
3771 self.exprs = [e.copy() for e in self.exprs]
3772 for e in self.exprs:
3773 e.ignore_whitespace(recursive)
3774 return self
3776 def ignore(self, other) -> ParserElement:
3777 if isinstance(other, Suppress):
3778 if other not in self.ignoreExprs:
3779 super().ignore(other)
3780 for e in self.exprs:
3781 e.ignore(self.ignoreExprs[-1])
3782 else:
3783 super().ignore(other)
3784 for e in self.exprs:
3785 e.ignore(self.ignoreExprs[-1])
3786 return self
3788 def _generateDefaultName(self) -> str:
3789 return f"{type(self).__name__}:({self.exprs})"
3791 def streamline(self) -> ParserElement:
3792 if self.streamlined:
3793 return self
3795 super().streamline()
3797 for e in self.exprs:
3798 e.streamline()
3800 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
3801 # but only if there are no parse actions or resultsNames on the nested And's
3802 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
3803 if len(self.exprs) == 2:
3804 other = self.exprs[0]
3805 if (
3806 isinstance(other, self.__class__)
3807 and not other.parseAction
3808 and other.resultsName is None
3809 and not other.debug
3810 ):
3811 self.exprs = other.exprs[:] + [self.exprs[1]]
3812 self._defaultName = None
3813 self.mayReturnEmpty |= other.mayReturnEmpty
3814 self.mayIndexError |= other.mayIndexError
3816 other = self.exprs[-1]
3817 if (
3818 isinstance(other, self.__class__)
3819 and not other.parseAction
3820 and other.resultsName is None
3821 and not other.debug
3822 ):
3823 self.exprs = self.exprs[:-1] + other.exprs[:]
3824 self._defaultName = None
3825 self.mayReturnEmpty |= other.mayReturnEmpty
3826 self.mayIndexError |= other.mayIndexError
3828 self.errmsg = f"Expected {self}"
3830 return self
3832 def validate(self, validateTrace=None) -> None:
3833 warnings.warn(
3834 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
3835 DeprecationWarning,
3836 stacklevel=2,
3837 )
3838 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3839 for e in self.exprs:
3840 e.validate(tmp)
3841 self._checkRecursion([])
3843 def copy(self) -> ParserElement:
3844 ret = super().copy()
3845 ret = typing.cast(ParseExpression, ret)
3846 ret.exprs = [e.copy() for e in self.exprs]
3847 return ret
3849 def _setResultsName(self, name, listAllMatches=False):
3850 if not (
3851 __diag__.warn_ungrouped_named_tokens_in_collection
3852 and Diagnostics.warn_ungrouped_named_tokens_in_collection
3853 not in self.suppress_warnings_
3854 ):
3855 return super()._setResultsName(name, listAllMatches)
3857 for e in self.exprs:
3858 if (
3859 isinstance(e, ParserElement)
3860 and e.resultsName
3861 and (
3862 Diagnostics.warn_ungrouped_named_tokens_in_collection
3863 not in e.suppress_warnings_
3864 )
3865 ):
3866 warning = (
3867 "warn_ungrouped_named_tokens_in_collection:"
3868 f" setting results name {name!r} on {type(self).__name__} expression"
3869 f" collides with {e.resultsName!r} on contained expression"
3870 )
3871 warnings.warn(warning, stacklevel=3)
3872 break
3874 return super()._setResultsName(name, listAllMatches)
3876 # Compatibility synonyms
3877 # fmt: off
3878 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
3879 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
3880 # fmt: on
3883class And(ParseExpression):
3884 """
3885 Requires all given :class:`ParseExpression` s to be found in the given order.
3886 Expressions may be separated by whitespace.
3887 May be constructed using the ``'+'`` operator.
3888 May also be constructed using the ``'-'`` operator, which will
3889 suppress backtracking.
3891 Example::
3893 integer = Word(nums)
3894 name_expr = Word(alphas)[1, ...]
3896 expr = And([integer("id"), name_expr("name"), integer("age")])
3897 # more easily written as:
3898 expr = integer("id") + name_expr("name") + integer("age")
3899 """
3901 class _ErrorStop(Empty):
3902 def __init__(self, *args, **kwargs):
3903 super().__init__(*args, **kwargs)
3904 self.leave_whitespace()
3906 def _generateDefaultName(self) -> str:
3907 return "-"
3909 def __init__(
3910 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True
3911 ):
3912 exprs: List[ParserElement] = list(exprs_arg)
3913 if exprs and Ellipsis in exprs:
3914 tmp = []
3915 for i, expr in enumerate(exprs):
3916 if expr is not Ellipsis:
3917 tmp.append(expr)
3918 continue
3920 if i < len(exprs) - 1:
3921 skipto_arg: ParserElement = typing.cast(
3922 ParseExpression, (Empty() + exprs[i + 1])
3923 ).exprs[-1]
3924 tmp.append(SkipTo(skipto_arg)("_skipped*"))
3925 continue
3927 raise Exception("cannot construct And with sequence ending in ...")
3928 exprs[:] = tmp
3929 super().__init__(exprs, savelist)
3930 if self.exprs:
3931 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3932 if not isinstance(self.exprs[0], White):
3933 self.set_whitespace_chars(
3934 self.exprs[0].whiteChars,
3935 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
3936 )
3937 self.skipWhitespace = self.exprs[0].skipWhitespace
3938 else:
3939 self.skipWhitespace = False
3940 else:
3941 self.mayReturnEmpty = True
3942 self.callPreparse = True
3944 def streamline(self) -> ParserElement:
3945 # collapse any _PendingSkip's
3946 if self.exprs and any(
3947 isinstance(e, ParseExpression)
3948 and e.exprs
3949 and isinstance(e.exprs[-1], _PendingSkip)
3950 for e in self.exprs[:-1]
3951 ):
3952 deleted_expr_marker = NoMatch()
3953 for i, e in enumerate(self.exprs[:-1]):
3954 if e is deleted_expr_marker:
3955 continue
3956 if (
3957 isinstance(e, ParseExpression)
3958 and e.exprs
3959 and isinstance(e.exprs[-1], _PendingSkip)
3960 ):
3961 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
3962 self.exprs[i + 1] = deleted_expr_marker
3963 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]
3965 super().streamline()
3967 # link any IndentedBlocks to the prior expression
3968 prev: ParserElement
3969 cur: ParserElement
3970 for prev, cur in zip(self.exprs, self.exprs[1:]):
3971 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
3972 # (but watch out for recursive grammar)
3973 seen = set()
3974 while True:
3975 if id(cur) in seen:
3976 break
3977 seen.add(id(cur))
3978 if isinstance(cur, IndentedBlock):
3979 prev.add_parse_action(
3980 lambda s, l, t, cur_=cur: setattr(
3981 cur_, "parent_anchor", col(l, s)
3982 )
3983 )
3984 break
3985 subs = cur.recurse()
3986 next_first = next(iter(subs), None)
3987 if next_first is None:
3988 break
3989 cur = typing.cast(ParserElement, next_first)
3991 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3992 return self
3994 def parseImpl(self, instring, loc, doActions=True):
3995 # pass False as callPreParse arg to _parse for first element, since we already
3996 # pre-parsed the string as part of our And pre-parsing
3997 loc, resultlist = self.exprs[0]._parse(
3998 instring, loc, doActions, callPreParse=False
3999 )
4000 errorStop = False
4001 for e in self.exprs[1:]:
4002 # if isinstance(e, And._ErrorStop):
4003 if type(e) is And._ErrorStop:
4004 errorStop = True
4005 continue
4006 if errorStop:
4007 try:
4008 loc, exprtokens = e._parse(instring, loc, doActions)
4009 except ParseSyntaxException:
4010 raise
4011 except ParseBaseException as pe:
4012 pe.__traceback__ = None
4013 raise ParseSyntaxException._from_exception(pe)
4014 except IndexError:
4015 raise ParseSyntaxException(
4016 instring, len(instring), self.errmsg, self
4017 )
4018 else:
4019 loc, exprtokens = e._parse(instring, loc, doActions)
4020 resultlist += exprtokens
4021 return loc, resultlist
4023 def __iadd__(self, other):
4024 if isinstance(other, str_type):
4025 other = self._literalStringClass(other)
4026 if not isinstance(other, ParserElement):
4027 return NotImplemented
4028 return self.append(other) # And([self, other])
4030 def _checkRecursion(self, parseElementList):
4031 subRecCheckList = parseElementList[:] + [self]
4032 for e in self.exprs:
4033 e._checkRecursion(subRecCheckList)
4034 if not e.mayReturnEmpty:
4035 break
4037 def _generateDefaultName(self) -> str:
4038 inner = " ".join(str(e) for e in self.exprs)
4039 # strip off redundant inner {}'s
4040 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4041 inner = inner[1:-1]
4042 return f"{{{inner}}}"
4045class Or(ParseExpression):
4046 """Requires that at least one :class:`ParseExpression` is found. If
4047 two expressions match, the expression that matches the longest
4048 string will be used. May be constructed using the ``'^'``
4049 operator.
4051 Example::
4053 # construct Or using '^' operator
4055 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4056 print(number.search_string("123 3.1416 789"))
4058 prints::
4060 [['123'], ['3.1416'], ['789']]
4061 """
4063 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
4064 super().__init__(exprs, savelist)
4065 if self.exprs:
4066 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4067 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4068 else:
4069 self.mayReturnEmpty = True
4071 def streamline(self) -> ParserElement:
4072 super().streamline()
4073 if self.exprs:
4074 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4075 self.saveAsList = any(e.saveAsList for e in self.exprs)
4076 self.skipWhitespace = all(
4077 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4078 )
4079 else:
4080 self.saveAsList = False
4081 return self
4083 def parseImpl(self, instring, loc, doActions=True):
4084 maxExcLoc = -1
4085 maxException = None
4086 matches = []
4087 fatals = []
4088 if all(e.callPreparse for e in self.exprs):
4089 loc = self.preParse(instring, loc)
4090 for e in self.exprs:
4091 try:
4092 loc2 = e.try_parse(instring, loc, raise_fatal=True)
4093 except ParseFatalException as pfe:
4094 pfe.__traceback__ = None
4095 pfe.parser_element = e
4096 fatals.append(pfe)
4097 maxException = None
4098 maxExcLoc = -1
4099 except ParseException as err:
4100 if not fatals:
4101 err.__traceback__ = None
4102 if err.loc > maxExcLoc:
4103 maxException = err
4104 maxExcLoc = err.loc
4105 except IndexError:
4106 if len(instring) > maxExcLoc:
4107 maxException = ParseException(
4108 instring, len(instring), e.errmsg, self
4109 )
4110 maxExcLoc = len(instring)
4111 else:
4112 # save match among all matches, to retry longest to shortest
4113 matches.append((loc2, e))
4115 if matches:
4116 # re-evaluate all matches in descending order of length of match, in case attached actions
4117 # might change whether or how much they match of the input.
4118 matches.sort(key=itemgetter(0), reverse=True)
4120 if not doActions:
4121 # no further conditions or parse actions to change the selection of
4122 # alternative, so the first match will be the best match
4123 best_expr = matches[0][1]
4124 return best_expr._parse(instring, loc, doActions)
4126 longest = -1, None
4127 for loc1, expr1 in matches:
4128 if loc1 <= longest[0]:
4129 # already have a longer match than this one will deliver, we are done
4130 return longest
4132 try:
4133 loc2, toks = expr1._parse(instring, loc, doActions)
4134 except ParseException as err:
4135 err.__traceback__ = None
4136 if err.loc > maxExcLoc:
4137 maxException = err
4138 maxExcLoc = err.loc
4139 else:
4140 if loc2 >= loc1:
4141 return loc2, toks
4142 # didn't match as much as before
4143 elif loc2 > longest[0]:
4144 longest = loc2, toks
4146 if longest != (-1, None):
4147 return longest
4149 if fatals:
4150 if len(fatals) > 1:
4151 fatals.sort(key=lambda e: -e.loc)
4152 if fatals[0].loc == fatals[1].loc:
4153 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4154 max_fatal = fatals[0]
4155 raise max_fatal
4157 if maxException is not None:
4158 # infer from this check that all alternatives failed at the current position
4159 # so emit this collective error message instead of any single error message
4160 if maxExcLoc == loc:
4161 maxException.msg = self.errmsg
4162 raise maxException
4164 raise ParseException(instring, loc, "no defined alternatives to match", self)
4166 def __ixor__(self, other):
4167 if isinstance(other, str_type):
4168 other = self._literalStringClass(other)
4169 if not isinstance(other, ParserElement):
4170 return NotImplemented
4171 return self.append(other) # Or([self, other])
4173 def _generateDefaultName(self) -> str:
4174 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"
4176 def _setResultsName(self, name, listAllMatches=False):
4177 if (
4178 __diag__.warn_multiple_tokens_in_named_alternation
4179 and Diagnostics.warn_multiple_tokens_in_named_alternation
4180 not in self.suppress_warnings_
4181 ):
4182 if any(
4183 isinstance(e, And)
4184 and Diagnostics.warn_multiple_tokens_in_named_alternation
4185 not in e.suppress_warnings_
4186 for e in self.exprs
4187 ):
4188 warning = (
4189 "warn_multiple_tokens_in_named_alternation:"
4190 f" setting results name {name!r} on {type(self).__name__} expression"
4191 " will return a list of all parsed tokens in an And alternative,"
4192 " in prior versions only the first token was returned; enclose"
4193 " contained argument in Group"
4194 )
4195 warnings.warn(warning, stacklevel=3)
4197 return super()._setResultsName(name, listAllMatches)
4200class MatchFirst(ParseExpression):
4201 """Requires that at least one :class:`ParseExpression` is found. If
4202 more than one expression matches, the first one listed is the one that will
4203 match. May be constructed using the ``'|'`` operator.
4205 Example::
4207 # construct MatchFirst using '|' operator
4209 # watch the order of expressions to match
4210 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4211 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4213 # put more selective expression first
4214 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4215 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4216 """
4218 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
4219 super().__init__(exprs, savelist)
4220 if self.exprs:
4221 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4222 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4223 else:
4224 self.mayReturnEmpty = True
4226 def streamline(self) -> ParserElement:
4227 if self.streamlined:
4228 return self
4230 super().streamline()
4231 if self.exprs:
4232 self.saveAsList = any(e.saveAsList for e in self.exprs)
4233 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4234 self.skipWhitespace = all(
4235 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4236 )
4237 else:
4238 self.saveAsList = False
4239 self.mayReturnEmpty = True
4240 return self
4242 def parseImpl(self, instring, loc, doActions=True):
4243 maxExcLoc = -1
4244 maxException = None
4246 for e in self.exprs:
4247 try:
4248 return e._parse(instring, loc, doActions)
4249 except ParseFatalException as pfe:
4250 pfe.__traceback__ = None
4251 pfe.parser_element = e
4252 raise
4253 except ParseException as err:
4254 if err.loc > maxExcLoc:
4255 maxException = err
4256 maxExcLoc = err.loc
4257 except IndexError:
4258 if len(instring) > maxExcLoc:
4259 maxException = ParseException(
4260 instring, len(instring), e.errmsg, self
4261 )
4262 maxExcLoc = len(instring)
4264 if maxException is not None:
4265 # infer from this check that all alternatives failed at the current position
4266 # so emit this collective error message instead of any individual error message
4267 if maxExcLoc == loc:
4268 maxException.msg = self.errmsg
4269 raise maxException
4271 raise ParseException(instring, loc, "no defined alternatives to match", self)
4273 def __ior__(self, other):
4274 if isinstance(other, str_type):
4275 other = self._literalStringClass(other)
4276 if not isinstance(other, ParserElement):
4277 return NotImplemented
4278 return self.append(other) # MatchFirst([self, other])
4280 def _generateDefaultName(self) -> str:
4281 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"
4283 def _setResultsName(self, name, listAllMatches=False):
4284 if (
4285 __diag__.warn_multiple_tokens_in_named_alternation
4286 and Diagnostics.warn_multiple_tokens_in_named_alternation
4287 not in self.suppress_warnings_
4288 ):
4289 if any(
4290 isinstance(e, And)
4291 and Diagnostics.warn_multiple_tokens_in_named_alternation
4292 not in e.suppress_warnings_
4293 for e in self.exprs
4294 ):
4295 warning = (
4296 "warn_multiple_tokens_in_named_alternation:"
4297 f" setting results name {name!r} on {type(self).__name__} expression"
4298 " will return a list of all parsed tokens in an And alternative,"
4299 " in prior versions only the first token was returned; enclose"
4300 " contained argument in Group"
4301 )
4302 warnings.warn(warning, stacklevel=3)
4304 return super()._setResultsName(name, listAllMatches)
4307class Each(ParseExpression):
4308 """Requires all given :class:`ParseExpression` s to be found, but in
4309 any order. Expressions may be separated by whitespace.
4311 May be constructed using the ``'&'`` operator.
4313 Example::
4315 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4316 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4317 integer = Word(nums)
4318 shape_attr = "shape:" + shape_type("shape")
4319 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4320 color_attr = "color:" + color("color")
4321 size_attr = "size:" + integer("size")
4323 # use Each (using operator '&') to accept attributes in any order
4324 # (shape and posn are required, color and size are optional)
4325 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4327 shape_spec.run_tests('''
4328 shape: SQUARE color: BLACK posn: 100, 120
4329 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4330 color:GREEN size:20 shape:TRIANGLE posn:20,40
4331 '''
4332 )
4334 prints::
4336 shape: SQUARE color: BLACK posn: 100, 120
4337 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4338 - color: BLACK
4339 - posn: ['100', ',', '120']
4340 - x: 100
4341 - y: 120
4342 - shape: SQUARE
4345 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4346 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4347 - color: BLUE
4348 - posn: ['50', ',', '80']
4349 - x: 50
4350 - y: 80
4351 - shape: CIRCLE
4352 - size: 50
4355 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4356 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4357 - color: GREEN
4358 - posn: ['20', ',', '40']
4359 - x: 20
4360 - y: 40
4361 - shape: TRIANGLE
4362 - size: 20
4363 """
4365 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True):
4366 super().__init__(exprs, savelist)
4367 if self.exprs:
4368 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4369 else:
4370 self.mayReturnEmpty = True
4371 self.skipWhitespace = True
4372 self.initExprGroups = True
4373 self.saveAsList = True
4375 def __iand__(self, other):
4376 if isinstance(other, str_type):
4377 other = self._literalStringClass(other)
4378 if not isinstance(other, ParserElement):
4379 return NotImplemented
4380 return self.append(other) # Each([self, other])
4382 def streamline(self) -> ParserElement:
4383 super().streamline()
4384 if self.exprs:
4385 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4386 else:
4387 self.mayReturnEmpty = True
4388 return self
4390 def parseImpl(self, instring, loc, doActions=True):
4391 if self.initExprGroups:
4392 self.opt1map = dict(
4393 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
4394 )
4395 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
4396 opt2 = [
4397 e
4398 for e in self.exprs
4399 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
4400 ]
4401 self.optionals = opt1 + opt2
4402 self.multioptionals = [
4403 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4404 for e in self.exprs
4405 if isinstance(e, _MultipleMatch)
4406 ]
4407 self.multirequired = [
4408 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4409 for e in self.exprs
4410 if isinstance(e, OneOrMore)
4411 ]
4412 self.required = [
4413 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
4414 ]
4415 self.required += self.multirequired
4416 self.initExprGroups = False
4418 tmpLoc = loc
4419 tmpReqd = self.required[:]
4420 tmpOpt = self.optionals[:]
4421 multis = self.multioptionals[:]
4422 matchOrder = []
4424 keepMatching = True
4425 failed = []
4426 fatals = []
4427 while keepMatching:
4428 tmpExprs = tmpReqd + tmpOpt + multis
4429 failed.clear()
4430 fatals.clear()
4431 for e in tmpExprs:
4432 try:
4433 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
4434 except ParseFatalException as pfe:
4435 pfe.__traceback__ = None
4436 pfe.parser_element = e
4437 fatals.append(pfe)
4438 failed.append(e)
4439 except ParseException:
4440 failed.append(e)
4441 else:
4442 matchOrder.append(self.opt1map.get(id(e), e))
4443 if e in tmpReqd:
4444 tmpReqd.remove(e)
4445 elif e in tmpOpt:
4446 tmpOpt.remove(e)
4447 if len(failed) == len(tmpExprs):
4448 keepMatching = False
4450 # look for any ParseFatalExceptions
4451 if fatals:
4452 if len(fatals) > 1:
4453 fatals.sort(key=lambda e: -e.loc)
4454 if fatals[0].loc == fatals[1].loc:
4455 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4456 max_fatal = fatals[0]
4457 raise max_fatal
4459 if tmpReqd:
4460 missing = ", ".join([str(e) for e in tmpReqd])
4461 raise ParseException(
4462 instring,
4463 loc,
4464 f"Missing one or more required elements ({missing})",
4465 )
4467 # add any unmatched Opts, in case they have default values defined
4468 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
4470 total_results = ParseResults([])
4471 for e in matchOrder:
4472 loc, results = e._parse(instring, loc, doActions)
4473 total_results += results
4475 return loc, total_results
4477 def _generateDefaultName(self) -> str:
4478 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"
4481class ParseElementEnhance(ParserElement):
4482 """Abstract subclass of :class:`ParserElement`, for combining and
4483 post-processing parsed tokens.
4484 """
4486 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
4487 super().__init__(savelist)
4488 if isinstance(expr, str_type):
4489 expr_str = typing.cast(str, expr)
4490 if issubclass(self._literalStringClass, Token):
4491 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]
4492 elif issubclass(type(self), self._literalStringClass):
4493 expr = Literal(expr_str)
4494 else:
4495 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]
4496 expr = typing.cast(ParserElement, expr)
4497 self.expr = expr
4498 if expr is not None:
4499 self.mayIndexError = expr.mayIndexError
4500 self.mayReturnEmpty = expr.mayReturnEmpty
4501 self.set_whitespace_chars(
4502 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
4503 )
4504 self.skipWhitespace = expr.skipWhitespace
4505 self.saveAsList = expr.saveAsList
4506 self.callPreparse = expr.callPreparse
4507 self.ignoreExprs.extend(expr.ignoreExprs)
4509 def recurse(self) -> List[ParserElement]:
4510 return [self.expr] if self.expr is not None else []
4512 def parseImpl(self, instring, loc, doActions=True):
4513 if self.expr is None:
4514 raise ParseException(instring, loc, "No expression defined", self)
4516 try:
4517 return self.expr._parse(instring, loc, doActions, callPreParse=False)
4518 except ParseBaseException as pbe:
4519 if not isinstance(self, Forward) or self.customName is not None:
4520 if self.errmsg:
4521 pbe.msg = self.errmsg
4522 raise
4524 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4525 super().leave_whitespace(recursive)
4527 if recursive:
4528 if self.expr is not None:
4529 self.expr = self.expr.copy()
4530 self.expr.leave_whitespace(recursive)
4531 return self
4533 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4534 super().ignore_whitespace(recursive)
4536 if recursive:
4537 if self.expr is not None:
4538 self.expr = self.expr.copy()
4539 self.expr.ignore_whitespace(recursive)
4540 return self
4542 def ignore(self, other) -> ParserElement:
4543 if not isinstance(other, Suppress) or other not in self.ignoreExprs:
4544 super().ignore(other)
4545 if self.expr is not None:
4546 self.expr.ignore(self.ignoreExprs[-1])
4548 return self
4550 def streamline(self) -> ParserElement:
4551 super().streamline()
4552 if self.expr is not None:
4553 self.expr.streamline()
4554 return self
4556 def _checkRecursion(self, parseElementList):
4557 if self in parseElementList:
4558 raise RecursiveGrammarException(parseElementList + [self])
4559 subRecCheckList = parseElementList[:] + [self]
4560 if self.expr is not None:
4561 self.expr._checkRecursion(subRecCheckList)
4563 def validate(self, validateTrace=None) -> None:
4564 warnings.warn(
4565 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4566 DeprecationWarning,
4567 stacklevel=2,
4568 )
4569 if validateTrace is None:
4570 validateTrace = []
4571 tmp = validateTrace[:] + [self]
4572 if self.expr is not None:
4573 self.expr.validate(tmp)
4574 self._checkRecursion([])
4576 def _generateDefaultName(self) -> str:
4577 return f"{type(self).__name__}:({self.expr})"
4579 # Compatibility synonyms
4580 # fmt: off
4581 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4582 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4583 # fmt: on
4586class IndentedBlock(ParseElementEnhance):
4587 """
4588 Expression to match one or more expressions at a given indentation level.
4589 Useful for parsing text where structure is implied by indentation (like Python source code).
4590 """
4592 class _Indent(Empty):
4593 def __init__(self, ref_col: int):
4594 super().__init__()
4595 self.errmsg = f"expected indent at column {ref_col}"
4596 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
4598 class _IndentGreater(Empty):
4599 def __init__(self, ref_col: int):
4600 super().__init__()
4601 self.errmsg = f"expected indent at column greater than {ref_col}"
4602 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
4604 def __init__(
4605 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
4606 ):
4607 super().__init__(expr, savelist=True)
4608 # if recursive:
4609 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
4610 self._recursive = recursive
4611 self._grouped = grouped
4612 self.parent_anchor = 1
4614 def parseImpl(self, instring, loc, doActions=True):
4615 # advance parse position to non-whitespace by using an Empty()
4616 # this should be the column to be used for all subsequent indented lines
4617 anchor_loc = Empty().preParse(instring, loc)
4619 # see if self.expr matches at the current location - if not it will raise an exception
4620 # and no further work is necessary
4621 self.expr.try_parse(instring, anchor_loc, do_actions=doActions)
4623 indent_col = col(anchor_loc, instring)
4624 peer_detect_expr = self._Indent(indent_col)
4626 inner_expr = Empty() + peer_detect_expr + self.expr
4627 if self._recursive:
4628 sub_indent = self._IndentGreater(indent_col)
4629 nested_block = IndentedBlock(
4630 self.expr, recursive=self._recursive, grouped=self._grouped
4631 )
4632 nested_block.set_debug(self.debug)
4633 nested_block.parent_anchor = indent_col
4634 inner_expr += Opt(sub_indent + nested_block)
4636 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
4637 block = OneOrMore(inner_expr)
4639 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
4641 if self._grouped:
4642 wrapper = Group
4643 else:
4644 wrapper = lambda expr: expr
4645 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
4646 instring, anchor_loc, doActions
4647 )
4650class AtStringStart(ParseElementEnhance):
4651 """Matches if expression matches at the beginning of the parse
4652 string::
4654 AtStringStart(Word(nums)).parse_string("123")
4655 # prints ["123"]
4657 AtStringStart(Word(nums)).parse_string(" 123")
4658 # raises ParseException
4659 """
4661 def __init__(self, expr: Union[ParserElement, str]):
4662 super().__init__(expr)
4663 self.callPreparse = False
4665 def parseImpl(self, instring, loc, doActions=True):
4666 if loc != 0:
4667 raise ParseException(instring, loc, "not found at string start")
4668 return super().parseImpl(instring, loc, doActions)
4671class AtLineStart(ParseElementEnhance):
4672 r"""Matches if an expression matches at the beginning of a line within
4673 the parse string
4675 Example::
4677 test = '''\
4678 AAA this line
4679 AAA and this line
4680 AAA but not this one
4681 B AAA and definitely not this one
4682 '''
4684 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):
4685 print(t)
4687 prints::
4689 ['AAA', ' this line']
4690 ['AAA', ' and this line']
4692 """
4694 def __init__(self, expr: Union[ParserElement, str]):
4695 super().__init__(expr)
4696 self.callPreparse = False
4698 def parseImpl(self, instring, loc, doActions=True):
4699 if col(loc, instring) != 1:
4700 raise ParseException(instring, loc, "not found at line start")
4701 return super().parseImpl(instring, loc, doActions)
4704class FollowedBy(ParseElementEnhance):
4705 """Lookahead matching of the given parse expression.
4706 ``FollowedBy`` does *not* advance the parsing position within
4707 the input string, it only verifies that the specified parse
4708 expression matches at the current position. ``FollowedBy``
4709 always returns a null token list. If any results names are defined
4710 in the lookahead expression, those *will* be returned for access by
4711 name.
4713 Example::
4715 # use FollowedBy to match a label only if it is followed by a ':'
4716 data_word = Word(alphas)
4717 label = data_word + FollowedBy(':')
4718 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4720 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()
4722 prints::
4724 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4725 """
4727 def __init__(self, expr: Union[ParserElement, str]):
4728 super().__init__(expr)
4729 self.mayReturnEmpty = True
4731 def parseImpl(self, instring, loc, doActions=True):
4732 # by using self._expr.parse and deleting the contents of the returned ParseResults list
4733 # we keep any named results that were defined in the FollowedBy expression
4734 _, ret = self.expr._parse(instring, loc, doActions=doActions)
4735 del ret[:]
4737 return loc, ret
4740class PrecededBy(ParseElementEnhance):
4741 """Lookbehind matching of the given parse expression.
4742 ``PrecededBy`` does not advance the parsing position within the
4743 input string, it only verifies that the specified parse expression
4744 matches prior to the current position. ``PrecededBy`` always
4745 returns a null token list, but if a results name is defined on the
4746 given expression, it is returned.
4748 Parameters:
4750 - ``expr`` - expression that must match prior to the current parse
4751 location
4752 - ``retreat`` - (default= ``None``) - (int) maximum number of characters
4753 to lookbehind prior to the current parse location
4755 If the lookbehind expression is a string, :class:`Literal`,
4756 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
4757 with a specified exact or maximum length, then the retreat
4758 parameter is not required. Otherwise, retreat must be specified to
4759 give a maximum number of characters to look back from
4760 the current parse position for a lookbehind match.
4762 Example::
4764 # VB-style variable names with type prefixes
4765 int_var = PrecededBy("#") + pyparsing_common.identifier
4766 str_var = PrecededBy("$") + pyparsing_common.identifier
4768 """
4770 def __init__(
4771 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None
4772 ):
4773 super().__init__(expr)
4774 self.expr = self.expr().leave_whitespace()
4775 self.mayReturnEmpty = True
4776 self.mayIndexError = False
4777 self.exact = False
4778 if isinstance(expr, str_type):
4779 expr = typing.cast(str, expr)
4780 retreat = len(expr)
4781 self.exact = True
4782 elif isinstance(expr, (Literal, Keyword)):
4783 retreat = expr.matchLen
4784 self.exact = True
4785 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4786 retreat = expr.maxLen
4787 self.exact = True
4788 elif isinstance(expr, PositionToken):
4789 retreat = 0
4790 self.exact = True
4791 self.retreat = retreat
4792 self.errmsg = f"not preceded by {expr}"
4793 self.skipWhitespace = False
4794 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4796 def parseImpl(self, instring, loc=0, doActions=True):
4797 if self.exact:
4798 if loc < self.retreat:
4799 raise ParseException(instring, loc, self.errmsg)
4800 start = loc - self.retreat
4801 _, ret = self.expr._parse(instring, start)
4802 return loc, ret
4804 # retreat specified a maximum lookbehind window, iterate
4805 test_expr = self.expr + StringEnd()
4806 instring_slice = instring[max(0, loc - self.retreat) : loc]
4807 last_expr = ParseException(instring, loc, self.errmsg)
4809 for offset in range(1, min(loc, self.retreat + 1) + 1):
4810 try:
4811 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
4812 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
4813 except ParseBaseException as pbe:
4814 last_expr = pbe
4815 else:
4816 break
4817 else:
4818 raise last_expr
4820 return loc, ret
4823class Located(ParseElementEnhance):
4824 """
4825 Decorates a returned token with its starting and ending
4826 locations in the input string.
4828 This helper adds the following results names:
4830 - ``locn_start`` - location where matched expression begins
4831 - ``locn_end`` - location where matched expression ends
4832 - ``value`` - the actual parsed results
4834 Be careful if the input text contains ``<TAB>`` characters, you
4835 may want to call :class:`ParserElement.parse_with_tabs`
4837 Example::
4839 wd = Word(alphas)
4840 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
4841 print(match)
4843 prints::
4845 [0, ['ljsdf'], 5]
4846 [8, ['lksdjjf'], 15]
4847 [18, ['lkkjj'], 23]
4849 """
4851 def parseImpl(self, instring, loc, doActions=True):
4852 start = loc
4853 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False)
4854 ret_tokens = ParseResults([start, tokens, loc])
4855 ret_tokens["locn_start"] = start
4856 ret_tokens["value"] = tokens
4857 ret_tokens["locn_end"] = loc
4858 if self.resultsName:
4859 # must return as a list, so that the name will be attached to the complete group
4860 return loc, [ret_tokens]
4861 else:
4862 return loc, ret_tokens
4865class NotAny(ParseElementEnhance):
4866 """
4867 Lookahead to disallow matching with the given parse expression.
4868 ``NotAny`` does *not* advance the parsing position within the
4869 input string, it only verifies that the specified parse expression
4870 does *not* match at the current position. Also, ``NotAny`` does
4871 *not* skip over leading whitespace. ``NotAny`` always returns
4872 a null token list. May be constructed using the ``'~'`` operator.
4874 Example::
4876 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4878 # take care not to mistake keywords for identifiers
4879 ident = ~(AND | OR | NOT) + Word(alphas)
4880 boolean_term = Opt(NOT) + ident
4882 # very crude boolean expression - to support parenthesis groups and
4883 # operation hierarchy, use infix_notation
4884 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
4886 # integers that are followed by "." are actually floats
4887 integer = Word(nums) + ~Char(".")
4888 """
4890 def __init__(self, expr: Union[ParserElement, str]):
4891 super().__init__(expr)
4892 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
4893 # self.leave_whitespace()
4894 self.skipWhitespace = False
4896 self.mayReturnEmpty = True
4897 self.errmsg = f"Found unwanted token, {self.expr}"
4899 def parseImpl(self, instring, loc, doActions=True):
4900 if self.expr.can_parse_next(instring, loc, do_actions=doActions):
4901 raise ParseException(instring, loc, self.errmsg, self)
4902 return loc, []
4904 def _generateDefaultName(self) -> str:
4905 return f"~{{{self.expr}}}"
4908class _MultipleMatch(ParseElementEnhance):
4909 def __init__(
4910 self,
4911 expr: Union[str, ParserElement],
4912 stop_on: typing.Optional[Union[ParserElement, str]] = None,
4913 *,
4914 stopOn: typing.Optional[Union[ParserElement, str]] = None,
4915 ):
4916 super().__init__(expr)
4917 stopOn = stopOn or stop_on
4918 self.saveAsList = True
4919 ender = stopOn
4920 if isinstance(ender, str_type):
4921 ender = self._literalStringClass(ender)
4922 self.stopOn(ender)
4924 def stopOn(self, ender) -> ParserElement:
4925 if isinstance(ender, str_type):
4926 ender = self._literalStringClass(ender)
4927 self.not_ender = ~ender if ender is not None else None
4928 return self
4930 def parseImpl(self, instring, loc, doActions=True):
4931 self_expr_parse = self.expr._parse
4932 self_skip_ignorables = self._skipIgnorables
4933 check_ender = self.not_ender is not None
4934 if check_ender:
4935 try_not_ender = self.not_ender.try_parse
4937 # must be at least one (but first see if we are the stopOn sentinel;
4938 # if so, fail)
4939 if check_ender:
4940 try_not_ender(instring, loc)
4941 loc, tokens = self_expr_parse(instring, loc, doActions)
4942 try:
4943 hasIgnoreExprs = not not self.ignoreExprs
4944 while 1:
4945 if check_ender:
4946 try_not_ender(instring, loc)
4947 if hasIgnoreExprs:
4948 preloc = self_skip_ignorables(instring, loc)
4949 else:
4950 preloc = loc
4951 loc, tmptokens = self_expr_parse(instring, preloc, doActions)
4952 tokens += tmptokens
4953 except (ParseException, IndexError):
4954 pass
4956 return loc, tokens
4958 def _setResultsName(self, name, listAllMatches=False):
4959 if (
4960 __diag__.warn_ungrouped_named_tokens_in_collection
4961 and Diagnostics.warn_ungrouped_named_tokens_in_collection
4962 not in self.suppress_warnings_
4963 ):
4964 for e in [self.expr] + self.expr.recurse():
4965 if (
4966 isinstance(e, ParserElement)
4967 and e.resultsName
4968 and (
4969 Diagnostics.warn_ungrouped_named_tokens_in_collection
4970 not in e.suppress_warnings_
4971 )
4972 ):
4973 warning = (
4974 "warn_ungrouped_named_tokens_in_collection:"
4975 f" setting results name {name!r} on {type(self).__name__} expression"
4976 f" collides with {e.resultsName!r} on contained expression"
4977 )
4978 warnings.warn(warning, stacklevel=3)
4979 break
4981 return super()._setResultsName(name, listAllMatches)
4984class OneOrMore(_MultipleMatch):
4985 """
4986 Repetition of one or more of the given expression.
4988 Parameters:
4990 - ``expr`` - expression that must match one or more times
4991 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel
4992 (only required if the sentinel would ordinarily match the repetition
4993 expression)
4995 Example::
4997 data_word = Word(alphas)
4998 label = data_word + FollowedBy(':')
4999 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))
5001 text = "shape: SQUARE posn: upper left color: BLACK"
5002 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
5004 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data
5005 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5006 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5008 # could also be written as
5009 (attr_expr * (1,)).parse_string(text).pprint()
5010 """
5012 def _generateDefaultName(self) -> str:
5013 return f"{{{self.expr}}}..."
5016class ZeroOrMore(_MultipleMatch):
5017 """
5018 Optional repetition of zero or more of the given expression.
5020 Parameters:
5022 - ``expr`` - expression that must match zero or more times
5023 - ``stop_on`` - expression for a terminating sentinel
5024 (only required if the sentinel would ordinarily match the repetition
5025 expression) - (default= ``None``)
5027 Example: similar to :class:`OneOrMore`
5028 """
5030 def __init__(
5031 self,
5032 expr: Union[str, ParserElement],
5033 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5034 *,
5035 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5036 ):
5037 super().__init__(expr, stopOn=stopOn or stop_on)
5038 self.mayReturnEmpty = True
5040 def parseImpl(self, instring, loc, doActions=True):
5041 try:
5042 return super().parseImpl(instring, loc, doActions)
5043 except (ParseException, IndexError):
5044 return loc, ParseResults([], name=self.resultsName)
5046 def _generateDefaultName(self) -> str:
5047 return f"[{self.expr}]..."
5050class DelimitedList(ParseElementEnhance):
5051 def __init__(
5052 self,
5053 expr: Union[str, ParserElement],
5054 delim: Union[str, ParserElement] = ",",
5055 combine: bool = False,
5056 min: typing.Optional[int] = None,
5057 max: typing.Optional[int] = None,
5058 *,
5059 allow_trailing_delim: bool = False,
5060 ):
5061 """Helper to define a delimited list of expressions - the delimiter
5062 defaults to ','. By default, the list elements and delimiters can
5063 have intervening whitespace, and comments, but this can be
5064 overridden by passing ``combine=True`` in the constructor. If
5065 ``combine`` is set to ``True``, the matching tokens are
5066 returned as a single token string, with the delimiters included;
5067 otherwise, the matching tokens are returned as a list of tokens,
5068 with the delimiters suppressed.
5070 If ``allow_trailing_delim`` is set to True, then the list may end with
5071 a delimiter.
5073 Example::
5075 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5076 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5077 """
5078 if isinstance(expr, str_type):
5079 expr = ParserElement._literalStringClass(expr)
5080 expr = typing.cast(ParserElement, expr)
5082 if min is not None and min < 1:
5083 raise ValueError("min must be greater than 0")
5085 if max is not None and min is not None and max < min:
5086 raise ValueError("max must be greater than, or equal to min")
5088 self.content = expr
5089 self.raw_delim = str(delim)
5090 self.delim = delim
5091 self.combine = combine
5092 if not combine:
5093 self.delim = Suppress(delim)
5094 self.min = min or 1
5095 self.max = max
5096 self.allow_trailing_delim = allow_trailing_delim
5098 delim_list_expr = self.content + (self.delim + self.content) * (
5099 self.min - 1,
5100 None if self.max is None else self.max - 1,
5101 )
5102 if self.allow_trailing_delim:
5103 delim_list_expr += Opt(self.delim)
5105 if self.combine:
5106 delim_list_expr = Combine(delim_list_expr)
5108 super().__init__(delim_list_expr, savelist=True)
5110 def _generateDefaultName(self) -> str:
5111 content_expr = self.content.streamline()
5112 return f"{content_expr} [{self.raw_delim} {content_expr}]..."
5115class _NullToken:
5116 def __bool__(self):
5117 return False
5119 def __str__(self):
5120 return ""
5123class Opt(ParseElementEnhance):
5124 """
5125 Optional matching of the given expression.
5127 Parameters:
5129 - ``expr`` - expression that must match zero or more times
5130 - ``default`` (optional) - value to be returned if the optional expression is not found.
5132 Example::
5134 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
5135 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
5136 zip.run_tests('''
5137 # traditional ZIP code
5138 12345
5140 # ZIP+4 form
5141 12101-0001
5143 # invalid ZIP
5144 98765-
5145 ''')
5147 prints::
5149 # traditional ZIP code
5150 12345
5151 ['12345']
5153 # ZIP+4 form
5154 12101-0001
5155 ['12101-0001']
5157 # invalid ZIP
5158 98765-
5159 ^
5160 FAIL: Expected end of text (at char 5), (line:1, col:6)
5161 """
5163 __optionalNotMatched = _NullToken()
5165 def __init__(
5166 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
5167 ):
5168 super().__init__(expr, savelist=False)
5169 self.saveAsList = self.expr.saveAsList
5170 self.defaultValue = default
5171 self.mayReturnEmpty = True
5173 def parseImpl(self, instring, loc, doActions=True):
5174 self_expr = self.expr
5175 try:
5176 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False)
5177 except (ParseException, IndexError):
5178 default_value = self.defaultValue
5179 if default_value is not self.__optionalNotMatched:
5180 if self_expr.resultsName:
5181 tokens = ParseResults([default_value])
5182 tokens[self_expr.resultsName] = default_value
5183 else:
5184 tokens = [default_value]
5185 else:
5186 tokens = []
5187 return loc, tokens
5189 def _generateDefaultName(self) -> str:
5190 inner = str(self.expr)
5191 # strip off redundant inner {}'s
5192 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
5193 inner = inner[1:-1]
5194 return f"[{inner}]"
5197Optional = Opt
5200class SkipTo(ParseElementEnhance):
5201 """
5202 Token for skipping over all undefined text until the matched
5203 expression is found.
5205 Parameters:
5207 - ``expr`` - target expression marking the end of the data to be skipped
5208 - ``include`` - if ``True``, the target expression is also parsed
5209 (the skipped text and target expression are returned as a 2-element
5210 list) (default= ``False``).
5211 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and
5212 comments) that might contain false matches to the target expression
5213 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be
5214 included in the skipped test; if found before the target expression is found,
5215 the :class:`SkipTo` is not a match
5217 Example::
5219 report = '''
5220 Outstanding Issues Report - 1 Jan 2000
5222 # | Severity | Description | Days Open
5223 -----+----------+-------------------------------------------+-----------
5224 101 | Critical | Intermittent system crash | 6
5225 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5226 79 | Minor | System slow when running too many reports | 47
5227 '''
5228 integer = Word(nums)
5229 SEP = Suppress('|')
5230 # use SkipTo to simply match everything up until the next SEP
5231 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
5232 # - parse action will call token.strip() for each matched token, i.e., the description body
5233 string_data = SkipTo(SEP, ignore=quoted_string)
5234 string_data.set_parse_action(token_map(str.strip))
5235 ticket_expr = (integer("issue_num") + SEP
5236 + string_data("sev") + SEP
5237 + string_data("desc") + SEP
5238 + integer("days_open"))
5240 for tkt in ticket_expr.search_string(report):
5241 print tkt.dump()
5243 prints::
5245 ['101', 'Critical', 'Intermittent system crash', '6']
5246 - days_open: '6'
5247 - desc: 'Intermittent system crash'
5248 - issue_num: '101'
5249 - sev: 'Critical'
5250 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
5251 - days_open: '14'
5252 - desc: "Spelling error on Login ('log|n')"
5253 - issue_num: '94'
5254 - sev: 'Cosmetic'
5255 ['79', 'Minor', 'System slow when running too many reports', '47']
5256 - days_open: '47'
5257 - desc: 'System slow when running too many reports'
5258 - issue_num: '79'
5259 - sev: 'Minor'
5260 """
5262 def __init__(
5263 self,
5264 other: Union[ParserElement, str],
5265 include: bool = False,
5266 ignore: typing.Optional[Union[ParserElement, str]] = None,
5267 fail_on: typing.Optional[Union[ParserElement, str]] = None,
5268 *,
5269 failOn: typing.Optional[Union[ParserElement, str]] = None,
5270 ):
5271 super().__init__(other)
5272 failOn = failOn or fail_on
5273 self.ignoreExpr = ignore
5274 self.mayReturnEmpty = True
5275 self.mayIndexError = False
5276 self.includeMatch = include
5277 self.saveAsList = False
5278 if isinstance(failOn, str_type):
5279 self.failOn = self._literalStringClass(failOn)
5280 else:
5281 self.failOn = failOn
5282 self.errmsg = "No match found for " + str(self.expr)
5283 self.ignorer = Empty().leave_whitespace()
5284 self._update_ignorer()
5286 def _update_ignorer(self):
5287 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr
5288 self.ignorer.ignoreExprs.clear()
5289 for e in self.expr.ignoreExprs:
5290 self.ignorer.ignore(e)
5291 if self.ignoreExpr:
5292 self.ignorer.ignore(self.ignoreExpr)
5294 def ignore(self, expr):
5295 super().ignore(expr)
5296 self._update_ignorer()
5298 def parseImpl(self, instring, loc, doActions=True):
5299 startloc = loc
5300 instrlen = len(instring)
5301 self_expr_parse = self.expr._parse
5302 self_failOn_canParseNext = (
5303 self.failOn.canParseNext if self.failOn is not None else None
5304 )
5305 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None
5307 tmploc = loc
5308 while tmploc <= instrlen:
5309 if self_failOn_canParseNext is not None:
5310 # break if failOn expression matches
5311 if self_failOn_canParseNext(instring, tmploc):
5312 break
5314 if ignorer_try_parse is not None:
5315 # advance past ignore expressions
5316 prev_tmploc = tmploc
5317 while 1:
5318 try:
5319 tmploc = ignorer_try_parse(instring, tmploc)
5320 except ParseBaseException:
5321 break
5322 # see if all ignorers matched, but didn't actually ignore anything
5323 if tmploc == prev_tmploc:
5324 break
5325 prev_tmploc = tmploc
5327 try:
5328 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False)
5329 except (ParseException, IndexError):
5330 # no match, advance loc in string
5331 tmploc += 1
5332 else:
5333 # matched skipto expr, done
5334 break
5336 else:
5337 # ran off the end of the input string without matching skipto expr, fail
5338 raise ParseException(instring, loc, self.errmsg, self)
5340 # build up return values
5341 loc = tmploc
5342 skiptext = instring[startloc:loc]
5343 skipresult = ParseResults(skiptext)
5345 if self.includeMatch:
5346 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False)
5347 skipresult += mat
5349 return loc, skipresult
5352class Forward(ParseElementEnhance):
5353 """
5354 Forward declaration of an expression to be defined later -
5355 used for recursive grammars, such as algebraic infix notation.
5356 When the expression is known, it is assigned to the ``Forward``
5357 variable using the ``'<<'`` operator.
5359 Note: take care when assigning to ``Forward`` not to overlook
5360 precedence of operators.
5362 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
5364 fwd_expr << a | b | c
5366 will actually be evaluated as::
5368 (fwd_expr << a) | b | c
5370 thereby leaving b and c out as parseable alternatives. It is recommended that you
5371 explicitly group the values inserted into the ``Forward``::
5373 fwd_expr << (a | b | c)
5375 Converting to use the ``'<<='`` operator instead will avoid this problem.
5377 See :class:`ParseResults.pprint` for an example of a recursive
5378 parser created using ``Forward``.
5379 """
5381 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None):
5382 self.caller_frame = traceback.extract_stack(limit=2)[0]
5383 super().__init__(other, savelist=False) # type: ignore[arg-type]
5384 self.lshift_line = None
5386 def __lshift__(self, other) -> "Forward":
5387 if hasattr(self, "caller_frame"):
5388 del self.caller_frame
5389 if isinstance(other, str_type):
5390 other = self._literalStringClass(other)
5392 if not isinstance(other, ParserElement):
5393 return NotImplemented
5395 self.expr = other
5396 self.streamlined = other.streamlined
5397 self.mayIndexError = self.expr.mayIndexError
5398 self.mayReturnEmpty = self.expr.mayReturnEmpty
5399 self.set_whitespace_chars(
5400 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
5401 )
5402 self.skipWhitespace = self.expr.skipWhitespace
5403 self.saveAsList = self.expr.saveAsList
5404 self.ignoreExprs.extend(self.expr.ignoreExprs)
5405 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]
5406 return self
5408 def __ilshift__(self, other) -> "Forward":
5409 if not isinstance(other, ParserElement):
5410 return NotImplemented
5412 return self << other
5414 def __or__(self, other) -> "ParserElement":
5415 caller_line = traceback.extract_stack(limit=2)[-2]
5416 if (
5417 __diag__.warn_on_match_first_with_lshift_operator
5418 and caller_line == self.lshift_line
5419 and Diagnostics.warn_on_match_first_with_lshift_operator
5420 not in self.suppress_warnings_
5421 ):
5422 warnings.warn(
5423 "using '<<' operator with '|' is probably an error, use '<<='",
5424 stacklevel=2,
5425 )
5426 ret = super().__or__(other)
5427 return ret
5429 def __del__(self):
5430 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
5431 if (
5432 self.expr is None
5433 and __diag__.warn_on_assignment_to_Forward
5434 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
5435 ):
5436 warnings.warn_explicit(
5437 "Forward defined here but no expression attached later using '<<=' or '<<'",
5438 UserWarning,
5439 filename=self.caller_frame.filename,
5440 lineno=self.caller_frame.lineno,
5441 )
5443 def parseImpl(self, instring, loc, doActions=True):
5444 if (
5445 self.expr is None
5446 and __diag__.warn_on_parse_using_empty_Forward
5447 and Diagnostics.warn_on_parse_using_empty_Forward
5448 not in self.suppress_warnings_
5449 ):
5450 # walk stack until parse_string, scan_string, search_string, or transform_string is found
5451 parse_fns = (
5452 "parse_string",
5453 "scan_string",
5454 "search_string",
5455 "transform_string",
5456 )
5457 tb = traceback.extract_stack(limit=200)
5458 for i, frm in enumerate(reversed(tb), start=1):
5459 if frm.name in parse_fns:
5460 stacklevel = i + 1
5461 break
5462 else:
5463 stacklevel = 2
5464 warnings.warn(
5465 "Forward expression was never assigned a value, will not parse any input",
5466 stacklevel=stacklevel,
5467 )
5468 if not ParserElement._left_recursion_enabled:
5469 return super().parseImpl(instring, loc, doActions)
5470 # ## Bounded Recursion algorithm ##
5471 # Recursion only needs to be processed at ``Forward`` elements, since they are
5472 # the only ones that can actually refer to themselves. The general idea is
5473 # to handle recursion stepwise: We start at no recursion, then recurse once,
5474 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
5475 #
5476 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
5477 # - to *match* a specific recursion level, and
5478 # - to *search* the bounded recursion level
5479 # and the two run concurrently. The *search* must *match* each recursion level
5480 # to find the best possible match. This is handled by a memo table, which
5481 # provides the previous match to the next level match attempt.
5482 #
5483 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
5484 #
5485 # There is a complication since we not only *parse* but also *transform* via
5486 # actions: We do not want to run the actions too often while expanding. Thus,
5487 # we expand using `doActions=False` and only run `doActions=True` if the next
5488 # recursion level is acceptable.
5489 with ParserElement.recursion_lock:
5490 memo = ParserElement.recursion_memos
5491 try:
5492 # we are parsing at a specific recursion expansion - use it as-is
5493 prev_loc, prev_result = memo[loc, self, doActions]
5494 if isinstance(prev_result, Exception):
5495 raise prev_result
5496 return prev_loc, prev_result.copy()
5497 except KeyError:
5498 act_key = (loc, self, True)
5499 peek_key = (loc, self, False)
5500 # we are searching for the best recursion expansion - keep on improving
5501 # both `doActions` cases must be tracked separately here!
5502 prev_loc, prev_peek = memo[peek_key] = (
5503 loc - 1,
5504 ParseException(
5505 instring, loc, "Forward recursion without base case", self
5506 ),
5507 )
5508 if doActions:
5509 memo[act_key] = memo[peek_key]
5510 while True:
5511 try:
5512 new_loc, new_peek = super().parseImpl(instring, loc, False)
5513 except ParseException:
5514 # we failed before getting any match – do not hide the error
5515 if isinstance(prev_peek, Exception):
5516 raise
5517 new_loc, new_peek = prev_loc, prev_peek
5518 # the match did not get better: we are done
5519 if new_loc <= prev_loc:
5520 if doActions:
5521 # replace the match for doActions=False as well,
5522 # in case the action did backtrack
5523 prev_loc, prev_result = memo[peek_key] = memo[act_key]
5524 del memo[peek_key], memo[act_key]
5525 return prev_loc, prev_result.copy()
5526 del memo[peek_key]
5527 return prev_loc, prev_peek.copy()
5528 # the match did get better: see if we can improve further
5529 if doActions:
5530 try:
5531 memo[act_key] = super().parseImpl(instring, loc, True)
5532 except ParseException as e:
5533 memo[peek_key] = memo[act_key] = (new_loc, e)
5534 raise
5535 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
5537 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5538 self.skipWhitespace = False
5539 return self
5541 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5542 self.skipWhitespace = True
5543 return self
5545 def streamline(self) -> ParserElement:
5546 if not self.streamlined:
5547 self.streamlined = True
5548 if self.expr is not None:
5549 self.expr.streamline()
5550 return self
5552 def validate(self, validateTrace=None) -> None:
5553 warnings.warn(
5554 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
5555 DeprecationWarning,
5556 stacklevel=2,
5557 )
5558 if validateTrace is None:
5559 validateTrace = []
5561 if self not in validateTrace:
5562 tmp = validateTrace[:] + [self]
5563 if self.expr is not None:
5564 self.expr.validate(tmp)
5565 self._checkRecursion([])
5567 def _generateDefaultName(self) -> str:
5568 # Avoid infinite recursion by setting a temporary _defaultName
5569 self._defaultName = ": ..."
5571 # Use the string representation of main expression.
5572 retString = "..."
5573 try:
5574 if self.expr is not None:
5575 retString = str(self.expr)[:1000]
5576 else:
5577 retString = "None"
5578 finally:
5579 return f"{type(self).__name__}: {retString}"
5581 def copy(self) -> ParserElement:
5582 if self.expr is not None:
5583 return super().copy()
5584 else:
5585 ret = Forward()
5586 ret <<= self
5587 return ret
5589 def _setResultsName(self, name, list_all_matches=False):
5590 # fmt: off
5591 if (
5592 __diag__.warn_name_set_on_empty_Forward
5593 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_
5594 and self.expr is None
5595 ):
5596 warning = (
5597 "warn_name_set_on_empty_Forward:"
5598 f" setting results name {name!r} on {type(self).__name__} expression"
5599 " that has no contained expression"
5600 )
5601 warnings.warn(warning, stacklevel=3)
5602 # fmt: on
5604 return super()._setResultsName(name, list_all_matches)
5606 # Compatibility synonyms
5607 # fmt: off
5608 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
5609 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
5610 # fmt: on
5613class TokenConverter(ParseElementEnhance):
5614 """
5615 Abstract subclass of :class:`ParseExpression`, for converting parsed results.
5616 """
5618 def __init__(self, expr: Union[ParserElement, str], savelist=False):
5619 super().__init__(expr) # , savelist)
5620 self.saveAsList = False
5623class Combine(TokenConverter):
5624 """Converter to concatenate all matching tokens to a single string.
5625 By default, the matching patterns must also be contiguous in the
5626 input string; this can be disabled by specifying
5627 ``'adjacent=False'`` in the constructor.
5629 Example::
5631 real = Word(nums) + '.' + Word(nums)
5632 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']
5633 # will also erroneously match the following
5634 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']
5636 real = Combine(Word(nums) + '.' + Word(nums))
5637 print(real.parse_string('3.1416')) # -> ['3.1416']
5638 # no match when there are internal spaces
5639 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)
5640 """
5642 def __init__(
5643 self,
5644 expr: ParserElement,
5645 join_string: str = "",
5646 adjacent: bool = True,
5647 *,
5648 joinString: typing.Optional[str] = None,
5649 ):
5650 super().__init__(expr)
5651 joinString = joinString if joinString is not None else join_string
5652 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5653 if adjacent:
5654 self.leave_whitespace()
5655 self.adjacent = adjacent
5656 self.skipWhitespace = True
5657 self.joinString = joinString
5658 self.callPreparse = True
5660 def ignore(self, other) -> ParserElement:
5661 if self.adjacent:
5662 ParserElement.ignore(self, other)
5663 else:
5664 super().ignore(other)
5665 return self
5667 def postParse(self, instring, loc, tokenlist):
5668 retToks = tokenlist.copy()
5669 del retToks[:]
5670 retToks += ParseResults(
5671 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
5672 )
5674 if self.resultsName and retToks.haskeys():
5675 return [retToks]
5676 else:
5677 return retToks
5680class Group(TokenConverter):
5681 """Converter to return the matched tokens as a list - useful for
5682 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5684 The optional ``aslist`` argument when set to True will return the
5685 parsed tokens as a Python list instead of a pyparsing ParseResults.
5687 Example::
5689 ident = Word(alphas)
5690 num = Word(nums)
5691 term = ident | num
5692 func = ident + Opt(DelimitedList(term))
5693 print(func.parse_string("fn a, b, 100"))
5694 # -> ['fn', 'a', 'b', '100']
5696 func = ident + Group(Opt(DelimitedList(term)))
5697 print(func.parse_string("fn a, b, 100"))
5698 # -> ['fn', ['a', 'b', '100']]
5699 """
5701 def __init__(self, expr: ParserElement, aslist: bool = False):
5702 super().__init__(expr)
5703 self.saveAsList = True
5704 self._asPythonList = aslist
5706 def postParse(self, instring, loc, tokenlist):
5707 if self._asPythonList:
5708 return ParseResults.List(
5709 tokenlist.asList()
5710 if isinstance(tokenlist, ParseResults)
5711 else list(tokenlist)
5712 )
5714 return [tokenlist]
5717class Dict(TokenConverter):
5718 """Converter to return a repetitive expression as a list, but also
5719 as a dictionary. Each element can also be referenced using the first
5720 token in the expression as its key. Useful for tabular report
5721 scraping when the first column can be used as a item key.
5723 The optional ``asdict`` argument when set to True will return the
5724 parsed tokens as a Python dict instead of a pyparsing ParseResults.
5726 Example::
5728 data_word = Word(alphas)
5729 label = data_word + FollowedBy(':')
5731 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5732 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5734 # print attributes as plain groups
5735 print(attr_expr[1, ...].parse_string(text).dump())
5737 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names
5738 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
5739 print(result.dump())
5741 # access named fields as dict entries, or output as dict
5742 print(result['shape'])
5743 print(result.as_dict())
5745 prints::
5747 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5748 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5749 - color: 'light blue'
5750 - posn: 'upper left'
5751 - shape: 'SQUARE'
5752 - texture: 'burlap'
5753 SQUARE
5754 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5756 See more examples at :class:`ParseResults` of accessing fields by results name.
5757 """
5759 def __init__(self, expr: ParserElement, asdict: bool = False):
5760 super().__init__(expr)
5761 self.saveAsList = True
5762 self._asPythonDict = asdict
5764 def postParse(self, instring, loc, tokenlist):
5765 for i, tok in enumerate(tokenlist):
5766 if len(tok) == 0:
5767 continue
5769 ikey = tok[0]
5770 if isinstance(ikey, int):
5771 ikey = str(ikey).strip()
5773 if len(tok) == 1:
5774 tokenlist[ikey] = _ParseResultsWithOffset("", i)
5776 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5777 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5779 else:
5780 try:
5781 dictvalue = tok.copy() # ParseResults(i)
5782 except Exception:
5783 exc = TypeError(
5784 "could not extract dict values from parsed results"
5785 " - Dict expression must contain Grouped expressions"
5786 )
5787 raise exc from None
5789 del dictvalue[0]
5791 if len(dictvalue) != 1 or (
5792 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
5793 ):
5794 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5795 else:
5796 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5798 if self._asPythonDict:
5799 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
5801 return [tokenlist] if self.resultsName else tokenlist
5804class Suppress(TokenConverter):
5805 """Converter for ignoring the results of a parsed expression.
5807 Example::
5809 source = "a, b, c,d"
5810 wd = Word(alphas)
5811 wd_list1 = wd + (',' + wd)[...]
5812 print(wd_list1.parse_string(source))
5814 # often, delimiters that are useful during parsing are just in the
5815 # way afterward - use Suppress to keep them out of the parsed output
5816 wd_list2 = wd + (Suppress(',') + wd)[...]
5817 print(wd_list2.parse_string(source))
5819 # Skipped text (using '...') can be suppressed as well
5820 source = "lead in START relevant text END trailing text"
5821 start_marker = Keyword("START")
5822 end_marker = Keyword("END")
5823 find_body = Suppress(...) + start_marker + ... + end_marker
5824 print(find_body.parse_string(source)
5826 prints::
5828 ['a', ',', 'b', ',', 'c', ',', 'd']
5829 ['a', 'b', 'c', 'd']
5830 ['START', 'relevant text ', 'END']
5832 (See also :class:`DelimitedList`.)
5833 """
5835 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
5836 if expr is ...:
5837 expr = _PendingSkip(NoMatch())
5838 super().__init__(expr)
5840 def __add__(self, other) -> "ParserElement":
5841 if isinstance(self.expr, _PendingSkip):
5842 return Suppress(SkipTo(other)) + other
5844 return super().__add__(other)
5846 def __sub__(self, other) -> "ParserElement":
5847 if isinstance(self.expr, _PendingSkip):
5848 return Suppress(SkipTo(other)) - other
5850 return super().__sub__(other)
5852 def postParse(self, instring, loc, tokenlist):
5853 return []
5855 def suppress(self) -> ParserElement:
5856 return self
5859def trace_parse_action(f: ParseAction) -> ParseAction:
5860 """Decorator for debugging parse actions.
5862 When the parse action is called, this decorator will print
5863 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
5864 When the parse action completes, the decorator will print
5865 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
5867 Example::
5869 wd = Word(alphas)
5871 @trace_parse_action
5872 def remove_duplicate_chars(tokens):
5873 return ''.join(sorted(set(''.join(tokens))))
5875 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
5876 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
5878 prints::
5880 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
5881 <<leaving remove_duplicate_chars (ret: 'dfjkls')
5882 ['dfjkls']
5883 """
5884 f = _trim_arity(f)
5886 def z(*paArgs):
5887 thisFunc = f.__name__
5888 s, l, t = paArgs[-3:]
5889 if len(paArgs) > 3:
5890 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"
5891 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")
5892 try:
5893 ret = f(*paArgs)
5894 except Exception as exc:
5895 sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n")
5896 raise
5897 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")
5898 return ret
5900 z.__name__ = f.__name__
5901 return z
5904# convenience constants for positional expressions
5905empty = Empty().set_name("empty")
5906line_start = LineStart().set_name("line_start")
5907line_end = LineEnd().set_name("line_end")
5908string_start = StringStart().set_name("string_start")
5909string_end = StringEnd().set_name("string_end")
5911_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(
5912 lambda s, l, t: t[0][1]
5913)
5914_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
5915 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
5916)
5917_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
5918 lambda s, l, t: chr(int(t[0][1:], 8))
5919)
5920_singleChar = (
5921 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
5922)
5923_charRange = Group(_singleChar + Suppress("-") + _singleChar)
5924_reBracketExpr = (
5925 Literal("[")
5926 + Opt("^").set_results_name("negate")
5927 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
5928 + Literal("]")
5929)
5932def srange(s: str) -> str:
5933 r"""Helper to easily define string ranges for use in :class:`Word`
5934 construction. Borrows syntax from regexp ``'[]'`` string range
5935 definitions::
5937 srange("[0-9]") -> "0123456789"
5938 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
5939 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
5941 The input string must be enclosed in []'s, and the returned string
5942 is the expanded character set joined into a single string. The
5943 values enclosed in the []'s may be:
5945 - a single character
5946 - an escaped character with a leading backslash (such as ``\-``
5947 or ``\]``)
5948 - an escaped hex character with a leading ``'\x'``
5949 (``\x21``, which is a ``'!'`` character) (``\0x##``
5950 is also supported for backwards compatibility)
5951 - an escaped octal character with a leading ``'\0'``
5952 (``\041``, which is a ``'!'`` character)
5953 - a range of any of the above, separated by a dash (``'a-z'``,
5954 etc.)
5955 - any combination of the above (``'aeiouy'``,
5956 ``'a-zA-Z0-9_$'``, etc.)
5957 """
5958 _expanded = lambda p: (
5959 p
5960 if not isinstance(p, ParseResults)
5961 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
5962 )
5963 try:
5964 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)
5965 except Exception as e:
5966 return ""
5969def token_map(func, *args) -> ParseAction:
5970 """Helper to define a parse action by mapping a function to all
5971 elements of a :class:`ParseResults` list. If any additional args are passed,
5972 they are forwarded to the given function as additional arguments
5973 after the token, as in
5974 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
5975 which will convert the parsed data to an integer using base 16.
5977 Example (compare the last to example in :class:`ParserElement.transform_string`::
5979 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
5980 hex_ints.run_tests('''
5981 00 11 22 aa FF 0a 0d 1a
5982 ''')
5984 upperword = Word(alphas).set_parse_action(token_map(str.upper))
5985 upperword[1, ...].run_tests('''
5986 my kingdom for a horse
5987 ''')
5989 wd = Word(alphas).set_parse_action(token_map(str.title))
5990 wd[1, ...].set_parse_action(' '.join).run_tests('''
5991 now is the winter of our discontent made glorious summer by this sun of york
5992 ''')
5994 prints::
5996 00 11 22 aa FF 0a 0d 1a
5997 [0, 17, 34, 170, 255, 10, 13, 26]
5999 my kingdom for a horse
6000 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
6002 now is the winter of our discontent made glorious summer by this sun of york
6003 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
6004 """
6006 def pa(s, l, t):
6007 return [func(tokn, *args) for tokn in t]
6009 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
6010 pa.__name__ = func_name
6012 return pa
6015def autoname_elements() -> None:
6016 """
6017 Utility to simplify mass-naming of parser elements, for
6018 generating railroad diagram with named subdiagrams.
6019 """
6020 calling_frame = sys._getframe().f_back
6021 if calling_frame is None:
6022 return
6023 calling_frame = typing.cast(types.FrameType, calling_frame)
6024 for name, var in calling_frame.f_locals.items():
6025 if isinstance(var, ParserElement) and not var.customName:
6026 var.set_name(name)
6029dbl_quoted_string = Combine(
6030 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6031).set_name("string enclosed in double quotes")
6033sgl_quoted_string = Combine(
6034 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
6035).set_name("string enclosed in single quotes")
6037quoted_string = Combine(
6038 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6039 "double quoted string"
6040 )
6041 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6042 "single quoted string"
6043 )
6044).set_name("quoted string using single or double quotes")
6046python_quoted_string = Combine(
6047 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(
6048 "multiline double quoted string"
6049 )
6050 ^ (
6051 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"
6052 ).set_name("multiline single quoted string")
6053 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6054 "double quoted string"
6055 )
6056 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6057 "single quoted string"
6058 )
6059).set_name("Python quoted string")
6061unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
6064alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6065punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6067# build list of built-in expressions, for future reference if a global default value
6068# gets updated
6069_builtin_exprs: List[ParserElement] = [
6070 v for v in vars().values() if isinstance(v, ParserElement)
6071]
6073# backward compatibility names
6074# fmt: off
6075sglQuotedString = sgl_quoted_string
6076dblQuotedString = dbl_quoted_string
6077quotedString = quoted_string
6078unicodeString = unicode_string
6079lineStart = line_start
6080lineEnd = line_end
6081stringStart = string_start
6082stringEnd = string_end
6083nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)
6084traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)
6085conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)
6086tokenMap = replaced_by_pep8("tokenMap", token_map)
6087# fmt: on