Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 46%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# core.py
3#
4from __future__ import annotations
6import collections.abc
7from collections import deque
8import os
9import typing
10from typing import (
11 Any,
12 Callable,
13 Generator,
14 NamedTuple,
15 Sequence,
16 TextIO,
17 Union,
18 cast,
19)
20from abc import ABC, abstractmethod
21from enum import Enum
22import string
23import copy
24import warnings
25import re
26import sys
27from collections.abc import Iterable
28import traceback
29import types
30from operator import itemgetter
31from functools import wraps
32from threading import RLock
33from pathlib import Path
35from .warnings import PyparsingDeprecationWarning, PyparsingDiagnosticWarning
36from .util import (
37 _FifoCache,
38 _UnboundedCache,
39 __config_flags,
40 _collapse_string_to_ranges,
41 _convert_escaped_numerics_to_char,
42 _escape_regex_range_chars,
43 _flatten,
44 LRUMemo as _LRUMemo,
45 UnboundedMemo as _UnboundedMemo,
46 deprecate_argument,
47 replaced_by_pep8,
48)
49from .exceptions import *
50from .actions import *
51from .results import ParseResults, _ParseResultsWithOffset
52from .unicode import pyparsing_unicode
54_MAX_INT = sys.maxsize
55str_type: tuple[type, ...] = (str, bytes)
57#
58# Copyright (c) 2003-2022 Paul T. McGuire
59#
60# Permission is hereby granted, free of charge, to any person obtaining
61# a copy of this software and associated documentation files (the
62# "Software"), to deal in the Software without restriction, including
63# without limitation the rights to use, copy, modify, merge, publish,
64# distribute, sublicense, and/or sell copies of the Software, and to
65# permit persons to whom the Software is furnished to do so, subject to
66# the following conditions:
67#
68# The above copyright notice and this permission notice shall be
69# included in all copies or substantial portions of the Software.
70#
71# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
72# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
73# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
74# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
75# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
76# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
77# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
78#
80from functools import cached_property
83class __compat__(__config_flags):
84 """
85 A cross-version compatibility configuration for pyparsing features that will be
86 released in a future version. By setting values in this configuration to True,
87 those features can be enabled in prior versions for compatibility development
88 and testing.
90 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
91 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
92 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
93 behavior
94 """
96 _type_desc = "compatibility"
98 collect_all_And_tokens = True
100 _all_names = [__ for __ in locals() if not __.startswith("_")]
101 _fixed_names = """
102 collect_all_And_tokens
103 """.split()
106class __diag__(__config_flags):
107 _type_desc = "diagnostic"
109 warn_multiple_tokens_in_named_alternation = False
110 warn_ungrouped_named_tokens_in_collection = False
111 warn_name_set_on_empty_Forward = False
112 warn_on_parse_using_empty_Forward = False
113 warn_on_assignment_to_Forward = False
114 warn_on_multiple_string_args_to_oneof = False
115 warn_on_match_first_with_lshift_operator = False
116 enable_debug_on_named_expressions = False
118 _all_names = [__ for __ in locals() if not __.startswith("_")]
119 _warning_names = [name for name in _all_names if name.startswith("warn")]
120 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
122 @classmethod
123 def enable_all_warnings(cls) -> None:
124 for name in cls._warning_names:
125 cls.enable(name)
128class Diagnostics(Enum):
129 """
130 Diagnostic configuration (all default to disabled)
132 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
133 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
134 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
135 name is defined on a containing expression with ungrouped subexpressions that also
136 have results names
137 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
138 with a results name, but has no contents defined
139 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
140 defined in a grammar but has never had an expression attached to it
141 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
142 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
143 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
144 incorrectly called with multiple str arguments
145 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
146 calls to :class:`ParserElement.set_name`
148 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
149 All warnings can be enabled by calling :class:`enable_all_warnings`.
150 """
152 warn_multiple_tokens_in_named_alternation = 0
153 warn_ungrouped_named_tokens_in_collection = 1
154 warn_name_set_on_empty_Forward = 2
155 warn_on_parse_using_empty_Forward = 3
156 warn_on_assignment_to_Forward = 4
157 warn_on_multiple_string_args_to_oneof = 5
158 warn_on_match_first_with_lshift_operator = 6
159 enable_debug_on_named_expressions = 7
162def enable_diag(diag_enum: Diagnostics) -> None:
163 """
164 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
165 """
166 __diag__.enable(diag_enum.name)
169def disable_diag(diag_enum: Diagnostics) -> None:
170 """
171 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
172 """
173 __diag__.disable(diag_enum.name)
176def enable_all_warnings() -> None:
177 """
178 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
179 """
180 __diag__.enable_all_warnings()
183# hide abstract class
184del __config_flags
187def _should_enable_warnings(
188 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
189) -> bool:
190 enable = bool(warn_env_var)
191 for warn_opt in cmd_line_warn_options:
192 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
193 ":"
194 )[:5]
195 if not w_action.lower().startswith("i") and (
196 not (w_message or w_category or w_module) or w_module == "pyparsing"
197 ):
198 enable = True
199 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
200 enable = False
201 return enable
204if _should_enable_warnings(
205 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
206):
207 enable_all_warnings()
210# build list of single arg builtins, that can be used as parse actions
211# fmt: off
212_single_arg_builtins = {
213 sum, len, sorted, reversed, list, tuple, set, any, all, min, max
214}
215# fmt: on
217_generatorType = types.GeneratorType
218ParseImplReturnType = tuple[int, Any]
219PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]
221ParseCondition = Union[
222 Callable[[], bool],
223 Callable[[ParseResults], bool],
224 Callable[[int, ParseResults], bool],
225 Callable[[str, int, ParseResults], bool],
226]
227ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
228DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
229DebugSuccessAction = Callable[
230 [str, int, int, "ParserElement", ParseResults, bool], None
231]
232DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
235alphas: str = string.ascii_uppercase + string.ascii_lowercase
236identchars: str = pyparsing_unicode.Latin1.identchars
237identbodychars: str = pyparsing_unicode.Latin1.identbodychars
238nums: str = "0123456789"
239hexnums: str = nums + "ABCDEFabcdef"
240alphanums: str = alphas + nums
241printables: str = "".join([c for c in string.printable if c not in string.whitespace])
244class _ParseActionIndexError(Exception):
245 """
246 Internal wrapper around IndexError so that IndexErrors raised inside
247 parse actions aren't misinterpreted as IndexErrors raised inside
248 ParserElement parseImpl methods.
249 """
251 def __init__(self, msg: str, exc: BaseException) -> None:
252 self.msg: str = msg
253 self.exc: BaseException = exc
256_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]
257pa_call_line_synth = ()
260def _trim_arity(func, max_limit=3):
261 """decorator to trim function calls to match the arity of the target"""
262 global _trim_arity_call_line, pa_call_line_synth
264 if func in _single_arg_builtins:
265 return lambda s, l, t: func(t)
267 limit = 0
268 found_arity = False
270 # synthesize what would be returned by traceback.extract_stack at the call to
271 # user's parse action 'func', so that we don't incur call penalty at parse time
273 # fmt: off
274 LINE_DIFF = 9
275 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
276 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
277 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]
278 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
280 def wrapper(*args):
281 nonlocal found_arity, limit
282 if found_arity:
283 return func(*args[limit:])
284 while 1:
285 try:
286 ret = func(*args[limit:])
287 found_arity = True
288 return ret
289 except TypeError as te:
290 # re-raise TypeErrors if they did not come from our arity testing
291 if found_arity:
292 raise
293 else:
294 tb = te.__traceback__
295 frames = traceback.extract_tb(tb, limit=2)
296 frame_summary = frames[-1]
297 trim_arity_type_error = (
298 [frame_summary[:2]][-1][:2] == pa_call_line_synth
299 )
300 del tb
302 if trim_arity_type_error:
303 if limit < max_limit:
304 limit += 1
305 continue
307 raise
308 except IndexError as ie:
309 # wrap IndexErrors inside a _ParseActionIndexError
310 raise _ParseActionIndexError(
311 "IndexError raised in parse action", ie
312 ).with_traceback(None)
313 # fmt: on
315 # copy func name to wrapper for sensible debug output
316 # (can't use functools.wraps, since that messes with function signature)
317 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
318 wrapper.__name__ = func_name
319 wrapper.__doc__ = func.__doc__
321 return wrapper
324def condition_as_parse_action(
325 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False
326) -> ParseAction:
327 """
328 Function to convert a simple predicate function that returns ``True`` or ``False``
329 into a parse action. Can be used in places when a parse action is required
330 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition
331 to an operator level in :class:`infix_notation`).
333 Optional keyword arguments:
335 :param message: define a custom message to be used in the raised exception
336 :param fatal: if ``True``, will raise :class:`ParseFatalException`
337 to stop parsing immediately;
338 otherwise will raise :class:`ParseException`
340 """
341 msg = message if message is not None else "failed user-defined condition"
342 exc_type = ParseFatalException if fatal else ParseException
343 fn = _trim_arity(fn)
345 @wraps(fn)
346 def pa(s, l, t):
347 if not bool(fn(s, l, t)):
348 raise exc_type(s, l, msg)
350 return pa
353def _default_start_debug_action(
354 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False
355):
356 cache_hit_str = "*" if cache_hit else ""
357 print(
358 (
359 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"
360 f" {line(loc, instring)}\n"
361 f" {'^':>{col(loc, instring)}}"
362 )
363 )
366def _default_success_debug_action(
367 instring: str,
368 startloc: int,
369 endloc: int,
370 expr: ParserElement,
371 toks: ParseResults,
372 cache_hit: bool = False,
373):
374 cache_hit_str = "*" if cache_hit else ""
375 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")
378def _default_exception_debug_action(
379 instring: str,
380 loc: int,
381 expr: ParserElement,
382 exc: Exception,
383 cache_hit: bool = False,
384):
385 cache_hit_str = "*" if cache_hit else ""
386 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")
389def null_debug_action(*args):
390 """'Do-nothing' debug action, to suppress debugging output during parsing."""
393class ParserElement(ABC):
394 """Abstract base level parser element class."""
396 DEFAULT_WHITE_CHARS: str = " \n\t\r"
397 verbose_stacktrace: bool = False
398 _literalStringClass: type = None # type: ignore[assignment]
400 @staticmethod
401 def set_default_whitespace_chars(chars: str) -> None:
402 r"""
403 Overrides the default whitespace chars
405 Example:
407 .. doctest::
409 # default whitespace chars are space, <TAB> and newline
410 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")
411 ParseResults(['abc', 'def', 'ghi', 'jkl'], {})
413 # change to just treat newline as significant
414 >>> ParserElement.set_default_whitespace_chars(" \t")
415 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")
416 ParseResults(['abc', 'def'], {})
418 # Reset to default
419 >>> ParserElement.set_default_whitespace_chars(" \n\t\r")
420 """
421 ParserElement.DEFAULT_WHITE_CHARS = chars
423 # update whitespace all parse expressions defined in this module
424 for expr in _builtin_exprs:
425 if expr.copyDefaultWhiteChars:
426 expr.whiteChars = set(chars)
428 @staticmethod
429 def inline_literals_using(cls: type) -> None:
430 """
431 Set class to be used for inclusion of string literals into a parser.
433 Example:
435 .. doctest::
436 :options: +NORMALIZE_WHITESPACE
438 # default literal class used is Literal
439 >>> integer = Word(nums)
440 >>> date_str = (
441 ... integer("year") + '/'
442 ... + integer("month") + '/'
443 ... + integer("day")
444 ... )
446 >>> date_str.parse_string("1999/12/31")
447 ParseResults(['1999', '/', '12', '/', '31'],
448 {'year': '1999', 'month': '12', 'day': '31'})
450 # change to Suppress
451 >>> ParserElement.inline_literals_using(Suppress)
452 >>> date_str = (
453 ... integer("year") + '/'
454 ... + integer("month") + '/'
455 ... + integer("day")
456 ... )
458 >>> date_str.parse_string("1999/12/31")
459 ParseResults(['1999', '12', '31'],
460 {'year': '1999', 'month': '12', 'day': '31'})
462 # Reset
463 >>> ParserElement.inline_literals_using(Literal)
464 """
465 ParserElement._literalStringClass = cls
467 @classmethod
468 def using_each(cls, seq, **class_kwargs):
469 """
470 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.
472 Example:
474 .. testcode::
476 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")
478 .. versionadded:: 3.1.0
479 """
480 yield from (cls(obj, **class_kwargs) for obj in seq)
482 class DebugActions(NamedTuple):
483 debug_try: typing.Optional[DebugStartAction]
484 debug_match: typing.Optional[DebugSuccessAction]
485 debug_fail: typing.Optional[DebugExceptionAction]
487 def __init__(self, savelist: bool = False) -> None:
488 self.parseAction: list[ParseAction] = list()
489 self.failAction: typing.Optional[ParseFailAction] = None
490 self.customName: str = None # type: ignore[assignment]
491 self._defaultName: typing.Optional[str] = None
492 self.resultsName: str = None # type: ignore[assignment]
493 self.saveAsList: bool = savelist
494 self.skipWhitespace: bool = True
495 self.whiteChars: set[str] = set(ParserElement.DEFAULT_WHITE_CHARS)
496 self.copyDefaultWhiteChars: bool = True
497 # used when checking for left-recursion
498 self._may_return_empty: bool = False
499 self.keepTabs: bool = False
500 self.ignoreExprs: list[ParserElement] = list()
501 self.debug: bool = False
502 self.streamlined: bool = False
503 # optimize exception handling for subclasses that don't advance parse index
504 self.mayIndexError: bool = True
505 self.errmsg: Union[str, None] = ""
506 # mark results names as modal (report only last) or cumulative (list all)
507 self.modalResults: bool = True
508 # custom debug actions
509 self.debugActions = self.DebugActions(None, None, None)
510 # avoid redundant calls to preParse
511 self.callPreparse: bool = True
512 self.callDuringTry: bool = False
513 self.suppress_warnings_: list[Diagnostics] = []
514 self.show_in_diagram: bool = True
516 @property
517 def mayReturnEmpty(self) -> bool:
518 """
519 .. deprecated:: 3.3.0
520 use _may_return_empty instead.
521 """
522 return self._may_return_empty
524 @mayReturnEmpty.setter
525 def mayReturnEmpty(self, value) -> None:
526 """
527 .. deprecated:: 3.3.0
528 use _may_return_empty instead.
529 """
530 self._may_return_empty = value
532 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:
533 """
534 Suppress warnings emitted for a particular diagnostic on this expression.
536 Example:
538 .. doctest::
540 >>> label = pp.Word(pp.alphas)
542 # Normally using an empty Forward in a grammar
543 # would print a warning, but we can suppress that
544 >>> base = pp.Forward().suppress_warning(
545 ... pp.Diagnostics.warn_on_parse_using_empty_Forward)
547 >>> grammar = base | label
548 >>> print(grammar.parse_string("x"))
549 ['x']
550 """
551 self.suppress_warnings_.append(warning_type)
552 return self
554 def visit_all(self):
555 """General-purpose method to yield all expressions and sub-expressions
556 in a grammar. Typically just for internal use.
557 """
558 to_visit = deque([self])
559 seen = set()
560 while to_visit:
561 cur = to_visit.popleft()
563 # guard against looping forever through recursive grammars
564 if cur in seen:
565 continue
566 seen.add(cur)
568 to_visit.extend(cur.recurse())
569 yield cur
571 def copy(self) -> ParserElement:
572 """
573 Make a copy of this :class:`ParserElement`. Useful for defining
574 different parse actions for the same parsing pattern, using copies of
575 the original parse element.
577 Example:
579 .. testcode::
581 integer = Word(nums).set_parse_action(
582 lambda toks: int(toks[0]))
583 integerK = integer.copy().add_parse_action(
584 lambda toks: toks[0] * 1024) + Suppress("K")
585 integerM = integer.copy().add_parse_action(
586 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
588 print(
589 (integerK | integerM | integer)[1, ...].parse_string(
590 "5K 100 640K 256M")
591 )
593 prints:
595 .. testoutput::
597 [5120, 100, 655360, 268435456]
599 Equivalent form of ``expr.copy()`` is just ``expr()``:
601 .. testcode::
603 integerM = integer().add_parse_action(
604 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
605 """
606 cpy = copy.copy(self)
607 cpy.parseAction = self.parseAction[:]
608 cpy.ignoreExprs = self.ignoreExprs[:]
609 if self.copyDefaultWhiteChars:
610 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
611 return cpy
613 def set_results_name(
614 self, name: str, list_all_matches: bool = False, **kwargs
615 ) -> ParserElement:
616 """
617 Define name for referencing matching tokens as a nested attribute
618 of the returned parse results.
620 Normally, results names are assigned as you would assign keys in a dict:
621 any existing value is overwritten by later values. If it is necessary to
622 keep all values captured for a particular results name, call ``set_results_name``
623 with ``list_all_matches`` = True.
625 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
626 this is so that the client can define a basic element, such as an
627 integer, and reference it in multiple places with different names.
629 You can also set results names using the abbreviated syntax,
630 ``expr("name")`` in place of ``expr.set_results_name("name")``
631 - see :meth:`__call__`. If ``list_all_matches`` is required, use
632 ``expr("name*")``.
634 Example:
636 .. testcode::
638 integer = Word(nums)
639 date_str = (integer.set_results_name("year") + '/'
640 + integer.set_results_name("month") + '/'
641 + integer.set_results_name("day"))
643 # equivalent form:
644 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
645 """
646 listAllMatches: bool = deprecate_argument(kwargs, "listAllMatches", False)
648 list_all_matches = listAllMatches or list_all_matches
649 return self._setResultsName(name, list_all_matches)
651 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
652 if name is None:
653 return self
654 newself = self.copy()
655 if name.endswith("*"):
656 name = name[:-1]
657 list_all_matches = True
658 newself.resultsName = name
659 newself.modalResults = not list_all_matches
660 return newself
662 def set_break(self, break_flag: bool = True) -> ParserElement:
663 """
664 Method to invoke the Python pdb debugger when this element is
665 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
666 disable.
667 """
668 if break_flag:
669 _parseMethod = self._parse
671 def breaker(instring, loc, do_actions=True, callPreParse=True):
672 # this call to breakpoint() is intentional, not a checkin error
673 breakpoint()
674 return _parseMethod(instring, loc, do_actions, callPreParse)
676 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]
677 self._parse = breaker # type: ignore [method-assign]
678 elif hasattr(self._parse, "_originalParseMethod"):
679 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]
680 return self
682 def set_parse_action(
683 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any
684 ) -> ParserElement:
685 """
686 Define one or more actions to perform when successfully matching parse element definition.
688 Parse actions can be called to perform data conversions, do extra validation,
689 update external data structures, or enhance or replace the parsed tokens.
690 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
691 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
693 - ``s`` = the original string being parsed (see note below)
694 - ``loc`` = the location of the matching substring
695 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object
697 The parsed tokens are passed to the parse action as ParseResults. They can be
698 modified in place using list-style append, extend, and pop operations to update
699 the parsed list elements; and with dictionary-style item set and del operations
700 to add, update, or remove any named results. If the tokens are modified in place,
701 it is not necessary to return them with a return statement.
703 Parse actions can also completely replace the given tokens, with another ``ParseResults``
704 object, or with some entirely different object (common for parse actions that perform data
705 conversions). A convenient way to build a new parse result is to define the values
706 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
708 If None is passed as the ``fn`` parse action, all previously added parse actions for this
709 expression are cleared.
711 Optional keyword arguments:
713 :param call_during_try: (default= ``False``) indicate if parse action
714 should be run during lookaheads and alternate
715 testing. For parse actions that have side
716 effects, it is important to only call the parse
717 action once it is determined that it is being
718 called as part of a successful parse.
719 For parse actions that perform additional
720 validation, then ``call_during_try`` should
721 be passed as True, so that the validation code
722 is included in the preliminary "try" parses.
724 .. Note::
725 The default parsing behavior is to expand tabs in the input string
726 before starting the parsing process.
727 See :meth:`parse_string` for more information on parsing strings
728 containing ``<TAB>`` s, and suggested methods to maintain a
729 consistent view of the parsed string, the parse location, and
730 line and column positions within the parsed string.
732 Example: Parse dates in the form ``YYYY/MM/DD``
733 -----------------------------------------------
735 Setup code:
737 .. testcode::
739 def convert_to_int(toks):
740 '''a parse action to convert toks from str to int
741 at parse time'''
742 return int(toks[0])
744 def is_valid_date(instring, loc, toks):
745 '''a parse action to verify that the date is a valid date'''
746 from datetime import date
747 year, month, day = toks[::2]
748 try:
749 date(year, month, day)
750 except ValueError:
751 raise ParseException(instring, loc, "invalid date given")
753 integer = Word(nums)
754 date_str = integer + '/' + integer + '/' + integer
756 # add parse actions
757 integer.set_parse_action(convert_to_int)
758 date_str.set_parse_action(is_valid_date)
760 Successful parse - note that integer fields are converted to ints:
762 .. testcode::
764 print(date_str.parse_string("1999/12/31"))
766 prints:
768 .. testoutput::
770 [1999, '/', 12, '/', 31]
772 Failure - invalid date:
774 .. testcode::
776 date_str.parse_string("1999/13/31")
778 prints:
780 .. testoutput::
782 Traceback (most recent call last):
783 ParseException: invalid date given, found '1999' ...
784 """
785 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)
787 if list(fns) == [None]:
788 self.parseAction.clear()
789 return self
791 if not all(callable(fn) for fn in fns):
792 raise TypeError("parse actions must be callable")
793 self.parseAction[:] = [_trim_arity(fn) for fn in fns]
794 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry
796 return self
798 def add_parse_action(
799 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any
800 ) -> ParserElement:
801 """
802 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
804 See examples in :class:`copy`.
805 """
806 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)
808 self.parseAction += [_trim_arity(fn) for fn in fns]
809 self.callDuringTry = self.callDuringTry or callDuringTry or call_during_try
810 return self
812 def add_condition(
813 self, *fns: ParseCondition, call_during_try: bool = False, **kwargs: Any
814 ) -> ParserElement:
815 """Add a boolean predicate function to expression's list of parse actions. See
816 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
817 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
819 Optional keyword arguments:
821 - ``message`` = define a custom message to be used in the raised exception
822 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
823 ParseException
824 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,
825 default=False
827 Example:
829 .. doctest::
830 :options: +NORMALIZE_WHITESPACE
832 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
833 >>> year_int = integer.copy().add_condition(
834 ... lambda toks: toks[0] >= 2000,
835 ... message="Only support years 2000 and later")
836 >>> date_str = year_int + '/' + integer + '/' + integer
838 >>> result = date_str.parse_string("1999/12/31")
839 Traceback (most recent call last):
840 ParseException: Only support years 2000 and later...
841 """
842 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)
844 for fn in fns:
845 self.parseAction.append(
846 condition_as_parse_action(
847 fn,
848 message=str(kwargs.get("message")),
849 fatal=bool(kwargs.get("fatal", False)),
850 )
851 )
853 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry
854 return self
856 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:
857 """
858 Define action to perform if parsing fails at this expression.
859 Fail acton fn is a callable function that takes the arguments
860 ``fn(s, loc, expr, err)`` where:
862 - ``s`` = string being parsed
863 - ``loc`` = location where expression match was attempted and failed
864 - ``expr`` = the parse expression that failed
865 - ``err`` = the exception thrown
867 The function returns no value. It may throw :class:`ParseFatalException`
868 if it is desired to stop parsing immediately."""
869 self.failAction = fn
870 return self
872 def _skipIgnorables(self, instring: str, loc: int) -> int:
873 if not self.ignoreExprs:
874 return loc
875 exprsFound = True
876 ignore_expr_fns = [e._parse for e in self.ignoreExprs]
877 last_loc = loc
878 while exprsFound:
879 exprsFound = False
880 for ignore_fn in ignore_expr_fns:
881 try:
882 while 1:
883 loc, dummy = ignore_fn(instring, loc)
884 exprsFound = True
885 except ParseException:
886 pass
887 # check if all ignore exprs matched but didn't actually advance the parse location
888 if loc == last_loc:
889 break
890 last_loc = loc
891 return loc
893 def preParse(self, instring: str, loc: int) -> int:
894 if self.ignoreExprs:
895 loc = self._skipIgnorables(instring, loc)
897 if self.skipWhitespace:
898 instrlen = len(instring)
899 white_chars = self.whiteChars
900 while loc < instrlen and instring[loc] in white_chars:
901 loc += 1
903 return loc
905 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
906 return loc, []
908 def postParse(self, instring, loc, tokenlist):
909 return tokenlist
911 # @profile
912 def _parseNoCache(
913 self, instring, loc, do_actions=True, callPreParse=True
914 ) -> tuple[int, ParseResults]:
915 debugging = self.debug # and do_actions)
916 len_instring = len(instring)
918 if debugging or self.failAction:
919 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
920 try:
921 if callPreParse and self.callPreparse:
922 pre_loc = self.preParse(instring, loc)
923 else:
924 pre_loc = loc
925 tokens_start = pre_loc
926 if self.debugActions.debug_try:
927 self.debugActions.debug_try(instring, tokens_start, self, False)
928 if self.mayIndexError or pre_loc >= len_instring:
929 try:
930 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
931 except IndexError:
932 raise ParseException(instring, len_instring, self.errmsg, self)
933 else:
934 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
935 except Exception as err:
936 # print("Exception raised:", err)
937 if self.debugActions.debug_fail:
938 self.debugActions.debug_fail(
939 instring, tokens_start, self, err, False
940 )
941 if self.failAction:
942 self.failAction(instring, tokens_start, self, err)
943 raise
944 else:
945 if callPreParse and self.callPreparse:
946 pre_loc = self.preParse(instring, loc)
947 else:
948 pre_loc = loc
949 tokens_start = pre_loc
950 if self.mayIndexError or pre_loc >= len_instring:
951 try:
952 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
953 except IndexError:
954 raise ParseException(instring, len_instring, self.errmsg, self)
955 else:
956 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
958 tokens = self.postParse(instring, loc, tokens)
960 ret_tokens = ParseResults(
961 tokens, self.resultsName, aslist=self.saveAsList, modal=self.modalResults
962 )
963 if self.parseAction and (do_actions or self.callDuringTry):
964 if debugging:
965 try:
966 for fn in self.parseAction:
967 try:
968 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
969 except IndexError as parse_action_exc:
970 exc = ParseException("exception raised in parse action")
971 raise exc from parse_action_exc
973 if tokens is not None and tokens is not ret_tokens:
974 ret_tokens = ParseResults(
975 tokens,
976 self.resultsName,
977 aslist=self.saveAsList
978 and isinstance(tokens, (ParseResults, list)),
979 modal=self.modalResults,
980 )
981 except Exception as err:
982 # print "Exception raised in user parse action:", err
983 if self.debugActions.debug_fail:
984 self.debugActions.debug_fail(
985 instring, tokens_start, self, err, False
986 )
987 raise
988 else:
989 for fn in self.parseAction:
990 try:
991 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
992 except IndexError as parse_action_exc:
993 exc = ParseException("exception raised in parse action")
994 raise exc from parse_action_exc
996 if tokens is not None and tokens is not ret_tokens:
997 ret_tokens = ParseResults(
998 tokens,
999 self.resultsName,
1000 aslist=self.saveAsList
1001 and isinstance(tokens, (ParseResults, list)),
1002 modal=self.modalResults,
1003 )
1004 if debugging:
1005 # print("Matched", self, "->", ret_tokens.as_list())
1006 if self.debugActions.debug_match:
1007 self.debugActions.debug_match(
1008 instring, tokens_start, loc, self, ret_tokens, False
1009 )
1011 return loc, ret_tokens
1013 def try_parse(
1014 self,
1015 instring: str,
1016 loc: int,
1017 *,
1018 raise_fatal: bool = False,
1019 do_actions: bool = False,
1020 ) -> int:
1021 try:
1022 return self._parse(instring, loc, do_actions=do_actions)[0]
1023 except ParseFatalException:
1024 if raise_fatal:
1025 raise
1026 raise ParseException(instring, loc, self.errmsg, self)
1028 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:
1029 try:
1030 self.try_parse(instring, loc, do_actions=do_actions)
1031 except (ParseException, IndexError):
1032 return False
1033 else:
1034 return True
1036 # cache for left-recursion in Forward references
1037 recursion_lock = RLock()
1038 recursion_memos: collections.abc.MutableMapping[
1039 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]
1040 ] = {}
1042 class _CacheType(typing.Protocol):
1043 """
1044 Class to be used for packrat and left-recursion cacheing of results
1045 and exceptions.
1046 """
1048 not_in_cache: bool
1050 def get(self, *args) -> typing.Any: ...
1052 def set(self, *args) -> None: ...
1054 def clear(self) -> None: ...
1056 class NullCache(dict):
1057 """
1058 A null cache type for initialization of the packrat_cache class variable.
1059 If/when enable_packrat() is called, this null cache will be replaced by a
1060 proper _CacheType class instance.
1061 """
1063 not_in_cache: bool = True
1065 def get(self, *args) -> typing.Any: ...
1067 def set(self, *args) -> None: ...
1069 def clear(self) -> None: ...
1071 # class-level argument cache for optimizing repeated calls when backtracking
1072 # through recursive expressions
1073 packrat_cache: _CacheType = NullCache()
1074 packrat_cache_lock = RLock()
1075 packrat_cache_stats = [0, 0]
1077 # this method gets repeatedly called during backtracking with the same arguments -
1078 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1079 def _parseCache(
1080 self, instring, loc, do_actions=True, callPreParse=True
1081 ) -> tuple[int, ParseResults]:
1082 HIT, MISS = 0, 1
1083 lookup = (self, instring, loc, callPreParse, do_actions)
1084 with ParserElement.packrat_cache_lock:
1085 cache = ParserElement.packrat_cache
1086 value = cache.get(lookup)
1087 if value is cache.not_in_cache:
1088 ParserElement.packrat_cache_stats[MISS] += 1
1089 try:
1090 value = self._parseNoCache(instring, loc, do_actions, callPreParse)
1091 except ParseBaseException as pe:
1092 # cache a copy of the exception, without the traceback
1093 cache.set(lookup, pe.__class__(*pe.args))
1094 raise
1095 else:
1096 cache.set(lookup, (value[0], value[1].copy(), loc))
1097 return value
1098 else:
1099 ParserElement.packrat_cache_stats[HIT] += 1
1100 if self.debug and self.debugActions.debug_try:
1101 try:
1102 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]
1103 except TypeError:
1104 pass
1105 if isinstance(value, Exception):
1106 if self.debug and self.debugActions.debug_fail:
1107 try:
1108 self.debugActions.debug_fail(
1109 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]
1110 )
1111 except TypeError:
1112 pass
1113 raise value
1115 value = cast(tuple[int, ParseResults, int], value)
1116 loc_, result, endloc = value[0], value[1].copy(), value[2]
1117 if self.debug and self.debugActions.debug_match:
1118 try:
1119 self.debugActions.debug_match(
1120 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]
1121 )
1122 except TypeError:
1123 pass
1125 return loc_, result
1127 _parse = _parseNoCache
1129 @staticmethod
1130 def reset_cache() -> None:
1131 """
1132 Clears caches used by packrat and left-recursion.
1133 """
1134 with ParserElement.packrat_cache_lock:
1135 ParserElement.packrat_cache.clear()
1136 ParserElement.packrat_cache_stats[:] = [0] * len(
1137 ParserElement.packrat_cache_stats
1138 )
1139 ParserElement.recursion_memos.clear()
1141 # class attributes to keep caching status
1142 _packratEnabled = False
1143 _left_recursion_enabled = False
1145 @staticmethod
1146 def disable_memoization() -> None:
1147 """
1148 Disables active Packrat or Left Recursion parsing and their memoization
1150 This method also works if neither Packrat nor Left Recursion are enabled.
1151 This makes it safe to call before activating Packrat nor Left Recursion
1152 to clear any previous settings.
1153 """
1154 with ParserElement.packrat_cache_lock:
1155 ParserElement.reset_cache()
1156 ParserElement._left_recursion_enabled = False
1157 ParserElement._packratEnabled = False
1158 ParserElement._parse = ParserElement._parseNoCache
1160 @staticmethod
1161 def enable_left_recursion(
1162 cache_size_limit: typing.Optional[int] = None, *, force=False
1163 ) -> None:
1164 """
1165 Enables "bounded recursion" parsing, which allows for both direct and indirect
1166 left-recursion. During parsing, left-recursive :class:`Forward` elements are
1167 repeatedly matched with a fixed recursion depth that is gradually increased
1168 until finding the longest match.
1170 Example:
1172 .. testcode::
1174 import pyparsing as pp
1175 pp.ParserElement.enable_left_recursion()
1177 E = pp.Forward("E")
1178 num = pp.Word(pp.nums)
1180 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1181 E <<= E + '+' - num | num
1183 print(E.parse_string("1+2+3+4"))
1185 prints:
1187 .. testoutput::
1189 ['1', '+', '2', '+', '3', '+', '4']
1191 Recursion search naturally memoizes matches of ``Forward`` elements and may
1192 thus skip reevaluation of parse actions during backtracking. This may break
1193 programs with parse actions which rely on strict ordering of side-effects.
1195 Parameters:
1197 - ``cache_size_limit`` - (default=``None``) - memoize at most this many
1198 ``Forward`` elements during matching; if ``None`` (the default),
1199 memoize all ``Forward`` elements.
1201 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1202 thus the two cannot be used together. Use ``force=True`` to disable any
1203 previous, conflicting settings.
1204 """
1205 with ParserElement.packrat_cache_lock:
1206 if force:
1207 ParserElement.disable_memoization()
1208 elif ParserElement._packratEnabled:
1209 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1210 if cache_size_limit is None:
1211 ParserElement.recursion_memos = _UnboundedMemo()
1212 elif cache_size_limit > 0:
1213 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]
1214 else:
1215 raise NotImplementedError(f"Memo size of {cache_size_limit}")
1216 ParserElement._left_recursion_enabled = True
1218 @staticmethod
1219 def enable_packrat(
1220 cache_size_limit: Union[int, None] = 128, *, force: bool = False
1221 ) -> None:
1222 """
1223 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1224 Repeated parse attempts at the same string location (which happens
1225 often in many complex grammars) can immediately return a cached value,
1226 instead of re-executing parsing/validating code. Memoizing is done of
1227 both valid results and parsing exceptions.
1229 Parameters:
1231 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided
1232 will limit the size of the packrat cache; if None is passed, then
1233 the cache size will be unbounded; if 0 is passed, the cache will
1234 be effectively disabled.
1236 This speedup may break existing programs that use parse actions that
1237 have side-effects. For this reason, packrat parsing is disabled when
1238 you first import pyparsing. To activate the packrat feature, your
1239 program must call the class method :class:`ParserElement.enable_packrat`.
1240 For best results, call ``enable_packrat()`` immediately after
1241 importing pyparsing.
1243 .. Can't really be doctested, alas
1245 Example::
1247 import pyparsing
1248 pyparsing.ParserElement.enable_packrat()
1250 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1251 thus the two cannot be used together. Use ``force=True`` to disable any
1252 previous, conflicting settings.
1253 """
1254 with ParserElement.packrat_cache_lock:
1255 if force:
1256 ParserElement.disable_memoization()
1257 elif ParserElement._left_recursion_enabled:
1258 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1260 if ParserElement._packratEnabled:
1261 return
1263 ParserElement._packratEnabled = True
1264 if cache_size_limit is None:
1265 ParserElement.packrat_cache = _UnboundedCache()
1266 else:
1267 ParserElement.packrat_cache = _FifoCache(cache_size_limit)
1268 ParserElement._parse = ParserElement._parseCache
1270 def parse_string(
1271 self, instring: str, parse_all: bool = False, **kwargs
1272 ) -> ParseResults:
1273 """
1274 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1275 client code.
1277 :param instring: The input string to be parsed.
1278 :param parse_all: If set, the entire input string must match the grammar.
1279 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1280 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1281 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1282 an object with attributes if the given parser includes results names.
1284 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1285 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().
1287 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1288 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1289 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1290 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1292 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1293 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1294 parse action's ``s`` argument, or
1295 - explicitly expand the tabs in your input string before calling ``parse_string``.
1297 Examples:
1299 By default, partial matches are OK.
1301 .. doctest::
1303 >>> res = Word('a').parse_string('aaaaabaaa')
1304 >>> print(res)
1305 ['aaaaa']
1307 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1308 directly to see more examples.
1310 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1312 .. doctest::
1314 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1315 Traceback (most recent call last):
1316 ParseException: Expected end of text, found 'b' ...
1317 """
1318 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)
1320 parse_all = parse_all or parseAll
1322 ParserElement.reset_cache()
1323 if not self.streamlined:
1324 self.streamline()
1325 for e in self.ignoreExprs:
1326 e.streamline()
1327 if not self.keepTabs:
1328 instring = instring.expandtabs()
1329 try:
1330 loc, tokens = self._parse(instring, 0)
1331 if parse_all:
1332 loc = self.preParse(instring, loc)
1333 se = Empty() + StringEnd().set_debug(False)
1334 se._parse(instring, loc)
1335 except _ParseActionIndexError as pa_exc:
1336 raise pa_exc.exc
1337 except ParseBaseException as exc:
1338 if ParserElement.verbose_stacktrace:
1339 raise
1341 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1342 raise exc.with_traceback(None)
1343 else:
1344 return tokens
1346 def scan_string(
1347 self,
1348 instring: str,
1349 max_matches: int = _MAX_INT,
1350 overlap: bool = False,
1351 always_skip_whitespace=True,
1352 *,
1353 debug: bool = False,
1354 **kwargs,
1355 ) -> Generator[tuple[ParseResults, int, int], None, None]:
1356 """
1357 Scan the input string for expression matches. Each match will return the
1358 matching tokens, start location, and end location. May be called with optional
1359 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1360 ``overlap`` is specified, then overlapping matches will be reported.
1362 Note that the start and end locations are reported relative to the string
1363 being parsed. See :class:`parse_string` for more information on parsing
1364 strings with embedded tabs.
1366 Example:
1368 .. testcode::
1370 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1371 print(source)
1372 for tokens, start, end in Word(alphas).scan_string(source):
1373 print(' '*start + '^'*(end-start))
1374 print(' '*start + tokens[0])
1376 prints:
1378 .. testoutput::
1380 sldjf123lsdjjkf345sldkjf879lkjsfd987
1381 ^^^^^
1382 sldjf
1383 ^^^^^^^
1384 lsdjjkf
1385 ^^^^^^
1386 sldkjf
1387 ^^^^^^
1388 lkjsfd
1389 """
1390 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)
1392 max_matches = min(maxMatches, max_matches)
1393 if not self.streamlined:
1394 self.streamline()
1395 for e in self.ignoreExprs:
1396 e.streamline()
1398 if not self.keepTabs:
1399 instring = str(instring).expandtabs()
1400 instrlen = len(instring)
1401 loc = 0
1402 if always_skip_whitespace:
1403 preparser = Empty()
1404 preparser.ignoreExprs = self.ignoreExprs
1405 preparser.whiteChars = self.whiteChars
1406 preparseFn = preparser.preParse
1407 else:
1408 preparseFn = self.preParse
1409 parseFn = self._parse
1410 ParserElement.reset_cache()
1411 matches = 0
1412 try:
1413 while loc <= instrlen and matches < max_matches:
1414 try:
1415 preloc: int = preparseFn(instring, loc)
1416 nextLoc: int
1417 tokens: ParseResults
1418 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1419 except ParseException:
1420 loc = preloc + 1
1421 else:
1422 if nextLoc > loc:
1423 matches += 1
1424 if debug:
1425 print(
1426 {
1427 "tokens": tokens.as_list(),
1428 "start": preloc,
1429 "end": nextLoc,
1430 }
1431 )
1432 yield tokens, preloc, nextLoc
1433 if overlap:
1434 nextloc = preparseFn(instring, loc)
1435 if nextloc > loc:
1436 loc = nextLoc
1437 else:
1438 loc += 1
1439 else:
1440 loc = nextLoc
1441 else:
1442 loc = preloc + 1
1443 except ParseBaseException as exc:
1444 if ParserElement.verbose_stacktrace:
1445 raise
1447 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1448 raise exc.with_traceback(None)
1450 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1451 """
1452 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1453 be returned from a parse action. To use ``transform_string``, define a grammar and
1454 attach a parse action to it that modifies the returned token list.
1455 Invoking ``transform_string()`` on a target string will then scan for matches,
1456 and replace the matched text patterns according to the logic in the parse
1457 action. ``transform_string()`` returns the resulting transformed string.
1459 Example:
1461 .. testcode::
1463 quote = '''now is the winter of our discontent,
1464 made glorious summer by this sun of york.'''
1466 wd = Word(alphas)
1467 wd.set_parse_action(lambda toks: toks[0].title())
1469 print(wd.transform_string(quote))
1471 prints:
1473 .. testoutput::
1475 Now Is The Winter Of Our Discontent,
1476 Made Glorious Summer By This Sun Of York.
1477 """
1478 out: list[str] = []
1479 lastE = 0
1480 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1481 # keep string locs straight between transform_string and scan_string
1482 self.keepTabs = True
1483 try:
1484 for t, s, e in self.scan_string(instring, debug=debug):
1485 if s > lastE:
1486 out.append(instring[lastE:s])
1487 lastE = e
1489 if not t:
1490 continue
1492 if isinstance(t, ParseResults):
1493 out += t.as_list()
1494 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1495 out.extend(t)
1496 else:
1497 out.append(t)
1499 out.append(instring[lastE:])
1500 out = [o for o in out if o]
1501 return "".join([str(s) for s in _flatten(out)])
1502 except ParseBaseException as exc:
1503 if ParserElement.verbose_stacktrace:
1504 raise
1506 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1507 raise exc.with_traceback(None)
1509 def search_string(
1510 self,
1511 instring: str,
1512 max_matches: int = _MAX_INT,
1513 *,
1514 debug: bool = False,
1515 **kwargs,
1516 ) -> ParseResults:
1517 """
1518 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1519 to match the given parse expression. May be called with optional
1520 ``max_matches`` argument, to clip searching after 'n' matches are found.
1522 Example:
1524 .. testcode::
1526 quote = '''More than Iron, more than Lead,
1527 more than Gold I need Electricity'''
1529 # a capitalized word starts with an uppercase letter,
1530 # followed by zero or more lowercase letters
1531 cap_word = Word(alphas.upper(), alphas.lower())
1533 print(cap_word.search_string(quote))
1535 # the sum() builtin can be used to merge results
1536 # into a single ParseResults object
1537 print(sum(cap_word.search_string(quote)))
1539 prints:
1541 .. testoutput::
1543 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1544 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1545 """
1546 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)
1548 max_matches = min(maxMatches, max_matches)
1549 try:
1550 return ParseResults(
1551 [
1552 t
1553 for t, s, e in self.scan_string(
1554 instring,
1555 max_matches=max_matches,
1556 always_skip_whitespace=False,
1557 debug=debug,
1558 )
1559 ]
1560 )
1561 except ParseBaseException as exc:
1562 if ParserElement.verbose_stacktrace:
1563 raise
1565 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1566 raise exc.with_traceback(None)
1568 def split(
1569 self,
1570 instring: str,
1571 maxsplit: int = _MAX_INT,
1572 include_separators: bool = False,
1573 **kwargs,
1574 ) -> Generator[str, None, None]:
1575 """
1576 Generator method to split a string using the given expression as a separator.
1577 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1578 and the optional ``include_separators`` argument (default= ``False``), if the separating
1579 matching text should be included in the split results.
1581 Example:
1583 .. testcode::
1585 punc = one_of(list(".,;:/-!?"))
1586 print(list(punc.split(
1587 "This, this?, this sentence, is badly punctuated!")))
1589 prints:
1591 .. testoutput::
1593 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1594 """
1595 includeSeparators: bool = deprecate_argument(kwargs, "includeSeparators", False)
1597 include_separators = includeSeparators or include_separators
1598 last = 0
1599 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1600 yield instring[last:s]
1601 if include_separators:
1602 yield t[0]
1603 last = e
1604 yield instring[last:]
1606 def __add__(self, other) -> ParserElement:
1607 """
1608 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1609 converts them to :class:`Literal`\\ s by default.
1611 Example:
1613 .. testcode::
1615 greet = Word(alphas) + "," + Word(alphas) + "!"
1616 hello = "Hello, World!"
1617 print(hello, "->", greet.parse_string(hello))
1619 prints:
1621 .. testoutput::
1623 Hello, World! -> ['Hello', ',', 'World', '!']
1625 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:
1627 .. testcode::
1629 Literal('start') + ... + Literal('end')
1631 is equivalent to:
1633 .. testcode::
1635 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1637 Note that the skipped text is returned with '_skipped' as a results name,
1638 and to support having multiple skips in the same parser, the value returned is
1639 a list of all skipped text.
1640 """
1641 if other is Ellipsis:
1642 return _PendingSkip(self)
1644 if isinstance(other, str_type):
1645 other = self._literalStringClass(other)
1646 if not isinstance(other, ParserElement):
1647 return NotImplemented
1648 return And([self, other])
1650 def __radd__(self, other) -> ParserElement:
1651 """
1652 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1653 """
1654 if other is Ellipsis:
1655 return SkipTo(self)("_skipped*") + self
1657 if isinstance(other, str_type):
1658 other = self._literalStringClass(other)
1659 if not isinstance(other, ParserElement):
1660 return NotImplemented
1661 return other + self
1663 def __sub__(self, other) -> ParserElement:
1664 """
1665 Implementation of ``-`` operator, returns :class:`And` with error stop
1666 """
1667 if isinstance(other, str_type):
1668 other = self._literalStringClass(other)
1669 if not isinstance(other, ParserElement):
1670 return NotImplemented
1671 return self + And._ErrorStop() + other
1673 def __rsub__(self, other) -> ParserElement:
1674 """
1675 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1676 """
1677 if isinstance(other, str_type):
1678 other = self._literalStringClass(other)
1679 if not isinstance(other, ParserElement):
1680 return NotImplemented
1681 return other - self
1683 def __mul__(self, other) -> ParserElement:
1684 """
1685 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1686 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1687 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1688 may also include ``None`` as in:
1690 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1691 to ``expr*n + ZeroOrMore(expr)``
1692 (read as "at least n instances of ``expr``")
1693 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1694 (read as "0 to n instances of ``expr``")
1695 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1696 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1698 Note that ``expr*(None, n)`` does not raise an exception if
1699 more than n exprs exist in the input stream; that is,
1700 ``expr*(None, n)`` does not enforce a maximum number of expr
1701 occurrences. If this behavior is desired, then write
1702 ``expr*(None, n) + ~expr``
1703 """
1704 if other is Ellipsis:
1705 other = (0, None)
1706 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1707 other = ((0,) + other[1:] + (None,))[:2]
1709 if not isinstance(other, (int, tuple)):
1710 return NotImplemented
1712 if isinstance(other, int):
1713 minElements, optElements = other, 0
1714 else:
1715 other = tuple(o if o is not Ellipsis else None for o in other)
1716 other = (other + (None, None))[:2]
1717 if other[0] is None:
1718 other = (0, other[1])
1719 if isinstance(other[0], int) and other[1] is None:
1720 if other[0] == 0:
1721 return ZeroOrMore(self)
1722 if other[0] == 1:
1723 return OneOrMore(self)
1724 else:
1725 return self * other[0] + ZeroOrMore(self)
1726 elif isinstance(other[0], int) and isinstance(other[1], int):
1727 minElements, optElements = other
1728 optElements -= minElements
1729 else:
1730 return NotImplemented
1732 if minElements < 0:
1733 raise ValueError("cannot multiply ParserElement by negative value")
1734 if optElements < 0:
1735 raise ValueError(
1736 "second tuple value must be greater or equal to first tuple value"
1737 )
1738 if minElements == optElements == 0:
1739 return And([])
1741 if optElements:
1743 def makeOptionalList(n):
1744 if n > 1:
1745 return Opt(self + makeOptionalList(n - 1))
1746 else:
1747 return Opt(self)
1749 if minElements:
1750 if minElements == 1:
1751 ret = self + makeOptionalList(optElements)
1752 else:
1753 ret = And([self] * minElements) + makeOptionalList(optElements)
1754 else:
1755 ret = makeOptionalList(optElements)
1756 else:
1757 if minElements == 1:
1758 ret = self
1759 else:
1760 ret = And([self] * minElements)
1761 return ret
1763 def __rmul__(self, other) -> ParserElement:
1764 return self.__mul__(other)
1766 def __or__(self, other) -> ParserElement:
1767 """
1768 Implementation of ``|`` operator - returns :class:`MatchFirst`
1770 .. versionchanged:: 3.1.0
1771 Support ``expr | ""`` as a synonym for ``Optional(expr)``.
1772 """
1773 if other is Ellipsis:
1774 return _PendingSkip(self, must_skip=True)
1776 if isinstance(other, str_type):
1777 # `expr | ""` is equivalent to `Opt(expr)`
1778 if other == "":
1779 return Opt(self)
1780 other = self._literalStringClass(other)
1781 if not isinstance(other, ParserElement):
1782 return NotImplemented
1783 return MatchFirst([self, other])
1785 def __ror__(self, other) -> ParserElement:
1786 """
1787 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1788 """
1789 if isinstance(other, str_type):
1790 other = self._literalStringClass(other)
1791 if not isinstance(other, ParserElement):
1792 return NotImplemented
1793 return other | self
1795 def __xor__(self, other) -> ParserElement:
1796 """
1797 Implementation of ``^`` operator - returns :class:`Or`
1798 """
1799 if isinstance(other, str_type):
1800 other = self._literalStringClass(other)
1801 if not isinstance(other, ParserElement):
1802 return NotImplemented
1803 return Or([self, other])
1805 def __rxor__(self, other) -> ParserElement:
1806 """
1807 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1808 """
1809 if isinstance(other, str_type):
1810 other = self._literalStringClass(other)
1811 if not isinstance(other, ParserElement):
1812 return NotImplemented
1813 return other ^ self
1815 def __and__(self, other) -> ParserElement:
1816 """
1817 Implementation of ``&`` operator - returns :class:`Each`
1818 """
1819 if isinstance(other, str_type):
1820 other = self._literalStringClass(other)
1821 if not isinstance(other, ParserElement):
1822 return NotImplemented
1823 return Each([self, other])
1825 def __rand__(self, other) -> ParserElement:
1826 """
1827 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1828 """
1829 if isinstance(other, str_type):
1830 other = self._literalStringClass(other)
1831 if not isinstance(other, ParserElement):
1832 return NotImplemented
1833 return other & self
1835 def __invert__(self) -> ParserElement:
1836 """
1837 Implementation of ``~`` operator - returns :class:`NotAny`
1838 """
1839 return NotAny(self)
1841 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1842 # iterate over a sequence
1843 __iter__ = None
1845 def __getitem__(self, key):
1846 """
1847 use ``[]`` indexing notation as a short form for expression repetition:
1849 - ``expr[n]`` is equivalent to ``expr*n``
1850 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1851 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1852 to ``expr*n + ZeroOrMore(expr)``
1853 (read as "at least n instances of ``expr``")
1854 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1855 (read as "0 to n instances of ``expr``")
1856 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1857 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1859 ``None`` may be used in place of ``...``.
1861 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception
1862 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is
1863 desired, then write ``expr[..., n] + ~expr``.
1865 For repetition with a stop_on expression, use slice notation:
1867 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``
1868 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``
1870 .. versionchanged:: 3.1.0
1871 Support for slice notation.
1872 """
1874 stop_on_defined = False
1875 stop_on = NoMatch()
1876 if isinstance(key, slice):
1877 key, stop_on = key.start, key.stop
1878 if key is None:
1879 key = ...
1880 stop_on_defined = True
1881 elif isinstance(key, tuple) and isinstance(key[-1], slice):
1882 key, stop_on = (key[0], key[1].start), key[1].stop
1883 stop_on_defined = True
1885 # convert single arg keys to tuples
1886 if isinstance(key, str_type):
1887 key = (key,)
1888 try:
1889 iter(key)
1890 except TypeError:
1891 key = (key, key)
1893 if len(key) > 2:
1894 raise TypeError(
1895 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"
1896 )
1898 # clip to 2 elements
1899 ret = self * tuple(key[:2])
1900 ret = typing.cast(_MultipleMatch, ret)
1902 if stop_on_defined:
1903 ret.stopOn(stop_on)
1905 return ret
1907 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:
1908 """
1909 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1911 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1912 passed as ``True``.
1914 If ``name`` is omitted, same as calling :class:`copy`.
1916 Example:
1918 .. testcode::
1920 # these are equivalent
1921 userdata = (
1922 Word(alphas).set_results_name("name")
1923 + Word(nums + "-").set_results_name("socsecno")
1924 )
1926 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1927 """
1928 if name is not None:
1929 return self._setResultsName(name)
1931 return self.copy()
1933 def suppress(self) -> ParserElement:
1934 """
1935 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1936 cluttering up returned output.
1937 """
1938 return Suppress(self)
1940 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
1941 """
1942 Enables the skipping of whitespace before matching the characters in the
1943 :class:`ParserElement`'s defined pattern.
1945 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1946 """
1947 self.skipWhitespace = True
1948 return self
1950 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
1951 """
1952 Disables the skipping of whitespace before matching the characters in the
1953 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1954 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1956 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1957 """
1958 self.skipWhitespace = False
1959 return self
1961 def set_whitespace_chars(
1962 self, chars: Union[set[str], str], copy_defaults: bool = False
1963 ) -> ParserElement:
1964 """
1965 Overrides the default whitespace chars
1966 """
1967 self.skipWhitespace = True
1968 self.whiteChars = set(chars)
1969 self.copyDefaultWhiteChars = copy_defaults
1970 return self
1972 def parse_with_tabs(self) -> ParserElement:
1973 """
1974 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1975 Must be called before ``parse_string`` when the input grammar contains elements that
1976 match ``<TAB>`` characters.
1977 """
1978 self.keepTabs = True
1979 return self
1981 def ignore(self, other: ParserElement) -> ParserElement:
1982 """
1983 Define expression to be ignored (e.g., comments) while doing pattern
1984 matching; may be called repeatedly, to define multiple comment or other
1985 ignorable patterns.
1987 Example:
1989 .. doctest::
1991 >>> patt = Word(alphas)[...]
1992 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))
1993 ['ablaj']
1995 >>> patt = Word(alphas)[...].ignore(c_style_comment)
1996 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))
1997 ['ablaj', 'lskjd']
1998 """
1999 if isinstance(other, str_type):
2000 other = Suppress(other)
2002 if isinstance(other, Suppress):
2003 if other not in self.ignoreExprs:
2004 self.ignoreExprs.append(other)
2005 else:
2006 self.ignoreExprs.append(Suppress(other.copy()))
2007 return self
2009 def set_debug_actions(
2010 self,
2011 start_action: DebugStartAction,
2012 success_action: DebugSuccessAction,
2013 exception_action: DebugExceptionAction,
2014 ) -> ParserElement:
2015 """
2016 Customize display of debugging messages while doing pattern matching:
2018 :param start_action: method to be called when an expression is about to be parsed;
2019 should have the signature::
2021 fn(input_string: str,
2022 location: int,
2023 expression: ParserElement,
2024 cache_hit: bool)
2026 :param success_action: method to be called when an expression has successfully parsed;
2027 should have the signature::
2029 fn(input_string: str,
2030 start_location: int,
2031 end_location: int,
2032 expression: ParserELement,
2033 parsed_tokens: ParseResults,
2034 cache_hit: bool)
2036 :param exception_action: method to be called when expression fails to parse;
2037 should have the signature::
2039 fn(input_string: str,
2040 location: int,
2041 expression: ParserElement,
2042 exception: Exception,
2043 cache_hit: bool)
2044 """
2045 self.debugActions = self.DebugActions(
2046 start_action or _default_start_debug_action, # type: ignore[truthy-function]
2047 success_action or _default_success_debug_action, # type: ignore[truthy-function]
2048 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]
2049 )
2050 self.debug = any(self.debugActions)
2051 return self
2053 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:
2054 """
2055 Enable display of debugging messages while doing pattern matching.
2056 Set ``flag`` to ``True`` to enable, ``False`` to disable.
2057 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.
2059 Example:
2061 .. testcode::
2063 wd = Word(alphas).set_name("alphaword")
2064 integer = Word(nums).set_name("numword")
2065 term = wd | integer
2067 # turn on debugging for wd
2068 wd.set_debug()
2070 term[1, ...].parse_string("abc 123 xyz 890")
2072 prints:
2074 .. testoutput::
2075 :options: +NORMALIZE_WHITESPACE
2077 Match alphaword at loc 0(1,1)
2078 abc 123 xyz 890
2079 ^
2080 Matched alphaword -> ['abc']
2081 Match alphaword at loc 4(1,5)
2082 abc 123 xyz 890
2083 ^
2084 Match alphaword failed, ParseException raised: Expected alphaword, ...
2085 Match alphaword at loc 8(1,9)
2086 abc 123 xyz 890
2087 ^
2088 Matched alphaword -> ['xyz']
2089 Match alphaword at loc 12(1,13)
2090 abc 123 xyz 890
2091 ^
2092 Match alphaword failed, ParseException raised: Expected alphaword, ...
2093 abc 123 xyz 890
2094 ^
2095 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ...
2097 The output shown is that produced by the default debug actions - custom debug actions can be
2098 specified using :meth:`set_debug_actions`. Prior to attempting
2099 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
2100 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
2101 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression,
2102 which makes debugging and exception messages easier to understand - for instance, the default
2103 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``.
2105 .. versionchanged:: 3.1.0
2106 ``recurse`` argument added.
2107 """
2108 if recurse:
2109 for expr in self.visit_all():
2110 expr.set_debug(flag, recurse=False)
2111 return self
2113 if flag:
2114 self.set_debug_actions(
2115 _default_start_debug_action,
2116 _default_success_debug_action,
2117 _default_exception_debug_action,
2118 )
2119 else:
2120 self.debug = False
2121 return self
2123 @property
2124 def default_name(self) -> str:
2125 if self._defaultName is None:
2126 self._defaultName = self._generateDefaultName()
2127 return self._defaultName
2129 @abstractmethod
2130 def _generateDefaultName(self) -> str:
2131 """
2132 Child classes must define this method, which defines how the ``default_name`` is set.
2133 """
2135 def set_name(self, name: typing.Optional[str]) -> ParserElement:
2136 """
2137 Define name for this expression, makes debugging and exception messages clearer. If
2138 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also
2139 enable debug for this expression.
2141 If `name` is None, clears any custom name for this expression, and clears the
2142 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.
2144 Example:
2146 .. doctest::
2148 >>> integer = Word(nums)
2149 >>> integer.parse_string("ABC")
2150 Traceback (most recent call last):
2151 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1)
2153 >>> integer.set_name("integer")
2154 integer
2155 >>> integer.parse_string("ABC")
2156 Traceback (most recent call last):
2157 ParseException: Expected integer (at char 0), (line:1, col:1)
2159 .. versionchanged:: 3.1.0
2160 Accept ``None`` as the ``name`` argument.
2161 """
2162 self.customName = name # type: ignore[assignment]
2163 self.errmsg = f"Expected {str(self)}"
2165 if __diag__.enable_debug_on_named_expressions:
2166 self.set_debug(name is not None)
2168 return self
2170 @property
2171 def name(self) -> str:
2172 """
2173 Returns a user-defined name if available, but otherwise defaults back to the auto-generated name
2174 """
2175 return self.customName if self.customName is not None else self.default_name
2177 @name.setter
2178 def name(self, new_name) -> None:
2179 self.set_name(new_name)
2181 def __str__(self) -> str:
2182 return self.name
2184 def __repr__(self) -> str:
2185 return str(self)
2187 def streamline(self) -> ParserElement:
2188 self.streamlined = True
2189 self._defaultName = None
2190 return self
2192 def recurse(self) -> list[ParserElement]:
2193 return []
2195 def _checkRecursion(self, parseElementList):
2196 subRecCheckList = parseElementList[:] + [self]
2197 for e in self.recurse():
2198 e._checkRecursion(subRecCheckList)
2200 def validate(self, validateTrace=None) -> None:
2201 """
2202 .. deprecated:: 3.0.0
2203 Do not use to check for left recursion.
2205 Check defined expressions for valid structure, check for infinite recursive definitions.
2207 """
2208 warnings.warn(
2209 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
2210 PyparsingDeprecationWarning,
2211 stacklevel=2,
2212 )
2213 self._checkRecursion([])
2215 def parse_file(
2216 self,
2217 file_or_filename: Union[str, Path, TextIO],
2218 encoding: str = "utf-8",
2219 parse_all: bool = False,
2220 **kwargs,
2221 ) -> ParseResults:
2222 """
2223 Execute the parse expression on the given file or filename.
2224 If a filename is specified (instead of a file object),
2225 the entire file is opened, read, and closed before parsing.
2226 """
2227 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)
2229 parse_all = parse_all or parseAll
2230 try:
2231 file_or_filename = typing.cast(TextIO, file_or_filename)
2232 file_contents = file_or_filename.read()
2233 except AttributeError:
2234 file_or_filename = typing.cast(str, file_or_filename)
2235 with open(file_or_filename, "r", encoding=encoding) as f:
2236 file_contents = f.read()
2237 try:
2238 return self.parse_string(file_contents, parse_all)
2239 except ParseBaseException as exc:
2240 if ParserElement.verbose_stacktrace:
2241 raise
2243 # catch and re-raise exception from here, clears out pyparsing internal stack trace
2244 raise exc.with_traceback(None)
2246 def __eq__(self, other):
2247 if self is other:
2248 return True
2249 elif isinstance(other, str_type):
2250 return self.matches(other, parse_all=True)
2251 elif isinstance(other, ParserElement):
2252 return vars(self) == vars(other)
2253 return False
2255 def __hash__(self):
2256 return id(self)
2258 def matches(self, test_string: str, parse_all: bool = True, **kwargs) -> bool:
2259 """
2260 Method for quick testing of a parser against a test string. Good for simple
2261 inline microtests of sub expressions while building up larger parser.
2263 :param test_string: to test against this expression for a match
2264 :param parse_all: flag to pass to :meth:`parse_string` when running tests
2266 Example:
2268 .. doctest::
2270 >>> expr = Word(nums)
2271 >>> expr.matches("100")
2272 True
2273 """
2274 parseAll: bool = deprecate_argument(kwargs, "parseAll", True)
2276 parse_all = parse_all and parseAll
2277 try:
2278 self.parse_string(str(test_string), parse_all=parse_all)
2279 return True
2280 except ParseBaseException:
2281 return False
2283 def run_tests(
2284 self,
2285 tests: Union[str, list[str]],
2286 parse_all: bool = True,
2287 comment: typing.Optional[Union[ParserElement, str]] = "#",
2288 full_dump: bool = True,
2289 print_results: bool = True,
2290 failure_tests: bool = False,
2291 post_parse: typing.Optional[
2292 Callable[[str, ParseResults], typing.Optional[str]]
2293 ] = None,
2294 file: typing.Optional[TextIO] = None,
2295 with_line_numbers: bool = False,
2296 *,
2297 parseAll: bool = True,
2298 fullDump: bool = True,
2299 printResults: bool = True,
2300 failureTests: bool = False,
2301 postParse: typing.Optional[
2302 Callable[[str, ParseResults], typing.Optional[str]]
2303 ] = None,
2304 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:
2305 """
2306 Execute the parse expression on a series of test strings, showing each
2307 test, the parsed results or where the parse failed. Quick and easy way to
2308 run a parse expression against a list of sample strings.
2310 Parameters:
2312 - ``tests`` - a list of separate test strings, or a multiline string of test strings
2313 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2314 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
2315 string; pass None to disable comment filtering
2316 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
2317 if False, only dump nested list
2318 - ``print_results`` - (default= ``True``) prints test output to stdout
2319 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
2320 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
2321 `fn(test_string, parse_results)` and returns a string to be added to the test output
2322 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
2323 if None, will default to ``sys.stdout``
2324 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
2326 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2327 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
2328 test's output
2330 Passing example:
2332 .. testcode::
2334 number_expr = pyparsing_common.number.copy()
2336 result = number_expr.run_tests('''
2337 # unsigned integer
2338 100
2339 # negative integer
2340 -100
2341 # float with scientific notation
2342 6.02e23
2343 # integer with scientific notation
2344 1e-12
2345 # negative decimal number without leading digit
2346 -.100
2347 ''')
2348 print("Success" if result[0] else "Failed!")
2350 prints:
2352 .. testoutput::
2353 :options: +NORMALIZE_WHITESPACE
2356 # unsigned integer
2357 100
2358 [100]
2360 # negative integer
2361 -100
2362 [-100]
2364 # float with scientific notation
2365 6.02e23
2366 [6.02e+23]
2368 # integer with scientific notation
2369 1e-12
2370 [1e-12]
2372 # negative decimal number without leading digit
2373 -.100
2374 [-0.1]
2375 Success
2377 Failure-test example:
2379 .. testcode::
2381 result = number_expr.run_tests('''
2382 # stray character
2383 100Z
2384 # too many '.'
2385 3.14.159
2386 ''', failure_tests=True)
2387 print("Success" if result[0] else "Failed!")
2389 prints:
2391 .. testoutput::
2392 :options: +NORMALIZE_WHITESPACE
2395 # stray character
2396 100Z
2397 100Z
2398 ^
2399 ParseException: Expected end of text, found 'Z' ...
2401 # too many '.'
2402 3.14.159
2403 3.14.159
2404 ^
2405 ParseException: Expected end of text, found '.' ...
2406 FAIL: Expected end of text, found '.' ...
2407 Success
2409 Each test string must be on a single line. If you want to test a string that spans multiple
2410 lines, create a test like this:
2412 .. testcode::
2414 expr = Word(alphanums)[1,...]
2415 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2417 .. testoutput::
2418 :options: +NORMALIZE_WHITESPACE
2419 :hide:
2422 this is a test\\n of strings that spans \\n 3 lines
2423 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines']
2425 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2426 """
2427 from .testing import pyparsing_test
2429 parseAll = parseAll and parse_all
2430 fullDump = fullDump and full_dump
2431 printResults = printResults and print_results
2432 failureTests = failureTests or failure_tests
2433 postParse = postParse or post_parse
2434 if isinstance(tests, str_type):
2435 tests = typing.cast(str, tests)
2436 line_strip = type(tests).strip
2437 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2438 comment_specified = comment is not None
2439 if comment_specified:
2440 if isinstance(comment, str_type):
2441 comment = typing.cast(str, comment)
2442 comment = Literal(comment)
2443 comment = typing.cast(ParserElement, comment)
2444 if file is None:
2445 file = sys.stdout
2446 print_ = file.write
2448 result: Union[ParseResults, Exception]
2449 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []
2450 comments: list[str] = []
2451 success = True
2452 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2453 BOM = "\ufeff"
2454 nlstr = "\n"
2455 for t in tests:
2456 if comment_specified and comment.matches(t, False) or comments and not t:
2457 comments.append(
2458 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2459 )
2460 continue
2461 if not t:
2462 continue
2463 out = [
2464 f"{nlstr}{nlstr.join(comments) if comments else ''}",
2465 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2466 ]
2467 comments.clear()
2468 try:
2469 # convert newline marks to actual newlines, and strip leading BOM if present
2470 t = NL.transform_string(t.lstrip(BOM))
2471 result = self.parse_string(t, parse_all=parse_all)
2472 except ParseBaseException as pe:
2473 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""
2474 out.append(pe.explain())
2475 out.append(f"FAIL: {fatal}{pe}")
2476 if ParserElement.verbose_stacktrace:
2477 out.extend(traceback.format_tb(pe.__traceback__))
2478 success = success and failureTests
2479 result = pe
2480 except Exception as exc:
2481 tag = "FAIL-EXCEPTION"
2483 # see if this exception was raised in a parse action
2484 tb = exc.__traceback__
2485 it = iter(traceback.walk_tb(tb))
2486 for f, line in it:
2487 if (f.f_code.co_filename, line) == pa_call_line_synth:
2488 next_f = next(it)[0]
2489 tag += f" (raised in parse action {next_f.f_code.co_name!r})"
2490 break
2492 out.append(f"{tag}: {type(exc).__name__}: {exc}")
2493 if ParserElement.verbose_stacktrace:
2494 out.extend(traceback.format_tb(exc.__traceback__))
2495 success = success and failureTests
2496 result = exc
2497 else:
2498 success = success and not failureTests
2499 if postParse is not None:
2500 try:
2501 pp_value = postParse(t, result)
2502 if pp_value is not None:
2503 if isinstance(pp_value, ParseResults):
2504 out.append(pp_value.dump())
2505 else:
2506 out.append(str(pp_value))
2507 else:
2508 out.append(result.dump())
2509 except Exception as e:
2510 out.append(result.dump(full=fullDump))
2511 out.append(
2512 f"{postParse.__name__} failed: {type(e).__name__}: {e}"
2513 )
2514 else:
2515 out.append(result.dump(full=fullDump))
2516 out.append("")
2518 if printResults:
2519 print_("\n".join(out))
2521 allResults.append((t, result))
2523 return success, allResults
2525 def create_diagram(
2526 self,
2527 output_html: Union[TextIO, Path, str],
2528 vertical: int = 3,
2529 show_results_names: bool = False,
2530 show_groups: bool = False,
2531 embed: bool = False,
2532 show_hidden: bool = False,
2533 **kwargs,
2534 ) -> None:
2535 """
2536 Create a railroad diagram for the parser.
2538 Parameters:
2540 - ``output_html`` (str or file-like object) - output target for generated
2541 diagram HTML
2542 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically
2543 instead of horizontally (default=3)
2544 - ``show_results_names`` - bool flag whether diagram should show annotations for
2545 defined results names
2546 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box
2547 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden
2548 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed
2549 the resulting HTML in an enclosing HTML source
2550 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;
2551 can be used to insert custom CSS styling
2552 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the
2553 generated code
2555 Additional diagram-formatting keyword arguments can also be included;
2556 see railroad.Diagram class.
2558 .. versionchanged:: 3.1.0
2559 ``embed`` argument added.
2560 """
2562 try:
2563 from .diagram import to_railroad, railroad_to_html
2564 except ImportError as ie:
2565 raise Exception(
2566 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2567 ) from ie
2569 self.streamline()
2571 railroad = to_railroad(
2572 self,
2573 vertical=vertical,
2574 show_results_names=show_results_names,
2575 show_groups=show_groups,
2576 show_hidden=show_hidden,
2577 diagram_kwargs=kwargs,
2578 )
2579 if not isinstance(output_html, (str, Path)):
2580 # we were passed a file-like object, just write to it
2581 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))
2582 return
2584 with open(output_html, "w", encoding="utf-8") as diag_file:
2585 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))
2587 # Compatibility synonyms
2588 # fmt: off
2589 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))
2590 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(
2591 "setDefaultWhitespaceChars", set_default_whitespace_chars
2592 ))
2593 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))
2594 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))
2595 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))
2596 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))
2598 setResultsName = replaced_by_pep8("setResultsName", set_results_name)
2599 setBreak = replaced_by_pep8("setBreak", set_break)
2600 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)
2601 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)
2602 addCondition = replaced_by_pep8("addCondition", add_condition)
2603 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)
2604 tryParse = replaced_by_pep8("tryParse", try_parse)
2605 parseString = replaced_by_pep8("parseString", parse_string)
2606 scanString = replaced_by_pep8("scanString", scan_string)
2607 transformString = replaced_by_pep8("transformString", transform_string)
2608 searchString = replaced_by_pep8("searchString", search_string)
2609 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
2610 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
2611 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)
2612 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)
2613 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)
2614 setDebug = replaced_by_pep8("setDebug", set_debug)
2615 setName = replaced_by_pep8("setName", set_name)
2616 parseFile = replaced_by_pep8("parseFile", parse_file)
2617 runTests = replaced_by_pep8("runTests", run_tests)
2618 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)
2619 defaultName = default_name
2620 # fmt: on
2623class _PendingSkip(ParserElement):
2624 # internal placeholder class to hold a place were '...' is added to a parser element,
2625 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2626 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:
2627 super().__init__()
2628 self.anchor = expr
2629 self.must_skip = must_skip
2631 def _generateDefaultName(self) -> str:
2632 return str(self.anchor + Empty()).replace("Empty", "...")
2634 def __add__(self, other) -> ParserElement:
2635 skipper = SkipTo(other).set_name("...")("_skipped*")
2636 if self.must_skip:
2638 def must_skip(t):
2639 if not t._skipped or t._skipped.as_list() == [""]:
2640 del t[0]
2641 t.pop("_skipped", None)
2643 def show_skip(t):
2644 if t._skipped.as_list()[-1:] == [""]:
2645 t.pop("_skipped")
2646 t["_skipped"] = f"missing <{self.anchor!r}>"
2648 return (
2649 self.anchor + skipper().add_parse_action(must_skip)
2650 | skipper().add_parse_action(show_skip)
2651 ) + other
2653 return self.anchor + skipper + other
2655 def __repr__(self):
2656 return self.defaultName
2658 def parseImpl(self, *args) -> ParseImplReturnType:
2659 raise Exception(
2660 "use of `...` expression without following SkipTo target expression"
2661 )
2664class Token(ParserElement):
2665 """Abstract :class:`ParserElement` subclass, for defining atomic
2666 matching patterns.
2667 """
2669 def __init__(self) -> None:
2670 super().__init__(savelist=False)
2672 def _generateDefaultName(self) -> str:
2673 return type(self).__name__
2676class NoMatch(Token):
2677 """
2678 A token that will never match.
2679 """
2681 def __init__(self) -> None:
2682 super().__init__()
2683 self._may_return_empty = True
2684 self.mayIndexError = False
2685 self.errmsg = "Unmatchable token"
2687 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2688 raise ParseException(instring, loc, self.errmsg, self)
2691class Literal(Token):
2692 """
2693 Token to exactly match a specified string.
2695 Example:
2697 .. doctest::
2699 >>> Literal('abc').parse_string('abc')
2700 ParseResults(['abc'], {})
2701 >>> Literal('abc').parse_string('abcdef')
2702 ParseResults(['abc'], {})
2703 >>> Literal('abc').parse_string('ab')
2704 Traceback (most recent call last):
2705 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1)
2707 For case-insensitive matching, use :class:`CaselessLiteral`.
2709 For keyword matching (force word break before and after the matched string),
2710 use :class:`Keyword` or :class:`CaselessKeyword`.
2711 """
2713 def __new__(cls, match_string: str = "", **kwargs):
2714 # Performance tuning: select a subclass with optimized parseImpl
2715 if cls is Literal:
2716 matchString: str = deprecate_argument(kwargs, "matchString", "")
2718 match_string = matchString or match_string
2719 if not match_string:
2720 return super().__new__(Empty)
2721 if len(match_string) == 1:
2722 return super().__new__(_SingleCharLiteral)
2724 # Default behavior
2725 return super().__new__(cls)
2727 # Needed to make copy.copy() work correctly if we customize __new__
2728 def __getnewargs__(self):
2729 return (self.match,)
2731 def __init__(self, match_string: str = "", **kwargs) -> None:
2732 matchString: str = deprecate_argument(kwargs, "matchString", "")
2734 super().__init__()
2735 match_string = matchString or match_string
2736 self.match = match_string
2737 self.matchLen = len(match_string)
2738 self.firstMatchChar = match_string[:1]
2739 self.errmsg = f"Expected {self.name}"
2740 self._may_return_empty = False
2741 self.mayIndexError = False
2743 def _generateDefaultName(self) -> str:
2744 return repr(self.match)
2746 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2747 if instring[loc] == self.firstMatchChar and instring.startswith(
2748 self.match, loc
2749 ):
2750 return loc + self.matchLen, self.match
2751 raise ParseException(instring, loc, self.errmsg, self)
2754class Empty(Literal):
2755 """
2756 An empty token, will always match.
2757 """
2759 def __init__(self, match_string="", *, matchString="") -> None:
2760 super().__init__("")
2761 self._may_return_empty = True
2762 self.mayIndexError = False
2764 def _generateDefaultName(self) -> str:
2765 return "Empty"
2767 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2768 return loc, []
2771class _SingleCharLiteral(Literal):
2772 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2773 if instring[loc] == self.firstMatchChar:
2774 return loc + 1, self.match
2775 raise ParseException(instring, loc, self.errmsg, self)
2778ParserElement._literalStringClass = Literal
2781class Keyword(Token):
2782 """
2783 Token to exactly match a specified string as a keyword, that is,
2784 it must be immediately preceded and followed by whitespace or
2785 non-keyword characters. Compare with :class:`Literal`:
2787 - ``Literal("if")`` will match the leading ``'if'`` in
2788 ``'ifAndOnlyIf'``.
2789 - ``Keyword("if")`` will not; it will only match the leading
2790 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2792 Accepts two optional constructor arguments in addition to the
2793 keyword string:
2795 - ``ident_chars`` is a string of characters that would be valid
2796 identifier characters, defaulting to all alphanumerics + "_" and
2797 "$"
2798 - ``caseless`` allows case-insensitive matching, default is ``False``.
2800 Example:
2802 .. doctest::
2803 :options: +NORMALIZE_WHITESPACE
2805 >>> Keyword("start").parse_string("start")
2806 ParseResults(['start'], {})
2807 >>> Keyword("start").parse_string("starting")
2808 Traceback (most recent call last):
2809 ParseException: Expected Keyword 'start', keyword was immediately
2810 followed by keyword character, found 'ing' (at char 5), (line:1, col:6)
2812 .. doctest::
2813 :options: +NORMALIZE_WHITESPACE
2815 >>> Keyword("start").parse_string("starting").debug()
2816 Traceback (most recent call last):
2817 ParseException: Expected Keyword "start", keyword was immediately
2818 followed by keyword character, found 'ing' ...
2820 For case-insensitive matching, use :class:`CaselessKeyword`.
2821 """
2823 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2825 def __init__(
2826 self,
2827 match_string: str = "",
2828 ident_chars: typing.Optional[str] = None,
2829 caseless: bool = False,
2830 **kwargs,
2831 ) -> None:
2832 matchString = deprecate_argument(kwargs, "matchString", "")
2833 identChars = deprecate_argument(kwargs, "identChars", None)
2835 super().__init__()
2836 identChars = identChars or ident_chars
2837 if identChars is None:
2838 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2839 match_string = matchString or match_string
2840 self.match = match_string
2841 self.matchLen = len(match_string)
2842 self.firstMatchChar = match_string[:1]
2843 if not self.firstMatchChar:
2844 raise ValueError("null string passed to Keyword; use Empty() instead")
2845 self.errmsg = f"Expected {type(self).__name__} {self.name}"
2846 self._may_return_empty = False
2847 self.mayIndexError = False
2848 self.caseless = caseless
2849 if caseless:
2850 self.caselessmatch = match_string.upper()
2851 identChars = identChars.upper()
2852 self.ident_chars = set(identChars)
2854 @property
2855 def identChars(self) -> set[str]:
2856 """
2857 .. deprecated:: 3.3.0
2858 use ident_chars instead.
2860 Property returning the characters being used as keyword characters for this expression.
2861 """
2862 return self.ident_chars
2864 def _generateDefaultName(self) -> str:
2865 return repr(self.match)
2867 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2868 errmsg = self.errmsg or ""
2869 errloc = loc
2870 if self.caseless:
2871 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2872 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2873 if (
2874 loc >= len(instring) - self.matchLen
2875 or instring[loc + self.matchLen].upper() not in self.identChars
2876 ):
2877 return loc + self.matchLen, self.match
2879 # followed by keyword char
2880 errmsg += ", was immediately followed by keyword character"
2881 errloc = loc + self.matchLen
2882 else:
2883 # preceded by keyword char
2884 errmsg += ", keyword was immediately preceded by keyword character"
2885 errloc = loc - 1
2886 # else no match just raise plain exception
2888 elif (
2889 instring[loc] == self.firstMatchChar
2890 and self.matchLen == 1
2891 or instring.startswith(self.match, loc)
2892 ):
2893 if loc == 0 or instring[loc - 1] not in self.identChars:
2894 if (
2895 loc >= len(instring) - self.matchLen
2896 or instring[loc + self.matchLen] not in self.identChars
2897 ):
2898 return loc + self.matchLen, self.match
2900 # followed by keyword char
2901 errmsg += ", keyword was immediately followed by keyword character"
2902 errloc = loc + self.matchLen
2903 else:
2904 # preceded by keyword char
2905 errmsg += ", keyword was immediately preceded by keyword character"
2906 errloc = loc - 1
2907 # else no match just raise plain exception
2909 raise ParseException(instring, errloc, errmsg, self)
2911 @staticmethod
2912 def set_default_keyword_chars(chars) -> None:
2913 """
2914 Overrides the default characters used by :class:`Keyword` expressions.
2915 """
2916 Keyword.DEFAULT_KEYWORD_CHARS = chars
2918 # Compatibility synonyms
2919 setDefaultKeywordChars = staticmethod(
2920 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)
2921 )
2924class CaselessLiteral(Literal):
2925 """
2926 Token to match a specified string, ignoring case of letters.
2927 Note: the matched results will always be in the case of the given
2928 match string, NOT the case of the input text.
2930 Example:
2932 .. doctest::
2934 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2935 ParseResults(['CMD', 'CMD', 'CMD'], {})
2937 (Contrast with example for :class:`CaselessKeyword`.)
2938 """
2940 def __init__(self, match_string: str = "", **kwargs) -> None:
2941 matchString: str = deprecate_argument(kwargs, "matchString", "")
2943 match_string = matchString or match_string
2944 super().__init__(match_string.upper())
2945 # Preserve the defining literal.
2946 self.returnString = match_string
2947 self.errmsg = f"Expected {self.name}"
2949 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2950 if instring[loc : loc + self.matchLen].upper() == self.match:
2951 return loc + self.matchLen, self.returnString
2952 raise ParseException(instring, loc, self.errmsg, self)
2955class CaselessKeyword(Keyword):
2956 """
2957 Caseless version of :class:`Keyword`.
2959 Example:
2961 .. doctest::
2963 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2964 ParseResults(['CMD', 'CMD'], {})
2966 (Contrast with example for :class:`CaselessLiteral`.)
2967 """
2969 def __init__(
2970 self, match_string: str = "", ident_chars: typing.Optional[str] = None, **kwargs
2971 ) -> None:
2972 matchString: str = deprecate_argument(kwargs, "matchString", "")
2973 identChars: typing.Optional[str] = deprecate_argument(
2974 kwargs, "identChars", None
2975 )
2977 identChars = identChars or ident_chars
2978 match_string = matchString or match_string
2979 super().__init__(match_string, identChars, caseless=True)
2982class CloseMatch(Token):
2983 """A variation on :class:`Literal` which matches "close" matches,
2984 that is, strings with at most 'n' mismatching characters.
2985 :class:`CloseMatch` takes parameters:
2987 - ``match_string`` - string to be matched
2988 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2989 - ``max_mismatches`` - (``default=1``) maximum number of
2990 mismatches allowed to count as a match
2992 The results from a successful parse will contain the matched text
2993 from the input string and the following named results:
2995 - ``mismatches`` - a list of the positions within the
2996 match_string where mismatches were found
2997 - ``original`` - the original match_string used to compare
2998 against the input string
3000 If ``mismatches`` is an empty list, then the match was an exact
3001 match.
3003 Example:
3005 .. doctest::
3006 :options: +NORMALIZE_WHITESPACE
3008 >>> patt = CloseMatch("ATCATCGAATGGA")
3009 >>> patt.parse_string("ATCATCGAAXGGA")
3010 ParseResults(['ATCATCGAAXGGA'],
3011 {'original': 'ATCATCGAATGGA', 'mismatches': [9]})
3013 >>> patt.parse_string("ATCAXCGAAXGGA")
3014 Traceback (most recent call last):
3015 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches),
3016 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1)
3018 # exact match
3019 >>> patt.parse_string("ATCATCGAATGGA")
3020 ParseResults(['ATCATCGAATGGA'],
3021 {'original': 'ATCATCGAATGGA', 'mismatches': []})
3023 # close match allowing up to 2 mismatches
3024 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
3025 >>> patt.parse_string("ATCAXCGAAXGGA")
3026 ParseResults(['ATCAXCGAAXGGA'],
3027 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]})
3028 """
3030 def __init__(
3031 self,
3032 match_string: str,
3033 max_mismatches: typing.Optional[int] = None,
3034 *,
3035 caseless=False,
3036 **kwargs,
3037 ) -> None:
3038 maxMismatches: int = deprecate_argument(kwargs, "maxMismatches", 1)
3040 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
3041 super().__init__()
3042 self.match_string = match_string
3043 self.maxMismatches = maxMismatches
3044 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"
3045 self.caseless = caseless
3046 self.mayIndexError = False
3047 self._may_return_empty = False
3049 def _generateDefaultName(self) -> str:
3050 return f"{type(self).__name__}:{self.match_string!r}"
3052 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3053 start = loc
3054 instrlen = len(instring)
3055 maxloc = start + len(self.match_string)
3057 if maxloc <= instrlen:
3058 match_string = self.match_string
3059 match_stringloc = 0
3060 mismatches = []
3061 maxMismatches = self.maxMismatches
3063 for match_stringloc, s_m in enumerate(
3064 zip(instring[loc:maxloc], match_string)
3065 ):
3066 src, mat = s_m
3067 if self.caseless:
3068 src, mat = src.lower(), mat.lower()
3070 if src != mat:
3071 mismatches.append(match_stringloc)
3072 if len(mismatches) > maxMismatches:
3073 break
3074 else:
3075 loc = start + match_stringloc + 1
3076 results = ParseResults([instring[start:loc]])
3077 results["original"] = match_string
3078 results["mismatches"] = mismatches
3079 return loc, results
3081 raise ParseException(instring, loc, self.errmsg, self)
3084class Word(Token):
3085 """Token for matching words composed of allowed character sets.
3087 Parameters:
3089 - ``init_chars`` - string of all characters that should be used to
3090 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
3091 if ``body_chars`` is also specified, then this is the string of
3092 initial characters
3093 - ``body_chars`` - string of characters that
3094 can be used for matching after a matched initial character as
3095 given in ``init_chars``; if omitted, same as the initial characters
3096 (default=``None``)
3097 - ``min`` - minimum number of characters to match (default=1)
3098 - ``max`` - maximum number of characters to match (default=0)
3099 - ``exact`` - exact number of characters to match (default=0)
3100 - ``as_keyword`` - match as a keyword (default=``False``)
3101 - ``exclude_chars`` - characters that might be
3102 found in the input ``body_chars`` string but which should not be
3103 accepted for matching ;useful to define a word of all
3104 printables except for one or two characters, for instance
3105 (default=``None``)
3107 :class:`srange` is useful for defining custom character set strings
3108 for defining :class:`Word` expressions, using range notation from
3109 regular expression character sets.
3111 A common mistake is to use :class:`Word` to match a specific literal
3112 string, as in ``Word("Address")``. Remember that :class:`Word`
3113 uses the string argument to define *sets* of matchable characters.
3114 This expression would match "Add", "AAA", "dAred", or any other word
3115 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
3116 exact literal string, use :class:`Literal` or :class:`Keyword`.
3118 pyparsing includes helper strings for building Words:
3120 - :attr:`alphas`
3121 - :attr:`nums`
3122 - :attr:`alphanums`
3123 - :attr:`hexnums`
3124 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255
3125 - accented, tilded, umlauted, etc.)
3126 - :attr:`punc8bit` (non-alphabetic characters in ASCII range
3127 128-255 - currency, symbols, superscripts, diacriticals, etc.)
3128 - :attr:`printables` (any non-whitespace character)
3130 ``alphas``, ``nums``, and ``printables`` are also defined in several
3131 Unicode sets - see :class:`pyparsing_unicode`.
3133 Example:
3135 .. testcode::
3137 # a word composed of digits
3138 integer = Word(nums)
3139 # Two equivalent alternate forms:
3140 Word("0123456789")
3141 Word(srange("[0-9]"))
3143 # a word with a leading capital, and zero or more lowercase
3144 capitalized_word = Word(alphas.upper(), alphas.lower())
3146 # hostnames are alphanumeric, with leading alpha, and '-'
3147 hostname = Word(alphas, alphanums + '-')
3149 # roman numeral
3150 # (not a strict parser, accepts invalid mix of characters)
3151 roman = Word("IVXLCDM")
3153 # any string of non-whitespace characters, except for ','
3154 csv_value = Word(printables, exclude_chars=",")
3156 :raises ValueError: If ``min`` and ``max`` are both specified
3157 and the test ``min <= max`` fails.
3159 .. versionchanged:: 3.1.0
3160 Raises :exc:`ValueError` if ``min`` > ``max``.
3161 """
3163 def __init__(
3164 self,
3165 init_chars: str = "",
3166 body_chars: typing.Optional[str] = None,
3167 min: int = 1,
3168 max: int = 0,
3169 exact: int = 0,
3170 as_keyword: bool = False,
3171 exclude_chars: typing.Optional[str] = None,
3172 **kwargs,
3173 ) -> None:
3174 initChars: typing.Optional[str] = deprecate_argument(kwargs, "initChars", None)
3175 bodyChars: typing.Optional[str] = deprecate_argument(kwargs, "bodyChars", None)
3176 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)
3177 excludeChars: typing.Optional[str] = deprecate_argument(
3178 kwargs, "excludeChars", None
3179 )
3181 initChars = initChars or init_chars
3182 bodyChars = bodyChars or body_chars
3183 asKeyword = asKeyword or as_keyword
3184 excludeChars = excludeChars or exclude_chars
3185 super().__init__()
3186 if not initChars:
3187 raise ValueError(
3188 f"invalid {type(self).__name__}, initChars cannot be empty string"
3189 )
3191 initChars_set = set(initChars)
3192 if excludeChars:
3193 excludeChars_set = set(excludeChars)
3194 initChars_set -= excludeChars_set
3195 if bodyChars:
3196 bodyChars = "".join(set(bodyChars) - excludeChars_set)
3197 self.init_chars = initChars_set
3198 self.initCharsOrig = "".join(sorted(initChars_set))
3200 if bodyChars:
3201 self.bodyChars = set(bodyChars)
3202 self.bodyCharsOrig = "".join(sorted(bodyChars))
3203 else:
3204 self.bodyChars = initChars_set
3205 self.bodyCharsOrig = self.initCharsOrig
3207 self.maxSpecified = max > 0
3209 if min < 1:
3210 raise ValueError(
3211 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
3212 )
3214 if self.maxSpecified and min > max:
3215 raise ValueError(
3216 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
3217 )
3219 self.minLen = min
3221 if max > 0:
3222 self.maxLen = max
3223 else:
3224 self.maxLen = _MAX_INT
3226 if exact > 0:
3227 min = max = exact
3228 self.maxLen = exact
3229 self.minLen = exact
3231 self.errmsg = f"Expected {self.name}"
3232 self.mayIndexError = False
3233 self.asKeyword = asKeyword
3234 if self.asKeyword:
3235 self.errmsg += " as a keyword"
3237 # see if we can make a regex for this Word
3238 if " " not in (self.initChars | self.bodyChars):
3239 if len(self.initChars) == 1:
3240 re_leading_fragment = re.escape(self.initCharsOrig)
3241 else:
3242 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
3244 if self.bodyChars == self.initChars:
3245 if max == 0 and self.minLen == 1:
3246 repeat = "+"
3247 elif max == 1:
3248 repeat = ""
3249 else:
3250 if self.minLen != self.maxLen:
3251 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
3252 else:
3253 repeat = f"{{{self.minLen}}}"
3254 self.reString = f"{re_leading_fragment}{repeat}"
3255 else:
3256 if max == 1:
3257 re_body_fragment = ""
3258 repeat = ""
3259 else:
3260 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
3261 if max == 0 and self.minLen == 1:
3262 repeat = "*"
3263 elif max == 2:
3264 repeat = "?" if min <= 1 else ""
3265 else:
3266 if min != max:
3267 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"
3268 else:
3269 repeat = f"{{{min - 1 if min > 0 else ''}}}"
3271 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"
3273 if self.asKeyword:
3274 self.reString = rf"\b{self.reString}\b"
3276 try:
3277 self.re = re.compile(self.reString)
3278 except re.error:
3279 self.re = None # type: ignore[assignment]
3280 else:
3281 self.re_match = self.re.match
3282 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]
3284 @property
3285 def initChars(self) -> set[str]:
3286 """
3287 .. deprecated:: 3.3.0
3288 use `init_chars` instead.
3290 Property returning the initial chars to be used when matching this
3291 Word expression. If no body chars were specified, the initial characters
3292 will also be the body characters.
3293 """
3294 return set(self.init_chars)
3296 def copy(self) -> Word:
3297 """
3298 Returns a copy of this expression.
3300 Generally only used internally by pyparsing.
3301 """
3302 ret: Word = cast(Word, super().copy())
3303 if hasattr(self, "re_match"):
3304 ret.re_match = self.re_match
3305 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign]
3306 return ret
3308 def _generateDefaultName(self) -> str:
3309 def charsAsStr(s):
3310 max_repr_len = 16
3311 s = _collapse_string_to_ranges(s, re_escape=False)
3313 if len(s) > max_repr_len:
3314 return s[: max_repr_len - 3] + "..."
3316 return s
3318 if self.initChars != self.bodyChars:
3319 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"
3320 else:
3321 base = f"W:({charsAsStr(self.initChars)})"
3323 # add length specification
3324 if self.minLen > 1 or self.maxLen != _MAX_INT:
3325 if self.minLen == self.maxLen:
3326 if self.minLen == 1:
3327 return base[2:]
3328 else:
3329 return base + f"{{{self.minLen}}}"
3330 elif self.maxLen == _MAX_INT:
3331 return base + f"{{{self.minLen},...}}"
3332 else:
3333 return base + f"{{{self.minLen},{self.maxLen}}}"
3334 return base
3336 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3337 if instring[loc] not in self.initChars:
3338 raise ParseException(instring, loc, self.errmsg, self)
3340 start = loc
3341 loc += 1
3342 instrlen = len(instring)
3343 body_chars: set[str] = self.bodyChars
3344 maxloc = start + self.maxLen
3345 maxloc = min(maxloc, instrlen)
3346 while loc < maxloc and instring[loc] in body_chars:
3347 loc += 1
3349 throw_exception = False
3350 if loc - start < self.minLen:
3351 throw_exception = True
3352 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:
3353 throw_exception = True
3354 elif self.asKeyword and (
3355 (start > 0 and instring[start - 1] in body_chars)
3356 or (loc < instrlen and instring[loc] in body_chars)
3357 ):
3358 throw_exception = True
3360 if throw_exception:
3361 raise ParseException(instring, loc, self.errmsg, self)
3363 return loc, instring[start:loc]
3365 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3366 result = self.re_match(instring, loc)
3367 if not result:
3368 raise ParseException(instring, loc, self.errmsg, self)
3370 loc = result.end()
3371 return loc, result[0]
3374class Char(Word):
3375 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
3376 when defining a match of any single character in a string of
3377 characters.
3378 """
3380 def __init__(
3381 self,
3382 charset: str,
3383 as_keyword: bool = False,
3384 exclude_chars: typing.Optional[str] = None,
3385 **kwargs,
3386 ) -> None:
3387 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)
3388 excludeChars: typing.Optional[str] = deprecate_argument(
3389 kwargs, "excludeChars", None
3390 )
3392 asKeyword = asKeyword or as_keyword
3393 excludeChars = excludeChars or exclude_chars
3394 super().__init__(
3395 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars
3396 )
3399class Regex(Token):
3400 r"""Token for matching strings that match a given regular
3401 expression. Defined with string specifying the regular expression in
3402 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3403 If the given regex contains named groups (defined using ``(?P<name>...)``),
3404 these will be preserved as named :class:`ParseResults`.
3406 If instead of the Python stdlib ``re`` module you wish to use a different RE module
3407 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
3408 a compiled RE that was compiled using ``regex``.
3410 The parameters ``pattern`` and ``flags`` are passed
3411 to the ``re.compile()`` function as-is. See the Python
3412 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3413 explanation of the acceptable patterns and flags.
3415 Example:
3417 .. testcode::
3419 realnum = Regex(r"[+-]?\d+\.\d*")
3420 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3421 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3423 # named fields in a regex will be returned as named results
3424 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3426 # the Regex class will accept regular expressions compiled using the
3427 # re module
3428 import re
3429 parser = pp.Regex(re.compile(r'[0-9]'))
3430 """
3432 def __init__(
3433 self,
3434 pattern: Any,
3435 flags: Union[re.RegexFlag, int] = 0,
3436 as_group_list: bool = False,
3437 as_match: bool = False,
3438 **kwargs,
3439 ) -> None:
3440 super().__init__()
3441 asGroupList: bool = deprecate_argument(kwargs, "asGroupList", False)
3442 asMatch: bool = deprecate_argument(kwargs, "asMatch", False)
3444 asGroupList = asGroupList or as_group_list
3445 asMatch = asMatch or as_match
3447 if isinstance(pattern, str_type):
3448 if not pattern:
3449 raise ValueError("null string passed to Regex; use Empty() instead")
3451 self._re = None
3452 self._may_return_empty = None # type: ignore [assignment]
3453 self.reString = self.pattern = pattern
3455 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
3456 self._re = pattern
3457 self._may_return_empty = None # type: ignore [assignment]
3458 self.pattern = self.reString = pattern.pattern
3460 elif callable(pattern):
3461 # defer creating this pattern until we really need it
3462 self.pattern = pattern
3463 self._may_return_empty = None # type: ignore [assignment]
3464 self._re = None
3466 else:
3467 raise TypeError(
3468 "Regex may only be constructed with a string or a compiled RE object,"
3469 " or a callable that takes no arguments and returns a string or a"
3470 " compiled RE object"
3471 )
3473 self.flags = flags
3474 self.errmsg = f"Expected {self.name}"
3475 self.mayIndexError = False
3476 self.asGroupList = asGroupList
3477 self.asMatch = asMatch
3478 if self.asGroupList:
3479 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]
3480 if self.asMatch:
3481 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]
3483 def copy(self) -> Regex:
3484 """
3485 Returns a copy of this expression.
3487 Generally only used internally by pyparsing.
3488 """
3489 ret: Regex = cast(Regex, super().copy())
3490 if self.asGroupList:
3491 ret.parseImpl = ret.parseImplAsGroupList # type: ignore [method-assign]
3492 if self.asMatch:
3493 ret.parseImpl = ret.parseImplAsMatch # type: ignore [method-assign]
3494 return ret
3496 @cached_property
3497 def re(self) -> re.Pattern:
3498 """
3499 Property returning the compiled regular expression for this Regex.
3501 Generally only used internally by pyparsing.
3502 """
3503 if self._re:
3504 return self._re
3506 if callable(self.pattern):
3507 # replace self.pattern with the string returned by calling self.pattern()
3508 self.pattern = cast(Callable[[], str], self.pattern)()
3510 # see if we got a compiled RE back instead of a str - if so, we're done
3511 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):
3512 self._re = cast(re.Pattern[str], self.pattern)
3513 self.pattern = self.reString = self._re.pattern
3514 return self._re
3516 try:
3517 self._re = re.compile(self.pattern, self.flags)
3518 except re.error:
3519 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3520 else:
3521 self._may_return_empty = self.re.match("", pos=0) is not None
3522 return self._re
3524 @cached_property
3525 def re_match(self) -> Callable[[str, int], Any]:
3526 return self.re.match
3528 @property
3529 def mayReturnEmpty(self):
3530 if self._may_return_empty is None:
3531 # force compile of regex pattern, to set may_return_empty flag
3532 self.re # noqa
3533 return self._may_return_empty
3535 @mayReturnEmpty.setter
3536 def mayReturnEmpty(self, value):
3537 self._may_return_empty = value
3539 def _generateDefaultName(self) -> str:
3540 unescaped = repr(self.pattern).replace("\\\\", "\\")
3541 return f"Re:({unescaped})"
3543 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3544 # explicit check for matching past the length of the string;
3545 # this is done because the re module will not complain about
3546 # a match with `pos > len(instring)`, it will just return ""
3547 if loc > len(instring) and self.mayReturnEmpty:
3548 raise ParseException(instring, loc, self.errmsg, self)
3550 result = self.re_match(instring, loc)
3551 if not result:
3552 raise ParseException(instring, loc, self.errmsg, self)
3554 loc = result.end()
3555 ret = ParseResults(result[0])
3556 d = result.groupdict()
3558 for k, v in d.items():
3559 ret[k] = v
3561 return loc, ret
3563 def parseImplAsGroupList(self, instring, loc, do_actions=True):
3564 if loc > len(instring) and self.mayReturnEmpty:
3565 raise ParseException(instring, loc, self.errmsg, self)
3567 result = self.re_match(instring, loc)
3568 if not result:
3569 raise ParseException(instring, loc, self.errmsg, self)
3571 loc = result.end()
3572 ret = result.groups()
3573 return loc, ret
3575 def parseImplAsMatch(self, instring, loc, do_actions=True):
3576 if loc > len(instring) and self.mayReturnEmpty:
3577 raise ParseException(instring, loc, self.errmsg, self)
3579 result = self.re_match(instring, loc)
3580 if not result:
3581 raise ParseException(instring, loc, self.errmsg, self)
3583 loc = result.end()
3584 ret = result
3585 return loc, ret
3587 def sub(self, repl: str) -> ParserElement:
3588 r"""
3589 Return :class:`Regex` with an attached parse action to transform the parsed
3590 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3592 Example:
3594 .. testcode::
3596 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3597 print(make_html.transform_string("h1:main title:"))
3599 .. testoutput::
3601 <h1>main title</h1>
3602 """
3603 if self.asGroupList:
3604 raise TypeError("cannot use sub() with Regex(as_group_list=True)")
3606 if self.asMatch and callable(repl):
3607 raise TypeError(
3608 "cannot use sub() with a callable with Regex(as_match=True)"
3609 )
3611 if self.asMatch:
3613 def pa(tokens):
3614 return tokens[0].expand(repl)
3616 else:
3618 def pa(tokens):
3619 return self.re.sub(repl, tokens[0])
3621 return self.add_parse_action(pa)
3624class QuotedString(Token):
3625 r"""
3626 Token for matching strings that are delimited by quoting characters.
3628 Defined with the following parameters:
3630 - ``quote_char`` - string of one or more characters defining the
3631 quote delimiting string
3632 - ``esc_char`` - character to re_escape quotes, typically backslash
3633 (default= ``None``)
3634 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3635 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3636 (default= ``None``)
3637 - ``multiline`` - boolean indicating whether quotes can span
3638 multiple lines (default= ``False``)
3639 - ``unquote_results`` - boolean indicating whether the matched text
3640 should be unquoted (default= ``True``)
3641 - ``end_quote_char`` - string of one or more characters defining the
3642 end of the quote delimited string (default= ``None`` => same as
3643 quote_char)
3644 - ``convert_whitespace_escapes`` - convert escaped whitespace
3645 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3646 (default= ``True``)
3648 .. caution:: ``convert_whitespace_escapes`` has no effect if
3649 ``unquote_results`` is ``False``.
3651 Example:
3653 .. doctest::
3655 >>> qs = QuotedString('"')
3656 >>> print(qs.search_string('lsjdf "This is the quote" sldjf'))
3657 [['This is the quote']]
3658 >>> complex_qs = QuotedString('{{', end_quote_char='}}')
3659 >>> print(complex_qs.search_string(
3660 ... 'lsjdf {{This is the "quote"}} sldjf'))
3661 [['This is the "quote"']]
3662 >>> sql_qs = QuotedString('"', esc_quote='""')
3663 >>> print(sql_qs.search_string(
3664 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3665 [['This is the quote with "embedded" quotes']]
3666 """
3668 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))
3670 def __init__(
3671 self,
3672 quote_char: str = "",
3673 esc_char: typing.Optional[str] = None,
3674 esc_quote: typing.Optional[str] = None,
3675 multiline: bool = False,
3676 unquote_results: bool = True,
3677 end_quote_char: typing.Optional[str] = None,
3678 convert_whitespace_escapes: bool = True,
3679 **kwargs,
3680 ) -> None:
3681 super().__init__()
3682 quoteChar: str = deprecate_argument(kwargs, "quoteChar", "")
3683 escChar: str = deprecate_argument(kwargs, "escChar", None)
3684 escQuote: str = deprecate_argument(kwargs, "escQuote", None)
3685 unquoteResults: bool = deprecate_argument(kwargs, "unquoteResults", True)
3686 endQuoteChar: typing.Optional[str] = deprecate_argument(
3687 kwargs, "endQuoteChar", None
3688 )
3689 convertWhitespaceEscapes: bool = deprecate_argument(
3690 kwargs, "convertWhitespaceEscapes", True
3691 )
3693 esc_char = escChar or esc_char
3694 esc_quote = escQuote or esc_quote
3695 unquote_results = unquoteResults and unquote_results
3696 end_quote_char = endQuoteChar or end_quote_char
3697 convert_whitespace_escapes = (
3698 convertWhitespaceEscapes and convert_whitespace_escapes
3699 )
3700 quote_char = quoteChar or quote_char
3702 # remove white space from quote chars
3703 quote_char = quote_char.strip()
3704 if not quote_char:
3705 raise ValueError("quote_char cannot be the empty string")
3707 if end_quote_char is None:
3708 end_quote_char = quote_char
3709 else:
3710 end_quote_char = end_quote_char.strip()
3711 if not end_quote_char:
3712 raise ValueError("end_quote_char cannot be the empty string")
3714 self.quote_char: str = quote_char
3715 self.quote_char_len: int = len(quote_char)
3716 self.first_quote_char: str = quote_char[0]
3717 self.end_quote_char: str = end_quote_char
3718 self.end_quote_char_len: int = len(end_quote_char)
3719 self.esc_char: str = esc_char or ""
3720 self.has_esc_char: bool = esc_char is not None
3721 self.esc_quote: str = esc_quote or ""
3722 self.unquote_results: bool = unquote_results
3723 self.convert_whitespace_escapes: bool = convert_whitespace_escapes
3724 self.multiline = multiline
3725 self.re_flags = re.RegexFlag(0)
3727 # fmt: off
3728 # build up re pattern for the content between the quote delimiters
3729 inner_pattern: list[str] = []
3731 if esc_quote:
3732 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
3734 if esc_char:
3735 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
3737 if len(self.end_quote_char) > 1:
3738 inner_pattern.append(
3739 "(?:"
3740 + "|".join(
3741 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3742 for i in range(len(self.end_quote_char) - 1, 0, -1)
3743 )
3744 + ")"
3745 )
3747 if self.multiline:
3748 self.re_flags |= re.MULTILINE | re.DOTALL
3749 inner_pattern.append(
3750 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3751 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3752 )
3753 else:
3754 inner_pattern.append(
3755 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3756 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3757 )
3759 self.pattern = "".join(
3760 [
3761 re.escape(self.quote_char),
3762 "(?:",
3763 '|'.join(inner_pattern),
3764 ")*",
3765 re.escape(self.end_quote_char),
3766 ]
3767 )
3769 if self.unquote_results:
3770 if self.convert_whitespace_escapes:
3771 self.unquote_scan_re = re.compile(
3772 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3773 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"
3774 rf"|({re.escape(self.esc_char)}.)"
3775 rf"|(\n|.)",
3776 flags=self.re_flags,
3777 )
3778 else:
3779 self.unquote_scan_re = re.compile(
3780 rf"({re.escape(self.esc_char)}.)"
3781 rf"|(\n|.)",
3782 flags=self.re_flags
3783 )
3784 # fmt: on
3786 try:
3787 self.re = re.compile(self.pattern, self.re_flags)
3788 self.reString = self.pattern
3789 self.re_match = self.re.match
3790 except re.error:
3791 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")
3793 self.errmsg = f"Expected {self.name}"
3794 self.mayIndexError = False
3795 self._may_return_empty = True
3797 def _generateDefaultName(self) -> str:
3798 if self.quote_char == self.end_quote_char and isinstance(
3799 self.quote_char, str_type
3800 ):
3801 return f"string enclosed in {self.quote_char!r}"
3803 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
3805 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3806 # check first character of opening quote to see if that is a match
3807 # before doing the more complicated regex match
3808 result = (
3809 instring[loc] == self.first_quote_char
3810 and self.re_match(instring, loc)
3811 or None
3812 )
3813 if not result:
3814 raise ParseException(instring, loc, self.errmsg, self)
3816 # get ending loc and matched string from regex matching result
3817 loc = result.end()
3818 ret = result[0]
3820 if self.unquote_results:
3821 # strip off quotes
3822 ret = ret[self.quote_char_len : -self.end_quote_char_len]
3824 if isinstance(ret, str_type):
3825 # fmt: off
3826 if self.convert_whitespace_escapes:
3827 # as we iterate over matches in the input string,
3828 # collect from whichever match group of the unquote_scan_re
3829 # regex matches (only 1 group will match at any given time)
3830 ret = "".join(
3831 # match group 1 matches \t, \n, etc.
3832 self.ws_map[g] if (g := match[1])
3833 # match group 2 matches escaped octal, null, hex, and Unicode
3834 # sequences
3835 else _convert_escaped_numerics_to_char(g[1:]) if (g := match[2])
3836 # match group 3 matches escaped characters
3837 else g[-1] if (g := match[3])
3838 # match group 4 matches any character
3839 else match[4]
3840 for match in self.unquote_scan_re.finditer(ret)
3841 )
3842 else:
3843 ret = "".join(
3844 # match group 1 matches escaped characters
3845 g[-1] if (g := match[1])
3846 # match group 2 matches any character
3847 else match[2]
3848 for match in self.unquote_scan_re.finditer(ret)
3849 )
3850 # fmt: on
3852 # replace escaped quotes
3853 if self.esc_quote:
3854 ret = ret.replace(self.esc_quote, self.end_quote_char)
3856 return loc, ret
3859class CharsNotIn(Token):
3860 """Token for matching words composed of characters *not* in a given
3861 set (will include whitespace in matched characters if not listed in
3862 the provided exclusion set - see example). Defined with string
3863 containing all disallowed characters, and an optional minimum,
3864 maximum, and/or exact length. The default value for ``min`` is
3865 1 (a minimum value < 1 is not valid); the default values for
3866 ``max`` and ``exact`` are 0, meaning no maximum or exact
3867 length restriction.
3869 Example:
3871 .. testcode::
3873 # define a comma-separated-value as anything that is not a ','
3874 csv_value = CharsNotIn(',')
3875 print(
3876 DelimitedList(csv_value).parse_string(
3877 "dkls,lsdkjf,s12 34,@!#,213"
3878 )
3879 )
3881 prints:
3883 .. testoutput::
3885 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3886 """
3888 def __init__(
3889 self, not_chars: str = "", min: int = 1, max: int = 0, exact: int = 0, **kwargs
3890 ) -> None:
3891 super().__init__()
3892 notChars: str = deprecate_argument(kwargs, "notChars", "")
3894 self.skipWhitespace = False
3895 self.notChars = not_chars or notChars
3896 self.notCharsSet = set(self.notChars)
3898 if min < 1:
3899 raise ValueError(
3900 "cannot specify a minimum length < 1; use"
3901 " Opt(CharsNotIn()) if zero-length char group is permitted"
3902 )
3904 self.minLen = min
3906 if max > 0:
3907 self.maxLen = max
3908 else:
3909 self.maxLen = _MAX_INT
3911 if exact > 0:
3912 self.maxLen = exact
3913 self.minLen = exact
3915 self.errmsg = f"Expected {self.name}"
3916 self._may_return_empty = self.minLen == 0
3917 self.mayIndexError = False
3919 def _generateDefaultName(self) -> str:
3920 not_chars_str = _collapse_string_to_ranges(self.notChars)
3921 if len(not_chars_str) > 16:
3922 return f"!W:({self.notChars[: 16 - 3]}...)"
3923 else:
3924 return f"!W:({self.notChars})"
3926 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3927 notchars = self.notCharsSet
3928 if instring[loc] in notchars:
3929 raise ParseException(instring, loc, self.errmsg, self)
3931 start = loc
3932 loc += 1
3933 maxlen = min(start + self.maxLen, len(instring))
3934 while loc < maxlen and instring[loc] not in notchars:
3935 loc += 1
3937 if loc - start < self.minLen:
3938 raise ParseException(instring, loc, self.errmsg, self)
3940 return loc, instring[start:loc]
3943class White(Token):
3944 """Special matching class for matching whitespace. Normally,
3945 whitespace is ignored by pyparsing grammars. This class is included
3946 when some whitespace structures are significant. Define with
3947 a string containing the whitespace characters to be matched; default
3948 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3949 ``max``, and ``exact`` arguments, as defined for the
3950 :class:`Word` class.
3951 """
3953 whiteStrs = {
3954 " ": "<SP>",
3955 "\t": "<TAB>",
3956 "\n": "<LF>",
3957 "\r": "<CR>",
3958 "\f": "<FF>",
3959 "\u00a0": "<NBSP>",
3960 "\u1680": "<OGHAM_SPACE_MARK>",
3961 "\u180e": "<MONGOLIAN_VOWEL_SEPARATOR>",
3962 "\u2000": "<EN_QUAD>",
3963 "\u2001": "<EM_QUAD>",
3964 "\u2002": "<EN_SPACE>",
3965 "\u2003": "<EM_SPACE>",
3966 "\u2004": "<THREE-PER-EM_SPACE>",
3967 "\u2005": "<FOUR-PER-EM_SPACE>",
3968 "\u2006": "<SIX-PER-EM_SPACE>",
3969 "\u2007": "<FIGURE_SPACE>",
3970 "\u2008": "<PUNCTUATION_SPACE>",
3971 "\u2009": "<THIN_SPACE>",
3972 "\u200a": "<HAIR_SPACE>",
3973 "\u200b": "<ZERO_WIDTH_SPACE>",
3974 "\u202f": "<NNBSP>",
3975 "\u205f": "<MMSP>",
3976 "\u3000": "<IDEOGRAPHIC_SPACE>",
3977 }
3979 def __init__(
3980 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0
3981 ) -> None:
3982 super().__init__()
3983 self.matchWhite = ws
3984 self.set_whitespace_chars(
3985 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3986 copy_defaults=True,
3987 )
3988 # self.leave_whitespace()
3989 self._may_return_empty = True
3990 self.errmsg = f"Expected {self.name}"
3992 self.minLen = min
3994 if max > 0:
3995 self.maxLen = max
3996 else:
3997 self.maxLen = _MAX_INT
3999 if exact > 0:
4000 self.maxLen = exact
4001 self.minLen = exact
4003 def _generateDefaultName(self) -> str:
4004 return "".join(White.whiteStrs[c] for c in self.matchWhite)
4006 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4007 if instring[loc] not in self.matchWhite:
4008 raise ParseException(instring, loc, self.errmsg, self)
4009 start = loc
4010 loc += 1
4011 maxloc = start + self.maxLen
4012 maxloc = min(maxloc, len(instring))
4013 while loc < maxloc and instring[loc] in self.matchWhite:
4014 loc += 1
4016 if loc - start < self.minLen:
4017 raise ParseException(instring, loc, self.errmsg, self)
4019 return loc, instring[start:loc]
4022class PositionToken(Token):
4023 def __init__(self) -> None:
4024 super().__init__()
4025 self._may_return_empty = True
4026 self.mayIndexError = False
4029class GoToColumn(PositionToken):
4030 """Token to advance to a specific column of input text; useful for
4031 tabular report scraping.
4032 """
4034 def __init__(self, colno: int) -> None:
4035 super().__init__()
4036 self.col = colno
4038 def preParse(self, instring: str, loc: int) -> int:
4039 if col(loc, instring) == self.col:
4040 return loc
4042 instrlen = len(instring)
4043 if self.ignoreExprs:
4044 loc = self._skipIgnorables(instring, loc)
4045 while (
4046 loc < instrlen
4047 and instring[loc].isspace()
4048 and col(loc, instring) != self.col
4049 ):
4050 loc += 1
4052 return loc
4054 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4055 thiscol = col(loc, instring)
4056 if thiscol > self.col:
4057 raise ParseException(instring, loc, "Text not in expected column", self)
4058 newloc = loc + self.col - thiscol
4059 ret = instring[loc:newloc]
4060 return newloc, ret
4063class LineStart(PositionToken):
4064 r"""Matches if current position is at the logical beginning of a line (after skipping whitespace)
4065 within the parse string
4067 Example:
4069 .. testcode::
4071 test = '''\
4072 AAA this line
4073 AAA and this line
4074 AAA and even this line
4075 B AAA but definitely not this line
4076 '''
4078 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):
4079 print(t)
4081 prints:
4083 .. testoutput::
4085 ['AAA', ' this line']
4086 ['AAA', ' and this line']
4087 ['AAA', ' and even this line']
4089 """
4091 def __init__(self) -> None:
4092 super().__init__()
4093 self.leave_whitespace()
4094 self.orig_whiteChars = set() | self.whiteChars
4095 self.whiteChars.discard("\n")
4096 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
4097 self.set_name("start of line")
4099 def preParse(self, instring: str, loc: int) -> int:
4100 if loc == 0:
4101 return loc
4103 ret = self.skipper.preParse(instring, loc)
4105 if "\n" in self.orig_whiteChars:
4106 while instring[ret : ret + 1] == "\n":
4107 ret = self.skipper.preParse(instring, ret + 1)
4109 return ret
4111 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4112 if col(loc, instring) == 1:
4113 return loc, []
4114 raise ParseException(instring, loc, self.errmsg, self)
4117class LineEnd(PositionToken):
4118 """Matches if current position is at the end of a line within the
4119 parse string
4120 """
4122 def __init__(self) -> None:
4123 super().__init__()
4124 self.whiteChars.discard("\n")
4125 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
4126 self.set_name("end of line")
4128 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4129 if loc < len(instring):
4130 if instring[loc] == "\n":
4131 return loc + 1, "\n"
4132 else:
4133 raise ParseException(instring, loc, self.errmsg, self)
4134 elif loc == len(instring):
4135 return loc + 1, []
4136 else:
4137 raise ParseException(instring, loc, self.errmsg, self)
4140class StringStart(PositionToken):
4141 """Matches if current position is at the beginning of the parse
4142 string
4143 """
4145 def __init__(self) -> None:
4146 super().__init__()
4147 self.set_name("start of text")
4149 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4150 # see if entire string up to here is just whitespace and ignoreables
4151 if loc != 0 and loc != self.preParse(instring, 0):
4152 raise ParseException(instring, loc, self.errmsg, self)
4154 return loc, []
4157class StringEnd(PositionToken):
4158 """
4159 Matches if current position is at the end of the parse string
4160 """
4162 def __init__(self) -> None:
4163 super().__init__()
4164 self.set_name("end of text")
4166 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4167 if loc < len(instring):
4168 raise ParseException(instring, loc, self.errmsg, self)
4169 if loc == len(instring):
4170 return loc + 1, []
4171 if loc > len(instring):
4172 return loc, []
4174 raise ParseException(instring, loc, self.errmsg, self)
4177class WordStart(PositionToken):
4178 """Matches if the current position is at the beginning of a
4179 :class:`Word`, and is not preceded by any character in a given
4180 set of ``word_chars`` (default= ``printables``). To emulate the
4181 ``\b`` behavior of regular expressions, use
4182 ``WordStart(alphanums)``. ``WordStart`` will also match at
4183 the beginning of the string being parsed, or at the beginning of
4184 a line.
4185 """
4187 def __init__(self, word_chars: str = printables, **kwargs) -> None:
4188 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)
4190 wordChars = word_chars if wordChars == printables else wordChars
4191 super().__init__()
4192 self.wordChars = set(wordChars)
4193 self.set_name("start of a word")
4195 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4196 if loc != 0:
4197 if (
4198 instring[loc - 1] in self.wordChars
4199 or instring[loc] not in self.wordChars
4200 ):
4201 raise ParseException(instring, loc, self.errmsg, self)
4202 return loc, []
4205class WordEnd(PositionToken):
4206 """Matches if the current position is at the end of a :class:`Word`,
4207 and is not followed by any character in a given set of ``word_chars``
4208 (default= ``printables``). To emulate the ``\b`` behavior of
4209 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
4210 will also match at the end of the string being parsed, or at the end
4211 of a line.
4212 """
4214 def __init__(self, word_chars: str = printables, **kwargs) -> None:
4215 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)
4217 wordChars = word_chars if wordChars == printables else wordChars
4218 super().__init__()
4219 self.wordChars = set(wordChars)
4220 self.skipWhitespace = False
4221 self.set_name("end of a word")
4223 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4224 instrlen = len(instring)
4225 if instrlen > 0 and loc < instrlen:
4226 if (
4227 instring[loc] in self.wordChars
4228 or instring[loc - 1] not in self.wordChars
4229 ):
4230 raise ParseException(instring, loc, self.errmsg, self)
4231 return loc, []
4234class Tag(Token):
4235 """
4236 A meta-element for inserting a named result into the parsed
4237 tokens that may be checked later in a parse action or while
4238 processing the parsed results. Accepts an optional tag value,
4239 defaulting to `True`.
4241 Example:
4243 .. doctest::
4245 >>> end_punc = "." | ("!" + Tag("enthusiastic"))
4246 >>> greeting = "Hello," + Word(alphas) + end_punc
4248 >>> result = greeting.parse_string("Hello, World.")
4249 >>> print(result.dump())
4250 ['Hello,', 'World', '.']
4252 >>> result = greeting.parse_string("Hello, World!")
4253 >>> print(result.dump())
4254 ['Hello,', 'World', '!']
4255 - enthusiastic: True
4257 .. versionadded:: 3.1.0
4258 """
4260 def __init__(self, tag_name: str, value: Any = True) -> None:
4261 super().__init__()
4262 self._may_return_empty = True
4263 self.mayIndexError = False
4264 self.leave_whitespace()
4265 self.tag_name = tag_name
4266 self.tag_value = value
4267 self.add_parse_action(self._add_tag)
4268 self.show_in_diagram = False
4270 def _add_tag(self, tokens: ParseResults):
4271 tokens[self.tag_name] = self.tag_value
4273 def _generateDefaultName(self) -> str:
4274 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"
4277class ParseExpression(ParserElement):
4278 """Abstract subclass of ParserElement, for combining and
4279 post-processing parsed tokens.
4280 """
4282 def __init__(
4283 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4284 ) -> None:
4285 super().__init__(savelist)
4286 self.exprs: list[ParserElement]
4287 if isinstance(exprs, _generatorType):
4288 exprs = list(exprs)
4290 if isinstance(exprs, str_type):
4291 self.exprs = [self._literalStringClass(exprs)]
4292 elif isinstance(exprs, ParserElement):
4293 self.exprs = [exprs]
4294 elif isinstance(exprs, Iterable):
4295 exprs = list(exprs)
4296 # if sequence of strings provided, wrap with Literal
4297 if any(isinstance(expr, str_type) for expr in exprs):
4298 exprs = (
4299 self._literalStringClass(e) if isinstance(e, str_type) else e
4300 for e in exprs
4301 )
4302 self.exprs = list(exprs)
4303 else:
4304 try:
4305 self.exprs = list(exprs)
4306 except TypeError:
4307 self.exprs = [exprs]
4308 self.callPreparse = False
4310 def recurse(self) -> list[ParserElement]:
4311 return self.exprs[:]
4313 def append(self, other) -> ParserElement:
4314 """
4315 Add an expression to the list of expressions related to this ParseExpression instance.
4316 """
4317 self.exprs.append(other)
4318 self._defaultName = None
4319 return self
4321 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4322 """
4323 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
4324 all contained expressions.
4325 """
4326 super().leave_whitespace(recursive)
4328 if recursive:
4329 self.exprs = [e.copy() for e in self.exprs]
4330 for e in self.exprs:
4331 e.leave_whitespace(recursive)
4332 return self
4334 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4335 """
4336 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on
4337 all contained expressions.
4338 """
4339 super().ignore_whitespace(recursive)
4340 if recursive:
4341 self.exprs = [e.copy() for e in self.exprs]
4342 for e in self.exprs:
4343 e.ignore_whitespace(recursive)
4344 return self
4346 def ignore(self, other) -> ParserElement:
4347 """
4348 Define expression to be ignored (e.g., comments) while doing pattern
4349 matching; may be called repeatedly, to define multiple comment or other
4350 ignorable patterns.
4351 """
4352 if isinstance(other, Suppress):
4353 if other not in self.ignoreExprs:
4354 super().ignore(other)
4355 for e in self.exprs:
4356 e.ignore(self.ignoreExprs[-1])
4357 else:
4358 super().ignore(other)
4359 for e in self.exprs:
4360 e.ignore(self.ignoreExprs[-1])
4361 return self
4363 def _generateDefaultName(self) -> str:
4364 return f"{type(self).__name__}:({self.exprs})"
4366 def streamline(self) -> ParserElement:
4367 if self.streamlined:
4368 return self
4370 super().streamline()
4372 for e in self.exprs:
4373 e.streamline()
4375 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
4376 # but only if there are no parse actions or resultsNames on the nested And's
4377 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
4378 if len(self.exprs) == 2:
4379 other = self.exprs[0]
4380 if (
4381 isinstance(other, self.__class__)
4382 and not other.parseAction
4383 and other.resultsName is None
4384 and not other.debug
4385 ):
4386 self.exprs = other.exprs[:] + [self.exprs[1]]
4387 self._defaultName = None
4388 self._may_return_empty |= other.mayReturnEmpty
4389 self.mayIndexError |= other.mayIndexError
4391 other = self.exprs[-1]
4392 if (
4393 isinstance(other, self.__class__)
4394 and not other.parseAction
4395 and other.resultsName is None
4396 and not other.debug
4397 ):
4398 self.exprs = self.exprs[:-1] + other.exprs[:]
4399 self._defaultName = None
4400 self._may_return_empty |= other.mayReturnEmpty
4401 self.mayIndexError |= other.mayIndexError
4403 self.errmsg = f"Expected {self}"
4405 return self
4407 def validate(self, validateTrace=None) -> None:
4408 warnings.warn(
4409 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4410 PyparsingDeprecationWarning,
4411 stacklevel=2,
4412 )
4413 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
4414 for e in self.exprs:
4415 e.validate(tmp)
4416 self._checkRecursion([])
4418 def copy(self) -> ParserElement:
4419 """
4420 Returns a copy of this expression.
4422 Generally only used internally by pyparsing.
4423 """
4424 ret = super().copy()
4425 ret = typing.cast(ParseExpression, ret)
4426 ret.exprs = [e.copy() for e in self.exprs]
4427 return ret
4429 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4430 if not (
4431 __diag__.warn_ungrouped_named_tokens_in_collection
4432 and Diagnostics.warn_ungrouped_named_tokens_in_collection
4433 not in self.suppress_warnings_
4434 ):
4435 return super()._setResultsName(name, list_all_matches)
4437 for e in self.exprs:
4438 if (
4439 isinstance(e, ParserElement)
4440 and e.resultsName
4441 and (
4442 Diagnostics.warn_ungrouped_named_tokens_in_collection
4443 not in e.suppress_warnings_
4444 )
4445 ):
4446 warning = (
4447 "warn_ungrouped_named_tokens_in_collection:"
4448 f" setting results name {name!r} on {type(self).__name__} expression"
4449 f" collides with {e.resultsName!r} on contained expression"
4450 )
4451 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
4452 break
4454 return super()._setResultsName(name, list_all_matches)
4456 # Compatibility synonyms
4457 # fmt: off
4458 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4459 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4460 # fmt: on
4463class And(ParseExpression):
4464 """
4465 Requires all given :class:`ParserElement` s to be found in the given order.
4466 Expressions may be separated by whitespace.
4467 May be constructed using the ``'+'`` operator.
4468 May also be constructed using the ``'-'`` operator, which will
4469 suppress backtracking.
4471 Example:
4473 .. testcode::
4475 integer = Word(nums)
4476 name_expr = Word(alphas)[1, ...]
4478 expr = And([integer("id"), name_expr("name"), integer("age")])
4479 # more easily written as:
4480 expr = integer("id") + name_expr("name") + integer("age")
4481 """
4483 class _ErrorStop(Empty):
4484 def __init__(self, *args, **kwargs) -> None:
4485 super().__init__(*args, **kwargs)
4486 self.leave_whitespace()
4488 def _generateDefaultName(self) -> str:
4489 return "-"
4491 def __init__(
4492 self,
4493 exprs_arg: typing.Iterable[Union[ParserElement, str]],
4494 savelist: bool = True,
4495 ) -> None:
4496 # instantiate exprs as a list, converting strs to ParserElements
4497 exprs: list[ParserElement] = [
4498 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg
4499 ]
4501 # convert any Ellipsis elements to SkipTo
4502 if Ellipsis in exprs:
4504 # Ellipsis cannot be the last element
4505 if exprs[-1] is Ellipsis:
4506 raise Exception("cannot construct And with sequence ending in ...")
4508 tmp: list[ParserElement] = []
4509 for cur_expr, next_expr in zip(exprs, exprs[1:]):
4510 if cur_expr is Ellipsis:
4511 tmp.append(SkipTo(next_expr)("_skipped*"))
4512 else:
4513 tmp.append(cur_expr)
4515 exprs[:-1] = tmp
4517 super().__init__(exprs, savelist)
4518 if self.exprs:
4519 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4520 if not isinstance(self.exprs[0], White):
4521 self.set_whitespace_chars(
4522 self.exprs[0].whiteChars,
4523 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
4524 )
4525 self.skipWhitespace = self.exprs[0].skipWhitespace
4526 else:
4527 self.skipWhitespace = False
4528 else:
4529 self._may_return_empty = True
4530 self.callPreparse = True
4532 def streamline(self) -> ParserElement:
4533 """
4534 Collapse `And` expressions like `And(And(And(A, B), C), D)`
4535 to `And(A, B, C, D)`.
4537 .. doctest::
4539 >>> expr = Word("A") + Word("B") + Word("C") + Word("D")
4540 >>> # Using '+' operator creates nested And expression
4541 >>> expr
4542 {{{W:(A) W:(B)} W:(C)} W:(D)}
4543 >>> # streamline simplifies to a single And with multiple expressions
4544 >>> expr.streamline()
4545 {W:(A) W:(B) W:(C) W:(D)}
4547 Guards against collapsing out expressions that have special features,
4548 such as results names or parse actions.
4550 Resolves pending Skip commands defined using `...` terms.
4551 """
4552 # collapse any _PendingSkip's
4553 if self.exprs and any(
4554 isinstance(e, ParseExpression)
4555 and e.exprs
4556 and isinstance(e.exprs[-1], _PendingSkip)
4557 for e in self.exprs[:-1]
4558 ):
4559 deleted_expr_marker = NoMatch()
4560 for i, e in enumerate(self.exprs[:-1]):
4561 if e is deleted_expr_marker:
4562 continue
4563 if (
4564 isinstance(e, ParseExpression)
4565 and e.exprs
4566 and isinstance(e.exprs[-1], _PendingSkip)
4567 ):
4568 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4569 self.exprs[i + 1] = deleted_expr_marker
4570 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]
4572 super().streamline()
4574 # link any IndentedBlocks to the prior expression
4575 prev: ParserElement
4576 cur: ParserElement
4577 for prev, cur in zip(self.exprs, self.exprs[1:]):
4578 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
4579 # (but watch out for recursive grammar)
4580 seen = set()
4581 while True:
4582 if id(cur) in seen:
4583 break
4584 seen.add(id(cur))
4585 if isinstance(cur, IndentedBlock):
4586 prev.add_parse_action(
4587 lambda s, l, t, cur_=cur: setattr(
4588 cur_, "parent_anchor", col(l, s)
4589 )
4590 )
4591 break
4592 subs = cur.recurse()
4593 next_first = next(iter(subs), None)
4594 if next_first is None:
4595 break
4596 cur = typing.cast(ParserElement, next_first)
4598 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4599 return self
4601 def parseImpl(self, instring, loc, do_actions=True):
4602 # pass False as callPreParse arg to _parse for first element, since we already
4603 # pre-parsed the string as part of our And pre-parsing
4604 loc, resultlist = self.exprs[0]._parse(
4605 instring, loc, do_actions, callPreParse=False
4606 )
4607 errorStop = False
4608 for e in self.exprs[1:]:
4609 # if isinstance(e, And._ErrorStop):
4610 if type(e) is And._ErrorStop:
4611 errorStop = True
4612 continue
4613 if errorStop:
4614 try:
4615 loc, exprtokens = e._parse(instring, loc, do_actions)
4616 except ParseSyntaxException:
4617 raise
4618 except ParseBaseException as pe:
4619 pe.__traceback__ = None
4620 raise ParseSyntaxException._from_exception(pe)
4621 except IndexError:
4622 raise ParseSyntaxException(
4623 instring, len(instring), self.errmsg, self
4624 )
4625 else:
4626 loc, exprtokens = e._parse(instring, loc, do_actions)
4627 resultlist += exprtokens
4628 return loc, resultlist
4630 def __iadd__(self, other):
4631 if isinstance(other, str_type):
4632 other = self._literalStringClass(other)
4633 if not isinstance(other, ParserElement):
4634 return NotImplemented
4635 return self.append(other) # And([self, other])
4637 def _checkRecursion(self, parseElementList):
4638 subRecCheckList = parseElementList[:] + [self]
4639 for e in self.exprs:
4640 e._checkRecursion(subRecCheckList)
4641 if not e.mayReturnEmpty:
4642 break
4644 def _generateDefaultName(self) -> str:
4645 inner = " ".join(str(e) for e in self.exprs)
4646 # strip off redundant inner {}'s
4647 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4648 inner = inner[1:-1]
4649 return f"{{{inner}}}"
4652class Or(ParseExpression):
4653 """Requires that at least one :class:`ParserElement` is found. If
4654 two expressions match, the expression that matches the longest
4655 string will be used. May be constructed using the ``'^'``
4656 operator.
4658 Example:
4660 .. testcode::
4662 # construct Or using '^' operator
4664 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4665 print(number.search_string("123 3.1416 789"))
4667 prints:
4669 .. testoutput::
4671 [['123'], ['3.1416'], ['789']]
4672 """
4674 def __init__(
4675 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4676 ) -> None:
4677 super().__init__(exprs, savelist)
4678 if self.exprs:
4679 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4680 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4681 else:
4682 self._may_return_empty = True
4684 def streamline(self) -> ParserElement:
4685 super().streamline()
4686 if self.exprs:
4687 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4688 self.saveAsList = any(e.saveAsList for e in self.exprs)
4689 self.skipWhitespace = all(
4690 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4691 )
4692 else:
4693 self.saveAsList = False
4694 return self
4696 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4697 maxExcLoc = -1
4698 maxException = None
4699 matches: list[tuple[int, ParserElement]] = []
4700 fatals: list[ParseFatalException] = []
4701 if all(e.callPreparse for e in self.exprs):
4702 loc = self.preParse(instring, loc)
4703 for e in self.exprs:
4704 try:
4705 loc2 = e.try_parse(instring, loc, raise_fatal=True)
4706 except ParseFatalException as pfe:
4707 pfe.__traceback__ = None
4708 pfe.parser_element = e
4709 fatals.append(pfe)
4710 maxException = None
4711 maxExcLoc = -1
4712 except ParseException as err:
4713 if not fatals:
4714 err.__traceback__ = None
4715 if err.loc > maxExcLoc:
4716 maxException = err
4717 maxExcLoc = err.loc
4718 except IndexError:
4719 if len(instring) > maxExcLoc:
4720 maxException = ParseException(
4721 instring, len(instring), e.errmsg, self
4722 )
4723 maxExcLoc = len(instring)
4724 else:
4725 # save match among all matches, to retry longest to shortest
4726 matches.append((loc2, e))
4728 if matches:
4729 # re-evaluate all matches in descending order of length of match, in case attached actions
4730 # might change whether or how much they match of the input.
4731 matches.sort(key=itemgetter(0), reverse=True)
4733 if not do_actions:
4734 # no further conditions or parse actions to change the selection of
4735 # alternative, so the first match will be the best match
4736 best_expr = matches[0][1]
4737 return best_expr._parse(instring, loc, do_actions)
4739 longest: tuple[int, typing.Optional[ParseResults]] = -1, None
4740 for loc1, expr1 in matches:
4741 if loc1 <= longest[0]:
4742 # already have a longer match than this one will deliver, we are done
4743 return longest
4745 try:
4746 loc2, toks = expr1._parse(instring, loc, do_actions)
4747 except ParseException as err:
4748 err.__traceback__ = None
4749 if err.loc > maxExcLoc:
4750 maxException = err
4751 maxExcLoc = err.loc
4752 else:
4753 if loc2 >= loc1:
4754 return loc2, toks
4755 # didn't match as much as before
4756 elif loc2 > longest[0]:
4757 longest = loc2, toks
4759 if longest != (-1, None):
4760 return longest
4762 if fatals:
4763 if len(fatals) > 1:
4764 fatals.sort(key=lambda e: -e.loc)
4765 if fatals[0].loc == fatals[1].loc:
4766 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4767 max_fatal = fatals[0]
4768 raise max_fatal
4770 if maxException is not None:
4771 # infer from this check that all alternatives failed at the current position
4772 # so emit this collective error message instead of any single error message
4773 parse_start_loc = self.preParse(instring, loc)
4774 if maxExcLoc == parse_start_loc:
4775 maxException.msg = self.errmsg or ""
4776 raise maxException
4778 raise ParseException(instring, loc, "no defined alternatives to match", self)
4780 def __ixor__(self, other):
4781 if isinstance(other, str_type):
4782 other = self._literalStringClass(other)
4783 if not isinstance(other, ParserElement):
4784 return NotImplemented
4785 return self.append(other) # Or([self, other])
4787 def _generateDefaultName(self) -> str:
4788 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"
4790 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4791 if (
4792 __diag__.warn_multiple_tokens_in_named_alternation
4793 and Diagnostics.warn_multiple_tokens_in_named_alternation
4794 not in self.suppress_warnings_
4795 ):
4796 if any(
4797 isinstance(e, And)
4798 and Diagnostics.warn_multiple_tokens_in_named_alternation
4799 not in e.suppress_warnings_
4800 for e in self.exprs
4801 ):
4802 warning = (
4803 "warn_multiple_tokens_in_named_alternation:"
4804 f" setting results name {name!r} on {type(self).__name__} expression"
4805 " will return a list of all parsed tokens in an And alternative,"
4806 " in prior versions only the first token was returned; enclose"
4807 " contained argument in Group"
4808 )
4809 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
4811 return super()._setResultsName(name, list_all_matches)
4814class MatchFirst(ParseExpression):
4815 """Requires that at least one :class:`ParserElement` is found. If
4816 more than one expression matches, the first one listed is the one that will
4817 match. May be constructed using the ``'|'`` operator.
4819 Example: Construct MatchFirst using '|' operator
4821 .. doctest::
4823 # watch the order of expressions to match
4824 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4825 >>> print(number.search_string("123 3.1416 789")) # Fail!
4826 [['123'], ['3'], ['1416'], ['789']]
4828 # put more selective expression first
4829 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4830 >>> print(number.search_string("123 3.1416 789")) # Better
4831 [['123'], ['3.1416'], ['789']]
4832 """
4834 def __init__(
4835 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4836 ) -> None:
4837 super().__init__(exprs, savelist)
4838 if self.exprs:
4839 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4840 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4841 else:
4842 self._may_return_empty = True
4844 def streamline(self) -> ParserElement:
4845 if self.streamlined:
4846 return self
4848 super().streamline()
4849 if self.exprs:
4850 self.saveAsList = any(e.saveAsList for e in self.exprs)
4851 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4852 self.skipWhitespace = all(
4853 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4854 )
4855 else:
4856 self.saveAsList = False
4857 self._may_return_empty = True
4858 return self
4860 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4861 maxExcLoc = -1
4862 maxException = None
4864 for e in self.exprs:
4865 try:
4866 return e._parse(instring, loc, do_actions)
4867 except ParseFatalException as pfe:
4868 pfe.__traceback__ = None
4869 pfe.parser_element = e
4870 raise
4871 except ParseException as err:
4872 if err.loc > maxExcLoc:
4873 maxException = err
4874 maxExcLoc = err.loc
4875 except IndexError:
4876 if len(instring) > maxExcLoc:
4877 maxException = ParseException(
4878 instring, len(instring), e.errmsg, self
4879 )
4880 maxExcLoc = len(instring)
4882 if maxException is not None:
4883 # infer from this check that all alternatives failed at the current position
4884 # so emit this collective error message instead of any individual error message
4885 parse_start_loc = self.preParse(instring, loc)
4886 if maxExcLoc == parse_start_loc:
4887 maxException.msg = self.errmsg or ""
4888 raise maxException
4890 raise ParseException(instring, loc, "no defined alternatives to match", self)
4892 def __ior__(self, other):
4893 if isinstance(other, str_type):
4894 other = self._literalStringClass(other)
4895 if not isinstance(other, ParserElement):
4896 return NotImplemented
4897 return self.append(other) # MatchFirst([self, other])
4899 def _generateDefaultName(self) -> str:
4900 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"
4902 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4903 if (
4904 __diag__.warn_multiple_tokens_in_named_alternation
4905 and Diagnostics.warn_multiple_tokens_in_named_alternation
4906 not in self.suppress_warnings_
4907 ):
4908 if any(
4909 isinstance(e, And)
4910 and Diagnostics.warn_multiple_tokens_in_named_alternation
4911 not in e.suppress_warnings_
4912 for e in self.exprs
4913 ):
4914 warning = (
4915 "warn_multiple_tokens_in_named_alternation:"
4916 f" setting results name {name!r} on {type(self).__name__} expression"
4917 " will return a list of all parsed tokens in an And alternative,"
4918 " in prior versions only the first token was returned; enclose"
4919 " contained argument in Group"
4920 )
4921 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
4923 return super()._setResultsName(name, list_all_matches)
4926class Each(ParseExpression):
4927 """Requires all given :class:`ParserElement` s to be found, but in
4928 any order. Expressions may be separated by whitespace.
4930 May be constructed using the ``'&'`` operator.
4932 Example:
4934 .. testcode::
4936 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4937 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4938 integer = Word(nums)
4939 shape_attr = "shape:" + shape_type("shape")
4940 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4941 color_attr = "color:" + color("color")
4942 size_attr = "size:" + integer("size")
4944 # use Each (using operator '&') to accept attributes in any order
4945 # (shape and posn are required, color and size are optional)
4946 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4948 shape_spec.run_tests('''
4949 shape: SQUARE color: BLACK posn: 100, 120
4950 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4951 color:GREEN size:20 shape:TRIANGLE posn:20,40
4952 '''
4953 )
4955 prints:
4957 .. testoutput::
4958 :options: +NORMALIZE_WHITESPACE
4961 shape: SQUARE color: BLACK posn: 100, 120
4962 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4963 - color: 'BLACK'
4964 - posn: ['100', ',', '120']
4965 - x: '100'
4966 - y: '120'
4967 - shape: 'SQUARE'
4968 ...
4970 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4971 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE',
4972 'posn:', ['50', ',', '80']]
4973 - color: 'BLUE'
4974 - posn: ['50', ',', '80']
4975 - x: '50'
4976 - y: '80'
4977 - shape: 'CIRCLE'
4978 - size: '50'
4979 ...
4981 color:GREEN size:20 shape:TRIANGLE posn:20,40
4982 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE',
4983 'posn:', ['20', ',', '40']]
4984 - color: 'GREEN'
4985 - posn: ['20', ',', '40']
4986 - x: '20'
4987 - y: '40'
4988 - shape: 'TRIANGLE'
4989 - size: '20'
4990 ...
4991 """
4993 def __init__(
4994 self, exprs: typing.Iterable[ParserElement], savelist: bool = True
4995 ) -> None:
4996 super().__init__(exprs, savelist)
4997 if self.exprs:
4998 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4999 else:
5000 self._may_return_empty = True
5001 self.skipWhitespace = True
5002 self.initExprGroups = True
5003 self.saveAsList = True
5005 def __iand__(self, other):
5006 if isinstance(other, str_type):
5007 other = self._literalStringClass(other)
5008 if not isinstance(other, ParserElement):
5009 return NotImplemented
5010 return self.append(other) # Each([self, other])
5012 def streamline(self) -> ParserElement:
5013 super().streamline()
5014 if self.exprs:
5015 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
5016 else:
5017 self._may_return_empty = True
5018 return self
5020 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5021 if self.initExprGroups:
5022 self.opt1map = dict(
5023 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
5024 )
5025 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
5026 opt2 = [
5027 e
5028 for e in self.exprs
5029 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
5030 ]
5031 self.optionals = opt1 + opt2
5032 self.multioptionals = [
5033 e.expr.set_results_name(e.resultsName, list_all_matches=True)
5034 for e in self.exprs
5035 if isinstance(e, _MultipleMatch)
5036 ]
5037 self.multirequired = [
5038 e.expr.set_results_name(e.resultsName, list_all_matches=True)
5039 for e in self.exprs
5040 if isinstance(e, OneOrMore)
5041 ]
5042 self.required = [
5043 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
5044 ]
5045 self.required += self.multirequired
5046 self.initExprGroups = False
5048 tmpLoc = loc
5049 tmpReqd = self.required[:]
5050 tmpOpt = self.optionals[:]
5051 multis = self.multioptionals[:]
5052 matchOrder: list[ParserElement] = []
5054 keepMatching = True
5055 failed: list[ParserElement] = []
5056 fatals: list[ParseFatalException] = []
5057 while keepMatching:
5058 tmpExprs = tmpReqd + tmpOpt + multis
5059 failed.clear()
5060 fatals.clear()
5061 for e in tmpExprs:
5062 try:
5063 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
5064 except ParseFatalException as pfe:
5065 pfe.__traceback__ = None
5066 pfe.parser_element = e
5067 fatals.append(pfe)
5068 failed.append(e)
5069 except ParseException:
5070 failed.append(e)
5071 else:
5072 matchOrder.append(self.opt1map.get(id(e), e))
5073 if e in tmpReqd:
5074 tmpReqd.remove(e)
5075 elif e in tmpOpt:
5076 tmpOpt.remove(e)
5077 if len(failed) == len(tmpExprs):
5078 keepMatching = False
5080 # look for any ParseFatalExceptions
5081 if fatals:
5082 if len(fatals) > 1:
5083 fatals.sort(key=lambda e: -e.loc)
5084 if fatals[0].loc == fatals[1].loc:
5085 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
5086 max_fatal = fatals[0]
5087 raise max_fatal
5089 if tmpReqd:
5090 missing = ", ".join([str(e) for e in tmpReqd])
5091 raise ParseException(
5092 instring,
5093 loc,
5094 f"Missing one or more required elements ({missing})",
5095 )
5097 # add any unmatched Opts, in case they have default values defined
5098 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
5100 total_results = ParseResults([])
5101 for e in matchOrder:
5102 loc, results = e._parse(instring, loc, do_actions)
5103 total_results += results
5105 return loc, total_results
5107 def _generateDefaultName(self) -> str:
5108 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"
5111class ParseElementEnhance(ParserElement):
5112 """Abstract subclass of :class:`ParserElement`, for combining and
5113 post-processing parsed tokens.
5114 """
5116 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
5117 super().__init__(savelist)
5118 if isinstance(expr, str_type):
5119 expr_str = typing.cast(str, expr)
5120 if issubclass(self._literalStringClass, Token):
5121 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]
5122 elif issubclass(type(self), self._literalStringClass):
5123 expr = Literal(expr_str)
5124 else:
5125 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]
5126 expr = typing.cast(ParserElement, expr)
5127 self.expr = expr
5128 if expr is not None:
5129 self.mayIndexError = expr.mayIndexError
5130 self._may_return_empty = expr.mayReturnEmpty
5131 self.set_whitespace_chars(
5132 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
5133 )
5134 self.skipWhitespace = expr.skipWhitespace
5135 self.saveAsList = expr.saveAsList
5136 self.callPreparse = expr.callPreparse
5137 self.ignoreExprs.extend(expr.ignoreExprs)
5139 def recurse(self) -> list[ParserElement]:
5140 return [self.expr] if self.expr is not None else []
5142 def parseImpl(self, instring, loc, do_actions=True):
5143 if self.expr is None:
5144 raise ParseException(instring, loc, "No expression defined", self)
5146 try:
5147 return self.expr._parse(instring, loc, do_actions, callPreParse=False)
5148 except ParseSyntaxException:
5149 raise
5150 except ParseBaseException as pbe:
5151 pbe.pstr = pbe.pstr or instring
5152 pbe.loc = pbe.loc or loc
5153 pbe.parser_element = pbe.parser_element or self
5154 if not isinstance(self, Forward) and self.customName is not None:
5155 if self.errmsg:
5156 pbe.msg = self.errmsg
5157 raise
5159 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5160 """
5161 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
5162 the contained expression.
5163 """
5164 super().leave_whitespace(recursive)
5166 if recursive:
5167 if self.expr is not None:
5168 self.expr = self.expr.copy()
5169 self.expr.leave_whitespace(recursive)
5170 return self
5172 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5173 """
5174 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on
5175 the contained expression.
5176 """
5177 super().ignore_whitespace(recursive)
5179 if recursive:
5180 if self.expr is not None:
5181 self.expr = self.expr.copy()
5182 self.expr.ignore_whitespace(recursive)
5183 return self
5185 def ignore(self, other) -> ParserElement:
5186 """
5187 Define expression to be ignored (e.g., comments) while doing pattern
5188 matching; may be called repeatedly, to define multiple comment or other
5189 ignorable patterns.
5190 """
5191 if not isinstance(other, Suppress) or other not in self.ignoreExprs:
5192 super().ignore(other)
5193 if self.expr is not None:
5194 self.expr.ignore(self.ignoreExprs[-1])
5196 return self
5198 def streamline(self) -> ParserElement:
5199 super().streamline()
5200 if self.expr is not None:
5201 self.expr.streamline()
5202 return self
5204 def _checkRecursion(self, parseElementList):
5205 if self in parseElementList:
5206 raise RecursiveGrammarException(parseElementList + [self])
5207 subRecCheckList = parseElementList[:] + [self]
5208 if self.expr is not None:
5209 self.expr._checkRecursion(subRecCheckList)
5211 def validate(self, validateTrace=None) -> None:
5212 warnings.warn(
5213 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
5214 PyparsingDeprecationWarning,
5215 stacklevel=2,
5216 )
5217 if validateTrace is None:
5218 validateTrace = []
5219 tmp = validateTrace[:] + [self]
5220 if self.expr is not None:
5221 self.expr.validate(tmp)
5222 self._checkRecursion([])
5224 def _generateDefaultName(self) -> str:
5225 return f"{type(self).__name__}:({self.expr})"
5227 # Compatibility synonyms
5228 # fmt: off
5229 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
5230 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
5231 # fmt: on
5234class IndentedBlock(ParseElementEnhance):
5235 """
5236 Expression to match one or more expressions at a given indentation level.
5237 Useful for parsing text where structure is implied by indentation (like Python source code).
5239 Example:
5241 .. testcode::
5243 '''
5244 BNF:
5245 statement ::= assignment_stmt | if_stmt
5246 assignment_stmt ::= identifier '=' rvalue
5247 rvalue ::= identifier | integer
5248 if_stmt ::= 'if' bool_condition block
5249 block ::= ([indent] statement)...
5250 identifier ::= [A..Za..z]
5251 integer ::= [0..9]...
5252 bool_condition ::= 'TRUE' | 'FALSE'
5253 '''
5255 IF, TRUE, FALSE = Keyword.using_each("IF TRUE FALSE".split())
5257 statement = Forward()
5258 identifier = Char(alphas)
5259 integer = Word(nums).add_parse_action(lambda t: int(t[0]))
5260 rvalue = identifier | integer
5261 assignment_stmt = identifier + "=" + rvalue
5263 if_stmt = IF + (TRUE | FALSE) + IndentedBlock(statement)
5265 statement <<= Group(assignment_stmt | if_stmt)
5267 result = if_stmt.parse_string('''
5268 IF TRUE
5269 a = 1000
5270 b = 2000
5271 IF FALSE
5272 z = 100
5273 ''')
5274 print(result.dump())
5276 .. testoutput::
5278 ['IF', 'TRUE', [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]]
5279 [0]:
5280 IF
5281 [1]:
5282 TRUE
5283 [2]:
5284 [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]
5285 [0]:
5286 ['a', '=', 1000]
5287 [1]:
5288 ['b', '=', 2000]
5289 [2]:
5290 ['IF', 'FALSE', [['z', '=', 100]]]
5291 [0]:
5292 IF
5293 [1]:
5294 FALSE
5295 [2]:
5296 [['z', '=', 100]]
5297 [0]:
5298 ['z', '=', 100]
5299 """
5301 class _Indent(Empty):
5302 def __init__(self, ref_col: int) -> None:
5303 super().__init__()
5304 self.errmsg = f"expected indent at column {ref_col}"
5305 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
5307 class _IndentGreater(Empty):
5308 def __init__(self, ref_col: int) -> None:
5309 super().__init__()
5310 self.errmsg = f"expected indent at column greater than {ref_col}"
5311 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
5313 def __init__(
5314 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
5315 ) -> None:
5316 super().__init__(expr, savelist=True)
5317 # if recursive:
5318 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
5319 self._recursive = recursive
5320 self._grouped = grouped
5321 self.parent_anchor = 1
5323 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5324 # advance parse position to non-whitespace by using an Empty()
5325 # this should be the column to be used for all subsequent indented lines
5326 anchor_loc = Empty().preParse(instring, loc)
5328 # see if self.expr matches at the current location - if not it will raise an exception
5329 # and no further work is necessary
5330 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)
5332 indent_col = col(anchor_loc, instring)
5333 peer_detect_expr = self._Indent(indent_col)
5335 inner_expr = Empty() + peer_detect_expr + self.expr
5336 if self._recursive:
5337 sub_indent = self._IndentGreater(indent_col)
5338 nested_block = IndentedBlock(
5339 self.expr, recursive=self._recursive, grouped=self._grouped
5340 )
5341 nested_block.set_debug(self.debug)
5342 nested_block.parent_anchor = indent_col
5343 inner_expr += Opt(sub_indent + nested_block)
5345 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
5346 block = OneOrMore(inner_expr)
5348 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
5350 if self._grouped:
5351 wrapper = Group
5352 else:
5353 wrapper = lambda expr: expr # type: ignore[misc, assignment]
5354 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
5355 instring, anchor_loc, do_actions
5356 )
5359class AtStringStart(ParseElementEnhance):
5360 """Matches if expression matches at the beginning of the parse
5361 string::
5363 AtStringStart(Word(nums)).parse_string("123")
5364 # prints ["123"]
5366 AtStringStart(Word(nums)).parse_string(" 123")
5367 # raises ParseException
5368 """
5370 def __init__(self, expr: Union[ParserElement, str]) -> None:
5371 super().__init__(expr)
5372 self.callPreparse = False
5374 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5375 if loc != 0:
5376 raise ParseException(instring, loc, "not found at string start")
5377 return super().parseImpl(instring, loc, do_actions)
5380class AtLineStart(ParseElementEnhance):
5381 r"""Matches if an expression matches at the beginning of a line within
5382 the parse string
5384 Example:
5386 .. testcode::
5388 test = '''\
5389 BBB this line
5390 BBB and this line
5391 BBB but not this one
5392 A BBB and definitely not this one
5393 '''
5395 for t in (AtLineStart('BBB') + rest_of_line).search_string(test):
5396 print(t)
5398 prints:
5400 .. testoutput::
5402 ['BBB', ' this line']
5403 ['BBB', ' and this line']
5404 """
5406 def __init__(self, expr: Union[ParserElement, str]) -> None:
5407 super().__init__(expr)
5408 self.callPreparse = False
5410 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5411 if col(loc, instring) != 1:
5412 raise ParseException(instring, loc, "not found at line start")
5413 return super().parseImpl(instring, loc, do_actions)
5416class FollowedBy(ParseElementEnhance):
5417 """Lookahead matching of the given parse expression.
5418 ``FollowedBy`` does *not* advance the parsing position within
5419 the input string, it only verifies that the specified parse
5420 expression matches at the current position. ``FollowedBy``
5421 always returns a null token list. If any results names are defined
5422 in the lookahead expression, those *will* be returned for access by
5423 name.
5425 Example:
5427 .. testcode::
5429 # use FollowedBy to match a label only if it is followed by a ':'
5430 data_word = Word(alphas)
5431 label = data_word + FollowedBy(':')
5432 attr_expr = Group(
5433 label + Suppress(':')
5434 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
5435 )
5437 attr_expr[1, ...].parse_string(
5438 "shape: SQUARE color: BLACK posn: upper left").pprint()
5440 prints:
5442 .. testoutput::
5444 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
5445 """
5447 def __init__(self, expr: Union[ParserElement, str]) -> None:
5448 super().__init__(expr)
5449 self._may_return_empty = True
5451 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5452 # by using self._expr.parse and deleting the contents of the returned ParseResults list
5453 # we keep any named results that were defined in the FollowedBy expression
5454 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)
5455 del ret[:]
5457 return loc, ret
5460class PrecededBy(ParseElementEnhance):
5461 """Lookbehind matching of the given parse expression.
5462 ``PrecededBy`` does not advance the parsing position within the
5463 input string, it only verifies that the specified parse expression
5464 matches prior to the current position. ``PrecededBy`` always
5465 returns a null token list, but if a results name is defined on the
5466 given expression, it is returned.
5468 Parameters:
5470 - ``expr`` - expression that must match prior to the current parse
5471 location
5472 - ``retreat`` - (default= ``None``) - (int) maximum number of characters
5473 to lookbehind prior to the current parse location
5475 If the lookbehind expression is a string, :class:`Literal`,
5476 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
5477 with a specified exact or maximum length, then the retreat
5478 parameter is not required. Otherwise, retreat must be specified to
5479 give a maximum number of characters to look back from
5480 the current parse position for a lookbehind match.
5482 Example:
5484 .. testcode::
5486 # VB-style variable names with type prefixes
5487 int_var = PrecededBy("#") + pyparsing_common.identifier
5488 str_var = PrecededBy("$") + pyparsing_common.identifier
5489 """
5491 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:
5492 super().__init__(expr)
5493 self.expr = self.expr().leave_whitespace()
5494 self._may_return_empty = True
5495 self.mayIndexError = False
5496 self.exact = False
5497 if isinstance(expr, str_type):
5498 expr = typing.cast(str, expr)
5499 retreat = len(expr)
5500 self.exact = True
5501 elif isinstance(expr, (Literal, Keyword)):
5502 retreat = expr.matchLen
5503 self.exact = True
5504 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
5505 retreat = expr.maxLen
5506 self.exact = True
5507 elif isinstance(expr, PositionToken):
5508 retreat = 0
5509 self.exact = True
5510 self.retreat = retreat
5511 self.errmsg = f"not preceded by {expr}"
5512 self.skipWhitespace = False
5513 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
5515 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:
5516 if self.exact:
5517 if loc < self.retreat:
5518 raise ParseException(instring, loc, self.errmsg, self)
5519 start = loc - self.retreat
5520 _, ret = self.expr._parse(instring, start)
5521 return loc, ret
5523 # retreat specified a maximum lookbehind window, iterate
5524 test_expr = self.expr + StringEnd()
5525 instring_slice = instring[max(0, loc - self.retreat) : loc]
5526 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)
5528 for offset in range(1, min(loc, self.retreat + 1) + 1):
5529 try:
5530 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
5531 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
5532 except ParseBaseException as pbe:
5533 last_expr = pbe
5534 else:
5535 break
5536 else:
5537 raise last_expr
5539 return loc, ret
5542class Located(ParseElementEnhance):
5543 """
5544 Decorates a returned token with its starting and ending
5545 locations in the input string.
5547 This helper adds the following results names:
5549 - ``locn_start`` - location where matched expression begins
5550 - ``locn_end`` - location where matched expression ends
5551 - ``value`` - the actual parsed results
5553 Be careful if the input text contains ``<TAB>`` characters, you
5554 may want to call :class:`ParserElement.parse_with_tabs`
5556 Example:
5558 .. testcode::
5560 wd = Word(alphas)
5561 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
5562 print(match)
5564 prints:
5566 .. testoutput::
5568 [0, ['ljsdf'], 5]
5569 [8, ['lksdjjf'], 15]
5570 [18, ['lkkjj'], 23]
5571 """
5573 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5574 start = loc
5575 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)
5576 ret_tokens = ParseResults([start, tokens, loc])
5577 ret_tokens["locn_start"] = start
5578 ret_tokens["value"] = tokens
5579 ret_tokens["locn_end"] = loc
5580 if self.resultsName:
5581 # must return as a list, so that the name will be attached to the complete group
5582 return loc, [ret_tokens]
5583 else:
5584 return loc, ret_tokens
5587class NotAny(ParseElementEnhance):
5588 """
5589 Lookahead to disallow matching with the given parse expression.
5590 ``NotAny`` does *not* advance the parsing position within the
5591 input string, it only verifies that the specified parse expression
5592 does *not* match at the current position. Also, ``NotAny`` does
5593 *not* skip over leading whitespace. ``NotAny`` always returns
5594 a null token list. May be constructed using the ``'~'`` operator.
5596 Example:
5598 .. testcode::
5600 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
5602 # take care not to mistake keywords for identifiers
5603 ident = ~(AND | OR | NOT) + Word(alphas)
5604 boolean_term = Opt(NOT) + ident
5606 # very crude boolean expression - to support parenthesis groups and
5607 # operation hierarchy, use infix_notation
5608 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
5610 # integers that are followed by "." are actually floats
5611 integer = Word(nums) + ~Char(".")
5612 """
5614 def __init__(self, expr: Union[ParserElement, str]) -> None:
5615 super().__init__(expr)
5616 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
5617 # self.leave_whitespace()
5618 self.skipWhitespace = False
5620 self._may_return_empty = True
5621 self.errmsg = f"Found unwanted token, {self.expr}"
5623 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5624 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):
5625 raise ParseException(instring, loc, self.errmsg, self)
5626 return loc, []
5628 def _generateDefaultName(self) -> str:
5629 return f"~{{{self.expr}}}"
5632class _MultipleMatch(ParseElementEnhance):
5633 def __init__(
5634 self,
5635 expr: Union[str, ParserElement],
5636 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5637 **kwargs,
5638 ) -> None:
5639 stopOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(
5640 kwargs, "stopOn", None
5641 )
5643 super().__init__(expr)
5644 stopOn = stopOn or stop_on
5645 self.saveAsList = True
5646 ender = stopOn
5647 if isinstance(ender, str_type):
5648 ender = self._literalStringClass(ender)
5649 self.stopOn(ender)
5651 def stop_on(self, ender) -> ParserElement:
5652 if isinstance(ender, str_type):
5653 ender = self._literalStringClass(ender)
5654 self.not_ender = ~ender if ender is not None else None
5655 return self
5657 stopOn = stop_on
5659 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5660 self_expr_parse = self.expr._parse
5661 self_skip_ignorables = self._skipIgnorables
5662 check_ender = False
5663 if self.not_ender is not None:
5664 try_not_ender = self.not_ender.try_parse
5665 check_ender = True
5667 # must be at least one (but first see if we are the stopOn sentinel;
5668 # if so, fail)
5669 if check_ender:
5670 try_not_ender(instring, loc)
5671 loc, tokens = self_expr_parse(instring, loc, do_actions)
5672 try:
5673 hasIgnoreExprs = not not self.ignoreExprs
5674 while 1:
5675 if check_ender:
5676 try_not_ender(instring, loc)
5677 if hasIgnoreExprs:
5678 preloc = self_skip_ignorables(instring, loc)
5679 else:
5680 preloc = loc
5681 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)
5682 tokens += tmptokens
5683 except (ParseException, IndexError):
5684 pass
5686 return loc, tokens
5688 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5689 if (
5690 __diag__.warn_ungrouped_named_tokens_in_collection
5691 and Diagnostics.warn_ungrouped_named_tokens_in_collection
5692 not in self.suppress_warnings_
5693 ):
5694 for e in [self.expr] + self.expr.recurse():
5695 if (
5696 isinstance(e, ParserElement)
5697 and e.resultsName
5698 and (
5699 Diagnostics.warn_ungrouped_named_tokens_in_collection
5700 not in e.suppress_warnings_
5701 )
5702 ):
5703 warning = (
5704 "warn_ungrouped_named_tokens_in_collection:"
5705 f" setting results name {name!r} on {type(self).__name__} expression"
5706 f" collides with {e.resultsName!r} on contained expression"
5707 )
5708 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
5709 break
5711 return super()._setResultsName(name, list_all_matches)
5714class OneOrMore(_MultipleMatch):
5715 """
5716 Repetition of one or more of the given expression.
5718 Parameters:
5720 - ``expr`` - expression that must match one or more times
5721 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel
5722 (only required if the sentinel would ordinarily match the repetition
5723 expression)
5725 Example:
5727 .. doctest::
5729 >>> data_word = Word(alphas)
5730 >>> label = data_word + FollowedBy(':')
5731 >>> attr_expr = Group(
5732 ... label + Suppress(':')
5733 ... + OneOrMore(data_word).set_parse_action(' '.join))
5735 >>> text = "shape: SQUARE posn: upper left color: BLACK"
5737 # Fail! read 'posn' as data instead of next label
5738 >>> attr_expr[1, ...].parse_string(text).pprint()
5739 [['shape', 'SQUARE posn']]
5741 # use stop_on attribute for OneOrMore
5742 # to avoid reading label string as part of the data
5743 >>> attr_expr = Group(
5744 ... label + Suppress(':')
5745 ... + OneOrMore(
5746 ... data_word, stop_on=label).set_parse_action(' '.join))
5747 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better
5748 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5750 # could also be written as
5751 >>> (attr_expr * (1,)).parse_string(text).pprint()
5752 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5753 """
5755 def _generateDefaultName(self) -> str:
5756 return f"{{{self.expr}}}..."
5759class ZeroOrMore(_MultipleMatch):
5760 """
5761 Optional repetition of zero or more of the given expression.
5763 Parameters:
5765 - ``expr`` - expression that must match zero or more times
5766 - ``stop_on`` - expression for a terminating sentinel
5767 (only required if the sentinel would ordinarily match the repetition
5768 expression) - (default= ``None``)
5770 Example: similar to :class:`OneOrMore`
5771 """
5773 def __init__(
5774 self,
5775 expr: Union[str, ParserElement],
5776 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5777 **kwargs,
5778 ) -> None:
5779 stopOn: Union[ParserElement, str] = deprecate_argument(kwargs, "stopOn", None)
5781 super().__init__(expr, stop_on=stopOn or stop_on)
5782 self._may_return_empty = True
5784 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5785 try:
5786 return super().parseImpl(instring, loc, do_actions)
5787 except (ParseException, IndexError):
5788 return loc, ParseResults([], name=self.resultsName)
5790 def _generateDefaultName(self) -> str:
5791 return f"[{self.expr}]..."
5794class DelimitedList(ParseElementEnhance):
5795 """Helper to define a delimited list of expressions - the delimiter
5796 defaults to ','. By default, the list elements and delimiters can
5797 have intervening whitespace, and comments, but this can be
5798 overridden by passing ``combine=True`` in the constructor. If
5799 ``combine`` is set to ``True``, the matching tokens are
5800 returned as a single token string, with the delimiters included;
5801 otherwise, the matching tokens are returned as a list of tokens,
5802 with the delimiters suppressed.
5804 If ``allow_trailing_delim`` is set to True, then the list may end with
5805 a delimiter.
5807 Example:
5809 .. doctest::
5811 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc")
5812 ParseResults(['aa', 'bb', 'cc'], {})
5813 >>> DelimitedList(Word(hexnums), delim=':', combine=True
5814 ... ).parse_string("AA:BB:CC:DD:EE")
5815 ParseResults(['AA:BB:CC:DD:EE'], {})
5817 .. versionadded:: 3.1.0
5818 """
5820 def __init__(
5821 self,
5822 expr: Union[str, ParserElement],
5823 delim: Union[str, ParserElement] = ",",
5824 combine: bool = False,
5825 min: typing.Optional[int] = None,
5826 max: typing.Optional[int] = None,
5827 *,
5828 allow_trailing_delim: bool = False,
5829 ) -> None:
5830 if isinstance(expr, str_type):
5831 expr = ParserElement._literalStringClass(expr)
5832 expr = typing.cast(ParserElement, expr)
5834 if min is not None and min < 1:
5835 raise ValueError("min must be greater than 0")
5837 if max is not None and min is not None and max < min:
5838 raise ValueError("max must be greater than, or equal to min")
5840 self.content = expr
5841 self.raw_delim = str(delim)
5842 self.delim = delim
5843 self.combine = combine
5844 if not combine:
5845 self.delim = Suppress(delim) if not isinstance(delim, Suppress) else delim
5846 self.min = min or 1
5847 self.max = max
5848 self.allow_trailing_delim = allow_trailing_delim
5850 delim_list_expr = self.content + (self.delim + self.content) * (
5851 self.min - 1,
5852 None if self.max is None else self.max - 1,
5853 )
5854 if self.allow_trailing_delim:
5855 delim_list_expr += Opt(self.delim)
5857 if self.combine:
5858 delim_list_expr = Combine(delim_list_expr)
5860 super().__init__(delim_list_expr, savelist=True)
5862 def _generateDefaultName(self) -> str:
5863 content_expr = self.content.streamline()
5864 return f"{content_expr} [{self.raw_delim} {content_expr}]..."
5867class _NullToken:
5868 def __bool__(self):
5869 return False
5871 def __str__(self):
5872 return ""
5875class Opt(ParseElementEnhance):
5876 """
5877 Optional matching of the given expression.
5879 :param expr: expression that must match zero or more times
5880 :param default: (optional) - value to be returned
5881 if the optional expression is not found.
5883 Example:
5885 .. testcode::
5887 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
5888 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
5889 zip.run_tests('''
5890 # traditional ZIP code
5891 12345
5893 # ZIP+4 form
5894 12101-0001
5896 # invalid ZIP
5897 98765-
5898 ''')
5900 prints:
5902 .. testoutput::
5903 :options: +NORMALIZE_WHITESPACE
5906 # traditional ZIP code
5907 12345
5908 ['12345']
5910 # ZIP+4 form
5911 12101-0001
5912 ['12101-0001']
5914 # invalid ZIP
5915 98765-
5916 98765-
5917 ^
5918 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6)
5919 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6)
5920 """
5922 __optionalNotMatched = _NullToken()
5924 def __init__(
5925 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
5926 ) -> None:
5927 super().__init__(expr, savelist=False)
5928 self.saveAsList = self.expr.saveAsList
5929 self.defaultValue = default
5930 self._may_return_empty = True
5932 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5933 self_expr = self.expr
5934 try:
5935 loc, tokens = self_expr._parse(
5936 instring, loc, do_actions, callPreParse=False
5937 )
5938 except (ParseException, IndexError):
5939 default_value = self.defaultValue
5940 if default_value is not self.__optionalNotMatched:
5941 if self_expr.resultsName:
5942 tokens = ParseResults([default_value])
5943 tokens[self_expr.resultsName] = default_value
5944 else:
5945 tokens = [default_value] # type: ignore[assignment]
5946 else:
5947 tokens = [] # type: ignore[assignment]
5948 return loc, tokens
5950 def _generateDefaultName(self) -> str:
5951 inner = str(self.expr)
5952 # strip off redundant inner {}'s
5953 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
5954 inner = inner[1:-1]
5955 return f"[{inner}]"
5958Optional = Opt
5961class SkipTo(ParseElementEnhance):
5962 """
5963 Token for skipping over all undefined text until the matched
5964 expression is found.
5966 :param expr: target expression marking the end of the data to be skipped
5967 :param include: if ``True``, the target expression is also parsed
5968 (the skipped text and target expression are returned
5969 as a 2-element list) (default= ``False``).
5971 :param ignore: (default= ``None``) used to define grammars
5972 (typically quoted strings and comments)
5973 that might contain false matches to the target expression
5975 :param fail_on: (default= ``None``) define expressions that
5976 are not allowed to be included in the skipped test;
5977 if found before the target expression is found,
5978 the :class:`SkipTo` is not a match
5980 Example:
5982 .. testcode::
5984 report = '''
5985 Outstanding Issues Report - 1 Jan 2000
5987 # | Severity | Description | Days Open
5988 -----+----------+-------------------------------------------+-----------
5989 101 | Critical | Intermittent system crash | 6
5990 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5991 79 | Minor | System slow when running too many reports | 47
5992 '''
5993 integer = Word(nums)
5994 SEP = Suppress('|')
5995 # use SkipTo to simply match everything up until the next SEP
5996 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
5997 # - parse action will call token.strip() for each matched token, i.e., the description body
5998 string_data = SkipTo(SEP, ignore=quoted_string)
5999 string_data.set_parse_action(token_map(str.strip))
6000 ticket_expr = (integer("issue_num") + SEP
6001 + string_data("sev") + SEP
6002 + string_data("desc") + SEP
6003 + integer("days_open"))
6005 for tkt in ticket_expr.search_string(report):
6006 print(tkt.dump())
6008 prints:
6010 .. testoutput::
6012 ['101', 'Critical', 'Intermittent system crash', '6']
6013 - days_open: '6'
6014 - desc: 'Intermittent system crash'
6015 - issue_num: '101'
6016 - sev: 'Critical'
6017 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
6018 - days_open: '14'
6019 - desc: "Spelling error on Login ('log|n')"
6020 - issue_num: '94'
6021 - sev: 'Cosmetic'
6022 ['79', 'Minor', 'System slow when running too many reports', '47']
6023 - days_open: '47'
6024 - desc: 'System slow when running too many reports'
6025 - issue_num: '79'
6026 - sev: 'Minor'
6027 """
6029 def __init__(
6030 self,
6031 other: Union[ParserElement, str],
6032 include: bool = False,
6033 ignore: typing.Optional[Union[ParserElement, str]] = None,
6034 fail_on: typing.Optional[Union[ParserElement, str]] = None,
6035 **kwargs,
6036 ) -> None:
6037 failOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(
6038 kwargs, "failOn", None
6039 )
6041 super().__init__(other)
6042 failOn = failOn or fail_on
6043 self.ignoreExpr = ignore
6044 self._may_return_empty = True
6045 self.mayIndexError = False
6046 self.includeMatch = include
6047 self.saveAsList = False
6048 if isinstance(failOn, str_type):
6049 self.failOn = self._literalStringClass(failOn)
6050 else:
6051 self.failOn = failOn
6052 self.errmsg = f"No match found for {self.expr}"
6053 self.ignorer = Empty().leave_whitespace()
6054 self._update_ignorer()
6056 def _update_ignorer(self):
6057 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr
6058 self.ignorer.ignoreExprs.clear()
6059 for e in self.expr.ignoreExprs:
6060 self.ignorer.ignore(e)
6061 if self.ignoreExpr:
6062 self.ignorer.ignore(self.ignoreExpr)
6064 def ignore(self, expr):
6065 """
6066 Define expression to be ignored (e.g., comments) while doing pattern
6067 matching; may be called repeatedly, to define multiple comment or other
6068 ignorable patterns.
6069 """
6070 super().ignore(expr)
6071 self._update_ignorer()
6073 def parseImpl(self, instring, loc, do_actions=True):
6074 startloc = loc
6075 instrlen = len(instring)
6076 self_expr_parse = self.expr._parse
6077 self_failOn_canParseNext = (
6078 self.failOn.can_parse_next if self.failOn is not None else None
6079 )
6080 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None
6082 tmploc = loc
6083 while tmploc <= instrlen:
6084 if self_failOn_canParseNext is not None:
6085 # break if failOn expression matches
6086 if self_failOn_canParseNext(instring, tmploc):
6087 break
6089 if ignorer_try_parse is not None:
6090 # advance past ignore expressions
6091 prev_tmploc = tmploc
6092 while 1:
6093 try:
6094 tmploc = ignorer_try_parse(instring, tmploc)
6095 except ParseBaseException:
6096 break
6097 # see if all ignorers matched, but didn't actually ignore anything
6098 if tmploc == prev_tmploc:
6099 break
6100 prev_tmploc = tmploc
6102 try:
6103 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)
6104 except (ParseException, IndexError):
6105 # no match, advance loc in string
6106 tmploc += 1
6107 else:
6108 # matched skipto expr, done
6109 break
6111 else:
6112 # ran off the end of the input string without matching skipto expr, fail
6113 raise ParseException(instring, loc, self.errmsg, self)
6115 # build up return values
6116 loc = tmploc
6117 skiptext = instring[startloc:loc]
6118 skipresult = ParseResults(skiptext)
6120 if self.includeMatch:
6121 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)
6122 skipresult += mat
6124 return loc, skipresult
6127class Forward(ParseElementEnhance):
6128 """
6129 Forward declaration of an expression to be defined later -
6130 used for recursive grammars, such as algebraic infix notation.
6131 When the expression is known, it is assigned to the ``Forward``
6132 instance using the ``'<<'`` operator.
6134 .. Note::
6136 Take care when assigning to ``Forward`` not to overlook
6137 precedence of operators.
6139 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
6141 fwd_expr << a | b | c
6143 will actually be evaluated as::
6145 (fwd_expr << a) | b | c
6147 thereby leaving b and c out as parseable alternatives.
6148 It is recommended that you explicitly group the values
6149 inserted into the :class:`Forward`::
6151 fwd_expr << (a | b | c)
6153 Converting to use the ``'<<='`` operator instead will avoid this problem.
6155 See :meth:`ParseResults.pprint` for an example of a recursive
6156 parser created using :class:`Forward`.
6157 """
6159 def __init__(
6160 self, other: typing.Optional[Union[ParserElement, str]] = None
6161 ) -> None:
6162 self.caller_frame = traceback.extract_stack(limit=2)[0]
6163 super().__init__(other, savelist=False) # type: ignore[arg-type]
6164 self.lshift_line = None
6166 def __lshift__(self, other) -> Forward:
6167 if hasattr(self, "caller_frame"):
6168 del self.caller_frame
6169 if isinstance(other, str_type):
6170 other = self._literalStringClass(other)
6172 if not isinstance(other, ParserElement):
6173 return NotImplemented
6175 self.expr = other
6176 self.streamlined = other.streamlined
6177 self.mayIndexError = self.expr.mayIndexError
6178 self._may_return_empty = self.expr.mayReturnEmpty
6179 self.set_whitespace_chars(
6180 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
6181 )
6182 self.skipWhitespace = self.expr.skipWhitespace
6183 self.saveAsList = self.expr.saveAsList
6184 self.ignoreExprs.extend(self.expr.ignoreExprs)
6185 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]
6186 return self
6188 def __ilshift__(self, other) -> Forward:
6189 if not isinstance(other, ParserElement):
6190 return NotImplemented
6192 return self << other
6194 def __or__(self, other) -> ParserElement:
6195 caller_line = traceback.extract_stack(limit=2)[-2]
6196 if (
6197 __diag__.warn_on_match_first_with_lshift_operator
6198 and caller_line == self.lshift_line
6199 and Diagnostics.warn_on_match_first_with_lshift_operator
6200 not in self.suppress_warnings_
6201 ):
6202 warnings.warn(
6203 "warn_on_match_first_with_lshift_operator:"
6204 " using '<<' operator with '|' is probably an error, use '<<='",
6205 PyparsingDiagnosticWarning,
6206 stacklevel=2,
6207 )
6208 ret = super().__or__(other)
6209 return ret
6211 def __del__(self):
6212 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
6213 if (
6214 self.expr is None
6215 and __diag__.warn_on_assignment_to_Forward
6216 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
6217 ):
6218 warnings.warn_explicit(
6219 "warn_on_assignment_to_Forward:"
6220 " Forward defined here but no expression attached later using '<<=' or '<<'",
6221 UserWarning,
6222 filename=self.caller_frame.filename,
6223 lineno=self.caller_frame.lineno,
6224 )
6226 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
6227 if (
6228 self.expr is None
6229 and __diag__.warn_on_parse_using_empty_Forward
6230 and Diagnostics.warn_on_parse_using_empty_Forward
6231 not in self.suppress_warnings_
6232 ):
6233 # walk stack until parse_string, scan_string, search_string, or transform_string is found
6234 parse_fns = (
6235 "parse_string",
6236 "scan_string",
6237 "search_string",
6238 "transform_string",
6239 )
6240 tb = traceback.extract_stack(limit=200)
6241 for i, frm in enumerate(reversed(tb), start=1):
6242 if frm.name in parse_fns:
6243 stacklevel = i + 1
6244 break
6245 else:
6246 stacklevel = 2
6247 warnings.warn(
6248 "warn_on_parse_using_empty_Forward:"
6249 " Forward expression was never assigned a value, will not parse any input",
6250 PyparsingDiagnosticWarning,
6251 stacklevel=stacklevel,
6252 )
6253 if not ParserElement._left_recursion_enabled:
6254 return super().parseImpl(instring, loc, do_actions)
6255 # ## Bounded Recursion algorithm ##
6256 # Recursion only needs to be processed at ``Forward`` elements, since they are
6257 # the only ones that can actually refer to themselves. The general idea is
6258 # to handle recursion stepwise: We start at no recursion, then recurse once,
6259 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
6260 #
6261 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
6262 # - to *match* a specific recursion level, and
6263 # - to *search* the bounded recursion level
6264 # and the two run concurrently. The *search* must *match* each recursion level
6265 # to find the best possible match. This is handled by a memo table, which
6266 # provides the previous match to the next level match attempt.
6267 #
6268 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
6269 #
6270 # There is a complication since we not only *parse* but also *transform* via
6271 # actions: We do not want to run the actions too often while expanding. Thus,
6272 # we expand using `do_actions=False` and only run `do_actions=True` if the next
6273 # recursion level is acceptable.
6274 with ParserElement.recursion_lock:
6275 memo = ParserElement.recursion_memos
6276 try:
6277 # we are parsing at a specific recursion expansion - use it as-is
6278 prev_loc, prev_result = memo[loc, self, do_actions]
6279 if isinstance(prev_result, Exception):
6280 raise prev_result
6281 return prev_loc, prev_result.copy()
6282 except KeyError:
6283 act_key = (loc, self, True)
6284 peek_key = (loc, self, False)
6285 # we are searching for the best recursion expansion - keep on improving
6286 # both `do_actions` cases must be tracked separately here!
6287 prev_loc, prev_peek = memo[peek_key] = (
6288 loc - 1,
6289 ParseException(
6290 instring, loc, "Forward recursion without base case", self
6291 ),
6292 )
6293 if do_actions:
6294 memo[act_key] = memo[peek_key]
6295 while True:
6296 try:
6297 new_loc, new_peek = super().parseImpl(instring, loc, False)
6298 except ParseException:
6299 # we failed before getting any match - do not hide the error
6300 if isinstance(prev_peek, Exception):
6301 raise
6302 new_loc, new_peek = prev_loc, prev_peek
6303 # the match did not get better: we are done
6304 if new_loc <= prev_loc:
6305 if do_actions:
6306 # replace the match for do_actions=False as well,
6307 # in case the action did backtrack
6308 prev_loc, prev_result = memo[peek_key] = memo[act_key]
6309 del memo[peek_key], memo[act_key]
6310 return prev_loc, copy.copy(prev_result)
6311 del memo[peek_key]
6312 return prev_loc, copy.copy(prev_peek)
6313 # the match did get better: see if we can improve further
6314 if do_actions:
6315 try:
6316 memo[act_key] = super().parseImpl(instring, loc, True)
6317 except ParseException as e:
6318 memo[peek_key] = memo[act_key] = (new_loc, e)
6319 raise
6320 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
6322 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
6323 """
6324 Extends ``leave_whitespace`` defined in base class.
6325 """
6326 self.skipWhitespace = False
6327 return self
6329 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
6330 """
6331 Extends ``ignore_whitespace`` defined in base class.
6332 """
6333 self.skipWhitespace = True
6334 return self
6336 def streamline(self) -> ParserElement:
6337 if not self.streamlined:
6338 self.streamlined = True
6339 if self.expr is not None:
6340 self.expr.streamline()
6341 return self
6343 def validate(self, validateTrace=None) -> None:
6344 warnings.warn(
6345 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
6346 PyparsingDeprecationWarning,
6347 stacklevel=2,
6348 )
6349 if validateTrace is None:
6350 validateTrace = []
6352 if self not in validateTrace:
6353 tmp = validateTrace[:] + [self]
6354 if self.expr is not None:
6355 self.expr.validate(tmp)
6356 self._checkRecursion([])
6358 def _generateDefaultName(self) -> str:
6359 # Avoid infinite recursion by setting a temporary _defaultName
6360 save_default_name = self._defaultName
6361 self._defaultName = ": ..."
6363 # Use the string representation of main expression.
6364 try:
6365 if self.expr is not None:
6366 ret_string = str(self.expr)[:1000]
6367 else:
6368 ret_string = "None"
6369 except Exception:
6370 ret_string = "..."
6372 self._defaultName = save_default_name
6373 return f"{type(self).__name__}: {ret_string}"
6375 def copy(self) -> ParserElement:
6376 """
6377 Returns a copy of this expression.
6379 Generally only used internally by pyparsing.
6380 """
6381 if self.expr is not None:
6382 return super().copy()
6383 else:
6384 ret = Forward()
6385 ret <<= self
6386 return ret
6388 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
6389 # fmt: off
6390 if (
6391 __diag__.warn_name_set_on_empty_Forward
6392 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_
6393 and self.expr is None
6394 ):
6395 warning = (
6396 "warn_name_set_on_empty_Forward:"
6397 f" setting results name {name!r} on {type(self).__name__} expression"
6398 " that has no contained expression"
6399 )
6400 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
6401 # fmt: on
6403 return super()._setResultsName(name, list_all_matches)
6405 # Compatibility synonyms
6406 # fmt: off
6407 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
6408 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
6409 # fmt: on
6412class TokenConverter(ParseElementEnhance):
6413 """
6414 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.
6415 """
6417 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:
6418 super().__init__(expr) # , savelist)
6419 self.saveAsList = False
6422class Combine(TokenConverter):
6423 """Converter to concatenate all matching tokens to a single string.
6424 By default, the matching patterns must also be contiguous in the
6425 input string; this can be disabled by specifying
6426 ``'adjacent=False'`` in the constructor.
6428 Example:
6430 .. doctest::
6432 >>> real = Word(nums) + '.' + Word(nums)
6433 >>> print(real.parse_string('3.1416'))
6434 ['3', '.', '1416']
6436 >>> # will also erroneously match the following
6437 >>> print(real.parse_string('3. 1416'))
6438 ['3', '.', '1416']
6440 >>> real = Combine(Word(nums) + '.' + Word(nums))
6441 >>> print(real.parse_string('3.1416'))
6442 ['3.1416']
6444 >>> # no match when there are internal spaces
6445 >>> print(real.parse_string('3. 1416'))
6446 Traceback (most recent call last):
6447 ParseException: Expected W:(0123...)
6448 """
6450 def __init__(
6451 self,
6452 expr: ParserElement,
6453 join_string: str = "",
6454 adjacent: bool = True,
6455 *,
6456 joinString: typing.Optional[str] = None,
6457 ) -> None:
6458 super().__init__(expr)
6459 joinString = joinString if joinString is not None else join_string
6460 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
6461 if adjacent:
6462 self.leave_whitespace()
6463 self.adjacent = adjacent
6464 self.skipWhitespace = True
6465 self.joinString = joinString
6466 self.callPreparse = True
6468 def ignore(self, other) -> ParserElement:
6469 """
6470 Define expression to be ignored (e.g., comments) while doing pattern
6471 matching; may be called repeatedly, to define multiple comment or other
6472 ignorable patterns.
6473 """
6474 if self.adjacent:
6475 ParserElement.ignore(self, other)
6476 else:
6477 super().ignore(other)
6478 return self
6480 def postParse(self, instring, loc, tokenlist):
6481 retToks = tokenlist.copy()
6482 del retToks[:]
6483 retToks += ParseResults(
6484 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
6485 )
6487 if self.resultsName and retToks.haskeys():
6488 return [retToks]
6489 else:
6490 return retToks
6493class Group(TokenConverter):
6494 """Converter to return the matched tokens as a list - useful for
6495 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
6497 The optional ``aslist`` argument when set to True will return the
6498 parsed tokens as a Python list instead of a pyparsing ParseResults.
6500 Example:
6502 .. doctest::
6504 >>> ident = Word(alphas)
6505 >>> num = Word(nums)
6506 >>> term = ident | num
6507 >>> func = ident + Opt(DelimitedList(term))
6508 >>> print(func.parse_string("fn a, b, 100"))
6509 ['fn', 'a', 'b', '100']
6511 >>> func = ident + Group(Opt(DelimitedList(term)))
6512 >>> print(func.parse_string("fn a, b, 100"))
6513 ['fn', ['a', 'b', '100']]
6514 """
6516 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:
6517 super().__init__(expr)
6518 self.saveAsList = True
6519 self._asPythonList = aslist
6521 def postParse(self, instring, loc, tokenlist):
6522 if self._asPythonList:
6523 return ParseResults.List(
6524 tokenlist.as_list()
6525 if isinstance(tokenlist, ParseResults)
6526 else list(tokenlist)
6527 )
6529 return [tokenlist]
6532class Dict(TokenConverter):
6533 """Converter to return a repetitive expression as a list, but also
6534 as a dictionary. Each element can also be referenced using the first
6535 token in the expression as its key. Useful for tabular report
6536 scraping when the first column can be used as a item key.
6538 The optional ``asdict`` argument when set to True will return the
6539 parsed tokens as a Python dict instead of a pyparsing ParseResults.
6541 Example:
6543 .. doctest::
6545 >>> data_word = Word(alphas)
6546 >>> label = data_word + FollowedBy(':')
6548 >>> attr_expr = (
6549 ... label + Suppress(':')
6550 ... + OneOrMore(data_word, stop_on=label)
6551 ... .set_parse_action(' '.join)
6552 ... )
6554 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
6556 >>> # print attributes as plain groups
6557 >>> print(attr_expr[1, ...].parse_string(text).dump())
6558 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
6560 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...])
6561 # Dict will auto-assign names.
6562 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
6563 >>> print(result.dump())
6564 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
6565 - color: 'light blue'
6566 - posn: 'upper left'
6567 - shape: 'SQUARE'
6568 - texture: 'burlap'
6569 [0]:
6570 ['shape', 'SQUARE']
6571 [1]:
6572 ['posn', 'upper left']
6573 [2]:
6574 ['color', 'light blue']
6575 [3]:
6576 ['texture', 'burlap']
6578 # access named fields as dict entries, or output as dict
6579 >>> print(result['shape'])
6580 SQUARE
6581 >>> print(result.as_dict())
6582 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}
6584 See more examples at :class:`ParseResults` of accessing fields by results name.
6585 """
6587 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:
6588 super().__init__(expr)
6589 self.saveAsList = True
6590 self._asPythonDict = asdict
6592 def postParse(self, instring, loc, tokenlist):
6593 for i, tok in enumerate(tokenlist):
6594 if len(tok) == 0:
6595 continue
6597 ikey = tok[0]
6598 if isinstance(ikey, int):
6599 ikey = str(ikey).strip()
6601 if len(tok) == 1:
6602 tokenlist[ikey] = _ParseResultsWithOffset("", i)
6604 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
6605 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
6607 else:
6608 try:
6609 dictvalue = tok.copy() # ParseResults(i)
6610 except Exception:
6611 exc = TypeError(
6612 "could not extract dict values from parsed results"
6613 " - Dict expression must contain Grouped expressions"
6614 )
6615 raise exc from None
6617 del dictvalue[0]
6619 if len(dictvalue) != 1 or (
6620 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
6621 ):
6622 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
6623 else:
6624 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
6626 if self._asPythonDict:
6627 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
6629 return [tokenlist] if self.resultsName else tokenlist
6632class Suppress(TokenConverter):
6633 """Converter for ignoring the results of a parsed expression.
6635 Example:
6637 .. doctest::
6639 >>> source = "a, b, c,d"
6640 >>> wd = Word(alphas)
6641 >>> wd_list1 = wd + (',' + wd)[...]
6642 >>> print(wd_list1.parse_string(source))
6643 ['a', ',', 'b', ',', 'c', ',', 'd']
6645 # often, delimiters that are useful during parsing are just in the
6646 # way afterward - use Suppress to keep them out of the parsed output
6647 >>> wd_list2 = wd + (Suppress(',') + wd)[...]
6648 >>> print(wd_list2.parse_string(source))
6649 ['a', 'b', 'c', 'd']
6651 # Skipped text (using '...') can be suppressed as well
6652 >>> source = "lead in START relevant text END trailing text"
6653 >>> start_marker = Keyword("START")
6654 >>> end_marker = Keyword("END")
6655 >>> find_body = Suppress(...) + start_marker + ... + end_marker
6656 >>> print(find_body.parse_string(source))
6657 ['START', 'relevant text ', 'END']
6659 (See also :class:`DelimitedList`.)
6660 """
6662 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
6663 if expr is ...:
6664 expr = _PendingSkip(NoMatch())
6665 super().__init__(expr)
6667 def __add__(self, other) -> ParserElement:
6668 if isinstance(self.expr, _PendingSkip):
6669 return Suppress(SkipTo(other)) + other
6671 return super().__add__(other)
6673 def __sub__(self, other) -> ParserElement:
6674 if isinstance(self.expr, _PendingSkip):
6675 return Suppress(SkipTo(other)) - other
6677 return super().__sub__(other)
6679 def postParse(self, instring, loc, tokenlist):
6680 return []
6682 def suppress(self) -> ParserElement:
6683 return self
6686# XXX: Example needs to be re-done for updated output
6687def trace_parse_action(f: ParseAction) -> ParseAction:
6688 """Decorator for debugging parse actions.
6690 When the parse action is called, this decorator will print
6691 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
6692 When the parse action completes, the decorator will print
6693 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
6695 Example:
6697 .. testsetup:: stderr
6699 import sys
6700 sys.stderr = sys.stdout
6702 .. testcleanup:: stderr
6704 sys.stderr = sys.__stderr__
6706 .. testcode:: stderr
6708 wd = Word(alphas)
6710 @trace_parse_action
6711 def remove_duplicate_chars(tokens):
6712 return ''.join(sorted(set(''.join(tokens))))
6714 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
6715 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
6717 prints:
6719 .. testoutput:: stderr
6720 :options: +NORMALIZE_WHITESPACE
6722 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf',
6723 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
6724 <<leaving remove_duplicate_chars (ret: 'dfjkls')
6725 ['dfjkls']
6727 .. versionchanged:: 3.1.0
6728 Exception type added to output
6729 """
6730 f = _trim_arity(f)
6732 def z(*paArgs):
6733 thisFunc = f.__name__
6734 s, l, t = paArgs[-3:]
6735 if len(paArgs) > 3:
6736 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"
6737 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")
6738 try:
6739 ret = f(*paArgs)
6740 except Exception as exc:
6741 sys.stderr.write(
6742 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"
6743 )
6744 raise
6745 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")
6746 return ret
6748 z.__name__ = f.__name__
6749 return z
6752# convenience constants for positional expressions
6753empty = Empty().set_name("empty")
6754line_start = LineStart().set_name("line_start")
6755line_end = LineEnd().set_name("line_end")
6756string_start = StringStart().set_name("string_start")
6757string_end = StringEnd().set_name("string_end")
6759_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(
6760 lambda s, l, t: t[0][1]
6761)
6762_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
6763 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
6764)
6765_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
6766 lambda s, l, t: chr(int(t[0][1:], 8))
6767)
6768_singleChar = (
6769 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
6770)
6771_charRange = Group(_singleChar + Suppress("-") + _singleChar)
6772_reBracketExpr = (
6773 Literal("[")
6774 + Opt("^").set_results_name("negate")
6775 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
6776 + Literal("]")
6777)
6780def srange(s: str) -> str:
6781 r"""Helper to easily define string ranges for use in :class:`Word`
6782 construction. Borrows syntax from regexp ``'[]'`` string range
6783 definitions::
6785 srange("[0-9]") -> "0123456789"
6786 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
6787 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
6789 The input string must be enclosed in []'s, and the returned string
6790 is the expanded character set joined into a single string. The
6791 values enclosed in the []'s may be:
6793 - a single character
6794 - an escaped character with a leading backslash (such as ``\-``
6795 or ``\]``)
6796 - an escaped hex character with a leading ``'\x'``
6797 (``\x21``, which is a ``'!'`` character) (``\0x##``
6798 is also supported for backwards compatibility)
6799 - an escaped octal character with a leading ``'\0'``
6800 (``\041``, which is a ``'!'`` character)
6801 - a range of any of the above, separated by a dash (``'a-z'``,
6802 etc.)
6803 - any combination of the above (``'aeiouy'``,
6804 ``'a-zA-Z0-9_$'``, etc.)
6805 """
6807 def _expanded(p):
6808 if isinstance(p, ParseResults):
6809 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
6810 else:
6811 yield p
6813 try:
6814 return "".join(
6815 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]
6816 )
6817 except Exception as e:
6818 return ""
6821def token_map(func, *args) -> ParseAction:
6822 """Helper to define a parse action by mapping a function to all
6823 elements of a :class:`ParseResults` list. If any additional args are passed,
6824 they are forwarded to the given function as additional arguments
6825 after the token, as in
6826 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
6827 which will convert the parsed data to an integer using base 16.
6829 Example (compare the last to example in :class:`ParserElement.transform_string`::
6831 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
6832 hex_ints.run_tests('''
6833 00 11 22 aa FF 0a 0d 1a
6834 ''')
6836 upperword = Word(alphas).set_parse_action(token_map(str.upper))
6837 upperword[1, ...].run_tests('''
6838 my kingdom for a horse
6839 ''')
6841 wd = Word(alphas).set_parse_action(token_map(str.title))
6842 wd[1, ...].set_parse_action(' '.join).run_tests('''
6843 now is the winter of our discontent made glorious summer by this sun of york
6844 ''')
6846 prints::
6848 00 11 22 aa FF 0a 0d 1a
6849 [0, 17, 34, 170, 255, 10, 13, 26]
6851 my kingdom for a horse
6852 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
6854 now is the winter of our discontent made glorious summer by this sun of york
6855 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
6856 """
6858 def pa(s, l, t):
6859 return [func(tokn, *args) for tokn in t]
6861 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
6862 pa.__name__ = func_name
6864 return pa
6867def autoname_elements() -> None:
6868 """
6869 Utility to simplify mass-naming of parser elements, for
6870 generating railroad diagram with named subdiagrams.
6871 """
6873 # guard against _getframe not being implemented in the current Python
6874 getframe_fn = getattr(sys, "_getframe", lambda _: None)
6875 calling_frame = getframe_fn(1)
6876 if calling_frame is None:
6877 return
6879 # find all locals in the calling frame that are ParserElements
6880 calling_frame = typing.cast(types.FrameType, calling_frame)
6881 for name, var in calling_frame.f_locals.items():
6882 # if no custom name defined, set the name to the var name
6883 if isinstance(var, ParserElement) and not var.customName:
6884 var.set_name(name)
6887dbl_quoted_string = Combine(
6888 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6889).set_name("string enclosed in double quotes")
6891sgl_quoted_string = Combine(
6892 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
6893).set_name("string enclosed in single quotes")
6895quoted_string = Combine(
6896 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6897 "double quoted string"
6898 )
6899 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6900 "single quoted string"
6901 )
6902).set_name("quoted string using single or double quotes")
6904# XXX: Is there some way to make this show up in API docs?
6905# .. versionadded:: 3.1.0
6906python_quoted_string = Combine(
6907 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(
6908 "multiline double quoted string"
6909 )
6910 ^ (
6911 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"
6912 ).set_name("multiline single quoted string")
6913 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6914 "double quoted string"
6915 )
6916 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6917 "single quoted string"
6918 )
6919).set_name("Python quoted string")
6921unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
6924alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6925punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6927# build list of built-in expressions, for future reference if a global default value
6928# gets updated
6929_builtin_exprs: list[ParserElement] = [
6930 v for v in vars().values() if isinstance(v, ParserElement)
6931]
6933# Compatibility synonyms
6934# fmt: off
6935sglQuotedString = sgl_quoted_string
6936dblQuotedString = dbl_quoted_string
6937quotedString = quoted_string
6938unicodeString = unicode_string
6939lineStart = line_start
6940lineEnd = line_end
6941stringStart = string_start
6942stringEnd = string_end
6943nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)
6944traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)
6945conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)
6946tokenMap = replaced_by_pep8("tokenMap", token_map)
6947# fmt: on