Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 44%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# core.py
3#
4from __future__ import annotations
6import collections.abc
7from collections import deque
8import os
9import typing
10from typing import (
11 Any,
12 Callable,
13 Generator,
14 NamedTuple,
15 Sequence,
16 TextIO,
17 Union,
18 cast,
19)
20from abc import ABC, abstractmethod
21from enum import Enum
22import string
23import copy
24import warnings
25import re
26import sys
27from collections.abc import Iterable
28import traceback
29import types
30from operator import itemgetter
31from functools import wraps
32from threading import RLock
33from pathlib import Path
35from .warnings import PyparsingDeprecationWarning, PyparsingDiagnosticWarning
36from .util import (
37 _FifoCache,
38 _UnboundedCache,
39 __config_flags,
40 _collapse_string_to_ranges,
41 _convert_escaped_numerics_to_char,
42 _escape_regex_range_chars,
43 _flatten,
44 LRUMemo as _LRUMemo,
45 UnboundedMemo as _UnboundedMemo,
46 deprecate_argument,
47 replaced_by_pep8,
48)
49from .exceptions import *
50from .actions import *
51from .results import ParseResults, _ParseResultsWithOffset
52from .unicode import pyparsing_unicode
54_MAX_INT = sys.maxsize
55str_type: tuple[type, ...] = (str, bytes)
57#
58# Copyright (c) 2003-2022 Paul T. McGuire
59#
60# Permission is hereby granted, free of charge, to any person obtaining
61# a copy of this software and associated documentation files (the
62# "Software"), to deal in the Software without restriction, including
63# without limitation the rights to use, copy, modify, merge, publish,
64# distribute, sublicense, and/or sell copies of the Software, and to
65# permit persons to whom the Software is furnished to do so, subject to
66# the following conditions:
67#
68# The above copyright notice and this permission notice shall be
69# included in all copies or substantial portions of the Software.
70#
71# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
72# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
73# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
74# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
75# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
76# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
77# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
78#
80from functools import cached_property
83class __compat__(__config_flags):
84 """
85 A cross-version compatibility configuration for pyparsing features that will be
86 released in a future version. By setting values in this configuration to True,
87 those features can be enabled in prior versions for compatibility development
88 and testing.
90 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
91 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
92 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
93 behavior
94 """
96 _type_desc = "compatibility"
98 collect_all_And_tokens = True
100 _all_names = [__ for __ in locals() if not __.startswith("_")]
101 _fixed_names = """
102 collect_all_And_tokens
103 """.split()
106class __diag__(__config_flags):
107 _type_desc = "diagnostic"
109 warn_multiple_tokens_in_named_alternation = False
110 warn_ungrouped_named_tokens_in_collection = False
111 warn_name_set_on_empty_Forward = False
112 warn_on_parse_using_empty_Forward = False
113 warn_on_assignment_to_Forward = False
114 warn_on_multiple_string_args_to_oneof = False
115 warn_on_match_first_with_lshift_operator = False
116 enable_debug_on_named_expressions = False
118 _all_names = [__ for __ in locals() if not __.startswith("_")]
119 _warning_names = [name for name in _all_names if name.startswith("warn")]
120 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
122 @classmethod
123 def enable_all_warnings(cls) -> None:
124 for name in cls._warning_names:
125 cls.enable(name)
128class Diagnostics(Enum):
129 """
130 Diagnostic configuration (all default to disabled)
132 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
133 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
134 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
135 name is defined on a containing expression with ungrouped subexpressions that also
136 have results names
137 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
138 with a results name, but has no contents defined
139 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
140 defined in a grammar but has never had an expression attached to it
141 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
142 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
143 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
144 incorrectly called with multiple str arguments
145 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
146 calls to :class:`ParserElement.set_name`
148 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
149 All warnings can be enabled by calling :class:`enable_all_warnings`.
150 """
152 warn_multiple_tokens_in_named_alternation = 0
153 warn_ungrouped_named_tokens_in_collection = 1
154 warn_name_set_on_empty_Forward = 2
155 warn_on_parse_using_empty_Forward = 3
156 warn_on_assignment_to_Forward = 4
157 warn_on_multiple_string_args_to_oneof = 5
158 warn_on_match_first_with_lshift_operator = 6
159 enable_debug_on_named_expressions = 7
162def enable_diag(diag_enum: Diagnostics) -> None:
163 """
164 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
165 """
166 __diag__.enable(diag_enum.name)
169def disable_diag(diag_enum: Diagnostics) -> None:
170 """
171 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
172 """
173 __diag__.disable(diag_enum.name)
176def enable_all_warnings() -> None:
177 """
178 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
179 """
180 __diag__.enable_all_warnings()
183# hide abstract class
184del __config_flags
187def _should_enable_warnings(
188 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
189) -> bool:
190 enable = bool(warn_env_var)
191 for warn_opt in cmd_line_warn_options:
192 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
193 ":"
194 )[:5]
195 if not w_action.lower().startswith("i") and (
196 not (w_message or w_category or w_module) or w_module == "pyparsing"
197 ):
198 enable = True
199 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
200 enable = False
201 return enable
204if _should_enable_warnings(
205 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
206):
207 enable_all_warnings()
210# build list of single arg builtins, that can be used as parse actions
211# fmt: off
212_single_arg_builtins = {
213 sum, len, sorted, reversed, list, tuple, set, any, all, min, max
214}
215# fmt: on
217_generatorType = types.GeneratorType
218ParseImplReturnType = tuple[int, Any]
219PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]
221ParseCondition = Union[
222 Callable[[], bool],
223 Callable[[ParseResults], bool],
224 Callable[[int, ParseResults], bool],
225 Callable[[str, int, ParseResults], bool],
226]
227ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
228DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
229DebugSuccessAction = Callable[
230 [str, int, int, "ParserElement", ParseResults, bool], None
231]
232DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
235alphas: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
236identchars: str = pyparsing_unicode.Latin1.identchars
237identbodychars: str = pyparsing_unicode.Latin1.identbodychars
238nums: str = "0123456789"
239hexnums: str = "0123456789ABCDEFabcdef"
240alphanums: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
241printables: str = (
242 '!"'
243 "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
244 "[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
245)
248class _ParseActionIndexError(Exception):
249 """
250 Internal wrapper around IndexError so that IndexErrors raised inside
251 parse actions aren't misinterpreted as IndexErrors raised inside
252 ParserElement parseImpl methods.
253 """
255 def __init__(self, msg: str, exc: BaseException) -> None:
256 self.msg: str = msg
257 self.exc: BaseException = exc
260_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]
261pa_call_line_synth = ()
264def _trim_arity(func, max_limit=3):
265 """decorator to trim function calls to match the arity of the target"""
266 global _trim_arity_call_line, pa_call_line_synth
268 if func in _single_arg_builtins:
269 return lambda s, l, t: func(t)
271 limit = 0
272 found_arity = False
274 # synthesize what would be returned by traceback.extract_stack at the call to
275 # user's parse action 'func', so that we don't incur call penalty at parse time
277 # fmt: off
278 LINE_DIFF = 9
279 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
280 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
281 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]
282 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
284 def wrapper(*args):
285 nonlocal found_arity, limit
286 if found_arity:
287 return func(*args[limit:])
288 while 1:
289 try:
290 ret = func(*args[limit:])
291 found_arity = True
292 return ret
293 except TypeError as te:
294 # re-raise TypeErrors if they did not come from our arity testing
295 if found_arity:
296 raise
297 else:
298 tb = te.__traceback__
299 frames = traceback.extract_tb(tb, limit=2)
300 frame_summary = frames[-1]
301 trim_arity_type_error = (
302 [frame_summary[:2]][-1][:2] == pa_call_line_synth
303 )
304 del tb
306 if trim_arity_type_error:
307 if limit < max_limit:
308 limit += 1
309 continue
311 raise
312 except IndexError as ie:
313 # wrap IndexErrors inside a _ParseActionIndexError
314 raise _ParseActionIndexError(
315 "IndexError raised in parse action", ie
316 ).with_traceback(None)
317 # fmt: on
319 # copy func name to wrapper for sensible debug output
320 # (can't use functools.wraps, since that messes with function signature)
321 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
322 wrapper.__name__ = func_name
323 wrapper.__doc__ = func.__doc__
325 return wrapper
328def condition_as_parse_action(
329 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False
330) -> ParseAction:
331 """
332 Function to convert a simple predicate function that returns ``True`` or ``False``
333 into a parse action. Can be used in places when a parse action is required
334 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition
335 to an operator level in :class:`infix_notation`).
337 Optional keyword arguments:
339 :param message: define a custom message to be used in the raised exception
340 :param fatal: if ``True``, will raise :class:`ParseFatalException`
341 to stop parsing immediately;
342 otherwise will raise :class:`ParseException`
344 """
345 msg = message if message is not None else "failed user-defined condition"
346 exc_type = ParseFatalException if fatal else ParseException
347 fn = _trim_arity(fn)
349 @wraps(fn)
350 def pa(s, l, t):
351 if not bool(fn(s, l, t)):
352 raise exc_type(s, l, msg)
354 return pa
357def _default_start_debug_action(
358 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False
359):
360 cache_hit_str = "*" if cache_hit else ""
361 print(
362 (
363 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"
364 f" {line(loc, instring)}\n"
365 f" {'^':>{col(loc, instring)}}"
366 )
367 )
370def _default_success_debug_action(
371 instring: str,
372 startloc: int,
373 endloc: int,
374 expr: ParserElement,
375 toks: ParseResults,
376 cache_hit: bool = False,
377):
378 cache_hit_str = "*" if cache_hit else ""
379 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")
382def _default_exception_debug_action(
383 instring: str,
384 loc: int,
385 expr: ParserElement,
386 exc: Exception,
387 cache_hit: bool = False,
388):
389 cache_hit_str = "*" if cache_hit else ""
390 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")
393def null_debug_action(*args):
394 """'Do-nothing' debug action, to suppress debugging output during parsing."""
397class ParserElement(ABC):
398 """Abstract base level parser element class."""
400 DEFAULT_WHITE_CHARS: str = " \n\t\r"
401 verbose_stacktrace: bool = False
402 _literalStringClass: type = None # type: ignore[assignment]
404 @staticmethod
405 def set_default_whitespace_chars(chars: str) -> None:
406 r"""
407 Overrides the default whitespace chars
409 Example:
411 .. doctest::
413 # default whitespace chars are space, <TAB> and newline
414 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")
415 ParseResults(['abc', 'def', 'ghi', 'jkl'], {})
417 # change to just treat newline as significant
418 >>> ParserElement.set_default_whitespace_chars(" \t")
419 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")
420 ParseResults(['abc', 'def'], {})
422 # Reset to default
423 >>> ParserElement.set_default_whitespace_chars(" \n\t\r")
424 """
425 ParserElement.DEFAULT_WHITE_CHARS = chars
427 # update whitespace all parse expressions defined in this module
428 for expr in _builtin_exprs:
429 if expr.copyDefaultWhiteChars:
430 expr.whiteChars = set(chars)
432 @staticmethod
433 def inline_literals_using(cls: type) -> None:
434 """
435 Set class to be used for inclusion of string literals into a parser.
437 Example:
439 .. doctest::
440 :options: +NORMALIZE_WHITESPACE
442 # default literal class used is Literal
443 >>> integer = Word(nums)
444 >>> date_str = (
445 ... integer("year") + '/'
446 ... + integer("month") + '/'
447 ... + integer("day")
448 ... )
450 >>> date_str.parse_string("1999/12/31")
451 ParseResults(['1999', '/', '12', '/', '31'],
452 {'year': '1999', 'month': '12', 'day': '31'})
454 # change to Suppress
455 >>> ParserElement.inline_literals_using(Suppress)
456 >>> date_str = (
457 ... integer("year") + '/'
458 ... + integer("month") + '/'
459 ... + integer("day")
460 ... )
462 >>> date_str.parse_string("1999/12/31")
463 ParseResults(['1999', '12', '31'],
464 {'year': '1999', 'month': '12', 'day': '31'})
466 # Reset
467 >>> ParserElement.inline_literals_using(Literal)
468 """
469 ParserElement._literalStringClass = cls
471 @classmethod
472 def using_each(cls, seq, **class_kwargs):
473 """
474 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.
476 Example:
478 .. testcode::
480 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")
482 .. versionadded:: 3.1.0
483 """
484 yield from (cls(obj, **class_kwargs) for obj in seq)
486 class DebugActions(NamedTuple):
487 debug_try: typing.Optional[DebugStartAction]
488 debug_match: typing.Optional[DebugSuccessAction]
489 debug_fail: typing.Optional[DebugExceptionAction]
491 def __init__(self, savelist: bool = False) -> None:
492 self.parseAction: list[ParseAction] = list()
493 self.failAction: typing.Optional[ParseFailAction] = None
494 self.customName: str = None # type: ignore[assignment]
495 self._defaultName: typing.Optional[str] = None
496 self.resultsName: str = None # type: ignore[assignment]
497 self.saveAsList: bool = savelist
498 self.skipWhitespace: bool = True
499 self.whiteChars: set[str] = set(ParserElement.DEFAULT_WHITE_CHARS)
500 self.copyDefaultWhiteChars: bool = True
501 # used when checking for left-recursion
502 self._may_return_empty: bool = False
503 self.keepTabs: bool = False
504 self.ignoreExprs: list[ParserElement] = list()
505 self.debug: bool = False
506 self.streamlined: bool = False
507 # optimize exception handling for subclasses that don't advance parse index
508 self.mayIndexError: bool = True
509 self.errmsg: Union[str, None] = ""
510 # mark results names as modal (report only last) or cumulative (list all)
511 self.modalResults: bool = True
512 # custom debug actions
513 self.debugActions = self.DebugActions(None, None, None)
514 # avoid redundant calls to preParse
515 self.callPreparse: bool = True
516 self.callDuringTry: bool = False
517 self.suppress_warnings_: list[Diagnostics] = []
518 self.show_in_diagram: bool = True
520 @property
521 def mayReturnEmpty(self) -> bool:
522 """
523 .. deprecated:: 3.3.0
524 use _may_return_empty instead.
525 """
526 return self._may_return_empty
528 @mayReturnEmpty.setter
529 def mayReturnEmpty(self, value) -> None:
530 """
531 .. deprecated:: 3.3.0
532 use _may_return_empty instead.
533 """
534 self._may_return_empty = value
536 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:
537 """
538 Suppress warnings emitted for a particular diagnostic on this expression.
540 Example:
542 .. doctest::
544 >>> label = pp.Word(pp.alphas)
546 # Normally using an empty Forward in a grammar
547 # would print a warning, but we can suppress that
548 >>> base = pp.Forward().suppress_warning(
549 ... pp.Diagnostics.warn_on_parse_using_empty_Forward)
551 >>> grammar = base | label
552 >>> print(grammar.parse_string("x"))
553 ['x']
554 """
555 self.suppress_warnings_.append(warning_type)
556 return self
558 def visit_all(self):
559 """General-purpose method to yield all expressions and sub-expressions
560 in a grammar. Typically just for internal use.
561 """
562 to_visit = deque([self])
563 seen = set()
564 while to_visit:
565 cur = to_visit.popleft()
567 # guard against looping forever through recursive grammars
568 if cur in seen:
569 continue
570 seen.add(cur)
572 to_visit.extend(cur.recurse())
573 yield cur
575 def copy(self) -> ParserElement:
576 """
577 Make a copy of this :class:`ParserElement`. Useful for defining
578 different parse actions for the same parsing pattern, using copies of
579 the original parse element.
581 Example:
583 .. testcode::
585 integer = Word(nums).set_parse_action(
586 lambda toks: int(toks[0]))
587 integerK = integer.copy().add_parse_action(
588 lambda toks: toks[0] * 1024) + Suppress("K")
589 integerM = integer.copy().add_parse_action(
590 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
592 print(
593 (integerK | integerM | integer)[1, ...].parse_string(
594 "5K 100 640K 256M")
595 )
597 prints:
599 .. testoutput::
601 [5120, 100, 655360, 268435456]
603 Equivalent form of ``expr.copy()`` is just ``expr()``:
605 .. testcode::
607 integerM = integer().add_parse_action(
608 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
609 """
610 cpy = copy.copy(self)
611 cpy.parseAction = self.parseAction[:]
612 cpy.ignoreExprs = self.ignoreExprs[:]
613 if self.copyDefaultWhiteChars:
614 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
615 return cpy
617 def set_results_name(
618 self, name: str, list_all_matches: bool = False, **kwargs
619 ) -> ParserElement:
620 """
621 Define name for referencing matching tokens as a nested attribute
622 of the returned parse results.
624 Normally, results names are assigned as you would assign keys in a dict:
625 any existing value is overwritten by later values. If it is necessary to
626 keep all values captured for a particular results name, call ``set_results_name``
627 with ``list_all_matches`` = True.
629 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
630 this is so that the client can define a basic element, such as an
631 integer, and reference it in multiple places with different names.
633 You can also set results names using the abbreviated syntax,
634 ``expr("name")`` in place of ``expr.set_results_name("name")``
635 - see :meth:`__call__`. If ``list_all_matches`` is required, use
636 ``expr("name*")``.
638 Example:
640 .. testcode::
642 integer = Word(nums)
643 date_str = (integer.set_results_name("year") + '/'
644 + integer.set_results_name("month") + '/'
645 + integer.set_results_name("day"))
647 # equivalent form:
648 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
649 """
650 listAllMatches: bool = deprecate_argument(kwargs, "listAllMatches", False)
652 list_all_matches = listAllMatches or list_all_matches
653 return self._setResultsName(name, list_all_matches)
655 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
656 if name is None:
657 return self
658 newself = self.copy()
659 if name.endswith("*"):
660 name = name[:-1]
661 list_all_matches = True
662 newself.resultsName = name
663 newself.modalResults = not list_all_matches
664 return newself
666 def set_break(self, break_flag: bool = True) -> ParserElement:
667 """
668 Method to invoke the Python pdb debugger when this element is
669 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
670 disable.
671 """
672 if break_flag:
673 _parseMethod = self._parse
675 def breaker(instring, loc, do_actions=True, callPreParse=True):
676 # this call to breakpoint() is intentional, not a checkin error
677 breakpoint()
678 return _parseMethod(instring, loc, do_actions, callPreParse)
680 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]
681 self._parse = breaker # type: ignore [method-assign]
682 elif hasattr(self._parse, "_originalParseMethod"):
683 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]
684 return self
686 def set_parse_action(
687 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any
688 ) -> ParserElement:
689 """
690 Define one or more actions to perform when successfully matching parse element definition.
692 Parse actions can be called to perform data conversions, do extra validation,
693 update external data structures, or enhance or replace the parsed tokens.
694 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
695 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
697 - ``s`` = the original string being parsed (see note below)
698 - ``loc`` = the location of the matching substring
699 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object
701 The parsed tokens are passed to the parse action as ParseResults. They can be
702 modified in place using list-style append, extend, and pop operations to update
703 the parsed list elements; and with dictionary-style item set and del operations
704 to add, update, or remove any named results. If the tokens are modified in place,
705 it is not necessary to return them with a return statement.
707 Parse actions can also completely replace the given tokens, with another ``ParseResults``
708 object, or with some entirely different object (common for parse actions that perform data
709 conversions). A convenient way to build a new parse result is to define the values
710 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
712 If None is passed as the ``fn`` parse action, all previously added parse actions for this
713 expression are cleared.
715 Optional keyword arguments:
717 :param call_during_try: (default= ``False``) indicate if parse action
718 should be run during lookaheads and alternate
719 testing. For parse actions that have side
720 effects, it is important to only call the parse
721 action once it is determined that it is being
722 called as part of a successful parse.
723 For parse actions that perform additional
724 validation, then ``call_during_try`` should
725 be passed as True, so that the validation code
726 is included in the preliminary "try" parses.
728 .. Note::
729 The default parsing behavior is to expand tabs in the input string
730 before starting the parsing process.
731 See :meth:`parse_string` for more information on parsing strings
732 containing ``<TAB>`` s, and suggested methods to maintain a
733 consistent view of the parsed string, the parse location, and
734 line and column positions within the parsed string.
736 Example: Parse dates in the form ``YYYY/MM/DD``
737 -----------------------------------------------
739 Setup code:
741 .. testcode::
743 def convert_to_int(toks):
744 '''a parse action to convert toks from str to int
745 at parse time'''
746 return int(toks[0])
748 def is_valid_date(instring, loc, toks):
749 '''a parse action to verify that the date is a valid date'''
750 from datetime import date
751 year, month, day = toks[::2]
752 try:
753 date(year, month, day)
754 except ValueError:
755 raise ParseException(instring, loc, "invalid date given")
757 integer = Word(nums)
758 date_str = integer + '/' + integer + '/' + integer
760 # add parse actions
761 integer.set_parse_action(convert_to_int)
762 date_str.set_parse_action(is_valid_date)
764 Successful parse - note that integer fields are converted to ints:
766 .. testcode::
768 print(date_str.parse_string("1999/12/31"))
770 prints:
772 .. testoutput::
774 [1999, '/', 12, '/', 31]
776 Failure - invalid date:
778 .. testcode::
780 date_str.parse_string("1999/13/31")
782 prints:
784 .. testoutput::
786 Traceback (most recent call last):
787 ParseException: invalid date given, found '1999' ...
788 """
789 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)
791 if list(fns) == [None]:
792 self.parseAction.clear()
793 return self
795 if not all(callable(fn) for fn in fns):
796 raise TypeError("parse actions must be callable")
797 self.parseAction[:] = [_trim_arity(fn) for fn in fns]
798 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry
800 return self
802 def add_parse_action(
803 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any
804 ) -> ParserElement:
805 """
806 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
808 See examples in :class:`copy`.
809 """
810 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)
812 self.parseAction += [_trim_arity(fn) for fn in fns]
813 self.callDuringTry = self.callDuringTry or callDuringTry or call_during_try
814 return self
816 def add_condition(
817 self, *fns: ParseCondition, call_during_try: bool = False, **kwargs: Any
818 ) -> ParserElement:
819 """Add a boolean predicate function to expression's list of parse actions. See
820 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
821 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
823 Optional keyword arguments:
825 - ``message`` = define a custom message to be used in the raised exception
826 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
827 ParseException
828 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,
829 default=False
831 Example:
833 .. doctest::
834 :options: +NORMALIZE_WHITESPACE
836 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
837 >>> year_int = integer.copy().add_condition(
838 ... lambda toks: toks[0] >= 2000,
839 ... message="Only support years 2000 and later")
840 >>> date_str = year_int + '/' + integer + '/' + integer
842 >>> result = date_str.parse_string("1999/12/31")
843 Traceback (most recent call last):
844 ParseException: Only support years 2000 and later...
845 """
846 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)
848 for fn in fns:
849 self.parseAction.append(
850 condition_as_parse_action(
851 fn,
852 message=str(kwargs.get("message")),
853 fatal=bool(kwargs.get("fatal", False)),
854 )
855 )
857 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry
858 return self
860 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:
861 """
862 Define action to perform if parsing fails at this expression.
863 Fail acton fn is a callable function that takes the arguments
864 ``fn(s, loc, expr, err)`` where:
866 - ``s`` = string being parsed
867 - ``loc`` = location where expression match was attempted and failed
868 - ``expr`` = the parse expression that failed
869 - ``err`` = the exception thrown
871 The function returns no value. It may throw :class:`ParseFatalException`
872 if it is desired to stop parsing immediately."""
873 self.failAction = fn
874 return self
876 def _skipIgnorables(self, instring: str, loc: int) -> int:
877 if not self.ignoreExprs:
878 return loc
879 exprsFound = True
880 ignore_expr_fns = [e._parse for e in self.ignoreExprs]
881 last_loc = loc
882 while exprsFound:
883 exprsFound = False
884 for ignore_fn in ignore_expr_fns:
885 try:
886 while 1:
887 loc, dummy = ignore_fn(instring, loc)
888 exprsFound = True
889 except ParseException:
890 pass
891 # check if all ignore exprs matched but didn't actually advance the parse location
892 if loc == last_loc:
893 break
894 last_loc = loc
895 return loc
897 def preParse(self, instring: str, loc: int) -> int:
898 if self.ignoreExprs:
899 loc = self._skipIgnorables(instring, loc)
901 if self.skipWhitespace:
902 instrlen = len(instring)
903 white_chars = self.whiteChars
904 while loc < instrlen and instring[loc] in white_chars:
905 loc += 1
907 return loc
909 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
910 return loc, []
912 def postParse(self, instring, loc, tokenlist):
913 return tokenlist
915 # @profile
916 def _parseNoCache(
917 self, instring, loc, do_actions=True, callPreParse=True
918 ) -> tuple[int, ParseResults]:
919 debugging = self.debug # and do_actions)
920 len_instring = len(instring)
922 if debugging or self.failAction:
923 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
924 try:
925 if callPreParse and self.callPreparse:
926 pre_loc = self.preParse(instring, loc)
927 else:
928 pre_loc = loc
929 tokens_start = pre_loc
930 if self.debugActions.debug_try:
931 self.debugActions.debug_try(instring, tokens_start, self, False)
932 if self.mayIndexError or pre_loc >= len_instring:
933 try:
934 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
935 except IndexError:
936 raise ParseException(instring, len_instring, self.errmsg, self)
937 else:
938 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
939 except Exception as err:
940 # print("Exception raised:", err)
941 if self.debugActions.debug_fail:
942 self.debugActions.debug_fail(
943 instring, tokens_start, self, err, False
944 )
945 if self.failAction:
946 self.failAction(instring, tokens_start, self, err)
947 raise
948 else:
949 if callPreParse and self.callPreparse:
950 pre_loc = self.preParse(instring, loc)
951 else:
952 pre_loc = loc
953 tokens_start = pre_loc
954 if self.mayIndexError or pre_loc >= len_instring:
955 try:
956 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
957 except IndexError:
958 raise ParseException(instring, len_instring, self.errmsg, self)
959 else:
960 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
962 tokens = self.postParse(instring, loc, tokens)
964 ret_tokens = ParseResults(
965 tokens, self.resultsName, aslist=self.saveAsList, modal=self.modalResults
966 )
967 if self.parseAction and (do_actions or self.callDuringTry):
968 if debugging:
969 try:
970 for fn in self.parseAction:
971 try:
972 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
973 except IndexError as parse_action_exc:
974 exc = ParseException("exception raised in parse action")
975 raise exc from parse_action_exc
977 if tokens is not None and tokens is not ret_tokens:
978 ret_tokens = ParseResults(
979 tokens,
980 self.resultsName,
981 aslist=self.saveAsList
982 and isinstance(tokens, (ParseResults, list)),
983 modal=self.modalResults,
984 )
985 except Exception as err:
986 # print "Exception raised in user parse action:", err
987 if self.debugActions.debug_fail:
988 self.debugActions.debug_fail(
989 instring, tokens_start, self, err, False
990 )
991 raise
992 else:
993 for fn in self.parseAction:
994 try:
995 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
996 except IndexError as parse_action_exc:
997 exc = ParseException("exception raised in parse action")
998 raise exc from parse_action_exc
1000 if tokens is not None and tokens is not ret_tokens:
1001 ret_tokens = ParseResults(
1002 tokens,
1003 self.resultsName,
1004 aslist=self.saveAsList
1005 and isinstance(tokens, (ParseResults, list)),
1006 modal=self.modalResults,
1007 )
1008 if debugging:
1009 # print("Matched", self, "->", ret_tokens.as_list())
1010 if self.debugActions.debug_match:
1011 self.debugActions.debug_match(
1012 instring, tokens_start, loc, self, ret_tokens, False
1013 )
1015 return loc, ret_tokens
1017 def try_parse(
1018 self,
1019 instring: str,
1020 loc: int,
1021 *,
1022 raise_fatal: bool = False,
1023 do_actions: bool = False,
1024 ) -> int:
1025 try:
1026 return self._parse(instring, loc, do_actions=do_actions)[0]
1027 except ParseFatalException:
1028 if raise_fatal:
1029 raise
1030 raise ParseException(instring, loc, self.errmsg, self)
1032 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:
1033 try:
1034 self.try_parse(instring, loc, do_actions=do_actions)
1035 except (ParseException, IndexError):
1036 return False
1037 else:
1038 return True
1040 # cache for left-recursion in Forward references
1041 recursion_lock = RLock()
1042 recursion_memos: collections.abc.MutableMapping[
1043 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]
1044 ] = {}
1046 class _CacheType(typing.Protocol):
1047 """
1048 Class to be used for packrat and left-recursion cacheing of results
1049 and exceptions.
1050 """
1052 not_in_cache: bool
1054 def get(self, *args) -> typing.Any: ...
1056 def set(self, *args) -> None: ...
1058 def clear(self) -> None: ...
1060 class NullCache(dict):
1061 """
1062 A null cache type for initialization of the packrat_cache class variable.
1063 If/when enable_packrat() is called, this null cache will be replaced by a
1064 proper _CacheType class instance.
1065 """
1067 not_in_cache: bool = True
1069 def get(self, *args) -> typing.Any: ...
1071 def set(self, *args) -> None: ...
1073 def clear(self) -> None: ...
1075 # class-level argument cache for optimizing repeated calls when backtracking
1076 # through recursive expressions
1077 packrat_cache: _CacheType = NullCache()
1078 packrat_cache_lock = RLock()
1079 packrat_cache_stats = [0, 0]
1081 # this method gets repeatedly called during backtracking with the same arguments -
1082 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1083 def _parseCache(
1084 self, instring, loc, do_actions=True, callPreParse=True
1085 ) -> tuple[int, ParseResults]:
1086 HIT, MISS = 0, 1
1087 lookup = (self, instring, loc, callPreParse, do_actions)
1088 with ParserElement.packrat_cache_lock:
1089 cache = ParserElement.packrat_cache
1090 value = cache.get(lookup)
1091 if value is cache.not_in_cache:
1092 ParserElement.packrat_cache_stats[MISS] += 1
1093 try:
1094 value = self._parseNoCache(instring, loc, do_actions, callPreParse)
1095 except ParseBaseException as pe:
1096 # cache a copy of the exception, without the traceback
1097 cache.set(lookup, pe.__class__(*pe.args))
1098 raise
1099 else:
1100 cache.set(lookup, (value[0], value[1].copy(), loc))
1101 return value
1102 else:
1103 ParserElement.packrat_cache_stats[HIT] += 1
1104 if self.debug and self.debugActions.debug_try:
1105 try:
1106 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]
1107 except TypeError:
1108 pass
1109 if isinstance(value, Exception):
1110 if self.debug and self.debugActions.debug_fail:
1111 try:
1112 self.debugActions.debug_fail(
1113 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]
1114 )
1115 except TypeError:
1116 pass
1117 raise value
1119 value = cast(tuple[int, ParseResults, int], value)
1120 loc_, result, endloc = value[0], value[1].copy(), value[2]
1121 if self.debug and self.debugActions.debug_match:
1122 try:
1123 self.debugActions.debug_match(
1124 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]
1125 )
1126 except TypeError:
1127 pass
1129 return loc_, result
1131 _parse = _parseNoCache
1133 @staticmethod
1134 def reset_cache() -> None:
1135 """
1136 Clears caches used by packrat and left-recursion.
1137 """
1138 with ParserElement.packrat_cache_lock:
1139 ParserElement.packrat_cache.clear()
1140 ParserElement.packrat_cache_stats[:] = [0] * len(
1141 ParserElement.packrat_cache_stats
1142 )
1143 ParserElement.recursion_memos.clear()
1145 # class attributes to keep caching status
1146 _packratEnabled = False
1147 _left_recursion_enabled = False
1149 @staticmethod
1150 def disable_memoization() -> None:
1151 """
1152 Disables active Packrat or Left Recursion parsing and their memoization
1154 This method also works if neither Packrat nor Left Recursion are enabled.
1155 This makes it safe to call before activating Packrat nor Left Recursion
1156 to clear any previous settings.
1157 """
1158 with ParserElement.packrat_cache_lock:
1159 ParserElement.reset_cache()
1160 ParserElement._left_recursion_enabled = False
1161 ParserElement._packratEnabled = False
1162 ParserElement._parse = ParserElement._parseNoCache
1164 @staticmethod
1165 def enable_left_recursion(
1166 cache_size_limit: typing.Optional[int] = None, *, force=False
1167 ) -> None:
1168 """
1169 Enables "bounded recursion" parsing, which allows for both direct and indirect
1170 left-recursion. During parsing, left-recursive :class:`Forward` elements are
1171 repeatedly matched with a fixed recursion depth that is gradually increased
1172 until finding the longest match.
1174 Example:
1176 .. testcode::
1178 import pyparsing as pp
1179 pp.ParserElement.enable_left_recursion()
1181 E = pp.Forward("E")
1182 num = pp.Word(pp.nums)
1184 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1185 E <<= E + '+' - num | num
1187 print(E.parse_string("1+2+3+4"))
1189 prints:
1191 .. testoutput::
1193 ['1', '+', '2', '+', '3', '+', '4']
1195 Recursion search naturally memoizes matches of ``Forward`` elements and may
1196 thus skip reevaluation of parse actions during backtracking. This may break
1197 programs with parse actions which rely on strict ordering of side-effects.
1199 Parameters:
1201 - ``cache_size_limit`` - (default=``None``) - memoize at most this many
1202 ``Forward`` elements during matching; if ``None`` (the default),
1203 memoize all ``Forward`` elements.
1205 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1206 thus the two cannot be used together. Use ``force=True`` to disable any
1207 previous, conflicting settings.
1208 """
1209 with ParserElement.packrat_cache_lock:
1210 if force:
1211 ParserElement.disable_memoization()
1212 elif ParserElement._packratEnabled:
1213 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1214 if cache_size_limit is None:
1215 ParserElement.recursion_memos = _UnboundedMemo()
1216 elif cache_size_limit > 0:
1217 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]
1218 else:
1219 raise NotImplementedError(f"Memo size of {cache_size_limit}")
1220 ParserElement._left_recursion_enabled = True
1222 @staticmethod
1223 def enable_packrat(
1224 cache_size_limit: Union[int, None] = 128, *, force: bool = False
1225 ) -> None:
1226 """
1227 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1228 Repeated parse attempts at the same string location (which happens
1229 often in many complex grammars) can immediately return a cached value,
1230 instead of re-executing parsing/validating code. Memoizing is done of
1231 both valid results and parsing exceptions.
1233 Parameters:
1235 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided
1236 will limit the size of the packrat cache; if None is passed, then
1237 the cache size will be unbounded; if 0 is passed, the cache will
1238 be effectively disabled.
1240 This speedup may break existing programs that use parse actions that
1241 have side-effects. For this reason, packrat parsing is disabled when
1242 you first import pyparsing. To activate the packrat feature, your
1243 program must call the class method :class:`ParserElement.enable_packrat`.
1244 For best results, call ``enable_packrat()`` immediately after
1245 importing pyparsing.
1247 .. Can't really be doctested, alas
1249 Example::
1251 import pyparsing
1252 pyparsing.ParserElement.enable_packrat()
1254 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1255 thus the two cannot be used together. Use ``force=True`` to disable any
1256 previous, conflicting settings.
1257 """
1258 with ParserElement.packrat_cache_lock:
1259 if force:
1260 ParserElement.disable_memoization()
1261 elif ParserElement._left_recursion_enabled:
1262 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1264 if ParserElement._packratEnabled:
1265 return
1267 ParserElement._packratEnabled = True
1268 if cache_size_limit is None:
1269 ParserElement.packrat_cache = _UnboundedCache()
1270 else:
1271 ParserElement.packrat_cache = _FifoCache(cache_size_limit)
1272 ParserElement._parse = ParserElement._parseCache
1274 def parse_string(
1275 self, instring: str, parse_all: bool = False, **kwargs
1276 ) -> ParseResults:
1277 """
1278 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1279 client code.
1281 :param instring: The input string to be parsed.
1282 :param parse_all: If set, the entire input string must match the grammar.
1283 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1284 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1285 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1286 an object with attributes if the given parser includes results names.
1288 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1289 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().
1291 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1292 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1293 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1294 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1296 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1297 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1298 parse action's ``s`` argument, or
1299 - explicitly expand the tabs in your input string before calling ``parse_string``.
1301 Examples:
1303 By default, partial matches are OK.
1305 .. doctest::
1307 >>> res = Word('a').parse_string('aaaaabaaa')
1308 >>> print(res)
1309 ['aaaaa']
1311 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1312 directly to see more examples.
1314 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1316 .. doctest::
1318 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1319 Traceback (most recent call last):
1320 ParseException: Expected end of text, found 'b' ...
1321 """
1322 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)
1324 parse_all = parse_all or parseAll
1326 ParserElement.reset_cache()
1327 if not self.streamlined:
1328 self.streamline()
1329 for e in self.ignoreExprs:
1330 e.streamline()
1331 if not self.keepTabs:
1332 instring = instring.expandtabs()
1333 try:
1334 loc, tokens = self._parse(instring, 0)
1335 if parse_all:
1336 loc = self.preParse(instring, loc)
1337 se = Empty() + StringEnd().set_debug(False)
1338 se._parse(instring, loc)
1339 except _ParseActionIndexError as pa_exc:
1340 raise pa_exc.exc
1341 except ParseBaseException as exc:
1342 if ParserElement.verbose_stacktrace:
1343 raise
1345 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1346 raise exc.with_traceback(None)
1347 else:
1348 return tokens
1350 def scan_string(
1351 self,
1352 instring: str,
1353 max_matches: int = _MAX_INT,
1354 overlap: bool = False,
1355 always_skip_whitespace=True,
1356 *,
1357 debug: bool = False,
1358 **kwargs,
1359 ) -> Generator[tuple[ParseResults, int, int], None, None]:
1360 """
1361 Scan the input string for expression matches. Each match will return the
1362 matching tokens, start location, and end location. May be called with optional
1363 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1364 ``overlap`` is specified, then overlapping matches will be reported.
1366 Note that the start and end locations are reported relative to the string
1367 being parsed. See :class:`parse_string` for more information on parsing
1368 strings with embedded tabs.
1370 Example:
1372 .. testcode::
1374 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1375 print(source)
1376 for tokens, start, end in Word(alphas).scan_string(source):
1377 print(' '*start + '^'*(end-start))
1378 print(' '*start + tokens[0])
1380 prints:
1382 .. testoutput::
1384 sldjf123lsdjjkf345sldkjf879lkjsfd987
1385 ^^^^^
1386 sldjf
1387 ^^^^^^^
1388 lsdjjkf
1389 ^^^^^^
1390 sldkjf
1391 ^^^^^^
1392 lkjsfd
1393 """
1394 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)
1396 max_matches = min(maxMatches, max_matches)
1397 if not self.streamlined:
1398 self.streamline()
1399 for e in self.ignoreExprs:
1400 e.streamline()
1402 if not self.keepTabs:
1403 instring = str(instring).expandtabs()
1404 instrlen = len(instring)
1405 loc = 0
1406 if always_skip_whitespace:
1407 preparser = Empty()
1408 preparser.ignoreExprs = self.ignoreExprs
1409 preparser.whiteChars = self.whiteChars
1410 preparseFn = preparser.preParse
1411 else:
1412 preparseFn = self.preParse
1413 parseFn = self._parse
1414 ParserElement.reset_cache()
1415 matches = 0
1416 try:
1417 while loc <= instrlen and matches < max_matches:
1418 try:
1419 preloc: int = preparseFn(instring, loc)
1420 nextLoc: int
1421 tokens: ParseResults
1422 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1423 except ParseException:
1424 loc = preloc + 1
1425 else:
1426 if nextLoc > loc:
1427 matches += 1
1428 if debug:
1429 print(
1430 {
1431 "tokens": tokens.as_list(),
1432 "start": preloc,
1433 "end": nextLoc,
1434 }
1435 )
1436 yield tokens, preloc, nextLoc
1437 if overlap:
1438 nextloc = preparseFn(instring, loc)
1439 if nextloc > loc:
1440 loc = nextLoc
1441 else:
1442 loc += 1
1443 else:
1444 loc = nextLoc
1445 else:
1446 loc = preloc + 1
1447 except ParseBaseException as exc:
1448 if ParserElement.verbose_stacktrace:
1449 raise
1451 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1452 raise exc.with_traceback(None)
1454 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1455 """
1456 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1457 be returned from a parse action. To use ``transform_string``, define a grammar and
1458 attach a parse action to it that modifies the returned token list.
1459 Invoking ``transform_string()`` on a target string will then scan for matches,
1460 and replace the matched text patterns according to the logic in the parse
1461 action. ``transform_string()`` returns the resulting transformed string.
1463 Example:
1465 .. testcode::
1467 quote = '''now is the winter of our discontent,
1468 made glorious summer by this sun of york.'''
1470 wd = Word(alphas)
1471 wd.set_parse_action(lambda toks: toks[0].title())
1473 print(wd.transform_string(quote))
1475 prints:
1477 .. testoutput::
1479 Now Is The Winter Of Our Discontent,
1480 Made Glorious Summer By This Sun Of York.
1481 """
1482 out: list[str] = []
1483 lastE = 0
1484 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1485 # keep string locs straight between transform_string and scan_string
1486 self.keepTabs = True
1487 try:
1488 for t, s, e in self.scan_string(instring, debug=debug):
1489 if s > lastE:
1490 out.append(instring[lastE:s])
1491 lastE = e
1493 if not t:
1494 continue
1496 if isinstance(t, ParseResults):
1497 out += t.as_list()
1498 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1499 out.extend(t)
1500 else:
1501 out.append(t)
1503 out.append(instring[lastE:])
1504 out = [o for o in out if o]
1505 return "".join([str(s) for s in _flatten(out)])
1506 except ParseBaseException as exc:
1507 if ParserElement.verbose_stacktrace:
1508 raise
1510 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1511 raise exc.with_traceback(None)
1513 def search_string(
1514 self,
1515 instring: str,
1516 max_matches: int = _MAX_INT,
1517 *,
1518 debug: bool = False,
1519 **kwargs,
1520 ) -> ParseResults:
1521 """
1522 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1523 to match the given parse expression. May be called with optional
1524 ``max_matches`` argument, to clip searching after 'n' matches are found.
1526 Example:
1528 .. testcode::
1530 quote = '''More than Iron, more than Lead,
1531 more than Gold I need Electricity'''
1533 # a capitalized word starts with an uppercase letter,
1534 # followed by zero or more lowercase letters
1535 cap_word = Word(alphas.upper(), alphas.lower())
1537 print(cap_word.search_string(quote))
1539 # the sum() builtin can be used to merge results
1540 # into a single ParseResults object
1541 print(sum(cap_word.search_string(quote)))
1543 prints:
1545 .. testoutput::
1547 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1548 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1549 """
1550 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)
1552 max_matches = min(maxMatches, max_matches)
1553 try:
1554 return ParseResults(
1555 [
1556 t
1557 for t, s, e in self.scan_string(
1558 instring,
1559 max_matches=max_matches,
1560 always_skip_whitespace=False,
1561 debug=debug,
1562 )
1563 ]
1564 )
1565 except ParseBaseException as exc:
1566 if ParserElement.verbose_stacktrace:
1567 raise
1569 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1570 raise exc.with_traceback(None)
1572 def split(
1573 self,
1574 instring: str,
1575 maxsplit: int = _MAX_INT,
1576 include_separators: bool = False,
1577 **kwargs,
1578 ) -> Generator[str, None, None]:
1579 """
1580 Generator method to split a string using the given expression as a separator.
1581 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1582 and the optional ``include_separators`` argument (default= ``False``), if the separating
1583 matching text should be included in the split results.
1585 Example:
1587 .. testcode::
1589 punc = one_of(list(".,;:/-!?"))
1590 print(list(punc.split(
1591 "This, this?, this sentence, is badly punctuated!")))
1593 prints:
1595 .. testoutput::
1597 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1598 """
1599 includeSeparators: bool = deprecate_argument(kwargs, "includeSeparators", False)
1601 include_separators = includeSeparators or include_separators
1602 last = 0
1603 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1604 yield instring[last:s]
1605 if include_separators:
1606 yield t[0]
1607 last = e
1608 yield instring[last:]
1610 def __add__(self, other) -> ParserElement:
1611 """
1612 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1613 converts them to :class:`Literal`\\ s by default.
1615 Example:
1617 .. testcode::
1619 greet = Word(alphas) + "," + Word(alphas) + "!"
1620 hello = "Hello, World!"
1621 print(hello, "->", greet.parse_string(hello))
1623 prints:
1625 .. testoutput::
1627 Hello, World! -> ['Hello', ',', 'World', '!']
1629 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:
1631 .. testcode::
1633 Literal('start') + ... + Literal('end')
1635 is equivalent to:
1637 .. testcode::
1639 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1641 Note that the skipped text is returned with '_skipped' as a results name,
1642 and to support having multiple skips in the same parser, the value returned is
1643 a list of all skipped text.
1644 """
1645 if other is Ellipsis:
1646 return _PendingSkip(self)
1648 if isinstance(other, str_type):
1649 other = self._literalStringClass(other)
1650 if not isinstance(other, ParserElement):
1651 return NotImplemented
1652 return And([self, other])
1654 def __radd__(self, other) -> ParserElement:
1655 """
1656 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1657 """
1658 if other is Ellipsis:
1659 return SkipTo(self)("_skipped*") + self
1661 if isinstance(other, str_type):
1662 other = self._literalStringClass(other)
1663 if not isinstance(other, ParserElement):
1664 return NotImplemented
1665 return other + self
1667 def __sub__(self, other) -> ParserElement:
1668 """
1669 Implementation of ``-`` operator, returns :class:`And` with error stop
1670 """
1671 if isinstance(other, str_type):
1672 other = self._literalStringClass(other)
1673 if not isinstance(other, ParserElement):
1674 return NotImplemented
1675 return self + And._ErrorStop() + other
1677 def __rsub__(self, other) -> ParserElement:
1678 """
1679 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1680 """
1681 if isinstance(other, str_type):
1682 other = self._literalStringClass(other)
1683 if not isinstance(other, ParserElement):
1684 return NotImplemented
1685 return other - self
1687 def __mul__(self, other) -> ParserElement:
1688 """
1689 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1690 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1691 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1692 may also include ``None`` as in:
1694 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1695 to ``expr*n + ZeroOrMore(expr)``
1696 (read as "at least n instances of ``expr``")
1697 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1698 (read as "0 to n instances of ``expr``")
1699 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1700 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1702 Note that ``expr*(None, n)`` does not raise an exception if
1703 more than n exprs exist in the input stream; that is,
1704 ``expr*(None, n)`` does not enforce a maximum number of expr
1705 occurrences. If this behavior is desired, then write
1706 ``expr*(None, n) + ~expr``
1707 """
1708 if other is Ellipsis:
1709 other = (0, None)
1710 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1711 other = ((0,) + other[1:] + (None,))[:2]
1713 if not isinstance(other, (int, tuple)):
1714 return NotImplemented
1716 if isinstance(other, int):
1717 minElements, optElements = other, 0
1718 else:
1719 other = tuple(o if o is not Ellipsis else None for o in other)
1720 other = (other + (None, None))[:2]
1721 if other[0] is None:
1722 other = (0, other[1])
1723 if isinstance(other[0], int) and other[1] is None:
1724 if other[0] == 0:
1725 return ZeroOrMore(self)
1726 if other[0] == 1:
1727 return OneOrMore(self)
1728 else:
1729 return self * other[0] + ZeroOrMore(self)
1730 elif isinstance(other[0], int) and isinstance(other[1], int):
1731 minElements, optElements = other
1732 optElements -= minElements
1733 else:
1734 return NotImplemented
1736 if minElements < 0:
1737 raise ValueError("cannot multiply ParserElement by negative value")
1738 if optElements < 0:
1739 raise ValueError(
1740 "second tuple value must be greater or equal to first tuple value"
1741 )
1742 if minElements == optElements == 0:
1743 return And([])
1745 if optElements:
1747 def makeOptionalList(n):
1748 if n > 1:
1749 return Opt(self + makeOptionalList(n - 1))
1750 else:
1751 return Opt(self)
1753 if minElements:
1754 if minElements == 1:
1755 ret = self + makeOptionalList(optElements)
1756 else:
1757 ret = And([self] * minElements) + makeOptionalList(optElements)
1758 else:
1759 ret = makeOptionalList(optElements)
1760 else:
1761 if minElements == 1:
1762 ret = self
1763 else:
1764 ret = And([self] * minElements)
1765 return ret
1767 def __rmul__(self, other) -> ParserElement:
1768 return self.__mul__(other)
1770 def __or__(self, other) -> ParserElement:
1771 """
1772 Implementation of ``|`` operator - returns :class:`MatchFirst`
1774 .. versionchanged:: 3.1.0
1775 Support ``expr | ""`` as a synonym for ``Optional(expr)``.
1776 """
1777 if other is Ellipsis:
1778 return _PendingSkip(self, must_skip=True)
1780 if isinstance(other, str_type):
1781 # `expr | ""` is equivalent to `Opt(expr)`
1782 if other == "":
1783 return Opt(self)
1784 other = self._literalStringClass(other)
1785 if not isinstance(other, ParserElement):
1786 return NotImplemented
1787 return MatchFirst([self, other])
1789 def __ror__(self, other) -> ParserElement:
1790 """
1791 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1792 """
1793 if isinstance(other, str_type):
1794 other = self._literalStringClass(other)
1795 if not isinstance(other, ParserElement):
1796 return NotImplemented
1797 return other | self
1799 def __xor__(self, other) -> ParserElement:
1800 """
1801 Implementation of ``^`` operator - returns :class:`Or`
1802 """
1803 if isinstance(other, str_type):
1804 other = self._literalStringClass(other)
1805 if not isinstance(other, ParserElement):
1806 return NotImplemented
1807 return Or([self, other])
1809 def __rxor__(self, other) -> ParserElement:
1810 """
1811 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1812 """
1813 if isinstance(other, str_type):
1814 other = self._literalStringClass(other)
1815 if not isinstance(other, ParserElement):
1816 return NotImplemented
1817 return other ^ self
1819 def __and__(self, other) -> ParserElement:
1820 """
1821 Implementation of ``&`` operator - returns :class:`Each`
1822 """
1823 if isinstance(other, str_type):
1824 other = self._literalStringClass(other)
1825 if not isinstance(other, ParserElement):
1826 return NotImplemented
1827 return Each([self, other])
1829 def __rand__(self, other) -> ParserElement:
1830 """
1831 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1832 """
1833 if isinstance(other, str_type):
1834 other = self._literalStringClass(other)
1835 if not isinstance(other, ParserElement):
1836 return NotImplemented
1837 return other & self
1839 def __invert__(self) -> ParserElement:
1840 """
1841 Implementation of ``~`` operator - returns :class:`NotAny`
1842 """
1843 return NotAny(self)
1845 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1846 # iterate over a sequence
1847 __iter__ = None
1849 def __getitem__(self, key):
1850 """
1851 use ``[]`` indexing notation as a short form for expression repetition:
1853 - ``expr[n]`` is equivalent to ``expr*n``
1854 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1855 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1856 to ``expr*n + ZeroOrMore(expr)``
1857 (read as "at least n instances of ``expr``")
1858 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1859 (read as "0 to n instances of ``expr``")
1860 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1861 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1863 ``None`` may be used in place of ``...``.
1865 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception
1866 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is
1867 desired, then write ``expr[..., n] + ~expr``.
1869 For repetition with a stop_on expression, use slice notation:
1871 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``
1872 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``
1874 .. versionchanged:: 3.1.0
1875 Support for slice notation.
1876 """
1878 stop_on_defined = False
1879 stop_on = NoMatch()
1880 if isinstance(key, slice):
1881 key, stop_on = key.start, key.stop
1882 if key is None:
1883 key = ...
1884 stop_on_defined = True
1885 elif isinstance(key, tuple) and isinstance(key[-1], slice):
1886 key, stop_on = (key[0], key[1].start), key[1].stop
1887 stop_on_defined = True
1889 # convert single arg keys to tuples
1890 if isinstance(key, str_type):
1891 key = (key,)
1892 try:
1893 iter(key)
1894 except TypeError:
1895 key = (key, key)
1897 if len(key) > 2:
1898 raise TypeError(
1899 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"
1900 )
1902 # clip to 2 elements
1903 ret = self * tuple(key[:2])
1904 ret = typing.cast(_MultipleMatch, ret)
1906 if stop_on_defined:
1907 ret.stopOn(stop_on)
1909 return ret
1911 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:
1912 """
1913 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1915 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1916 passed as ``True``.
1918 If ``name`` is omitted, same as calling :class:`copy`.
1920 Example:
1922 .. testcode::
1924 # these are equivalent
1925 userdata = (
1926 Word(alphas).set_results_name("name")
1927 + Word(nums + "-").set_results_name("socsecno")
1928 )
1930 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1931 """
1932 if name is not None:
1933 return self._setResultsName(name)
1935 return self.copy()
1937 def suppress(self) -> ParserElement:
1938 """
1939 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1940 cluttering up returned output.
1941 """
1942 return Suppress(self)
1944 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
1945 """
1946 Enables the skipping of whitespace before matching the characters in the
1947 :class:`ParserElement`'s defined pattern.
1949 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1950 """
1951 self.skipWhitespace = True
1952 return self
1954 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
1955 """
1956 Disables the skipping of whitespace before matching the characters in the
1957 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1958 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1960 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1961 """
1962 self.skipWhitespace = False
1963 return self
1965 def set_whitespace_chars(
1966 self, chars: Union[set[str], str], copy_defaults: bool = False
1967 ) -> ParserElement:
1968 """
1969 Overrides the default whitespace chars
1970 """
1971 self.skipWhitespace = True
1972 self.whiteChars = set(chars)
1973 self.copyDefaultWhiteChars = copy_defaults
1974 return self
1976 def parse_with_tabs(self) -> ParserElement:
1977 """
1978 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1979 Must be called before ``parse_string`` when the input grammar contains elements that
1980 match ``<TAB>`` characters.
1981 """
1982 self.keepTabs = True
1983 return self
1985 def ignore(self, other: ParserElement) -> ParserElement:
1986 """
1987 Define expression to be ignored (e.g., comments) while doing pattern
1988 matching; may be called repeatedly, to define multiple comment or other
1989 ignorable patterns.
1991 Example:
1993 .. doctest::
1995 >>> patt = Word(alphas)[...]
1996 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))
1997 ['ablaj']
1999 >>> patt = Word(alphas)[...].ignore(c_style_comment)
2000 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))
2001 ['ablaj', 'lskjd']
2002 """
2003 if isinstance(other, str_type):
2004 other = Suppress(other)
2006 if isinstance(other, Suppress):
2007 if other not in self.ignoreExprs:
2008 self.ignoreExprs.append(other)
2009 else:
2010 self.ignoreExprs.append(Suppress(other.copy()))
2011 return self
2013 def set_debug_actions(
2014 self,
2015 start_action: DebugStartAction,
2016 success_action: DebugSuccessAction,
2017 exception_action: DebugExceptionAction,
2018 ) -> ParserElement:
2019 """
2020 Customize display of debugging messages while doing pattern matching:
2022 :param start_action: method to be called when an expression is about to be parsed;
2023 should have the signature::
2025 fn(input_string: str,
2026 location: int,
2027 expression: ParserElement,
2028 cache_hit: bool)
2030 :param success_action: method to be called when an expression has successfully parsed;
2031 should have the signature::
2033 fn(input_string: str,
2034 start_location: int,
2035 end_location: int,
2036 expression: ParserELement,
2037 parsed_tokens: ParseResults,
2038 cache_hit: bool)
2040 :param exception_action: method to be called when expression fails to parse;
2041 should have the signature::
2043 fn(input_string: str,
2044 location: int,
2045 expression: ParserElement,
2046 exception: Exception,
2047 cache_hit: bool)
2048 """
2049 self.debugActions = self.DebugActions(
2050 start_action or _default_start_debug_action, # type: ignore[truthy-function]
2051 success_action or _default_success_debug_action, # type: ignore[truthy-function]
2052 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]
2053 )
2054 self.debug = any(self.debugActions)
2055 return self
2057 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:
2058 """
2059 Enable display of debugging messages while doing pattern matching.
2060 Set ``flag`` to ``True`` to enable, ``False`` to disable.
2061 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.
2063 Example:
2065 .. testcode::
2067 wd = Word(alphas).set_name("alphaword")
2068 integer = Word(nums).set_name("numword")
2069 term = wd | integer
2071 # turn on debugging for wd
2072 wd.set_debug()
2074 term[1, ...].parse_string("abc 123 xyz 890")
2076 prints:
2078 .. testoutput::
2079 :options: +NORMALIZE_WHITESPACE
2081 Match alphaword at loc 0(1,1)
2082 abc 123 xyz 890
2083 ^
2084 Matched alphaword -> ['abc']
2085 Match alphaword at loc 4(1,5)
2086 abc 123 xyz 890
2087 ^
2088 Match alphaword failed, ParseException raised: Expected alphaword, ...
2089 Match alphaword at loc 8(1,9)
2090 abc 123 xyz 890
2091 ^
2092 Matched alphaword -> ['xyz']
2093 Match alphaword at loc 12(1,13)
2094 abc 123 xyz 890
2095 ^
2096 Match alphaword failed, ParseException raised: Expected alphaword, ...
2097 abc 123 xyz 890
2098 ^
2099 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ...
2101 The output shown is that produced by the default debug actions - custom debug actions can be
2102 specified using :meth:`set_debug_actions`. Prior to attempting
2103 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
2104 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
2105 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression,
2106 which makes debugging and exception messages easier to understand - for instance, the default
2107 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``.
2109 .. versionchanged:: 3.1.0
2110 ``recurse`` argument added.
2111 """
2112 if recurse:
2113 for expr in self.visit_all():
2114 expr.set_debug(flag, recurse=False)
2115 return self
2117 if flag:
2118 self.set_debug_actions(
2119 _default_start_debug_action,
2120 _default_success_debug_action,
2121 _default_exception_debug_action,
2122 )
2123 else:
2124 self.debug = False
2125 return self
2127 @property
2128 def default_name(self) -> str:
2129 if self._defaultName is None:
2130 self._defaultName = self._generateDefaultName()
2131 return self._defaultName
2133 @abstractmethod
2134 def _generateDefaultName(self) -> str:
2135 """
2136 Child classes must define this method, which defines how the ``default_name`` is set.
2137 """
2139 def set_name(self, name: typing.Optional[str]) -> ParserElement:
2140 """
2141 Define name for this expression, makes debugging and exception messages clearer. If
2142 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also
2143 enable debug for this expression.
2145 If `name` is None, clears any custom name for this expression, and clears the
2146 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.
2148 Example:
2150 .. doctest::
2152 >>> integer = Word(nums)
2153 >>> integer.parse_string("ABC")
2154 Traceback (most recent call last):
2155 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1)
2157 >>> integer.set_name("integer")
2158 integer
2159 >>> integer.parse_string("ABC")
2160 Traceback (most recent call last):
2161 ParseException: Expected integer (at char 0), (line:1, col:1)
2163 .. versionchanged:: 3.1.0
2164 Accept ``None`` as the ``name`` argument.
2165 """
2166 self.customName = name # type: ignore[assignment]
2167 self.errmsg = f"Expected {str(self)}"
2169 if __diag__.enable_debug_on_named_expressions:
2170 self.set_debug(name is not None)
2172 return self
2174 @property
2175 def name(self) -> str:
2176 """
2177 Returns a user-defined name if available, but otherwise defaults back to the auto-generated name
2178 """
2179 return self.customName if self.customName is not None else self.default_name
2181 @name.setter
2182 def name(self, new_name) -> None:
2183 self.set_name(new_name)
2185 def __str__(self) -> str:
2186 return self.name
2188 def __repr__(self) -> str:
2189 return str(self)
2191 def streamline(self) -> ParserElement:
2192 self.streamlined = True
2193 self._defaultName = None
2194 return self
2196 def recurse(self) -> list[ParserElement]:
2197 return []
2199 def _checkRecursion(self, parseElementList):
2200 subRecCheckList = parseElementList[:] + [self]
2201 for e in self.recurse():
2202 e._checkRecursion(subRecCheckList)
2204 def validate(self, validateTrace=None) -> None:
2205 """
2206 .. deprecated:: 3.0.0
2207 Do not use to check for left recursion.
2209 Check defined expressions for valid structure, check for infinite recursive definitions.
2211 """
2212 warnings.warn(
2213 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
2214 PyparsingDeprecationWarning,
2215 stacklevel=2,
2216 )
2217 self._checkRecursion([])
2219 def parse_file(
2220 self,
2221 file_or_filename: Union[str, Path, TextIO],
2222 encoding: str = "utf-8",
2223 parse_all: bool = False,
2224 **kwargs,
2225 ) -> ParseResults:
2226 """
2227 Execute the parse expression on the given file or filename.
2228 If a filename is specified (instead of a file object),
2229 the entire file is opened, read, and closed before parsing.
2230 """
2231 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)
2233 parse_all = parse_all or parseAll
2234 try:
2235 file_or_filename = typing.cast(TextIO, file_or_filename)
2236 file_contents = file_or_filename.read()
2237 except AttributeError:
2238 file_or_filename = typing.cast(str, file_or_filename)
2239 with open(file_or_filename, "r", encoding=encoding) as f:
2240 file_contents = f.read()
2241 try:
2242 return self.parse_string(file_contents, parse_all)
2243 except ParseBaseException as exc:
2244 if ParserElement.verbose_stacktrace:
2245 raise
2247 # catch and re-raise exception from here, clears out pyparsing internal stack trace
2248 raise exc.with_traceback(None)
2250 def __eq__(self, other):
2251 if self is other:
2252 return True
2253 elif isinstance(other, str_type):
2254 return self.matches(other, parse_all=True)
2255 elif isinstance(other, ParserElement):
2256 return vars(self) == vars(other)
2257 return False
2259 def __hash__(self):
2260 return id(self)
2262 def matches(self, test_string: str, parse_all: bool = True, **kwargs) -> bool:
2263 """
2264 Method for quick testing of a parser against a test string. Good for simple
2265 inline microtests of sub expressions while building up larger parser.
2267 :param test_string: to test against this expression for a match
2268 :param parse_all: flag to pass to :meth:`parse_string` when running tests
2270 Example:
2272 .. doctest::
2274 >>> expr = Word(nums)
2275 >>> expr.matches("100")
2276 True
2277 """
2278 parseAll: bool = deprecate_argument(kwargs, "parseAll", True)
2280 parse_all = parse_all and parseAll
2281 try:
2282 self.parse_string(str(test_string), parse_all=parse_all)
2283 return True
2284 except ParseBaseException:
2285 return False
2287 def run_tests(
2288 self,
2289 tests: Union[str, list[str]],
2290 parse_all: bool = True,
2291 comment: typing.Optional[Union[ParserElement, str]] = "#",
2292 full_dump: bool = True,
2293 print_results: bool = True,
2294 failure_tests: bool = False,
2295 post_parse: typing.Optional[
2296 Callable[[str, ParseResults], typing.Optional[str]]
2297 ] = None,
2298 file: typing.Optional[TextIO] = None,
2299 with_line_numbers: bool = False,
2300 *,
2301 parseAll: bool = True,
2302 fullDump: bool = True,
2303 printResults: bool = True,
2304 failureTests: bool = False,
2305 postParse: typing.Optional[
2306 Callable[[str, ParseResults], typing.Optional[str]]
2307 ] = None,
2308 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:
2309 """
2310 Execute the parse expression on a series of test strings, showing each
2311 test, the parsed results or where the parse failed. Quick and easy way to
2312 run a parse expression against a list of sample strings.
2314 Parameters:
2316 - ``tests`` - a list of separate test strings, or a multiline string of test strings
2317 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2318 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
2319 string; pass None to disable comment filtering
2320 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
2321 if False, only dump nested list
2322 - ``print_results`` - (default= ``True``) prints test output to stdout
2323 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
2324 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
2325 `fn(test_string, parse_results)` and returns a string to be added to the test output
2326 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
2327 if None, will default to ``sys.stdout``
2328 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
2330 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2331 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
2332 test's output
2334 Passing example:
2336 .. testcode::
2338 number_expr = pyparsing_common.number.copy()
2340 result = number_expr.run_tests('''
2341 # unsigned integer
2342 100
2343 # negative integer
2344 -100
2345 # float with scientific notation
2346 6.02e23
2347 # integer with scientific notation
2348 1e-12
2349 # negative decimal number without leading digit
2350 -.100
2351 ''')
2352 print("Success" if result[0] else "Failed!")
2354 prints:
2356 .. testoutput::
2357 :options: +NORMALIZE_WHITESPACE
2360 # unsigned integer
2361 100
2362 [100]
2364 # negative integer
2365 -100
2366 [-100]
2368 # float with scientific notation
2369 6.02e23
2370 [6.02e+23]
2372 # integer with scientific notation
2373 1e-12
2374 [1e-12]
2376 # negative decimal number without leading digit
2377 -.100
2378 [-0.1]
2379 Success
2381 Failure-test example:
2383 .. testcode::
2385 result = number_expr.run_tests('''
2386 # stray character
2387 100Z
2388 # too many '.'
2389 3.14.159
2390 ''', failure_tests=True)
2391 print("Success" if result[0] else "Failed!")
2393 prints:
2395 .. testoutput::
2396 :options: +NORMALIZE_WHITESPACE
2399 # stray character
2400 100Z
2401 100Z
2402 ^
2403 ParseException: Expected end of text, found 'Z' ...
2405 # too many '.'
2406 3.14.159
2407 3.14.159
2408 ^
2409 ParseException: Expected end of text, found '.' ...
2410 FAIL: Expected end of text, found '.' ...
2411 Success
2413 Each test string must be on a single line. If you want to test a string that spans multiple
2414 lines, create a test like this:
2416 .. testcode::
2418 expr = Word(alphanums)[1,...]
2419 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2421 .. testoutput::
2422 :options: +NORMALIZE_WHITESPACE
2423 :hide:
2426 this is a test\\n of strings that spans \\n 3 lines
2427 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines']
2429 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2430 """
2431 from .testing import pyparsing_test
2433 parseAll = parseAll and parse_all
2434 fullDump = fullDump and full_dump
2435 printResults = printResults and print_results
2436 failureTests = failureTests or failure_tests
2437 postParse = postParse or post_parse
2438 if isinstance(tests, str_type):
2439 tests = typing.cast(str, tests)
2440 line_strip = type(tests).strip
2441 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2442 comment_specified = comment is not None
2443 if comment_specified:
2444 if isinstance(comment, str_type):
2445 comment = typing.cast(str, comment)
2446 comment = Literal(comment)
2447 comment = typing.cast(ParserElement, comment)
2448 if file is None:
2449 file = sys.stdout
2450 print_ = file.write
2452 result: Union[ParseResults, Exception]
2453 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []
2454 comments: list[str] = []
2455 success = True
2456 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2457 BOM = "\ufeff"
2458 nlstr = "\n"
2459 for t in tests:
2460 if comment_specified and comment.matches(t, False) or comments and not t:
2461 comments.append(
2462 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2463 )
2464 continue
2465 if not t:
2466 continue
2467 out = [
2468 f"{nlstr}{nlstr.join(comments) if comments else ''}",
2469 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2470 ]
2471 comments.clear()
2472 try:
2473 # convert newline marks to actual newlines, and strip leading BOM if present
2474 t = NL.transform_string(t.lstrip(BOM))
2475 result = self.parse_string(t, parse_all=parse_all)
2476 except ParseBaseException as pe:
2477 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""
2478 out.append(pe.explain())
2479 out.append(f"FAIL: {fatal}{pe}")
2480 if ParserElement.verbose_stacktrace:
2481 out.extend(traceback.format_tb(pe.__traceback__))
2482 success = success and failureTests
2483 result = pe
2484 except Exception as exc:
2485 tag = "FAIL-EXCEPTION"
2487 # see if this exception was raised in a parse action
2488 tb = exc.__traceback__
2489 it = iter(traceback.walk_tb(tb))
2490 for f, line in it:
2491 if (f.f_code.co_filename, line) == pa_call_line_synth:
2492 next_f = next(it)[0]
2493 tag += f" (raised in parse action {next_f.f_code.co_name!r})"
2494 break
2496 out.append(f"{tag}: {type(exc).__name__}: {exc}")
2497 if ParserElement.verbose_stacktrace:
2498 out.extend(traceback.format_tb(exc.__traceback__))
2499 success = success and failureTests
2500 result = exc
2501 else:
2502 success = success and not failureTests
2503 if postParse is not None:
2504 try:
2505 pp_value = postParse(t, result)
2506 if pp_value is not None:
2507 if isinstance(pp_value, ParseResults):
2508 out.append(pp_value.dump())
2509 else:
2510 out.append(str(pp_value))
2511 else:
2512 out.append(result.dump())
2513 except Exception as e:
2514 out.append(result.dump(full=fullDump))
2515 out.append(
2516 f"{postParse.__name__} failed: {type(e).__name__}: {e}"
2517 )
2518 else:
2519 out.append(result.dump(full=fullDump))
2520 out.append("")
2522 if printResults:
2523 print_("\n".join(out))
2525 allResults.append((t, result))
2527 return success, allResults
2529 def create_diagram(
2530 self,
2531 output_html: Union[TextIO, Path, str],
2532 vertical: int = 3,
2533 show_results_names: bool = False,
2534 show_groups: bool = False,
2535 embed: bool = False,
2536 show_hidden: bool = False,
2537 **kwargs,
2538 ) -> None:
2539 """
2540 Create a railroad diagram for the parser.
2542 Parameters:
2544 - ``output_html`` (str or file-like object) - output target for generated
2545 diagram HTML
2546 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically
2547 instead of horizontally (default=3)
2548 - ``show_results_names`` - bool flag whether diagram should show annotations for
2549 defined results names
2550 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box
2551 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden
2552 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed
2553 the resulting HTML in an enclosing HTML source
2554 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;
2555 can be used to insert custom CSS styling
2556 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the
2557 generated code
2559 Additional diagram-formatting keyword arguments can also be included;
2560 see railroad.Diagram class.
2562 .. versionchanged:: 3.1.0
2563 ``embed`` argument added.
2564 """
2566 try:
2567 from .diagram import to_railroad, railroad_to_html
2568 except ImportError as ie:
2569 raise Exception(
2570 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2571 ) from ie
2573 self.streamline()
2575 railroad = to_railroad(
2576 self,
2577 vertical=vertical,
2578 show_results_names=show_results_names,
2579 show_groups=show_groups,
2580 show_hidden=show_hidden,
2581 diagram_kwargs=kwargs,
2582 )
2583 if not isinstance(output_html, (str, Path)):
2584 # we were passed a file-like object, just write to it
2585 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))
2586 return
2588 with open(output_html, "w", encoding="utf-8") as diag_file:
2589 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))
2591 # Compatibility synonyms
2592 # fmt: off
2593 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))
2594 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(
2595 "setDefaultWhitespaceChars", set_default_whitespace_chars
2596 ))
2597 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))
2598 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))
2599 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))
2600 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))
2602 setResultsName = replaced_by_pep8("setResultsName", set_results_name)
2603 setBreak = replaced_by_pep8("setBreak", set_break)
2604 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)
2605 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)
2606 addCondition = replaced_by_pep8("addCondition", add_condition)
2607 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)
2608 tryParse = replaced_by_pep8("tryParse", try_parse)
2609 parseString = replaced_by_pep8("parseString", parse_string)
2610 scanString = replaced_by_pep8("scanString", scan_string)
2611 transformString = replaced_by_pep8("transformString", transform_string)
2612 searchString = replaced_by_pep8("searchString", search_string)
2613 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
2614 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
2615 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)
2616 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)
2617 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)
2618 setDebug = replaced_by_pep8("setDebug", set_debug)
2619 setName = replaced_by_pep8("setName", set_name)
2620 parseFile = replaced_by_pep8("parseFile", parse_file)
2621 runTests = replaced_by_pep8("runTests", run_tests)
2622 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)
2623 defaultName = default_name
2624 # fmt: on
2627class _PendingSkip(ParserElement):
2628 # internal placeholder class to hold a place were '...' is added to a parser element,
2629 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2630 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:
2631 super().__init__()
2632 self.anchor = expr
2633 self.must_skip = must_skip
2635 def _generateDefaultName(self) -> str:
2636 return str(self.anchor + Empty()).replace("Empty", "...")
2638 def __add__(self, other) -> ParserElement:
2639 skipper = SkipTo(other).set_name("...")("_skipped*")
2640 if self.must_skip:
2642 def must_skip(t):
2643 if not t._skipped or t._skipped.as_list() == [""]:
2644 del t[0]
2645 t.pop("_skipped", None)
2647 def show_skip(t):
2648 if t._skipped.as_list()[-1:] == [""]:
2649 t.pop("_skipped")
2650 t["_skipped"] = f"missing <{self.anchor!r}>"
2652 return (
2653 self.anchor + skipper().add_parse_action(must_skip)
2654 | skipper().add_parse_action(show_skip)
2655 ) + other
2657 return self.anchor + skipper + other
2659 def __repr__(self):
2660 return self.defaultName
2662 def parseImpl(self, *args) -> ParseImplReturnType:
2663 raise Exception(
2664 "use of `...` expression without following SkipTo target expression"
2665 )
2668class Token(ParserElement):
2669 """Abstract :class:`ParserElement` subclass, for defining atomic
2670 matching patterns.
2671 """
2673 def __init__(self) -> None:
2674 super().__init__(savelist=False)
2676 def _generateDefaultName(self) -> str:
2677 return type(self).__name__
2680class NoMatch(Token):
2681 """
2682 A token that will never match.
2683 """
2685 def __init__(self) -> None:
2686 super().__init__()
2687 self._may_return_empty = True
2688 self.mayIndexError = False
2689 self.errmsg = "Unmatchable token"
2691 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2692 raise ParseException(instring, loc, self.errmsg, self)
2695class Literal(Token):
2696 """
2697 Token to exactly match a specified string.
2699 Example:
2701 .. doctest::
2703 >>> Literal('abc').parse_string('abc')
2704 ParseResults(['abc'], {})
2705 >>> Literal('abc').parse_string('abcdef')
2706 ParseResults(['abc'], {})
2707 >>> Literal('abc').parse_string('ab')
2708 Traceback (most recent call last):
2709 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1)
2711 For case-insensitive matching, use :class:`CaselessLiteral`.
2713 For keyword matching (force word break before and after the matched string),
2714 use :class:`Keyword` or :class:`CaselessKeyword`.
2715 """
2717 def __new__(cls, match_string: str = "", **kwargs):
2718 # Performance tuning: select a subclass with optimized parseImpl
2719 if cls is Literal:
2720 matchString: str = deprecate_argument(kwargs, "matchString", "")
2722 match_string = matchString or match_string
2723 if not match_string:
2724 return super().__new__(Empty)
2725 if len(match_string) == 1:
2726 return super().__new__(_SingleCharLiteral)
2728 # Default behavior
2729 return super().__new__(cls)
2731 # Needed to make copy.copy() work correctly if we customize __new__
2732 def __getnewargs__(self):
2733 return (self.match,)
2735 def __init__(self, match_string: str = "", **kwargs) -> None:
2736 matchString: str = deprecate_argument(kwargs, "matchString", "")
2738 super().__init__()
2739 match_string = matchString or match_string
2740 self.match = match_string
2741 self.matchLen = len(match_string)
2742 self.firstMatchChar = match_string[:1]
2743 self.errmsg = f"Expected {self.name}"
2744 self._may_return_empty = False
2745 self.mayIndexError = False
2747 def _generateDefaultName(self) -> str:
2748 return repr(self.match)
2750 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2751 if instring[loc] == self.firstMatchChar and instring.startswith(
2752 self.match, loc
2753 ):
2754 return loc + self.matchLen, self.match
2755 raise ParseException(instring, loc, self.errmsg, self)
2758class Empty(Literal):
2759 """
2760 An empty token, will always match.
2761 """
2763 def __init__(self, match_string="", *, matchString="") -> None:
2764 super().__init__("")
2765 self._may_return_empty = True
2766 self.mayIndexError = False
2768 def _generateDefaultName(self) -> str:
2769 return "Empty"
2771 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2772 return loc, []
2775class _SingleCharLiteral(Literal):
2776 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2777 if instring[loc] == self.firstMatchChar:
2778 return loc + 1, self.match
2779 raise ParseException(instring, loc, self.errmsg, self)
2782ParserElement._literalStringClass = Literal
2785class Keyword(Token):
2786 """
2787 Token to exactly match a specified string as a keyword, that is,
2788 it must be immediately preceded and followed by whitespace or
2789 non-keyword characters. Compare with :class:`Literal`:
2791 - ``Literal("if")`` will match the leading ``'if'`` in
2792 ``'ifAndOnlyIf'``.
2793 - ``Keyword("if")`` will not; it will only match the leading
2794 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2796 Accepts two optional constructor arguments in addition to the
2797 keyword string:
2799 - ``ident_chars`` is a string of characters that would be valid
2800 identifier characters, defaulting to all alphanumerics + "_" and
2801 "$"
2802 - ``caseless`` allows case-insensitive matching, default is ``False``.
2804 Example:
2806 .. doctest::
2807 :options: +NORMALIZE_WHITESPACE
2809 >>> Keyword("start").parse_string("start")
2810 ParseResults(['start'], {})
2811 >>> Keyword("start").parse_string("starting")
2812 Traceback (most recent call last):
2813 ParseException: Expected Keyword 'start', keyword was immediately
2814 followed by keyword character, found 'ing' (at char 5), (line:1, col:6)
2816 .. doctest::
2817 :options: +NORMALIZE_WHITESPACE
2819 >>> Keyword("start").parse_string("starting").debug()
2820 Traceback (most recent call last):
2821 ParseException: Expected Keyword "start", keyword was immediately
2822 followed by keyword character, found 'ing' ...
2824 For case-insensitive matching, use :class:`CaselessKeyword`.
2825 """
2827 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2829 def __init__(
2830 self,
2831 match_string: str = "",
2832 ident_chars: typing.Optional[str] = None,
2833 caseless: bool = False,
2834 **kwargs,
2835 ) -> None:
2836 matchString = deprecate_argument(kwargs, "matchString", "")
2837 identChars = deprecate_argument(kwargs, "identChars", None)
2839 super().__init__()
2840 identChars = identChars or ident_chars
2841 if identChars is None:
2842 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2843 match_string = matchString or match_string
2844 self.match = match_string
2845 self.matchLen = len(match_string)
2846 self.firstMatchChar = match_string[:1]
2847 if not self.firstMatchChar:
2848 raise ValueError("null string passed to Keyword; use Empty() instead")
2849 self.errmsg = f"Expected {type(self).__name__} {self.name}"
2850 self._may_return_empty = False
2851 self.mayIndexError = False
2852 self.caseless = caseless
2853 if caseless:
2854 self.caselessmatch = match_string.upper()
2855 identChars = identChars.upper()
2856 self.ident_chars = set(identChars)
2858 @property
2859 def identChars(self) -> set[str]:
2860 """
2861 .. deprecated:: 3.3.0
2862 use ident_chars instead.
2864 Property returning the characters being used as keyword characters for this expression.
2865 """
2866 return self.ident_chars
2868 def _generateDefaultName(self) -> str:
2869 return repr(self.match)
2871 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2872 errmsg = self.errmsg or ""
2873 errloc = loc
2874 if self.caseless:
2875 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2876 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2877 if (
2878 loc >= len(instring) - self.matchLen
2879 or instring[loc + self.matchLen].upper() not in self.identChars
2880 ):
2881 return loc + self.matchLen, self.match
2883 # followed by keyword char
2884 errmsg += ", was immediately followed by keyword character"
2885 errloc = loc + self.matchLen
2886 else:
2887 # preceded by keyword char
2888 errmsg += ", keyword was immediately preceded by keyword character"
2889 errloc = loc - 1
2890 # else no match just raise plain exception
2892 elif (
2893 instring[loc] == self.firstMatchChar
2894 and self.matchLen == 1
2895 or instring.startswith(self.match, loc)
2896 ):
2897 if loc == 0 or instring[loc - 1] not in self.identChars:
2898 if (
2899 loc >= len(instring) - self.matchLen
2900 or instring[loc + self.matchLen] not in self.identChars
2901 ):
2902 return loc + self.matchLen, self.match
2904 # followed by keyword char
2905 errmsg += ", keyword was immediately followed by keyword character"
2906 errloc = loc + self.matchLen
2907 else:
2908 # preceded by keyword char
2909 errmsg += ", keyword was immediately preceded by keyword character"
2910 errloc = loc - 1
2911 # else no match just raise plain exception
2913 raise ParseException(instring, errloc, errmsg, self)
2915 @staticmethod
2916 def set_default_keyword_chars(chars) -> None:
2917 """
2918 Overrides the default characters used by :class:`Keyword` expressions.
2919 """
2920 Keyword.DEFAULT_KEYWORD_CHARS = chars
2922 # Compatibility synonyms
2923 setDefaultKeywordChars = staticmethod(
2924 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)
2925 )
2928class CaselessLiteral(Literal):
2929 """
2930 Token to match a specified string, ignoring case of letters.
2931 Note: the matched results will always be in the case of the given
2932 match string, NOT the case of the input text.
2934 Example:
2936 .. doctest::
2938 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2939 ParseResults(['CMD', 'CMD', 'CMD'], {})
2941 (Contrast with example for :class:`CaselessKeyword`.)
2942 """
2944 def __init__(self, match_string: str = "", **kwargs) -> None:
2945 matchString: str = deprecate_argument(kwargs, "matchString", "")
2947 match_string = matchString or match_string
2948 super().__init__(match_string.upper())
2949 # Preserve the defining literal.
2950 self.returnString = match_string
2951 self.errmsg = f"Expected {self.name}"
2953 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2954 if instring[loc : loc + self.matchLen].upper() == self.match:
2955 return loc + self.matchLen, self.returnString
2956 raise ParseException(instring, loc, self.errmsg, self)
2959class CaselessKeyword(Keyword):
2960 """
2961 Caseless version of :class:`Keyword`.
2963 Example:
2965 .. doctest::
2967 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2968 ParseResults(['CMD', 'CMD'], {})
2970 (Contrast with example for :class:`CaselessLiteral`.)
2971 """
2973 def __init__(
2974 self, match_string: str = "", ident_chars: typing.Optional[str] = None, **kwargs
2975 ) -> None:
2976 matchString: str = deprecate_argument(kwargs, "matchString", "")
2977 identChars: typing.Optional[str] = deprecate_argument(
2978 kwargs, "identChars", None
2979 )
2981 identChars = identChars or ident_chars
2982 match_string = matchString or match_string
2983 super().__init__(match_string, identChars, caseless=True)
2986class CloseMatch(Token):
2987 """A variation on :class:`Literal` which matches "close" matches,
2988 that is, strings with at most 'n' mismatching characters.
2989 :class:`CloseMatch` takes parameters:
2991 - ``match_string`` - string to be matched
2992 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2993 - ``max_mismatches`` - (``default=1``) maximum number of
2994 mismatches allowed to count as a match
2996 The results from a successful parse will contain the matched text
2997 from the input string and the following named results:
2999 - ``mismatches`` - a list of the positions within the
3000 match_string where mismatches were found
3001 - ``original`` - the original match_string used to compare
3002 against the input string
3004 If ``mismatches`` is an empty list, then the match was an exact
3005 match.
3007 Example:
3009 .. doctest::
3010 :options: +NORMALIZE_WHITESPACE
3012 >>> patt = CloseMatch("ATCATCGAATGGA")
3013 >>> patt.parse_string("ATCATCGAAXGGA")
3014 ParseResults(['ATCATCGAAXGGA'],
3015 {'original': 'ATCATCGAATGGA', 'mismatches': [9]})
3017 >>> patt.parse_string("ATCAXCGAAXGGA")
3018 Traceback (most recent call last):
3019 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches),
3020 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1)
3022 # exact match
3023 >>> patt.parse_string("ATCATCGAATGGA")
3024 ParseResults(['ATCATCGAATGGA'],
3025 {'original': 'ATCATCGAATGGA', 'mismatches': []})
3027 # close match allowing up to 2 mismatches
3028 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
3029 >>> patt.parse_string("ATCAXCGAAXGGA")
3030 ParseResults(['ATCAXCGAAXGGA'],
3031 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]})
3032 """
3034 def __init__(
3035 self,
3036 match_string: str,
3037 max_mismatches: typing.Optional[int] = None,
3038 *,
3039 caseless=False,
3040 **kwargs,
3041 ) -> None:
3042 maxMismatches: int = deprecate_argument(kwargs, "maxMismatches", 1)
3044 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
3045 super().__init__()
3046 self.match_string = match_string
3047 self.maxMismatches = maxMismatches
3048 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"
3049 self.caseless = caseless
3050 self.mayIndexError = False
3051 self._may_return_empty = False
3053 def _generateDefaultName(self) -> str:
3054 return f"{type(self).__name__}:{self.match_string!r}"
3056 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3057 start = loc
3058 instrlen = len(instring)
3059 maxloc = start + len(self.match_string)
3061 if maxloc <= instrlen:
3062 match_string = self.match_string
3063 match_stringloc = 0
3064 mismatches = []
3065 maxMismatches = self.maxMismatches
3067 for match_stringloc, s_m in enumerate(
3068 zip(instring[loc:maxloc], match_string)
3069 ):
3070 src, mat = s_m
3071 if self.caseless:
3072 src, mat = src.lower(), mat.lower()
3074 if src != mat:
3075 mismatches.append(match_stringloc)
3076 if len(mismatches) > maxMismatches:
3077 break
3078 else:
3079 loc = start + match_stringloc + 1
3080 results = ParseResults([instring[start:loc]])
3081 results["original"] = match_string
3082 results["mismatches"] = mismatches
3083 return loc, results
3085 raise ParseException(instring, loc, self.errmsg, self)
3088class Word(Token):
3089 """Token for matching words composed of allowed character sets.
3091 Parameters:
3093 - ``init_chars`` - string of all characters that should be used to
3094 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
3095 if ``body_chars`` is also specified, then this is the string of
3096 initial characters
3097 - ``body_chars`` - string of characters that
3098 can be used for matching after a matched initial character as
3099 given in ``init_chars``; if omitted, same as the initial characters
3100 (default=``None``)
3101 - ``min`` - minimum number of characters to match (default=1)
3102 - ``max`` - maximum number of characters to match (default=0)
3103 - ``exact`` - exact number of characters to match (default=0)
3104 - ``as_keyword`` - match as a keyword (default=``False``)
3105 - ``exclude_chars`` - characters that might be
3106 found in the input ``body_chars`` string but which should not be
3107 accepted for matching ;useful to define a word of all
3108 printables except for one or two characters, for instance
3109 (default=``None``)
3111 :class:`srange` is useful for defining custom character set strings
3112 for defining :class:`Word` expressions, using range notation from
3113 regular expression character sets.
3115 A common mistake is to use :class:`Word` to match a specific literal
3116 string, as in ``Word("Address")``. Remember that :class:`Word`
3117 uses the string argument to define *sets* of matchable characters.
3118 This expression would match "Add", "AAA", "dAred", or any other word
3119 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
3120 exact literal string, use :class:`Literal` or :class:`Keyword`.
3122 pyparsing includes helper strings for building Words:
3124 - :attr:`alphas`
3125 - :attr:`nums`
3126 - :attr:`alphanums`
3127 - :attr:`hexnums`
3128 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255
3129 - accented, tilded, umlauted, etc.)
3130 - :attr:`punc8bit` (non-alphabetic characters in ASCII range
3131 128-255 - currency, symbols, superscripts, diacriticals, etc.)
3132 - :attr:`printables` (any non-whitespace character)
3134 ``alphas``, ``nums``, and ``printables`` are also defined in several
3135 Unicode sets - see :class:`pyparsing_unicode`.
3137 Example:
3139 .. testcode::
3141 # a word composed of digits
3142 integer = Word(nums)
3143 # Two equivalent alternate forms:
3144 Word("0123456789")
3145 Word(srange("[0-9]"))
3147 # a word with a leading capital, and zero or more lowercase
3148 capitalized_word = Word(alphas.upper(), alphas.lower())
3150 # hostnames are alphanumeric, with leading alpha, and '-'
3151 hostname = Word(alphas, alphanums + '-')
3153 # roman numeral
3154 # (not a strict parser, accepts invalid mix of characters)
3155 roman = Word("IVXLCDM")
3157 # any string of non-whitespace characters, except for ','
3158 csv_value = Word(printables, exclude_chars=",")
3160 :raises ValueError: If ``min`` and ``max`` are both specified
3161 and the test ``min <= max`` fails.
3163 .. versionchanged:: 3.1.0
3164 Raises :exc:`ValueError` if ``min`` > ``max``.
3165 """
3167 def __init__(
3168 self,
3169 init_chars: str = "",
3170 body_chars: typing.Optional[str] = None,
3171 min: int = 1,
3172 max: int = 0,
3173 exact: int = 0,
3174 as_keyword: bool = False,
3175 exclude_chars: typing.Optional[str] = None,
3176 **kwargs,
3177 ) -> None:
3178 initChars: typing.Optional[str] = deprecate_argument(kwargs, "initChars", None)
3179 bodyChars: typing.Optional[str] = deprecate_argument(kwargs, "bodyChars", None)
3180 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)
3181 excludeChars: typing.Optional[str] = deprecate_argument(
3182 kwargs, "excludeChars", None
3183 )
3185 initChars = initChars or init_chars
3186 bodyChars = bodyChars or body_chars
3187 asKeyword = asKeyword or as_keyword
3188 excludeChars = excludeChars or exclude_chars
3189 super().__init__()
3190 if not initChars:
3191 raise ValueError(
3192 f"invalid {type(self).__name__}, initChars cannot be empty string"
3193 )
3195 initChars_set = set(initChars)
3196 if excludeChars:
3197 excludeChars_set = set(excludeChars)
3198 initChars_set -= excludeChars_set
3199 if bodyChars:
3200 bodyChars = "".join(set(bodyChars) - excludeChars_set)
3201 self.init_chars = initChars_set
3202 self.initCharsOrig = "".join(sorted(initChars_set))
3204 if bodyChars:
3205 self.bodyChars = set(bodyChars)
3206 self.bodyCharsOrig = "".join(sorted(bodyChars))
3207 else:
3208 self.bodyChars = initChars_set
3209 self.bodyCharsOrig = self.initCharsOrig
3211 self.maxSpecified = max > 0
3213 if min < 1:
3214 raise ValueError(
3215 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
3216 )
3218 if self.maxSpecified and min > max:
3219 raise ValueError(
3220 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
3221 )
3223 self.minLen = min
3225 if max > 0:
3226 self.maxLen = max
3227 else:
3228 self.maxLen = _MAX_INT
3230 if exact > 0:
3231 min = max = exact
3232 self.maxLen = exact
3233 self.minLen = exact
3235 self.errmsg = f"Expected {self.name}"
3236 self.mayIndexError = False
3237 self.asKeyword = asKeyword
3238 if self.asKeyword:
3239 self.errmsg += " as a keyword"
3241 # see if we can make a regex for this Word
3242 if " " not in (self.initChars | self.bodyChars):
3243 if len(self.initChars) == 1:
3244 re_leading_fragment = re.escape(self.initCharsOrig)
3245 else:
3246 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
3248 if self.bodyChars == self.initChars:
3249 if max == 0 and self.minLen == 1:
3250 repeat = "+"
3251 elif max == 1:
3252 repeat = ""
3253 else:
3254 if self.minLen != self.maxLen:
3255 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
3256 else:
3257 repeat = f"{{{self.minLen}}}"
3258 self.reString = f"{re_leading_fragment}{repeat}"
3259 else:
3260 if max == 1:
3261 re_body_fragment = ""
3262 repeat = ""
3263 else:
3264 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
3265 if max == 0 and self.minLen == 1:
3266 repeat = "*"
3267 elif max == 2:
3268 repeat = "?" if min <= 1 else ""
3269 else:
3270 if min != max:
3271 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"
3272 else:
3273 repeat = f"{{{min - 1 if min > 0 else ''}}}"
3275 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"
3277 if self.asKeyword:
3278 self.reString = rf"\b{self.reString}\b"
3280 try:
3281 self.re = re.compile(self.reString)
3282 except re.error:
3283 self.re = None # type: ignore[assignment]
3284 else:
3285 self.re_match = self.re.match
3286 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]
3288 @property
3289 def initChars(self) -> set[str]:
3290 """
3291 .. deprecated:: 3.3.0
3292 use `init_chars` instead.
3294 Property returning the initial chars to be used when matching this
3295 Word expression. If no body chars were specified, the initial characters
3296 will also be the body characters.
3297 """
3298 return set(self.init_chars)
3300 def copy(self) -> Word:
3301 """
3302 Returns a copy of this expression.
3304 Generally only used internally by pyparsing.
3305 """
3306 ret: Word = cast(Word, super().copy())
3307 if hasattr(self, "re_match"):
3308 ret.re_match = self.re_match
3309 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign]
3310 return ret
3312 def _generateDefaultName(self) -> str:
3313 def charsAsStr(s):
3314 max_repr_len = 16
3315 s = _collapse_string_to_ranges(s, re_escape=False)
3317 if len(s) > max_repr_len:
3318 return s[: max_repr_len - 3] + "..."
3320 return s
3322 if self.initChars != self.bodyChars:
3323 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"
3324 else:
3325 base = f"W:({charsAsStr(self.initChars)})"
3327 # add length specification
3328 if self.minLen > 1 or self.maxLen != _MAX_INT:
3329 if self.minLen == self.maxLen:
3330 if self.minLen == 1:
3331 return base[2:]
3332 else:
3333 return base + f"{{{self.minLen}}}"
3334 elif self.maxLen == _MAX_INT:
3335 return base + f"{{{self.minLen},...}}"
3336 else:
3337 return base + f"{{{self.minLen},{self.maxLen}}}"
3338 return base
3340 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3341 if instring[loc] not in self.initChars:
3342 raise ParseException(instring, loc, self.errmsg, self)
3344 start = loc
3345 loc += 1
3346 instrlen = len(instring)
3347 body_chars: set[str] = self.bodyChars
3348 maxloc = start + self.maxLen
3349 maxloc = min(maxloc, instrlen)
3350 while loc < maxloc and instring[loc] in body_chars:
3351 loc += 1
3353 throw_exception = False
3354 if loc - start < self.minLen:
3355 throw_exception = True
3356 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:
3357 throw_exception = True
3358 elif self.asKeyword and (
3359 (start > 0 and instring[start - 1] in body_chars)
3360 or (loc < instrlen and instring[loc] in body_chars)
3361 ):
3362 throw_exception = True
3364 if throw_exception:
3365 raise ParseException(instring, loc, self.errmsg, self)
3367 return loc, instring[start:loc]
3369 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3370 result = self.re_match(instring, loc)
3371 if not result:
3372 raise ParseException(instring, loc, self.errmsg, self)
3374 loc = result.end()
3375 return loc, result[0]
3378class Char(Word):
3379 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
3380 when defining a match of any single character in a string of
3381 characters.
3382 """
3384 def __init__(
3385 self,
3386 charset: str,
3387 as_keyword: bool = False,
3388 exclude_chars: typing.Optional[str] = None,
3389 **kwargs,
3390 ) -> None:
3391 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)
3392 excludeChars: typing.Optional[str] = deprecate_argument(
3393 kwargs, "excludeChars", None
3394 )
3396 asKeyword = asKeyword or as_keyword
3397 excludeChars = excludeChars or exclude_chars
3398 super().__init__(
3399 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars
3400 )
3403class Regex(Token):
3404 r"""Token for matching strings that match a given regular
3405 expression. Defined with string specifying the regular expression in
3406 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3407 If the given regex contains named groups (defined using ``(?P<name>...)``),
3408 these will be preserved as named :class:`ParseResults`.
3410 If instead of the Python stdlib ``re`` module you wish to use a different RE module
3411 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
3412 a compiled RE that was compiled using ``regex``.
3414 The parameters ``pattern`` and ``flags`` are passed
3415 to the ``re.compile()`` function as-is. See the Python
3416 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3417 explanation of the acceptable patterns and flags.
3419 Example:
3421 .. testcode::
3423 realnum = Regex(r"[+-]?\d+\.\d*")
3424 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3425 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3427 # named fields in a regex will be returned as named results
3428 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3430 # the Regex class will accept regular expressions compiled using the
3431 # re module
3432 import re
3433 parser = pp.Regex(re.compile(r'[0-9]'))
3434 """
3436 def __init__(
3437 self,
3438 pattern: Any,
3439 flags: Union[re.RegexFlag, int] = 0,
3440 as_group_list: bool = False,
3441 as_match: bool = False,
3442 **kwargs,
3443 ) -> None:
3444 super().__init__()
3445 asGroupList: bool = deprecate_argument(kwargs, "asGroupList", False)
3446 asMatch: bool = deprecate_argument(kwargs, "asMatch", False)
3448 asGroupList = asGroupList or as_group_list
3449 asMatch = asMatch or as_match
3451 if isinstance(pattern, str_type):
3452 if not pattern:
3453 raise ValueError("null string passed to Regex; use Empty() instead")
3455 self._re = None
3456 self._may_return_empty = None # type: ignore [assignment]
3457 self.reString = self.pattern = pattern
3459 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
3460 self._re = pattern
3461 self._may_return_empty = None # type: ignore [assignment]
3462 self.pattern = self.reString = pattern.pattern
3464 elif callable(pattern):
3465 # defer creating this pattern until we really need it
3466 self.pattern = pattern
3467 self._may_return_empty = None # type: ignore [assignment]
3468 self._re = None
3470 else:
3471 raise TypeError(
3472 "Regex may only be constructed with a string or a compiled RE object,"
3473 " or a callable that takes no arguments and returns a string or a"
3474 " compiled RE object"
3475 )
3477 self.flags = flags
3478 self.errmsg = f"Expected {self.name}"
3479 self.mayIndexError = False
3480 self.asGroupList = asGroupList
3481 self.asMatch = asMatch
3482 if self.asGroupList:
3483 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]
3484 if self.asMatch:
3485 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]
3487 def copy(self) -> Regex:
3488 """
3489 Returns a copy of this expression.
3491 Generally only used internally by pyparsing.
3492 """
3493 ret: Regex = cast(Regex, super().copy())
3494 if self.asGroupList:
3495 ret.parseImpl = ret.parseImplAsGroupList # type: ignore [method-assign]
3496 if self.asMatch:
3497 ret.parseImpl = ret.parseImplAsMatch # type: ignore [method-assign]
3498 return ret
3500 @cached_property
3501 def re(self) -> re.Pattern:
3502 """
3503 Property returning the compiled regular expression for this Regex.
3505 Generally only used internally by pyparsing.
3506 """
3507 if self._re:
3508 return self._re
3510 if callable(self.pattern):
3511 # replace self.pattern with the string returned by calling self.pattern()
3512 self.pattern = cast(Callable[[], str], self.pattern)()
3514 # see if we got a compiled RE back instead of a str - if so, we're done
3515 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):
3516 self._re = cast(re.Pattern[str], self.pattern)
3517 self.pattern = self.reString = self._re.pattern
3518 return self._re
3520 try:
3521 self._re = re.compile(self.pattern, self.flags)
3522 except re.error:
3523 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3524 else:
3525 self._may_return_empty = self.re.match("", pos=0) is not None
3526 return self._re
3528 @cached_property
3529 def re_match(self) -> Callable[[str, int], Any]:
3530 return self.re.match
3532 @property
3533 def mayReturnEmpty(self):
3534 if self._may_return_empty is None:
3535 # force compile of regex pattern, to set may_return_empty flag
3536 self.re # noqa
3537 return self._may_return_empty
3539 @mayReturnEmpty.setter
3540 def mayReturnEmpty(self, value):
3541 self._may_return_empty = value
3543 def _generateDefaultName(self) -> str:
3544 unescaped = repr(self.pattern).replace("\\\\", "\\")
3545 return f"Re:({unescaped})"
3547 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3548 # explicit check for matching past the length of the string;
3549 # this is done because the re module will not complain about
3550 # a match with `pos > len(instring)`, it will just return ""
3551 if loc > len(instring) and self.mayReturnEmpty:
3552 raise ParseException(instring, loc, self.errmsg, self)
3554 result = self.re_match(instring, loc)
3555 if not result:
3556 raise ParseException(instring, loc, self.errmsg, self)
3558 loc = result.end()
3559 ret = ParseResults(result[0])
3560 d = result.groupdict()
3562 for k, v in d.items():
3563 ret[k] = v
3565 return loc, ret
3567 def parseImplAsGroupList(self, instring, loc, do_actions=True):
3568 if loc > len(instring) and self.mayReturnEmpty:
3569 raise ParseException(instring, loc, self.errmsg, self)
3571 result = self.re_match(instring, loc)
3572 if not result:
3573 raise ParseException(instring, loc, self.errmsg, self)
3575 loc = result.end()
3576 ret = result.groups()
3577 return loc, ret
3579 def parseImplAsMatch(self, instring, loc, do_actions=True):
3580 if loc > len(instring) and self.mayReturnEmpty:
3581 raise ParseException(instring, loc, self.errmsg, self)
3583 result = self.re_match(instring, loc)
3584 if not result:
3585 raise ParseException(instring, loc, self.errmsg, self)
3587 loc = result.end()
3588 ret = result
3589 return loc, ret
3591 def sub(self, repl: str) -> ParserElement:
3592 r"""
3593 Return :class:`Regex` with an attached parse action to transform the parsed
3594 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3596 Example:
3598 .. testcode::
3600 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3601 print(make_html.transform_string("h1:main title:"))
3603 .. testoutput::
3605 <h1>main title</h1>
3606 """
3607 if self.asGroupList:
3608 raise TypeError("cannot use sub() with Regex(as_group_list=True)")
3610 if self.asMatch and callable(repl):
3611 raise TypeError(
3612 "cannot use sub() with a callable with Regex(as_match=True)"
3613 )
3615 if self.asMatch:
3617 def pa(tokens):
3618 return tokens[0].expand(repl)
3620 else:
3622 def pa(tokens):
3623 return self.re.sub(repl, tokens[0])
3625 return self.add_parse_action(pa)
3628class QuotedString(Token):
3629 r"""
3630 Token for matching strings that are delimited by quoting characters.
3632 Defined with the following parameters:
3634 - ``quote_char`` - string of one or more characters defining the
3635 quote delimiting string
3636 - ``esc_char`` - character to re_escape quotes, typically backslash
3637 (default= ``None``)
3638 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3639 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3640 (default= ``None``)
3641 - ``multiline`` - boolean indicating whether quotes can span
3642 multiple lines (default= ``False``)
3643 - ``unquote_results`` - boolean indicating whether the matched text
3644 should be unquoted (default= ``True``)
3645 - ``end_quote_char`` - string of one or more characters defining the
3646 end of the quote delimited string (default= ``None`` => same as
3647 quote_char)
3648 - ``convert_whitespace_escapes`` - convert escaped whitespace
3649 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3650 (default= ``True``)
3652 .. caution:: ``convert_whitespace_escapes`` has no effect if
3653 ``unquote_results`` is ``False``.
3655 Example:
3657 .. doctest::
3659 >>> qs = QuotedString('"')
3660 >>> print(qs.search_string('lsjdf "This is the quote" sldjf'))
3661 [['This is the quote']]
3662 >>> complex_qs = QuotedString('{{', end_quote_char='}}')
3663 >>> print(complex_qs.search_string(
3664 ... 'lsjdf {{This is the "quote"}} sldjf'))
3665 [['This is the "quote"']]
3666 >>> sql_qs = QuotedString('"', esc_quote='""')
3667 >>> print(sql_qs.search_string(
3668 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3669 [['This is the quote with "embedded" quotes']]
3670 """
3672 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))
3674 def __init__(
3675 self,
3676 quote_char: str = "",
3677 esc_char: typing.Optional[str] = None,
3678 esc_quote: typing.Optional[str] = None,
3679 multiline: bool = False,
3680 unquote_results: bool = True,
3681 end_quote_char: typing.Optional[str] = None,
3682 convert_whitespace_escapes: bool = True,
3683 **kwargs,
3684 ) -> None:
3685 super().__init__()
3686 quoteChar: str = deprecate_argument(kwargs, "quoteChar", "")
3687 escChar: str = deprecate_argument(kwargs, "escChar", None)
3688 escQuote: str = deprecate_argument(kwargs, "escQuote", None)
3689 unquoteResults: bool = deprecate_argument(kwargs, "unquoteResults", True)
3690 endQuoteChar: typing.Optional[str] = deprecate_argument(
3691 kwargs, "endQuoteChar", None
3692 )
3693 convertWhitespaceEscapes: bool = deprecate_argument(
3694 kwargs, "convertWhitespaceEscapes", True
3695 )
3697 esc_char = escChar or esc_char
3698 esc_quote = escQuote or esc_quote
3699 unquote_results = unquoteResults and unquote_results
3700 end_quote_char = endQuoteChar or end_quote_char
3701 convert_whitespace_escapes = (
3702 convertWhitespaceEscapes and convert_whitespace_escapes
3703 )
3704 quote_char = quoteChar or quote_char
3706 # remove white space from quote chars
3707 quote_char = quote_char.strip()
3708 if not quote_char:
3709 raise ValueError("quote_char cannot be the empty string")
3711 if end_quote_char is None:
3712 end_quote_char = quote_char
3713 else:
3714 end_quote_char = end_quote_char.strip()
3715 if not end_quote_char:
3716 raise ValueError("end_quote_char cannot be the empty string")
3718 self.quote_char: str = quote_char
3719 self.quote_char_len: int = len(quote_char)
3720 self.first_quote_char: str = quote_char[0]
3721 self.end_quote_char: str = end_quote_char
3722 self.end_quote_char_len: int = len(end_quote_char)
3723 self.esc_char: str = esc_char or ""
3724 self.has_esc_char: bool = esc_char is not None
3725 self.esc_quote: str = esc_quote or ""
3726 self.unquote_results: bool = unquote_results
3727 self.convert_whitespace_escapes: bool = convert_whitespace_escapes
3728 self.multiline = multiline
3729 self.re_flags = re.RegexFlag(0)
3731 # fmt: off
3732 # build up re pattern for the content between the quote delimiters
3733 inner_pattern: list[str] = []
3735 if esc_quote:
3736 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
3738 if esc_char:
3739 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
3741 if len(self.end_quote_char) > 1:
3742 inner_pattern.append(
3743 "(?:"
3744 + "|".join(
3745 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3746 for i in range(len(self.end_quote_char) - 1, 0, -1)
3747 )
3748 + ")"
3749 )
3751 if self.multiline:
3752 self.re_flags |= re.MULTILINE | re.DOTALL
3753 inner_pattern.append(
3754 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3755 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3756 )
3757 else:
3758 inner_pattern.append(
3759 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3760 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3761 )
3763 self.pattern = "".join(
3764 [
3765 re.escape(self.quote_char),
3766 "(?:",
3767 '|'.join(inner_pattern),
3768 ")*",
3769 re.escape(self.end_quote_char),
3770 ]
3771 )
3773 if self.unquote_results:
3774 if self.convert_whitespace_escapes:
3775 self.unquote_scan_re = re.compile(
3776 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3777 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"
3778 rf"|({re.escape(self.esc_char)}.)"
3779 rf"|(\n|.)",
3780 flags=self.re_flags,
3781 )
3782 else:
3783 self.unquote_scan_re = re.compile(
3784 rf"({re.escape(self.esc_char)}.)"
3785 rf"|(\n|.)",
3786 flags=self.re_flags
3787 )
3788 # fmt: on
3790 try:
3791 self.re = re.compile(self.pattern, self.re_flags)
3792 self.reString = self.pattern
3793 self.re_match = self.re.match
3794 except re.error:
3795 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")
3797 self.errmsg = f"Expected {self.name}"
3798 self.mayIndexError = False
3799 self._may_return_empty = True
3801 def _generateDefaultName(self) -> str:
3802 if self.quote_char == self.end_quote_char and isinstance(
3803 self.quote_char, str_type
3804 ):
3805 return f"string enclosed in {self.quote_char!r}"
3807 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
3809 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3810 # check first character of opening quote to see if that is a match
3811 # before doing the more complicated regex match
3812 result = (
3813 instring[loc] == self.first_quote_char
3814 and self.re_match(instring, loc)
3815 or None
3816 )
3817 if not result:
3818 raise ParseException(instring, loc, self.errmsg, self)
3820 # get ending loc and matched string from regex matching result
3821 loc = result.end()
3822 ret = result[0]
3824 if self.unquote_results:
3825 # strip off quotes
3826 ret = ret[self.quote_char_len : -self.end_quote_char_len]
3828 if isinstance(ret, str_type):
3829 # fmt: off
3830 if self.convert_whitespace_escapes:
3831 # as we iterate over matches in the input string,
3832 # collect from whichever match group of the unquote_scan_re
3833 # regex matches (only 1 group will match at any given time)
3834 ret = "".join(
3835 # match group 1 matches \t, \n, etc.
3836 self.ws_map[g] if (g := match[1])
3837 # match group 2 matches escaped octal, null, hex, and Unicode
3838 # sequences
3839 else _convert_escaped_numerics_to_char(g[1:]) if (g := match[2])
3840 # match group 3 matches escaped characters
3841 else g[-1] if (g := match[3])
3842 # match group 4 matches any character
3843 else match[4]
3844 for match in self.unquote_scan_re.finditer(ret)
3845 )
3846 else:
3847 ret = "".join(
3848 # match group 1 matches escaped characters
3849 g[-1] if (g := match[1])
3850 # match group 2 matches any character
3851 else match[2]
3852 for match in self.unquote_scan_re.finditer(ret)
3853 )
3854 # fmt: on
3856 # replace escaped quotes
3857 if self.esc_quote:
3858 ret = ret.replace(self.esc_quote, self.end_quote_char)
3860 return loc, ret
3863class CharsNotIn(Token):
3864 """Token for matching words composed of characters *not* in a given
3865 set (will include whitespace in matched characters if not listed in
3866 the provided exclusion set - see example). Defined with string
3867 containing all disallowed characters, and an optional minimum,
3868 maximum, and/or exact length. The default value for ``min`` is
3869 1 (a minimum value < 1 is not valid); the default values for
3870 ``max`` and ``exact`` are 0, meaning no maximum or exact
3871 length restriction.
3873 Example:
3875 .. testcode::
3877 # define a comma-separated-value as anything that is not a ','
3878 csv_value = CharsNotIn(',')
3879 print(
3880 DelimitedList(csv_value).parse_string(
3881 "dkls,lsdkjf,s12 34,@!#,213"
3882 )
3883 )
3885 prints:
3887 .. testoutput::
3889 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3890 """
3892 def __init__(
3893 self, not_chars: str = "", min: int = 1, max: int = 0, exact: int = 0, **kwargs
3894 ) -> None:
3895 super().__init__()
3896 notChars: str = deprecate_argument(kwargs, "notChars", "")
3898 self.skipWhitespace = False
3899 self.notChars = not_chars or notChars
3900 self.notCharsSet = set(self.notChars)
3902 if min < 1:
3903 raise ValueError(
3904 "cannot specify a minimum length < 1; use"
3905 " Opt(CharsNotIn()) if zero-length char group is permitted"
3906 )
3908 self.minLen = min
3910 if max > 0:
3911 self.maxLen = max
3912 else:
3913 self.maxLen = _MAX_INT
3915 if exact > 0:
3916 self.maxLen = exact
3917 self.minLen = exact
3919 self.errmsg = f"Expected {self.name}"
3920 self._may_return_empty = self.minLen == 0
3921 self.mayIndexError = False
3923 def _generateDefaultName(self) -> str:
3924 not_chars_str = _collapse_string_to_ranges(self.notChars)
3925 if len(not_chars_str) > 16:
3926 return f"!W:({self.notChars[: 16 - 3]}...)"
3927 else:
3928 return f"!W:({self.notChars})"
3930 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3931 notchars = self.notCharsSet
3932 if instring[loc] in notchars:
3933 raise ParseException(instring, loc, self.errmsg, self)
3935 start = loc
3936 loc += 1
3937 maxlen = min(start + self.maxLen, len(instring))
3938 while loc < maxlen and instring[loc] not in notchars:
3939 loc += 1
3941 if loc - start < self.minLen:
3942 raise ParseException(instring, loc, self.errmsg, self)
3944 return loc, instring[start:loc]
3947class White(Token):
3948 """Special matching class for matching whitespace. Normally,
3949 whitespace is ignored by pyparsing grammars. This class is included
3950 when some whitespace structures are significant. Define with
3951 a string containing the whitespace characters to be matched; default
3952 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3953 ``max``, and ``exact`` arguments, as defined for the
3954 :class:`Word` class.
3955 """
3957 whiteStrs = {
3958 " ": "<SP>",
3959 "\t": "<TAB>",
3960 "\n": "<LF>",
3961 "\r": "<CR>",
3962 "\f": "<FF>",
3963 "\u00a0": "<NBSP>",
3964 "\u1680": "<OGHAM_SPACE_MARK>",
3965 "\u180e": "<MONGOLIAN_VOWEL_SEPARATOR>",
3966 "\u2000": "<EN_QUAD>",
3967 "\u2001": "<EM_QUAD>",
3968 "\u2002": "<EN_SPACE>",
3969 "\u2003": "<EM_SPACE>",
3970 "\u2004": "<THREE-PER-EM_SPACE>",
3971 "\u2005": "<FOUR-PER-EM_SPACE>",
3972 "\u2006": "<SIX-PER-EM_SPACE>",
3973 "\u2007": "<FIGURE_SPACE>",
3974 "\u2008": "<PUNCTUATION_SPACE>",
3975 "\u2009": "<THIN_SPACE>",
3976 "\u200a": "<HAIR_SPACE>",
3977 "\u200b": "<ZERO_WIDTH_SPACE>",
3978 "\u202f": "<NNBSP>",
3979 "\u205f": "<MMSP>",
3980 "\u3000": "<IDEOGRAPHIC_SPACE>",
3981 }
3983 def __init__(
3984 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0
3985 ) -> None:
3986 super().__init__()
3987 self.matchWhite = ws
3988 self.set_whitespace_chars(
3989 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3990 copy_defaults=True,
3991 )
3992 # self.leave_whitespace()
3993 self._may_return_empty = True
3994 self.errmsg = f"Expected {self.name}"
3996 self.minLen = min
3998 if max > 0:
3999 self.maxLen = max
4000 else:
4001 self.maxLen = _MAX_INT
4003 if exact > 0:
4004 self.maxLen = exact
4005 self.minLen = exact
4007 def _generateDefaultName(self) -> str:
4008 return "".join(White.whiteStrs[c] for c in self.matchWhite)
4010 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4011 if instring[loc] not in self.matchWhite:
4012 raise ParseException(instring, loc, self.errmsg, self)
4013 start = loc
4014 loc += 1
4015 maxloc = start + self.maxLen
4016 maxloc = min(maxloc, len(instring))
4017 while loc < maxloc and instring[loc] in self.matchWhite:
4018 loc += 1
4020 if loc - start < self.minLen:
4021 raise ParseException(instring, loc, self.errmsg, self)
4023 return loc, instring[start:loc]
4026class PositionToken(Token):
4027 def __init__(self) -> None:
4028 super().__init__()
4029 self._may_return_empty = True
4030 self.mayIndexError = False
4033class GoToColumn(PositionToken):
4034 """Token to advance to a specific column of input text; useful for
4035 tabular report scraping.
4036 """
4038 def __init__(self, colno: int) -> None:
4039 super().__init__()
4040 self.col = colno
4042 def preParse(self, instring: str, loc: int) -> int:
4043 if col(loc, instring) == self.col:
4044 return loc
4046 instrlen = len(instring)
4047 if self.ignoreExprs:
4048 loc = self._skipIgnorables(instring, loc)
4049 while (
4050 loc < instrlen
4051 and instring[loc].isspace()
4052 and col(loc, instring) != self.col
4053 ):
4054 loc += 1
4056 return loc
4058 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4059 thiscol = col(loc, instring)
4060 if thiscol > self.col:
4061 raise ParseException(instring, loc, "Text not in expected column", self)
4062 newloc = loc + self.col - thiscol
4063 ret = instring[loc:newloc]
4064 return newloc, ret
4067class LineStart(PositionToken):
4068 r"""Matches if current position is at the logical beginning of a line (after skipping whitespace)
4069 within the parse string
4071 Example:
4073 .. testcode::
4075 test = '''\
4076 AAA this line
4077 AAA and this line
4078 AAA and even this line
4079 B AAA but definitely not this line
4080 '''
4082 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):
4083 print(t)
4085 prints:
4087 .. testoutput::
4089 ['AAA', ' this line']
4090 ['AAA', ' and this line']
4091 ['AAA', ' and even this line']
4093 """
4095 def __init__(self) -> None:
4096 super().__init__()
4097 self.leave_whitespace()
4098 self.orig_whiteChars = set() | self.whiteChars
4099 self.whiteChars.discard("\n")
4100 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
4101 self.set_name("start of line")
4103 def preParse(self, instring: str, loc: int) -> int:
4104 if loc == 0:
4105 return loc
4107 ret = self.skipper.preParse(instring, loc)
4109 if "\n" in self.orig_whiteChars:
4110 while instring[ret : ret + 1] == "\n":
4111 ret = self.skipper.preParse(instring, ret + 1)
4113 return ret
4115 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4116 if col(loc, instring) == 1:
4117 return loc, []
4118 raise ParseException(instring, loc, self.errmsg, self)
4121class LineEnd(PositionToken):
4122 """Matches if current position is at the end of a line within the
4123 parse string
4124 """
4126 def __init__(self) -> None:
4127 super().__init__()
4128 self.whiteChars.discard("\n")
4129 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
4130 self.set_name("end of line")
4132 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4133 if loc < len(instring):
4134 if instring[loc] == "\n":
4135 return loc + 1, "\n"
4136 else:
4137 raise ParseException(instring, loc, self.errmsg, self)
4138 elif loc == len(instring):
4139 return loc + 1, []
4140 else:
4141 raise ParseException(instring, loc, self.errmsg, self)
4144class StringStart(PositionToken):
4145 """Matches if current position is at the beginning of the parse
4146 string
4147 """
4149 def __init__(self) -> None:
4150 super().__init__()
4151 self.set_name("start of text")
4153 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4154 # see if entire string up to here is just whitespace and ignoreables
4155 if loc != 0 and loc != self.preParse(instring, 0):
4156 raise ParseException(instring, loc, self.errmsg, self)
4158 return loc, []
4161class StringEnd(PositionToken):
4162 """
4163 Matches if current position is at the end of the parse string
4164 """
4166 def __init__(self) -> None:
4167 super().__init__()
4168 self.set_name("end of text")
4170 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4171 if loc < len(instring):
4172 raise ParseException(instring, loc, self.errmsg, self)
4173 if loc == len(instring):
4174 return loc + 1, []
4175 if loc > len(instring):
4176 return loc, []
4178 raise ParseException(instring, loc, self.errmsg, self)
4181class WordStart(PositionToken):
4182 """Matches if the current position is at the beginning of a
4183 :class:`Word`, and is not preceded by any character in a given
4184 set of ``word_chars`` (default= ``printables``). To emulate the
4185 ``\b`` behavior of regular expressions, use
4186 ``WordStart(alphanums)``. ``WordStart`` will also match at
4187 the beginning of the string being parsed, or at the beginning of
4188 a line.
4189 """
4191 def __init__(self, word_chars: str = printables, **kwargs) -> None:
4192 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)
4194 wordChars = word_chars if wordChars == printables else wordChars
4195 super().__init__()
4196 self.wordChars = set(wordChars)
4197 self.set_name("start of a word")
4199 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4200 if loc != 0:
4201 if (
4202 instring[loc - 1] in self.wordChars
4203 or instring[loc] not in self.wordChars
4204 ):
4205 raise ParseException(instring, loc, self.errmsg, self)
4206 return loc, []
4209class WordEnd(PositionToken):
4210 """Matches if the current position is at the end of a :class:`Word`,
4211 and is not followed by any character in a given set of ``word_chars``
4212 (default= ``printables``). To emulate the ``\b`` behavior of
4213 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
4214 will also match at the end of the string being parsed, or at the end
4215 of a line.
4216 """
4218 def __init__(self, word_chars: str = printables, **kwargs) -> None:
4219 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)
4221 wordChars = word_chars if wordChars == printables else wordChars
4222 super().__init__()
4223 self.wordChars = set(wordChars)
4224 self.skipWhitespace = False
4225 self.set_name("end of a word")
4227 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4228 instrlen = len(instring)
4229 if instrlen > 0 and loc < instrlen:
4230 if (
4231 instring[loc] in self.wordChars
4232 or instring[loc - 1] not in self.wordChars
4233 ):
4234 raise ParseException(instring, loc, self.errmsg, self)
4235 return loc, []
4238class Tag(Token):
4239 """
4240 A meta-element for inserting a named result into the parsed
4241 tokens that may be checked later in a parse action or while
4242 processing the parsed results. Accepts an optional tag value,
4243 defaulting to `True`.
4245 Example:
4247 .. doctest::
4249 >>> end_punc = "." | ("!" + Tag("enthusiastic"))
4250 >>> greeting = "Hello," + Word(alphas) + end_punc
4252 >>> result = greeting.parse_string("Hello, World.")
4253 >>> print(result.dump())
4254 ['Hello,', 'World', '.']
4256 >>> result = greeting.parse_string("Hello, World!")
4257 >>> print(result.dump())
4258 ['Hello,', 'World', '!']
4259 - enthusiastic: True
4261 .. versionadded:: 3.1.0
4262 """
4264 def __init__(self, tag_name: str, value: Any = True) -> None:
4265 super().__init__()
4266 self._may_return_empty = True
4267 self.mayIndexError = False
4268 self.leave_whitespace()
4269 self.tag_name = tag_name
4270 self.tag_value = value
4271 self.add_parse_action(self._add_tag)
4272 self.show_in_diagram = False
4274 def _add_tag(self, tokens: ParseResults):
4275 tokens[self.tag_name] = self.tag_value
4277 def _generateDefaultName(self) -> str:
4278 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"
4281class ParseExpression(ParserElement):
4282 """Abstract subclass of ParserElement, for combining and
4283 post-processing parsed tokens.
4284 """
4286 def __init__(
4287 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4288 ) -> None:
4289 super().__init__(savelist)
4290 self.exprs: list[ParserElement]
4291 if isinstance(exprs, _generatorType):
4292 exprs = list(exprs)
4294 if isinstance(exprs, str_type):
4295 self.exprs = [self._literalStringClass(exprs)]
4296 elif isinstance(exprs, ParserElement):
4297 self.exprs = [exprs]
4298 elif isinstance(exprs, Iterable):
4299 exprs = list(exprs)
4300 # if sequence of strings provided, wrap with Literal
4301 if any(isinstance(expr, str_type) for expr in exprs):
4302 exprs = (
4303 self._literalStringClass(e) if isinstance(e, str_type) else e
4304 for e in exprs
4305 )
4306 self.exprs = list(exprs)
4307 else:
4308 try:
4309 self.exprs = list(exprs)
4310 except TypeError:
4311 self.exprs = [exprs]
4312 self.callPreparse = False
4314 def recurse(self) -> list[ParserElement]:
4315 return self.exprs[:]
4317 def append(self, other) -> ParserElement:
4318 """
4319 Add an expression to the list of expressions related to this ParseExpression instance.
4320 """
4321 self.exprs.append(other)
4322 self._defaultName = None
4323 return self
4325 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4326 """
4327 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
4328 all contained expressions.
4329 """
4330 super().leave_whitespace(recursive)
4332 if recursive:
4333 self.exprs = [e.copy() for e in self.exprs]
4334 for e in self.exprs:
4335 e.leave_whitespace(recursive)
4336 return self
4338 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4339 """
4340 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on
4341 all contained expressions.
4342 """
4343 super().ignore_whitespace(recursive)
4344 if recursive:
4345 self.exprs = [e.copy() for e in self.exprs]
4346 for e in self.exprs:
4347 e.ignore_whitespace(recursive)
4348 return self
4350 def ignore(self, other) -> ParserElement:
4351 """
4352 Define expression to be ignored (e.g., comments) while doing pattern
4353 matching; may be called repeatedly, to define multiple comment or other
4354 ignorable patterns.
4355 """
4356 if isinstance(other, Suppress):
4357 if other not in self.ignoreExprs:
4358 super().ignore(other)
4359 for e in self.exprs:
4360 e.ignore(self.ignoreExprs[-1])
4361 else:
4362 super().ignore(other)
4363 for e in self.exprs:
4364 e.ignore(self.ignoreExprs[-1])
4365 return self
4367 def _generateDefaultName(self) -> str:
4368 return f"{type(self).__name__}:({self.exprs})"
4370 def streamline(self) -> ParserElement:
4371 if self.streamlined:
4372 return self
4374 super().streamline()
4376 for e in self.exprs:
4377 e.streamline()
4379 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
4380 # but only if there are no parse actions or resultsNames on the nested And's
4381 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
4382 if len(self.exprs) == 2:
4383 other = self.exprs[0]
4384 if (
4385 isinstance(other, self.__class__)
4386 and not other.parseAction
4387 and other.resultsName is None
4388 and not other.debug
4389 ):
4390 self.exprs = other.exprs[:] + [self.exprs[1]]
4391 self._defaultName = None
4392 self._may_return_empty |= other.mayReturnEmpty
4393 self.mayIndexError |= other.mayIndexError
4395 other = self.exprs[-1]
4396 if (
4397 isinstance(other, self.__class__)
4398 and not other.parseAction
4399 and other.resultsName is None
4400 and not other.debug
4401 ):
4402 self.exprs = self.exprs[:-1] + other.exprs[:]
4403 self._defaultName = None
4404 self._may_return_empty |= other.mayReturnEmpty
4405 self.mayIndexError |= other.mayIndexError
4407 self.errmsg = f"Expected {self}"
4409 return self
4411 def validate(self, validateTrace=None) -> None:
4412 warnings.warn(
4413 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4414 PyparsingDeprecationWarning,
4415 stacklevel=2,
4416 )
4417 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
4418 for e in self.exprs:
4419 e.validate(tmp)
4420 self._checkRecursion([])
4422 def copy(self) -> ParserElement:
4423 """
4424 Returns a copy of this expression.
4426 Generally only used internally by pyparsing.
4427 """
4428 ret = super().copy()
4429 ret = typing.cast(ParseExpression, ret)
4430 ret.exprs = [e.copy() for e in self.exprs]
4431 return ret
4433 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4434 if not (
4435 __diag__.warn_ungrouped_named_tokens_in_collection
4436 and Diagnostics.warn_ungrouped_named_tokens_in_collection
4437 not in self.suppress_warnings_
4438 ):
4439 return super()._setResultsName(name, list_all_matches)
4441 for e in self.exprs:
4442 if (
4443 isinstance(e, ParserElement)
4444 and e.resultsName
4445 and (
4446 Diagnostics.warn_ungrouped_named_tokens_in_collection
4447 not in e.suppress_warnings_
4448 )
4449 ):
4450 warning = (
4451 "warn_ungrouped_named_tokens_in_collection:"
4452 f" setting results name {name!r} on {type(self).__name__} expression"
4453 f" collides with {e.resultsName!r} on contained expression"
4454 )
4455 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
4456 break
4458 return super()._setResultsName(name, list_all_matches)
4460 # Compatibility synonyms
4461 # fmt: off
4462 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4463 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4464 # fmt: on
4467class And(ParseExpression):
4468 """
4469 Requires all given :class:`ParserElement` s to be found in the given order.
4470 Expressions may be separated by whitespace.
4471 May be constructed using the ``'+'`` operator.
4472 May also be constructed using the ``'-'`` operator, which will
4473 suppress backtracking.
4475 Example:
4477 .. testcode::
4479 integer = Word(nums)
4480 name_expr = Word(alphas)[1, ...]
4482 expr = And([integer("id"), name_expr("name"), integer("age")])
4483 # more easily written as:
4484 expr = integer("id") + name_expr("name") + integer("age")
4485 """
4487 class _ErrorStop(Empty):
4488 def __init__(self, *args, **kwargs) -> None:
4489 super().__init__(*args, **kwargs)
4490 self.leave_whitespace()
4492 def _generateDefaultName(self) -> str:
4493 return "-"
4495 def __init__(
4496 self,
4497 exprs_arg: typing.Iterable[Union[ParserElement, str]],
4498 savelist: bool = True,
4499 ) -> None:
4500 # instantiate exprs as a list, converting strs to ParserElements
4501 exprs: list[ParserElement] = [
4502 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg
4503 ]
4505 # convert any Ellipsis elements to SkipTo
4506 if Ellipsis in exprs:
4508 # Ellipsis cannot be the last element
4509 if exprs[-1] is Ellipsis:
4510 raise Exception("cannot construct And with sequence ending in ...")
4512 tmp: list[ParserElement] = []
4513 for cur_expr, next_expr in zip(exprs, exprs[1:]):
4514 if cur_expr is Ellipsis:
4515 tmp.append(SkipTo(next_expr)("_skipped*"))
4516 else:
4517 tmp.append(cur_expr)
4519 exprs[:-1] = tmp
4521 super().__init__(exprs, savelist)
4522 if self.exprs:
4523 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4524 if not isinstance(self.exprs[0], White):
4525 self.set_whitespace_chars(
4526 self.exprs[0].whiteChars,
4527 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
4528 )
4529 self.skipWhitespace = self.exprs[0].skipWhitespace
4530 else:
4531 self.skipWhitespace = False
4532 else:
4533 self._may_return_empty = True
4534 self.callPreparse = True
4536 def streamline(self) -> ParserElement:
4537 """
4538 Collapse `And` expressions like `And(And(And(A, B), C), D)`
4539 to `And(A, B, C, D)`.
4541 .. doctest::
4543 >>> expr = Word("A") + Word("B") + Word("C") + Word("D")
4544 >>> # Using '+' operator creates nested And expression
4545 >>> expr
4546 {{{W:(A) W:(B)} W:(C)} W:(D)}
4547 >>> # streamline simplifies to a single And with multiple expressions
4548 >>> expr.streamline()
4549 {W:(A) W:(B) W:(C) W:(D)}
4551 Guards against collapsing out expressions that have special features,
4552 such as results names or parse actions.
4554 Resolves pending Skip commands defined using `...` terms.
4555 """
4556 # collapse any _PendingSkip's
4557 if self.exprs and any(
4558 isinstance(e, ParseExpression)
4559 and e.exprs
4560 and isinstance(e.exprs[-1], _PendingSkip)
4561 for e in self.exprs[:-1]
4562 ):
4563 deleted_expr_marker = NoMatch()
4564 for i, e in enumerate(self.exprs[:-1]):
4565 if e is deleted_expr_marker:
4566 continue
4567 if (
4568 isinstance(e, ParseExpression)
4569 and e.exprs
4570 and isinstance(e.exprs[-1], _PendingSkip)
4571 ):
4572 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4573 self.exprs[i + 1] = deleted_expr_marker
4574 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]
4576 super().streamline()
4578 # link any IndentedBlocks to the prior expression
4579 prev: ParserElement
4580 cur: ParserElement
4581 for prev, cur in zip(self.exprs, self.exprs[1:]):
4582 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
4583 # (but watch out for recursive grammar)
4584 seen = set()
4585 while True:
4586 if id(cur) in seen:
4587 break
4588 seen.add(id(cur))
4589 if isinstance(cur, IndentedBlock):
4590 prev.add_parse_action(
4591 lambda s, l, t, cur_=cur: setattr(
4592 cur_, "parent_anchor", col(l, s)
4593 )
4594 )
4595 break
4596 subs = cur.recurse()
4597 next_first = next(iter(subs), None)
4598 if next_first is None:
4599 break
4600 cur = typing.cast(ParserElement, next_first)
4602 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4603 return self
4605 def parseImpl(self, instring, loc, do_actions=True):
4606 # pass False as callPreParse arg to _parse for first element, since we already
4607 # pre-parsed the string as part of our And pre-parsing
4608 loc, resultlist = self.exprs[0]._parse(
4609 instring, loc, do_actions, callPreParse=False
4610 )
4611 errorStop = False
4612 for e in self.exprs[1:]:
4613 # if isinstance(e, And._ErrorStop):
4614 if type(e) is And._ErrorStop:
4615 errorStop = True
4616 continue
4617 if errorStop:
4618 try:
4619 loc, exprtokens = e._parse(instring, loc, do_actions)
4620 except ParseSyntaxException:
4621 raise
4622 except ParseBaseException as pe:
4623 pe.__traceback__ = None
4624 raise ParseSyntaxException._from_exception(pe)
4625 except IndexError:
4626 raise ParseSyntaxException(
4627 instring, len(instring), self.errmsg, self
4628 )
4629 else:
4630 loc, exprtokens = e._parse(instring, loc, do_actions)
4631 resultlist += exprtokens
4632 return loc, resultlist
4634 def __iadd__(self, other):
4635 if isinstance(other, str_type):
4636 other = self._literalStringClass(other)
4637 if not isinstance(other, ParserElement):
4638 return NotImplemented
4639 return self.append(other) # And([self, other])
4641 def _checkRecursion(self, parseElementList):
4642 subRecCheckList = parseElementList[:] + [self]
4643 for e in self.exprs:
4644 e._checkRecursion(subRecCheckList)
4645 if not e.mayReturnEmpty:
4646 break
4648 def _generateDefaultName(self) -> str:
4649 inner = " ".join(str(e) for e in self.exprs)
4650 # strip off redundant inner {}'s
4651 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4652 inner = inner[1:-1]
4653 return f"{{{inner}}}"
4656class Or(ParseExpression):
4657 """Requires that at least one :class:`ParserElement` is found. If
4658 two expressions match, the expression that matches the longest
4659 string will be used. May be constructed using the ``'^'``
4660 operator.
4662 Example:
4664 .. testcode::
4666 # construct Or using '^' operator
4668 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4669 print(number.search_string("123 3.1416 789"))
4671 prints:
4673 .. testoutput::
4675 [['123'], ['3.1416'], ['789']]
4676 """
4678 def __init__(
4679 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4680 ) -> None:
4681 super().__init__(exprs, savelist)
4682 if self.exprs:
4683 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4684 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4685 else:
4686 self._may_return_empty = True
4688 def streamline(self) -> ParserElement:
4689 super().streamline()
4690 if self.exprs:
4691 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4692 self.saveAsList = any(e.saveAsList for e in self.exprs)
4693 self.skipWhitespace = all(
4694 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4695 )
4696 else:
4697 self.saveAsList = False
4698 return self
4700 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4701 maxExcLoc = -1
4702 maxException = None
4703 matches: list[tuple[int, ParserElement]] = []
4704 fatals: list[ParseFatalException] = []
4705 if all(e.callPreparse for e in self.exprs):
4706 loc = self.preParse(instring, loc)
4707 for e in self.exprs:
4708 try:
4709 loc2 = e.try_parse(instring, loc, raise_fatal=True)
4710 except ParseFatalException as pfe:
4711 pfe.__traceback__ = None
4712 pfe.parser_element = e
4713 fatals.append(pfe)
4714 maxException = None
4715 maxExcLoc = -1
4716 except ParseException as err:
4717 if not fatals:
4718 err.__traceback__ = None
4719 if err.loc > maxExcLoc:
4720 maxException = err
4721 maxExcLoc = err.loc
4722 except IndexError:
4723 if len(instring) > maxExcLoc:
4724 maxException = ParseException(
4725 instring, len(instring), e.errmsg, self
4726 )
4727 maxExcLoc = len(instring)
4728 else:
4729 # save match among all matches, to retry longest to shortest
4730 matches.append((loc2, e))
4732 if matches:
4733 # re-evaluate all matches in descending order of length of match, in case attached actions
4734 # might change whether or how much they match of the input.
4735 matches.sort(key=itemgetter(0), reverse=True)
4737 if not do_actions:
4738 # no further conditions or parse actions to change the selection of
4739 # alternative, so the first match will be the best match
4740 best_expr = matches[0][1]
4741 return best_expr._parse(instring, loc, do_actions)
4743 longest: tuple[int, typing.Optional[ParseResults]] = -1, None
4744 for loc1, expr1 in matches:
4745 if loc1 <= longest[0]:
4746 # already have a longer match than this one will deliver, we are done
4747 return longest
4749 try:
4750 loc2, toks = expr1._parse(instring, loc, do_actions)
4751 except ParseException as err:
4752 err.__traceback__ = None
4753 if err.loc > maxExcLoc:
4754 maxException = err
4755 maxExcLoc = err.loc
4756 else:
4757 if loc2 >= loc1:
4758 return loc2, toks
4759 # didn't match as much as before
4760 elif loc2 > longest[0]:
4761 longest = loc2, toks
4763 if longest != (-1, None):
4764 return longest
4766 if fatals:
4767 if len(fatals) > 1:
4768 fatals.sort(key=lambda e: -e.loc)
4769 if fatals[0].loc == fatals[1].loc:
4770 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4771 max_fatal = fatals[0]
4772 raise max_fatal
4774 if maxException is not None:
4775 # infer from this check that all alternatives failed at the current position
4776 # so emit this collective error message instead of any single error message
4777 parse_start_loc = self.preParse(instring, loc)
4778 if maxExcLoc == parse_start_loc:
4779 maxException.msg = self.errmsg or ""
4780 raise maxException
4782 raise ParseException(instring, loc, "no defined alternatives to match", self)
4784 def __ixor__(self, other):
4785 if isinstance(other, str_type):
4786 other = self._literalStringClass(other)
4787 if not isinstance(other, ParserElement):
4788 return NotImplemented
4789 return self.append(other) # Or([self, other])
4791 def _generateDefaultName(self) -> str:
4792 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"
4794 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4795 if (
4796 __diag__.warn_multiple_tokens_in_named_alternation
4797 and Diagnostics.warn_multiple_tokens_in_named_alternation
4798 not in self.suppress_warnings_
4799 ):
4800 if any(
4801 isinstance(e, And)
4802 and Diagnostics.warn_multiple_tokens_in_named_alternation
4803 not in e.suppress_warnings_
4804 for e in self.exprs
4805 ):
4806 warning = (
4807 "warn_multiple_tokens_in_named_alternation:"
4808 f" setting results name {name!r} on {type(self).__name__} expression"
4809 " will return a list of all parsed tokens in an And alternative,"
4810 " in prior versions only the first token was returned; enclose"
4811 " contained argument in Group"
4812 )
4813 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
4815 return super()._setResultsName(name, list_all_matches)
4818class MatchFirst(ParseExpression):
4819 """Requires that at least one :class:`ParserElement` is found. If
4820 more than one expression matches, the first one listed is the one that will
4821 match. May be constructed using the ``'|'`` operator.
4823 Example: Construct MatchFirst using '|' operator
4825 .. doctest::
4827 # watch the order of expressions to match
4828 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4829 >>> print(number.search_string("123 3.1416 789")) # Fail!
4830 [['123'], ['3'], ['1416'], ['789']]
4832 # put more selective expression first
4833 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4834 >>> print(number.search_string("123 3.1416 789")) # Better
4835 [['123'], ['3.1416'], ['789']]
4836 """
4838 def __init__(
4839 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4840 ) -> None:
4841 super().__init__(exprs, savelist)
4842 if self.exprs:
4843 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4844 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4845 else:
4846 self._may_return_empty = True
4848 def streamline(self) -> ParserElement:
4849 if self.streamlined:
4850 return self
4852 super().streamline()
4853 if self.exprs:
4854 self.saveAsList = any(e.saveAsList for e in self.exprs)
4855 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4856 self.skipWhitespace = all(
4857 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4858 )
4859 else:
4860 self.saveAsList = False
4861 self._may_return_empty = True
4862 return self
4864 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4865 maxExcLoc = -1
4866 maxException = None
4868 for e in self.exprs:
4869 try:
4870 return e._parse(instring, loc, do_actions)
4871 except ParseFatalException as pfe:
4872 pfe.__traceback__ = None
4873 pfe.parser_element = e
4874 raise
4875 except ParseException as err:
4876 if err.loc > maxExcLoc:
4877 maxException = err
4878 maxExcLoc = err.loc
4879 except IndexError:
4880 if len(instring) > maxExcLoc:
4881 maxException = ParseException(
4882 instring, len(instring), e.errmsg, self
4883 )
4884 maxExcLoc = len(instring)
4886 if maxException is not None:
4887 # infer from this check that all alternatives failed at the current position
4888 # so emit this collective error message instead of any individual error message
4889 parse_start_loc = self.preParse(instring, loc)
4890 if maxExcLoc == parse_start_loc:
4891 maxException.msg = self.errmsg or ""
4892 raise maxException
4894 raise ParseException(instring, loc, "no defined alternatives to match", self)
4896 def __ior__(self, other):
4897 if isinstance(other, str_type):
4898 other = self._literalStringClass(other)
4899 if not isinstance(other, ParserElement):
4900 return NotImplemented
4901 return self.append(other) # MatchFirst([self, other])
4903 def _generateDefaultName(self) -> str:
4904 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"
4906 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4907 if (
4908 __diag__.warn_multiple_tokens_in_named_alternation
4909 and Diagnostics.warn_multiple_tokens_in_named_alternation
4910 not in self.suppress_warnings_
4911 ):
4912 if any(
4913 isinstance(e, And)
4914 and Diagnostics.warn_multiple_tokens_in_named_alternation
4915 not in e.suppress_warnings_
4916 for e in self.exprs
4917 ):
4918 warning = (
4919 "warn_multiple_tokens_in_named_alternation:"
4920 f" setting results name {name!r} on {type(self).__name__} expression"
4921 " will return a list of all parsed tokens in an And alternative,"
4922 " in prior versions only the first token was returned; enclose"
4923 " contained argument in Group"
4924 )
4925 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
4927 return super()._setResultsName(name, list_all_matches)
4930class Each(ParseExpression):
4931 """Requires all given :class:`ParserElement` s to be found, but in
4932 any order. Expressions may be separated by whitespace.
4934 May be constructed using the ``'&'`` operator.
4936 Example:
4938 .. testcode::
4940 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4941 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4942 integer = Word(nums)
4943 shape_attr = "shape:" + shape_type("shape")
4944 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4945 color_attr = "color:" + color("color")
4946 size_attr = "size:" + integer("size")
4948 # use Each (using operator '&') to accept attributes in any order
4949 # (shape and posn are required, color and size are optional)
4950 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4952 shape_spec.run_tests('''
4953 shape: SQUARE color: BLACK posn: 100, 120
4954 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4955 color:GREEN size:20 shape:TRIANGLE posn:20,40
4956 '''
4957 )
4959 prints:
4961 .. testoutput::
4962 :options: +NORMALIZE_WHITESPACE
4965 shape: SQUARE color: BLACK posn: 100, 120
4966 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4967 - color: 'BLACK'
4968 - posn: ['100', ',', '120']
4969 - x: '100'
4970 - y: '120'
4971 - shape: 'SQUARE'
4972 ...
4974 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4975 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE',
4976 'posn:', ['50', ',', '80']]
4977 - color: 'BLUE'
4978 - posn: ['50', ',', '80']
4979 - x: '50'
4980 - y: '80'
4981 - shape: 'CIRCLE'
4982 - size: '50'
4983 ...
4985 color:GREEN size:20 shape:TRIANGLE posn:20,40
4986 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE',
4987 'posn:', ['20', ',', '40']]
4988 - color: 'GREEN'
4989 - posn: ['20', ',', '40']
4990 - x: '20'
4991 - y: '40'
4992 - shape: 'TRIANGLE'
4993 - size: '20'
4994 ...
4995 """
4997 def __init__(
4998 self, exprs: typing.Iterable[ParserElement], savelist: bool = True
4999 ) -> None:
5000 super().__init__(exprs, savelist)
5001 if self.exprs:
5002 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
5003 else:
5004 self._may_return_empty = True
5005 self.skipWhitespace = True
5006 self.initExprGroups = True
5007 self.saveAsList = True
5009 def __iand__(self, other):
5010 if isinstance(other, str_type):
5011 other = self._literalStringClass(other)
5012 if not isinstance(other, ParserElement):
5013 return NotImplemented
5014 return self.append(other) # Each([self, other])
5016 def streamline(self) -> ParserElement:
5017 super().streamline()
5018 if self.exprs:
5019 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
5020 else:
5021 self._may_return_empty = True
5022 return self
5024 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5025 if self.initExprGroups:
5026 self.opt1map = dict(
5027 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
5028 )
5029 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
5030 opt2 = [
5031 e
5032 for e in self.exprs
5033 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
5034 ]
5035 self.optionals = opt1 + opt2
5036 self.multioptionals = [
5037 e.expr.set_results_name(e.resultsName, list_all_matches=True)
5038 for e in self.exprs
5039 if isinstance(e, _MultipleMatch)
5040 ]
5041 self.multirequired = [
5042 e.expr.set_results_name(e.resultsName, list_all_matches=True)
5043 for e in self.exprs
5044 if isinstance(e, OneOrMore)
5045 ]
5046 self.required = [
5047 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
5048 ]
5049 self.required += self.multirequired
5050 self.initExprGroups = False
5052 tmpLoc = loc
5053 tmpReqd = self.required[:]
5054 tmpOpt = self.optionals[:]
5055 multis = self.multioptionals[:]
5056 matchOrder: list[ParserElement] = []
5058 keepMatching = True
5059 failed: list[ParserElement] = []
5060 fatals: list[ParseFatalException] = []
5061 while keepMatching:
5062 tmpExprs = tmpReqd + tmpOpt + multis
5063 failed.clear()
5064 fatals.clear()
5065 for e in tmpExprs:
5066 try:
5067 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
5068 except ParseFatalException as pfe:
5069 pfe.__traceback__ = None
5070 pfe.parser_element = e
5071 fatals.append(pfe)
5072 failed.append(e)
5073 except ParseException:
5074 failed.append(e)
5075 else:
5076 matchOrder.append(self.opt1map.get(id(e), e))
5077 if e in tmpReqd:
5078 tmpReqd.remove(e)
5079 elif e in tmpOpt:
5080 tmpOpt.remove(e)
5081 if len(failed) == len(tmpExprs):
5082 keepMatching = False
5084 # look for any ParseFatalExceptions
5085 if fatals:
5086 if len(fatals) > 1:
5087 fatals.sort(key=lambda e: -e.loc)
5088 if fatals[0].loc == fatals[1].loc:
5089 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
5090 max_fatal = fatals[0]
5091 raise max_fatal
5093 if tmpReqd:
5094 missing = ", ".join([str(e) for e in tmpReqd])
5095 raise ParseException(
5096 instring,
5097 loc,
5098 f"Missing one or more required elements ({missing})",
5099 )
5101 # add any unmatched Opts, in case they have default values defined
5102 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
5104 total_results = ParseResults([])
5105 for e in matchOrder:
5106 loc, results = e._parse(instring, loc, do_actions)
5107 total_results += results
5109 return loc, total_results
5111 def _generateDefaultName(self) -> str:
5112 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"
5115class ParseElementEnhance(ParserElement):
5116 """Abstract subclass of :class:`ParserElement`, for combining and
5117 post-processing parsed tokens.
5118 """
5120 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
5121 super().__init__(savelist)
5122 if isinstance(expr, str_type):
5123 expr_str = typing.cast(str, expr)
5124 if issubclass(self._literalStringClass, Token):
5125 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]
5126 elif issubclass(type(self), self._literalStringClass):
5127 expr = Literal(expr_str)
5128 else:
5129 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]
5130 expr = typing.cast(ParserElement, expr)
5131 self.expr = expr
5132 if expr is not None:
5133 self.mayIndexError = expr.mayIndexError
5134 self._may_return_empty = expr.mayReturnEmpty
5135 self.set_whitespace_chars(
5136 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
5137 )
5138 self.skipWhitespace = expr.skipWhitespace
5139 self.saveAsList = expr.saveAsList
5140 self.callPreparse = expr.callPreparse
5141 self.ignoreExprs.extend(expr.ignoreExprs)
5143 def recurse(self) -> list[ParserElement]:
5144 return [self.expr] if self.expr is not None else []
5146 def parseImpl(self, instring, loc, do_actions=True):
5147 if self.expr is None:
5148 raise ParseException(instring, loc, "No expression defined", self)
5150 try:
5151 return self.expr._parse(instring, loc, do_actions, callPreParse=False)
5152 except ParseSyntaxException:
5153 raise
5154 except ParseBaseException as pbe:
5155 pbe.pstr = pbe.pstr or instring
5156 pbe.loc = pbe.loc or loc
5157 pbe.parser_element = pbe.parser_element or self
5158 if not isinstance(self, Forward) and self.customName is not None:
5159 if self.errmsg:
5160 pbe.msg = self.errmsg
5161 raise
5163 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5164 """
5165 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
5166 the contained expression.
5167 """
5168 super().leave_whitespace(recursive)
5170 if recursive:
5171 if self.expr is not None:
5172 self.expr = self.expr.copy()
5173 self.expr.leave_whitespace(recursive)
5174 return self
5176 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5177 """
5178 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on
5179 the contained expression.
5180 """
5181 super().ignore_whitespace(recursive)
5183 if recursive:
5184 if self.expr is not None:
5185 self.expr = self.expr.copy()
5186 self.expr.ignore_whitespace(recursive)
5187 return self
5189 def ignore(self, other) -> ParserElement:
5190 """
5191 Define expression to be ignored (e.g., comments) while doing pattern
5192 matching; may be called repeatedly, to define multiple comment or other
5193 ignorable patterns.
5194 """
5195 if not isinstance(other, Suppress) or other not in self.ignoreExprs:
5196 super().ignore(other)
5197 if self.expr is not None:
5198 self.expr.ignore(self.ignoreExprs[-1])
5200 return self
5202 def streamline(self) -> ParserElement:
5203 super().streamline()
5204 if self.expr is not None:
5205 self.expr.streamline()
5206 return self
5208 def _checkRecursion(self, parseElementList):
5209 if self in parseElementList:
5210 raise RecursiveGrammarException(parseElementList + [self])
5211 subRecCheckList = parseElementList[:] + [self]
5212 if self.expr is not None:
5213 self.expr._checkRecursion(subRecCheckList)
5215 def validate(self, validateTrace=None) -> None:
5216 warnings.warn(
5217 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
5218 PyparsingDeprecationWarning,
5219 stacklevel=2,
5220 )
5221 if validateTrace is None:
5222 validateTrace = []
5223 tmp = validateTrace[:] + [self]
5224 if self.expr is not None:
5225 self.expr.validate(tmp)
5226 self._checkRecursion([])
5228 def _generateDefaultName(self) -> str:
5229 return f"{type(self).__name__}:({self.expr})"
5231 # Compatibility synonyms
5232 # fmt: off
5233 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
5234 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
5235 # fmt: on
5238class IndentedBlock(ParseElementEnhance):
5239 """
5240 Expression to match one or more expressions at a given indentation level.
5241 Useful for parsing text where structure is implied by indentation (like Python source code).
5243 Example:
5245 .. testcode::
5247 '''
5248 BNF:
5249 statement ::= assignment_stmt | if_stmt
5250 assignment_stmt ::= identifier '=' rvalue
5251 rvalue ::= identifier | integer
5252 if_stmt ::= 'if' bool_condition block
5253 block ::= ([indent] statement)...
5254 identifier ::= [A..Za..z]
5255 integer ::= [0..9]...
5256 bool_condition ::= 'TRUE' | 'FALSE'
5257 '''
5259 IF, TRUE, FALSE = Keyword.using_each("IF TRUE FALSE".split())
5261 statement = Forward()
5262 identifier = Char(alphas)
5263 integer = Word(nums).add_parse_action(lambda t: int(t[0]))
5264 rvalue = identifier | integer
5265 assignment_stmt = identifier + "=" + rvalue
5267 if_stmt = IF + (TRUE | FALSE) + IndentedBlock(statement)
5269 statement <<= Group(assignment_stmt | if_stmt)
5271 result = if_stmt.parse_string('''
5272 IF TRUE
5273 a = 1000
5274 b = 2000
5275 IF FALSE
5276 z = 100
5277 ''')
5278 print(result.dump())
5280 .. testoutput::
5282 ['IF', 'TRUE', [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]]
5283 [0]:
5284 IF
5285 [1]:
5286 TRUE
5287 [2]:
5288 [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]
5289 [0]:
5290 ['a', '=', 1000]
5291 [1]:
5292 ['b', '=', 2000]
5293 [2]:
5294 ['IF', 'FALSE', [['z', '=', 100]]]
5295 [0]:
5296 IF
5297 [1]:
5298 FALSE
5299 [2]:
5300 [['z', '=', 100]]
5301 [0]:
5302 ['z', '=', 100]
5303 """
5305 class _Indent(Empty):
5306 def __init__(self, ref_col: int) -> None:
5307 super().__init__()
5308 self.errmsg = f"expected indent at column {ref_col}"
5309 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
5311 class _IndentGreater(Empty):
5312 def __init__(self, ref_col: int) -> None:
5313 super().__init__()
5314 self.errmsg = f"expected indent at column greater than {ref_col}"
5315 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
5317 def __init__(
5318 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
5319 ) -> None:
5320 super().__init__(expr, savelist=True)
5321 # if recursive:
5322 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
5323 self._recursive = recursive
5324 self._grouped = grouped
5325 self.parent_anchor = 1
5327 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5328 # advance parse position to non-whitespace by using an Empty()
5329 # this should be the column to be used for all subsequent indented lines
5330 anchor_loc = Empty().preParse(instring, loc)
5332 # see if self.expr matches at the current location - if not it will raise an exception
5333 # and no further work is necessary
5334 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)
5336 indent_col = col(anchor_loc, instring)
5337 peer_detect_expr = self._Indent(indent_col)
5339 inner_expr = Empty() + peer_detect_expr + self.expr
5340 if self._recursive:
5341 sub_indent = self._IndentGreater(indent_col)
5342 nested_block = IndentedBlock(
5343 self.expr, recursive=self._recursive, grouped=self._grouped
5344 )
5345 nested_block.set_debug(self.debug)
5346 nested_block.parent_anchor = indent_col
5347 inner_expr += Opt(sub_indent + nested_block)
5349 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
5350 block = OneOrMore(inner_expr)
5352 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
5354 if self._grouped:
5355 wrapper = Group
5356 else:
5357 wrapper = lambda expr: expr # type: ignore[misc, assignment]
5358 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
5359 instring, anchor_loc, do_actions
5360 )
5363class AtStringStart(ParseElementEnhance):
5364 """Matches if expression matches at the beginning of the parse
5365 string::
5367 AtStringStart(Word(nums)).parse_string("123")
5368 # prints ["123"]
5370 AtStringStart(Word(nums)).parse_string(" 123")
5371 # raises ParseException
5372 """
5374 def __init__(self, expr: Union[ParserElement, str]) -> None:
5375 super().__init__(expr)
5376 self.callPreparse = False
5378 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5379 if loc != 0:
5380 raise ParseException(instring, loc, "not found at string start")
5381 return super().parseImpl(instring, loc, do_actions)
5384class AtLineStart(ParseElementEnhance):
5385 r"""Matches if an expression matches at the beginning of a line within
5386 the parse string
5388 Example:
5390 .. testcode::
5392 test = '''\
5393 BBB this line
5394 BBB and this line
5395 BBB but not this one
5396 A BBB and definitely not this one
5397 '''
5399 for t in (AtLineStart('BBB') + rest_of_line).search_string(test):
5400 print(t)
5402 prints:
5404 .. testoutput::
5406 ['BBB', ' this line']
5407 ['BBB', ' and this line']
5408 """
5410 def __init__(self, expr: Union[ParserElement, str]) -> None:
5411 super().__init__(expr)
5412 self.callPreparse = False
5414 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5415 if col(loc, instring) != 1:
5416 raise ParseException(instring, loc, "not found at line start")
5417 return super().parseImpl(instring, loc, do_actions)
5420class FollowedBy(ParseElementEnhance):
5421 """Lookahead matching of the given parse expression.
5422 ``FollowedBy`` does *not* advance the parsing position within
5423 the input string, it only verifies that the specified parse
5424 expression matches at the current position. ``FollowedBy``
5425 always returns a null token list. If any results names are defined
5426 in the lookahead expression, those *will* be returned for access by
5427 name.
5429 Example:
5431 .. testcode::
5433 # use FollowedBy to match a label only if it is followed by a ':'
5434 data_word = Word(alphas)
5435 label = data_word + FollowedBy(':')
5436 attr_expr = Group(
5437 label + Suppress(':')
5438 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
5439 )
5441 attr_expr[1, ...].parse_string(
5442 "shape: SQUARE color: BLACK posn: upper left").pprint()
5444 prints:
5446 .. testoutput::
5448 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
5449 """
5451 def __init__(self, expr: Union[ParserElement, str]) -> None:
5452 super().__init__(expr)
5453 self._may_return_empty = True
5455 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5456 # by using self._expr.parse and deleting the contents of the returned ParseResults list
5457 # we keep any named results that were defined in the FollowedBy expression
5458 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)
5459 del ret[:]
5461 return loc, ret
5464class PrecededBy(ParseElementEnhance):
5465 """Lookbehind matching of the given parse expression.
5466 ``PrecededBy`` does not advance the parsing position within the
5467 input string, it only verifies that the specified parse expression
5468 matches prior to the current position. ``PrecededBy`` always
5469 returns a null token list, but if a results name is defined on the
5470 given expression, it is returned.
5472 Parameters:
5474 - ``expr`` - expression that must match prior to the current parse
5475 location
5476 - ``retreat`` - (default= ``None``) - (int) maximum number of characters
5477 to lookbehind prior to the current parse location
5479 If the lookbehind expression is a string, :class:`Literal`,
5480 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
5481 with a specified exact or maximum length, then the retreat
5482 parameter is not required. Otherwise, retreat must be specified to
5483 give a maximum number of characters to look back from
5484 the current parse position for a lookbehind match.
5486 Example:
5488 .. testcode::
5490 # VB-style variable names with type prefixes
5491 int_var = PrecededBy("#") + pyparsing_common.identifier
5492 str_var = PrecededBy("$") + pyparsing_common.identifier
5493 """
5495 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:
5496 super().__init__(expr)
5497 self.expr = self.expr().leave_whitespace()
5498 self._may_return_empty = True
5499 self.mayIndexError = False
5500 self.exact = False
5501 if isinstance(expr, str_type):
5502 expr = typing.cast(str, expr)
5503 retreat = len(expr)
5504 self.exact = True
5505 elif isinstance(expr, (Literal, Keyword)):
5506 retreat = expr.matchLen
5507 self.exact = True
5508 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
5509 retreat = expr.maxLen
5510 self.exact = True
5511 elif isinstance(expr, PositionToken):
5512 retreat = 0
5513 self.exact = True
5514 self.retreat = retreat
5515 self.errmsg = f"not preceded by {expr}"
5516 self.skipWhitespace = False
5517 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
5519 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:
5520 if self.exact:
5521 if loc < self.retreat:
5522 raise ParseException(instring, loc, self.errmsg, self)
5523 start = loc - self.retreat
5524 _, ret = self.expr._parse(instring, start)
5525 return loc, ret
5527 # retreat specified a maximum lookbehind window, iterate
5528 test_expr = self.expr + StringEnd()
5529 instring_slice = instring[max(0, loc - self.retreat) : loc]
5530 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)
5532 for offset in range(1, min(loc, self.retreat + 1) + 1):
5533 try:
5534 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
5535 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
5536 except ParseBaseException as pbe:
5537 last_expr = pbe
5538 else:
5539 break
5540 else:
5541 raise last_expr
5543 return loc, ret
5546class Located(ParseElementEnhance):
5547 """
5548 Decorates a returned token with its starting and ending
5549 locations in the input string.
5551 This helper adds the following results names:
5553 - ``locn_start`` - location where matched expression begins
5554 - ``locn_end`` - location where matched expression ends
5555 - ``value`` - the actual parsed results
5557 Be careful if the input text contains ``<TAB>`` characters, you
5558 may want to call :class:`ParserElement.parse_with_tabs`
5560 Example:
5562 .. testcode::
5564 wd = Word(alphas)
5565 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
5566 print(match)
5568 prints:
5570 .. testoutput::
5572 [0, ['ljsdf'], 5]
5573 [8, ['lksdjjf'], 15]
5574 [18, ['lkkjj'], 23]
5575 """
5577 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5578 start = loc
5579 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)
5580 ret_tokens = ParseResults([start, tokens, loc])
5581 ret_tokens["locn_start"] = start
5582 ret_tokens["value"] = tokens
5583 ret_tokens["locn_end"] = loc
5584 if self.resultsName:
5585 # must return as a list, so that the name will be attached to the complete group
5586 return loc, [ret_tokens]
5587 else:
5588 return loc, ret_tokens
5591class NotAny(ParseElementEnhance):
5592 """
5593 Lookahead to disallow matching with the given parse expression.
5594 ``NotAny`` does *not* advance the parsing position within the
5595 input string, it only verifies that the specified parse expression
5596 does *not* match at the current position. Also, ``NotAny`` does
5597 *not* skip over leading whitespace. ``NotAny`` always returns
5598 a null token list. May be constructed using the ``'~'`` operator.
5600 Example:
5602 .. testcode::
5604 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
5606 # take care not to mistake keywords for identifiers
5607 ident = ~(AND | OR | NOT) + Word(alphas)
5608 boolean_term = Opt(NOT) + ident
5610 # very crude boolean expression - to support parenthesis groups and
5611 # operation hierarchy, use infix_notation
5612 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
5614 # integers that are followed by "." are actually floats
5615 integer = Word(nums) + ~Char(".")
5616 """
5618 def __init__(self, expr: Union[ParserElement, str]) -> None:
5619 super().__init__(expr)
5620 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
5621 # self.leave_whitespace()
5622 self.skipWhitespace = False
5624 self._may_return_empty = True
5625 self.errmsg = f"Found unwanted token, {self.expr}"
5627 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5628 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):
5629 raise ParseException(instring, loc, self.errmsg, self)
5630 return loc, []
5632 def _generateDefaultName(self) -> str:
5633 return f"~{{{self.expr}}}"
5636class _MultipleMatch(ParseElementEnhance):
5637 def __init__(
5638 self,
5639 expr: Union[str, ParserElement],
5640 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5641 **kwargs,
5642 ) -> None:
5643 stopOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(
5644 kwargs, "stopOn", None
5645 )
5647 super().__init__(expr)
5648 stopOn = stopOn or stop_on
5649 self.saveAsList = True
5650 ender = stopOn
5651 if isinstance(ender, str_type):
5652 ender = self._literalStringClass(ender)
5653 self.stopOn(ender)
5655 def stop_on(self, ender) -> ParserElement:
5656 if isinstance(ender, str_type):
5657 ender = self._literalStringClass(ender)
5658 self.not_ender = ~ender if ender is not None else None
5659 return self
5661 stopOn = stop_on
5663 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5664 self_expr_parse = self.expr._parse
5665 self_skip_ignorables = self._skipIgnorables
5666 check_ender = False
5667 if self.not_ender is not None:
5668 try_not_ender = self.not_ender.try_parse
5669 check_ender = True
5671 # must be at least one (but first see if we are the stopOn sentinel;
5672 # if so, fail)
5673 if check_ender:
5674 try_not_ender(instring, loc)
5675 loc, tokens = self_expr_parse(instring, loc, do_actions)
5676 try:
5677 hasIgnoreExprs = not not self.ignoreExprs
5678 while 1:
5679 if check_ender:
5680 try_not_ender(instring, loc)
5681 if hasIgnoreExprs:
5682 preloc = self_skip_ignorables(instring, loc)
5683 else:
5684 preloc = loc
5685 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)
5686 tokens += tmptokens
5687 except (ParseException, IndexError):
5688 pass
5690 return loc, tokens
5692 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5693 if (
5694 __diag__.warn_ungrouped_named_tokens_in_collection
5695 and Diagnostics.warn_ungrouped_named_tokens_in_collection
5696 not in self.suppress_warnings_
5697 ):
5698 for e in [self.expr] + self.expr.recurse():
5699 if (
5700 isinstance(e, ParserElement)
5701 and e.resultsName
5702 and (
5703 Diagnostics.warn_ungrouped_named_tokens_in_collection
5704 not in e.suppress_warnings_
5705 )
5706 ):
5707 warning = (
5708 "warn_ungrouped_named_tokens_in_collection:"
5709 f" setting results name {name!r} on {type(self).__name__} expression"
5710 f" collides with {e.resultsName!r} on contained expression"
5711 )
5712 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
5713 break
5715 return super()._setResultsName(name, list_all_matches)
5718class OneOrMore(_MultipleMatch):
5719 """
5720 Repetition of one or more of the given expression.
5722 Parameters:
5724 - ``expr`` - expression that must match one or more times
5725 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel
5726 (only required if the sentinel would ordinarily match the repetition
5727 expression)
5729 Example:
5731 .. doctest::
5733 >>> data_word = Word(alphas)
5734 >>> label = data_word + FollowedBy(':')
5735 >>> attr_expr = Group(
5736 ... label + Suppress(':')
5737 ... + OneOrMore(data_word).set_parse_action(' '.join))
5739 >>> text = "shape: SQUARE posn: upper left color: BLACK"
5741 # Fail! read 'posn' as data instead of next label
5742 >>> attr_expr[1, ...].parse_string(text).pprint()
5743 [['shape', 'SQUARE posn']]
5745 # use stop_on attribute for OneOrMore
5746 # to avoid reading label string as part of the data
5747 >>> attr_expr = Group(
5748 ... label + Suppress(':')
5749 ... + OneOrMore(
5750 ... data_word, stop_on=label).set_parse_action(' '.join))
5751 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better
5752 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5754 # could also be written as
5755 >>> (attr_expr * (1,)).parse_string(text).pprint()
5756 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5757 """
5759 def _generateDefaultName(self) -> str:
5760 return f"{{{self.expr}}}..."
5763class ZeroOrMore(_MultipleMatch):
5764 """
5765 Optional repetition of zero or more of the given expression.
5767 Parameters:
5769 - ``expr`` - expression that must match zero or more times
5770 - ``stop_on`` - expression for a terminating sentinel
5771 (only required if the sentinel would ordinarily match the repetition
5772 expression) - (default= ``None``)
5774 Example: similar to :class:`OneOrMore`
5775 """
5777 def __init__(
5778 self,
5779 expr: Union[str, ParserElement],
5780 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5781 **kwargs,
5782 ) -> None:
5783 stopOn: Union[ParserElement, str] = deprecate_argument(kwargs, "stopOn", None)
5785 super().__init__(expr, stop_on=stopOn or stop_on)
5786 self._may_return_empty = True
5788 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5789 try:
5790 return super().parseImpl(instring, loc, do_actions)
5791 except (ParseException, IndexError):
5792 return loc, ParseResults([], name=self.resultsName)
5794 def _generateDefaultName(self) -> str:
5795 return f"[{self.expr}]..."
5798class DelimitedList(ParseElementEnhance):
5799 """Helper to define a delimited list of expressions - the delimiter
5800 defaults to ','. By default, the list elements and delimiters can
5801 have intervening whitespace, and comments, but this can be
5802 overridden by passing ``combine=True`` in the constructor. If
5803 ``combine`` is set to ``True``, the matching tokens are
5804 returned as a single token string, with the delimiters included;
5805 otherwise, the matching tokens are returned as a list of tokens,
5806 with the delimiters suppressed.
5808 If ``allow_trailing_delim`` is set to True, then the list may end with
5809 a delimiter.
5811 Example:
5813 .. doctest::
5815 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc")
5816 ParseResults(['aa', 'bb', 'cc'], {})
5817 >>> DelimitedList(Word(hexnums), delim=':', combine=True
5818 ... ).parse_string("AA:BB:CC:DD:EE")
5819 ParseResults(['AA:BB:CC:DD:EE'], {})
5821 .. versionadded:: 3.1.0
5822 """
5824 def __init__(
5825 self,
5826 expr: Union[str, ParserElement],
5827 delim: Union[str, ParserElement] = ",",
5828 combine: bool = False,
5829 min: typing.Optional[int] = None,
5830 max: typing.Optional[int] = None,
5831 *,
5832 allow_trailing_delim: bool = False,
5833 ) -> None:
5834 if isinstance(expr, str_type):
5835 expr = ParserElement._literalStringClass(expr)
5836 expr = typing.cast(ParserElement, expr)
5838 if min is not None and min < 1:
5839 raise ValueError("min must be greater than 0")
5841 if max is not None and min is not None and max < min:
5842 raise ValueError("max must be greater than, or equal to min")
5844 self.content = expr
5845 self.raw_delim = str(delim)
5846 self.delim = delim
5847 self.combine = combine
5848 if not combine:
5849 self.delim = Suppress(delim) if not isinstance(delim, Suppress) else delim
5850 self.min = min or 1
5851 self.max = max
5852 self.allow_trailing_delim = allow_trailing_delim
5854 delim_list_expr = self.content + (self.delim + self.content) * (
5855 self.min - 1,
5856 None if self.max is None else self.max - 1,
5857 )
5858 if self.allow_trailing_delim:
5859 delim_list_expr += Opt(self.delim)
5861 if self.combine:
5862 delim_list_expr = Combine(delim_list_expr)
5864 super().__init__(delim_list_expr, savelist=True)
5866 def _generateDefaultName(self) -> str:
5867 content_expr = self.content.streamline()
5868 return f"{content_expr} [{self.raw_delim} {content_expr}]..."
5871class _NullToken:
5872 def __bool__(self):
5873 return False
5875 def __str__(self):
5876 return ""
5879class Opt(ParseElementEnhance):
5880 """
5881 Optional matching of the given expression.
5883 :param expr: expression that must match zero or more times
5884 :param default: (optional) - value to be returned
5885 if the optional expression is not found.
5887 Example:
5889 .. testcode::
5891 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
5892 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
5893 zip.run_tests('''
5894 # traditional ZIP code
5895 12345
5897 # ZIP+4 form
5898 12101-0001
5900 # invalid ZIP
5901 98765-
5902 ''')
5904 prints:
5906 .. testoutput::
5907 :options: +NORMALIZE_WHITESPACE
5910 # traditional ZIP code
5911 12345
5912 ['12345']
5914 # ZIP+4 form
5915 12101-0001
5916 ['12101-0001']
5918 # invalid ZIP
5919 98765-
5920 98765-
5921 ^
5922 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6)
5923 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6)
5924 """
5926 __optionalNotMatched = _NullToken()
5928 def __init__(
5929 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
5930 ) -> None:
5931 super().__init__(expr, savelist=False)
5932 self.saveAsList = self.expr.saveAsList
5933 self.defaultValue = default
5934 self._may_return_empty = True
5936 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5937 self_expr = self.expr
5938 try:
5939 loc, tokens = self_expr._parse(
5940 instring, loc, do_actions, callPreParse=False
5941 )
5942 except (ParseException, IndexError):
5943 default_value = self.defaultValue
5944 if default_value is not self.__optionalNotMatched:
5945 if self_expr.resultsName:
5946 tokens = ParseResults([default_value])
5947 tokens[self_expr.resultsName] = default_value
5948 else:
5949 tokens = [default_value] # type: ignore[assignment]
5950 else:
5951 tokens = [] # type: ignore[assignment]
5952 return loc, tokens
5954 def _generateDefaultName(self) -> str:
5955 inner = str(self.expr)
5956 # strip off redundant inner {}'s
5957 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
5958 inner = inner[1:-1]
5959 return f"[{inner}]"
5962Optional = Opt
5965class SkipTo(ParseElementEnhance):
5966 """
5967 Token for skipping over all undefined text until the matched
5968 expression is found.
5970 :param expr: target expression marking the end of the data to be skipped
5971 :param include: if ``True``, the target expression is also parsed
5972 (the skipped text and target expression are returned
5973 as a 2-element list) (default= ``False``).
5975 :param ignore: (default= ``None``) used to define grammars
5976 (typically quoted strings and comments)
5977 that might contain false matches to the target expression
5979 :param fail_on: (default= ``None``) define expressions that
5980 are not allowed to be included in the skipped test;
5981 if found before the target expression is found,
5982 the :class:`SkipTo` is not a match
5984 Example:
5986 .. testcode::
5988 report = '''
5989 Outstanding Issues Report - 1 Jan 2000
5991 # | Severity | Description | Days Open
5992 -----+----------+-------------------------------------------+-----------
5993 101 | Critical | Intermittent system crash | 6
5994 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5995 79 | Minor | System slow when running too many reports | 47
5996 '''
5997 integer = Word(nums)
5998 SEP = Suppress('|')
5999 # use SkipTo to simply match everything up until the next SEP
6000 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
6001 # - parse action will call token.strip() for each matched token, i.e., the description body
6002 string_data = SkipTo(SEP, ignore=quoted_string)
6003 string_data.set_parse_action(token_map(str.strip))
6004 ticket_expr = (integer("issue_num") + SEP
6005 + string_data("sev") + SEP
6006 + string_data("desc") + SEP
6007 + integer("days_open"))
6009 for tkt in ticket_expr.search_string(report):
6010 print(tkt.dump())
6012 prints:
6014 .. testoutput::
6016 ['101', 'Critical', 'Intermittent system crash', '6']
6017 - days_open: '6'
6018 - desc: 'Intermittent system crash'
6019 - issue_num: '101'
6020 - sev: 'Critical'
6021 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
6022 - days_open: '14'
6023 - desc: "Spelling error on Login ('log|n')"
6024 - issue_num: '94'
6025 - sev: 'Cosmetic'
6026 ['79', 'Minor', 'System slow when running too many reports', '47']
6027 - days_open: '47'
6028 - desc: 'System slow when running too many reports'
6029 - issue_num: '79'
6030 - sev: 'Minor'
6031 """
6033 def __init__(
6034 self,
6035 other: Union[ParserElement, str],
6036 include: bool = False,
6037 ignore: typing.Optional[Union[ParserElement, str]] = None,
6038 fail_on: typing.Optional[Union[ParserElement, str]] = None,
6039 **kwargs,
6040 ) -> None:
6041 failOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(
6042 kwargs, "failOn", None
6043 )
6045 super().__init__(other)
6046 failOn = failOn or fail_on
6047 self.ignoreExpr = ignore
6048 self._may_return_empty = True
6049 self.mayIndexError = False
6050 self.includeMatch = include
6051 self.saveAsList = False
6052 if isinstance(failOn, str_type):
6053 self.failOn = self._literalStringClass(failOn)
6054 else:
6055 self.failOn = failOn
6056 self.errmsg = f"No match found for {self.expr}"
6057 self.ignorer = Empty().leave_whitespace()
6058 self._update_ignorer()
6060 def _update_ignorer(self):
6061 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr
6062 self.ignorer.ignoreExprs.clear()
6063 for e in self.expr.ignoreExprs:
6064 self.ignorer.ignore(e)
6065 if self.ignoreExpr:
6066 self.ignorer.ignore(self.ignoreExpr)
6068 def ignore(self, expr):
6069 """
6070 Define expression to be ignored (e.g., comments) while doing pattern
6071 matching; may be called repeatedly, to define multiple comment or other
6072 ignorable patterns.
6073 """
6074 super().ignore(expr)
6075 self._update_ignorer()
6077 def parseImpl(self, instring, loc, do_actions=True):
6078 startloc = loc
6079 instrlen = len(instring)
6080 self_expr_parse = self.expr._parse
6081 self_failOn_canParseNext = (
6082 self.failOn.can_parse_next if self.failOn is not None else None
6083 )
6084 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None
6086 tmploc = loc
6087 while tmploc <= instrlen:
6088 if self_failOn_canParseNext is not None:
6089 # break if failOn expression matches
6090 if self_failOn_canParseNext(instring, tmploc):
6091 break
6093 if ignorer_try_parse is not None:
6094 # advance past ignore expressions
6095 prev_tmploc = tmploc
6096 while 1:
6097 try:
6098 tmploc = ignorer_try_parse(instring, tmploc)
6099 except ParseBaseException:
6100 break
6101 # see if all ignorers matched, but didn't actually ignore anything
6102 if tmploc == prev_tmploc:
6103 break
6104 prev_tmploc = tmploc
6106 try:
6107 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)
6108 except (ParseException, IndexError):
6109 # no match, advance loc in string
6110 tmploc += 1
6111 else:
6112 # matched skipto expr, done
6113 break
6115 else:
6116 # ran off the end of the input string without matching skipto expr, fail
6117 raise ParseException(instring, loc, self.errmsg, self)
6119 # build up return values
6120 loc = tmploc
6121 skiptext = instring[startloc:loc]
6122 skipresult = ParseResults(skiptext)
6124 if self.includeMatch:
6125 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)
6126 skipresult += mat
6128 return loc, skipresult
6131class Forward(ParseElementEnhance):
6132 """
6133 Forward declaration of an expression to be defined later -
6134 used for recursive grammars, such as algebraic infix notation.
6135 When the expression is known, it is assigned to the ``Forward``
6136 instance using the ``'<<'`` operator.
6138 .. Note::
6140 Take care when assigning to ``Forward`` not to overlook
6141 precedence of operators.
6143 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
6145 fwd_expr << a | b | c
6147 will actually be evaluated as::
6149 (fwd_expr << a) | b | c
6151 thereby leaving b and c out as parseable alternatives.
6152 It is recommended that you explicitly group the values
6153 inserted into the :class:`Forward`::
6155 fwd_expr << (a | b | c)
6157 Converting to use the ``'<<='`` operator instead will avoid this problem.
6159 See :meth:`ParseResults.pprint` for an example of a recursive
6160 parser created using :class:`Forward`.
6161 """
6163 def __init__(
6164 self, other: typing.Optional[Union[ParserElement, str]] = None
6165 ) -> None:
6166 self.caller_frame = traceback.extract_stack(limit=2)[0]
6167 super().__init__(other, savelist=False) # type: ignore[arg-type]
6168 self.lshift_line = None
6170 def __lshift__(self, other) -> Forward:
6171 if hasattr(self, "caller_frame"):
6172 del self.caller_frame
6173 if isinstance(other, str_type):
6174 other = self._literalStringClass(other)
6176 if not isinstance(other, ParserElement):
6177 return NotImplemented
6179 self.expr = other
6180 self.streamlined = other.streamlined
6181 self.mayIndexError = self.expr.mayIndexError
6182 self._may_return_empty = self.expr.mayReturnEmpty
6183 self.set_whitespace_chars(
6184 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
6185 )
6186 self.skipWhitespace = self.expr.skipWhitespace
6187 self.saveAsList = self.expr.saveAsList
6188 self.ignoreExprs.extend(self.expr.ignoreExprs)
6189 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]
6190 return self
6192 def __ilshift__(self, other) -> Forward:
6193 if not isinstance(other, ParserElement):
6194 return NotImplemented
6196 return self << other
6198 def __or__(self, other) -> ParserElement:
6199 caller_line = traceback.extract_stack(limit=2)[-2]
6200 if (
6201 __diag__.warn_on_match_first_with_lshift_operator
6202 and caller_line == self.lshift_line
6203 and Diagnostics.warn_on_match_first_with_lshift_operator
6204 not in self.suppress_warnings_
6205 ):
6206 warnings.warn(
6207 "warn_on_match_first_with_lshift_operator:"
6208 " using '<<' operator with '|' is probably an error, use '<<='",
6209 PyparsingDiagnosticWarning,
6210 stacklevel=2,
6211 )
6212 ret = super().__or__(other)
6213 return ret
6215 def __del__(self):
6216 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
6217 if (
6218 self.expr is None
6219 and __diag__.warn_on_assignment_to_Forward
6220 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
6221 ):
6222 warnings.warn_explicit(
6223 "warn_on_assignment_to_Forward:"
6224 " Forward defined here but no expression attached later using '<<=' or '<<'",
6225 UserWarning,
6226 filename=self.caller_frame.filename,
6227 lineno=self.caller_frame.lineno,
6228 )
6230 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
6231 if (
6232 self.expr is None
6233 and __diag__.warn_on_parse_using_empty_Forward
6234 and Diagnostics.warn_on_parse_using_empty_Forward
6235 not in self.suppress_warnings_
6236 ):
6237 # walk stack until parse_string, scan_string, search_string, or transform_string is found
6238 parse_fns = (
6239 "parse_string",
6240 "scan_string",
6241 "search_string",
6242 "transform_string",
6243 )
6244 tb = traceback.extract_stack(limit=200)
6245 for i, frm in enumerate(reversed(tb), start=1):
6246 if frm.name in parse_fns:
6247 stacklevel = i + 1
6248 break
6249 else:
6250 stacklevel = 2
6251 warnings.warn(
6252 "warn_on_parse_using_empty_Forward:"
6253 " Forward expression was never assigned a value, will not parse any input",
6254 PyparsingDiagnosticWarning,
6255 stacklevel=stacklevel,
6256 )
6257 if not ParserElement._left_recursion_enabled:
6258 return super().parseImpl(instring, loc, do_actions)
6259 # ## Bounded Recursion algorithm ##
6260 # Recursion only needs to be processed at ``Forward`` elements, since they are
6261 # the only ones that can actually refer to themselves. The general idea is
6262 # to handle recursion stepwise: We start at no recursion, then recurse once,
6263 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
6264 #
6265 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
6266 # - to *match* a specific recursion level, and
6267 # - to *search* the bounded recursion level
6268 # and the two run concurrently. The *search* must *match* each recursion level
6269 # to find the best possible match. This is handled by a memo table, which
6270 # provides the previous match to the next level match attempt.
6271 #
6272 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
6273 #
6274 # There is a complication since we not only *parse* but also *transform* via
6275 # actions: We do not want to run the actions too often while expanding. Thus,
6276 # we expand using `do_actions=False` and only run `do_actions=True` if the next
6277 # recursion level is acceptable.
6278 with ParserElement.recursion_lock:
6279 memo = ParserElement.recursion_memos
6280 try:
6281 # we are parsing at a specific recursion expansion - use it as-is
6282 prev_loc, prev_result = memo[loc, self, do_actions]
6283 if isinstance(prev_result, Exception):
6284 raise prev_result
6285 return prev_loc, prev_result.copy()
6286 except KeyError:
6287 act_key = (loc, self, True)
6288 peek_key = (loc, self, False)
6289 # we are searching for the best recursion expansion - keep on improving
6290 # both `do_actions` cases must be tracked separately here!
6291 prev_loc, prev_peek = memo[peek_key] = (
6292 loc - 1,
6293 ParseException(
6294 instring, loc, "Forward recursion without base case", self
6295 ),
6296 )
6297 if do_actions:
6298 memo[act_key] = memo[peek_key]
6299 while True:
6300 try:
6301 new_loc, new_peek = super().parseImpl(instring, loc, False)
6302 except ParseException:
6303 # we failed before getting any match - do not hide the error
6304 if isinstance(prev_peek, Exception):
6305 raise
6306 new_loc, new_peek = prev_loc, prev_peek
6307 # the match did not get better: we are done
6308 if new_loc <= prev_loc:
6309 if do_actions:
6310 # replace the match for do_actions=False as well,
6311 # in case the action did backtrack
6312 prev_loc, prev_result = memo[peek_key] = memo[act_key]
6313 del memo[peek_key], memo[act_key]
6314 return prev_loc, copy.copy(prev_result)
6315 del memo[peek_key]
6316 return prev_loc, copy.copy(prev_peek)
6317 # the match did get better: see if we can improve further
6318 if do_actions:
6319 try:
6320 memo[act_key] = super().parseImpl(instring, loc, True)
6321 except ParseException as e:
6322 memo[peek_key] = memo[act_key] = (new_loc, e)
6323 raise
6324 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
6326 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
6327 """
6328 Extends ``leave_whitespace`` defined in base class.
6329 """
6330 self.skipWhitespace = False
6331 return self
6333 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
6334 """
6335 Extends ``ignore_whitespace`` defined in base class.
6336 """
6337 self.skipWhitespace = True
6338 return self
6340 def streamline(self) -> ParserElement:
6341 if not self.streamlined:
6342 self.streamlined = True
6343 if self.expr is not None:
6344 self.expr.streamline()
6345 return self
6347 def validate(self, validateTrace=None) -> None:
6348 warnings.warn(
6349 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
6350 PyparsingDeprecationWarning,
6351 stacklevel=2,
6352 )
6353 if validateTrace is None:
6354 validateTrace = []
6356 if self not in validateTrace:
6357 tmp = validateTrace[:] + [self]
6358 if self.expr is not None:
6359 self.expr.validate(tmp)
6360 self._checkRecursion([])
6362 def _generateDefaultName(self) -> str:
6363 # Avoid infinite recursion by setting a temporary _defaultName
6364 save_default_name = self._defaultName
6365 self._defaultName = ": ..."
6367 # Use the string representation of main expression.
6368 try:
6369 if self.expr is not None:
6370 ret_string = str(self.expr)[:1000]
6371 else:
6372 ret_string = "None"
6373 except Exception:
6374 ret_string = "..."
6376 self._defaultName = save_default_name
6377 return f"{type(self).__name__}: {ret_string}"
6379 def copy(self) -> ParserElement:
6380 """
6381 Returns a copy of this expression.
6383 Generally only used internally by pyparsing.
6384 """
6385 if self.expr is not None:
6386 return super().copy()
6387 else:
6388 ret = Forward()
6389 ret <<= self
6390 return ret
6392 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
6393 # fmt: off
6394 if (
6395 __diag__.warn_name_set_on_empty_Forward
6396 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_
6397 and self.expr is None
6398 ):
6399 warning = (
6400 "warn_name_set_on_empty_Forward:"
6401 f" setting results name {name!r} on {type(self).__name__} expression"
6402 " that has no contained expression"
6403 )
6404 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)
6405 # fmt: on
6407 return super()._setResultsName(name, list_all_matches)
6409 # Compatibility synonyms
6410 # fmt: off
6411 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
6412 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
6413 # fmt: on
6416class TokenConverter(ParseElementEnhance):
6417 """
6418 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.
6419 """
6421 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:
6422 super().__init__(expr) # , savelist)
6423 self.saveAsList = False
6426class Combine(TokenConverter):
6427 """Converter to concatenate all matching tokens to a single string.
6428 By default, the matching patterns must also be contiguous in the
6429 input string; this can be disabled by specifying
6430 ``'adjacent=False'`` in the constructor.
6432 Example:
6434 .. doctest::
6436 >>> real = Word(nums) + '.' + Word(nums)
6437 >>> print(real.parse_string('3.1416'))
6438 ['3', '.', '1416']
6440 >>> # will also erroneously match the following
6441 >>> print(real.parse_string('3. 1416'))
6442 ['3', '.', '1416']
6444 >>> real = Combine(Word(nums) + '.' + Word(nums))
6445 >>> print(real.parse_string('3.1416'))
6446 ['3.1416']
6448 >>> # no match when there are internal spaces
6449 >>> print(real.parse_string('3. 1416'))
6450 Traceback (most recent call last):
6451 ParseException: Expected W:(0123...)
6452 """
6454 def __init__(
6455 self,
6456 expr: ParserElement,
6457 join_string: str = "",
6458 adjacent: bool = True,
6459 *,
6460 joinString: typing.Optional[str] = None,
6461 ) -> None:
6462 super().__init__(expr)
6463 joinString = joinString if joinString is not None else join_string
6464 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
6465 if adjacent:
6466 self.leave_whitespace()
6467 self.adjacent = adjacent
6468 self.skipWhitespace = True
6469 self.joinString = joinString
6470 self.callPreparse = True
6472 def ignore(self, other) -> ParserElement:
6473 """
6474 Define expression to be ignored (e.g., comments) while doing pattern
6475 matching; may be called repeatedly, to define multiple comment or other
6476 ignorable patterns.
6477 """
6478 if self.adjacent:
6479 ParserElement.ignore(self, other)
6480 else:
6481 super().ignore(other)
6482 return self
6484 def postParse(self, instring, loc, tokenlist):
6485 retToks = tokenlist.copy()
6486 del retToks[:]
6487 retToks += ParseResults(
6488 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
6489 )
6491 if self.resultsName and retToks.haskeys():
6492 return [retToks]
6493 else:
6494 return retToks
6497class Group(TokenConverter):
6498 """Converter to return the matched tokens as a list - useful for
6499 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
6501 The optional ``aslist`` argument when set to True will return the
6502 parsed tokens as a Python list instead of a pyparsing ParseResults.
6504 Example:
6506 .. doctest::
6508 >>> ident = Word(alphas)
6509 >>> num = Word(nums)
6510 >>> term = ident | num
6511 >>> func = ident + Opt(DelimitedList(term))
6512 >>> print(func.parse_string("fn a, b, 100"))
6513 ['fn', 'a', 'b', '100']
6515 >>> func = ident + Group(Opt(DelimitedList(term)))
6516 >>> print(func.parse_string("fn a, b, 100"))
6517 ['fn', ['a', 'b', '100']]
6518 """
6520 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:
6521 super().__init__(expr)
6522 self.saveAsList = True
6523 self._asPythonList = aslist
6525 def postParse(self, instring, loc, tokenlist):
6526 if self._asPythonList:
6527 return ParseResults.List(
6528 tokenlist.as_list()
6529 if isinstance(tokenlist, ParseResults)
6530 else list(tokenlist)
6531 )
6533 return [tokenlist]
6536class Dict(TokenConverter):
6537 """Converter to return a repetitive expression as a list, but also
6538 as a dictionary. Each element can also be referenced using the first
6539 token in the expression as its key. Useful for tabular report
6540 scraping when the first column can be used as a item key.
6542 The optional ``asdict`` argument when set to True will return the
6543 parsed tokens as a Python dict instead of a pyparsing ParseResults.
6545 Example:
6547 .. doctest::
6549 >>> data_word = Word(alphas)
6550 >>> label = data_word + FollowedBy(':')
6552 >>> attr_expr = (
6553 ... label + Suppress(':')
6554 ... + OneOrMore(data_word, stop_on=label)
6555 ... .set_parse_action(' '.join)
6556 ... )
6558 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
6560 >>> # print attributes as plain groups
6561 >>> print(attr_expr[1, ...].parse_string(text).dump())
6562 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
6564 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...])
6565 # Dict will auto-assign names.
6566 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
6567 >>> print(result.dump())
6568 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
6569 - color: 'light blue'
6570 - posn: 'upper left'
6571 - shape: 'SQUARE'
6572 - texture: 'burlap'
6573 [0]:
6574 ['shape', 'SQUARE']
6575 [1]:
6576 ['posn', 'upper left']
6577 [2]:
6578 ['color', 'light blue']
6579 [3]:
6580 ['texture', 'burlap']
6582 # access named fields as dict entries, or output as dict
6583 >>> print(result['shape'])
6584 SQUARE
6585 >>> print(result.as_dict())
6586 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}
6588 See more examples at :class:`ParseResults` of accessing fields by results name.
6589 """
6591 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:
6592 super().__init__(expr)
6593 self.saveAsList = True
6594 self._asPythonDict = asdict
6596 def postParse(self, instring, loc, tokenlist):
6597 for i, tok in enumerate(tokenlist):
6598 if len(tok) == 0:
6599 continue
6601 ikey = tok[0]
6602 if isinstance(ikey, int):
6603 ikey = str(ikey).strip()
6605 if len(tok) == 1:
6606 tokenlist[ikey] = _ParseResultsWithOffset("", i)
6608 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
6609 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
6611 else:
6612 try:
6613 dictvalue = tok.copy() # ParseResults(i)
6614 except Exception:
6615 exc = TypeError(
6616 "could not extract dict values from parsed results"
6617 " - Dict expression must contain Grouped expressions"
6618 )
6619 raise exc from None
6621 del dictvalue[0]
6623 if len(dictvalue) != 1 or (
6624 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
6625 ):
6626 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
6627 else:
6628 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
6630 if self._asPythonDict:
6631 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
6633 return [tokenlist] if self.resultsName else tokenlist
6636class Suppress(TokenConverter):
6637 """Converter for ignoring the results of a parsed expression.
6639 Example:
6641 .. doctest::
6643 >>> source = "a, b, c,d"
6644 >>> wd = Word(alphas)
6645 >>> wd_list1 = wd + (',' + wd)[...]
6646 >>> print(wd_list1.parse_string(source))
6647 ['a', ',', 'b', ',', 'c', ',', 'd']
6649 # often, delimiters that are useful during parsing are just in the
6650 # way afterward - use Suppress to keep them out of the parsed output
6651 >>> wd_list2 = wd + (Suppress(',') + wd)[...]
6652 >>> print(wd_list2.parse_string(source))
6653 ['a', 'b', 'c', 'd']
6655 # Skipped text (using '...') can be suppressed as well
6656 >>> source = "lead in START relevant text END trailing text"
6657 >>> start_marker = Keyword("START")
6658 >>> end_marker = Keyword("END")
6659 >>> find_body = Suppress(...) + start_marker + ... + end_marker
6660 >>> print(find_body.parse_string(source))
6661 ['START', 'relevant text ', 'END']
6663 (See also :class:`DelimitedList`.)
6664 """
6666 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
6667 if expr is ...:
6668 expr = _PendingSkip(NoMatch())
6669 super().__init__(expr)
6671 def __add__(self, other) -> ParserElement:
6672 if isinstance(self.expr, _PendingSkip):
6673 return Suppress(SkipTo(other)) + other
6675 return super().__add__(other)
6677 def __sub__(self, other) -> ParserElement:
6678 if isinstance(self.expr, _PendingSkip):
6679 return Suppress(SkipTo(other)) - other
6681 return super().__sub__(other)
6683 def postParse(self, instring, loc, tokenlist):
6684 return []
6686 def suppress(self) -> ParserElement:
6687 return self
6690# XXX: Example needs to be re-done for updated output
6691def trace_parse_action(f: ParseAction) -> ParseAction:
6692 """Decorator for debugging parse actions.
6694 When the parse action is called, this decorator will print
6695 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
6696 When the parse action completes, the decorator will print
6697 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
6699 Example:
6701 .. testsetup:: stderr
6703 import sys
6704 sys.stderr = sys.stdout
6706 .. testcleanup:: stderr
6708 sys.stderr = sys.__stderr__
6710 .. testcode:: stderr
6712 wd = Word(alphas)
6714 @trace_parse_action
6715 def remove_duplicate_chars(tokens):
6716 return ''.join(sorted(set(''.join(tokens))))
6718 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
6719 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
6721 prints:
6723 .. testoutput:: stderr
6724 :options: +NORMALIZE_WHITESPACE
6726 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf',
6727 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
6728 <<leaving remove_duplicate_chars (ret: 'dfjkls')
6729 ['dfjkls']
6731 .. versionchanged:: 3.1.0
6732 Exception type added to output
6733 """
6734 f = _trim_arity(f)
6736 def z(*paArgs):
6737 thisFunc = f.__name__
6738 s, l, t = paArgs[-3:]
6739 if len(paArgs) > 3:
6740 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"
6741 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")
6742 try:
6743 ret = f(*paArgs)
6744 except Exception as exc:
6745 sys.stderr.write(
6746 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"
6747 )
6748 raise
6749 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")
6750 return ret
6752 z.__name__ = f.__name__
6753 return z
6756# convenience constants for positional expressions
6757empty = Empty().set_name("empty")
6758line_start = LineStart().set_name("line_start")
6759line_end = LineEnd().set_name("line_end")
6760string_start = StringStart().set_name("string_start")
6761string_end = StringEnd().set_name("string_end")
6763_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(
6764 lambda s, l, t: t[0][1]
6765)
6766_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
6767 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
6768)
6769_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
6770 lambda s, l, t: chr(int(t[0][1:], 8))
6771)
6772_singleChar = (
6773 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
6774)
6775_charRange = Group(_singleChar + Suppress("-") + _singleChar)
6776_reBracketExpr = (
6777 Literal("[")
6778 + Opt("^").set_results_name("negate")
6779 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
6780 + Literal("]")
6781)
6784def srange(s: str) -> str:
6785 r"""Helper to easily define string ranges for use in :class:`Word`
6786 construction. Borrows syntax from regexp ``'[]'`` string range
6787 definitions::
6789 srange("[0-9]") -> "0123456789"
6790 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
6791 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
6793 The input string must be enclosed in []'s, and the returned string
6794 is the expanded character set joined into a single string. The
6795 values enclosed in the []'s may be:
6797 - a single character
6798 - an escaped character with a leading backslash (such as ``\-``
6799 or ``\]``)
6800 - an escaped hex character with a leading ``'\x'``
6801 (``\x21``, which is a ``'!'`` character) (``\0x##``
6802 is also supported for backwards compatibility)
6803 - an escaped octal character with a leading ``'\0'``
6804 (``\041``, which is a ``'!'`` character)
6805 - a range of any of the above, separated by a dash (``'a-z'``,
6806 etc.)
6807 - any combination of the above (``'aeiouy'``,
6808 ``'a-zA-Z0-9_$'``, etc.)
6809 """
6811 def _expanded(p):
6812 if isinstance(p, ParseResults):
6813 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
6814 else:
6815 yield p
6817 try:
6818 return "".join(
6819 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]
6820 )
6821 except Exception as e:
6822 return ""
6825def token_map(func, *args) -> ParseAction:
6826 """Helper to define a parse action by mapping a function to all
6827 elements of a :class:`ParseResults` list. If any additional args are passed,
6828 they are forwarded to the given function as additional arguments
6829 after the token, as in
6830 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
6831 which will convert the parsed data to an integer using base 16.
6833 Example (compare the last to example in :class:`ParserElement.transform_string`::
6835 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
6836 hex_ints.run_tests('''
6837 00 11 22 aa FF 0a 0d 1a
6838 ''')
6840 upperword = Word(alphas).set_parse_action(token_map(str.upper))
6841 upperword[1, ...].run_tests('''
6842 my kingdom for a horse
6843 ''')
6845 wd = Word(alphas).set_parse_action(token_map(str.title))
6846 wd[1, ...].set_parse_action(' '.join).run_tests('''
6847 now is the winter of our discontent made glorious summer by this sun of york
6848 ''')
6850 prints::
6852 00 11 22 aa FF 0a 0d 1a
6853 [0, 17, 34, 170, 255, 10, 13, 26]
6855 my kingdom for a horse
6856 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
6858 now is the winter of our discontent made glorious summer by this sun of york
6859 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
6860 """
6862 def pa(s, l, t):
6863 return [func(tokn, *args) for tokn in t]
6865 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
6866 pa.__name__ = func_name
6868 return pa
6871def autoname_elements() -> None:
6872 """
6873 Utility to simplify mass-naming of parser elements, for
6874 generating railroad diagram with named subdiagrams.
6875 """
6877 # guard against _getframe not being implemented in the current Python
6878 getframe_fn = getattr(sys, "_getframe", lambda _: None)
6879 calling_frame = getframe_fn(1)
6880 if calling_frame is None:
6881 return
6883 # find all locals in the calling frame that are ParserElements
6884 calling_frame = typing.cast(types.FrameType, calling_frame)
6885 for name, var in calling_frame.f_locals.items():
6886 # if no custom name defined, set the name to the var name
6887 if isinstance(var, ParserElement) and not var.customName:
6888 var.set_name(name)
6891dbl_quoted_string = Combine(
6892 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6893).set_name("string enclosed in double quotes")
6895sgl_quoted_string = Combine(
6896 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
6897).set_name("string enclosed in single quotes")
6899quoted_string = Combine(
6900 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6901 "double quoted string"
6902 )
6903 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6904 "single quoted string"
6905 )
6906).set_name("quoted string using single or double quotes")
6908# XXX: Is there some way to make this show up in API docs?
6909# .. versionadded:: 3.1.0
6910python_quoted_string = Combine(
6911 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(
6912 "multiline double quoted string"
6913 )
6914 ^ (
6915 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"
6916 ).set_name("multiline single quoted string")
6917 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6918 "double quoted string"
6919 )
6920 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6921 "single quoted string"
6922 )
6923).set_name("Python quoted string")
6925unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
6928alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6929punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6931# build list of built-in expressions, for future reference if a global default value
6932# gets updated
6933_builtin_exprs: list[ParserElement] = [
6934 v for v in vars().values() if isinstance(v, ParserElement)
6935]
6937# Compatibility synonyms
6938# fmt: off
6939sglQuotedString = sgl_quoted_string
6940dblQuotedString = dbl_quoted_string
6941quotedString = quoted_string
6942unicodeString = unicode_string
6943lineStart = line_start
6944lineEnd = line_end
6945stringStart = string_start
6946stringEnd = string_end
6947nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)
6948traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)
6949conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)
6950tokenMap = replaced_by_pep8("tokenMap", token_map)
6951# fmt: on