Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 45%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# core.py
3#
4from __future__ import annotations
6import collections.abc
7from collections import deque
8import os
9import typing
10from typing import (
11 Any,
12 Callable,
13 Generator,
14 NamedTuple,
15 Sequence,
16 TextIO,
17 Union,
18 cast,
19)
20from abc import ABC, abstractmethod
21from enum import Enum
22import string
23import copy
24import warnings
25import re
26import sys
27from collections.abc import Iterable
28import traceback
29import types
30from operator import itemgetter
31from functools import wraps
32from threading import RLock
33from pathlib import Path
35from .util import (
36 _FifoCache,
37 _UnboundedCache,
38 __config_flags,
39 _collapse_string_to_ranges,
40 _escape_regex_range_chars,
41 _flatten,
42 LRUMemo as _LRUMemo,
43 UnboundedMemo as _UnboundedMemo,
44 deprecate_argument,
45 replaced_by_pep8,
46)
47from .exceptions import *
48from .actions import *
49from .results import ParseResults, _ParseResultsWithOffset
50from .unicode import pyparsing_unicode
52_MAX_INT = sys.maxsize
53str_type: tuple[type, ...] = (str, bytes)
55#
56# Copyright (c) 2003-2022 Paul T. McGuire
57#
58# Permission is hereby granted, free of charge, to any person obtaining
59# a copy of this software and associated documentation files (the
60# "Software"), to deal in the Software without restriction, including
61# without limitation the rights to use, copy, modify, merge, publish,
62# distribute, sublicense, and/or sell copies of the Software, and to
63# permit persons to whom the Software is furnished to do so, subject to
64# the following conditions:
65#
66# The above copyright notice and this permission notice shall be
67# included in all copies or substantial portions of the Software.
68#
69# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
70# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
71# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
72# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
73# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
74# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
75# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
76#
78from functools import cached_property
81class __compat__(__config_flags):
82 """
83 A cross-version compatibility configuration for pyparsing features that will be
84 released in a future version. By setting values in this configuration to True,
85 those features can be enabled in prior versions for compatibility development
86 and testing.
88 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
89 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
90 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
91 behavior
92 """
94 _type_desc = "compatibility"
96 collect_all_And_tokens = True
98 _all_names = [__ for __ in locals() if not __.startswith("_")]
99 _fixed_names = """
100 collect_all_And_tokens
101 """.split()
104class __diag__(__config_flags):
105 _type_desc = "diagnostic"
107 warn_multiple_tokens_in_named_alternation = False
108 warn_ungrouped_named_tokens_in_collection = False
109 warn_name_set_on_empty_Forward = False
110 warn_on_parse_using_empty_Forward = False
111 warn_on_assignment_to_Forward = False
112 warn_on_multiple_string_args_to_oneof = False
113 warn_on_match_first_with_lshift_operator = False
114 enable_debug_on_named_expressions = False
116 _all_names = [__ for __ in locals() if not __.startswith("_")]
117 _warning_names = [name for name in _all_names if name.startswith("warn")]
118 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
120 @classmethod
121 def enable_all_warnings(cls) -> None:
122 for name in cls._warning_names:
123 cls.enable(name)
126class Diagnostics(Enum):
127 """
128 Diagnostic configuration (all default to disabled)
130 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
131 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
132 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
133 name is defined on a containing expression with ungrouped subexpressions that also
134 have results names
135 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
136 with a results name, but has no contents defined
137 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
138 defined in a grammar but has never had an expression attached to it
139 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
140 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
141 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
142 incorrectly called with multiple str arguments
143 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
144 calls to :class:`ParserElement.set_name`
146 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
147 All warnings can be enabled by calling :class:`enable_all_warnings`.
148 """
150 warn_multiple_tokens_in_named_alternation = 0
151 warn_ungrouped_named_tokens_in_collection = 1
152 warn_name_set_on_empty_Forward = 2
153 warn_on_parse_using_empty_Forward = 3
154 warn_on_assignment_to_Forward = 4
155 warn_on_multiple_string_args_to_oneof = 5
156 warn_on_match_first_with_lshift_operator = 6
157 enable_debug_on_named_expressions = 7
160def enable_diag(diag_enum: Diagnostics) -> None:
161 """
162 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
163 """
164 __diag__.enable(diag_enum.name)
167def disable_diag(diag_enum: Diagnostics) -> None:
168 """
169 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
170 """
171 __diag__.disable(diag_enum.name)
174def enable_all_warnings() -> None:
175 """
176 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
177 """
178 __diag__.enable_all_warnings()
181# hide abstract class
182del __config_flags
185def _should_enable_warnings(
186 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
187) -> bool:
188 enable = bool(warn_env_var)
189 for warn_opt in cmd_line_warn_options:
190 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
191 ":"
192 )[:5]
193 if not w_action.lower().startswith("i") and (
194 not (w_message or w_category or w_module) or w_module == "pyparsing"
195 ):
196 enable = True
197 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
198 enable = False
199 return enable
202if _should_enable_warnings(
203 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
204):
205 enable_all_warnings()
208# build list of single arg builtins, that can be used as parse actions
209# fmt: off
210_single_arg_builtins = {
211 sum, len, sorted, reversed, list, tuple, set, any, all, min, max
212}
213# fmt: on
215_generatorType = types.GeneratorType
216ParseImplReturnType = tuple[int, Any]
217PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]
219ParseCondition = Union[
220 Callable[[], bool],
221 Callable[[ParseResults], bool],
222 Callable[[int, ParseResults], bool],
223 Callable[[str, int, ParseResults], bool],
224]
225ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
226DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
227DebugSuccessAction = Callable[
228 [str, int, int, "ParserElement", ParseResults, bool], None
229]
230DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
233alphas: str = string.ascii_uppercase + string.ascii_lowercase
234identchars: str = pyparsing_unicode.Latin1.identchars
235identbodychars: str = pyparsing_unicode.Latin1.identbodychars
236nums: str = "0123456789"
237hexnums: str = nums + "ABCDEFabcdef"
238alphanums: str = alphas + nums
239printables: str = "".join([c for c in string.printable if c not in string.whitespace])
242class _ParseActionIndexError(Exception):
243 """
244 Internal wrapper around IndexError so that IndexErrors raised inside
245 parse actions aren't misinterpreted as IndexErrors raised inside
246 ParserElement parseImpl methods.
247 """
249 def __init__(self, msg: str, exc: BaseException) -> None:
250 self.msg: str = msg
251 self.exc: BaseException = exc
254_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]
255pa_call_line_synth = ()
258def _trim_arity(func, max_limit=3):
259 """decorator to trim function calls to match the arity of the target"""
260 global _trim_arity_call_line, pa_call_line_synth
262 if func in _single_arg_builtins:
263 return lambda s, l, t: func(t)
265 limit = 0
266 found_arity = False
268 # synthesize what would be returned by traceback.extract_stack at the call to
269 # user's parse action 'func', so that we don't incur call penalty at parse time
271 # fmt: off
272 LINE_DIFF = 9
273 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
274 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
275 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]
276 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
278 def wrapper(*args):
279 nonlocal found_arity, limit
280 if found_arity:
281 return func(*args[limit:])
282 while 1:
283 try:
284 ret = func(*args[limit:])
285 found_arity = True
286 return ret
287 except TypeError as te:
288 # re-raise TypeErrors if they did not come from our arity testing
289 if found_arity:
290 raise
291 else:
292 tb = te.__traceback__
293 frames = traceback.extract_tb(tb, limit=2)
294 frame_summary = frames[-1]
295 trim_arity_type_error = (
296 [frame_summary[:2]][-1][:2] == pa_call_line_synth
297 )
298 del tb
300 if trim_arity_type_error:
301 if limit < max_limit:
302 limit += 1
303 continue
305 raise
306 except IndexError as ie:
307 # wrap IndexErrors inside a _ParseActionIndexError
308 raise _ParseActionIndexError(
309 "IndexError raised in parse action", ie
310 ).with_traceback(None)
311 # fmt: on
313 # copy func name to wrapper for sensible debug output
314 # (can't use functools.wraps, since that messes with function signature)
315 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
316 wrapper.__name__ = func_name
317 wrapper.__doc__ = func.__doc__
319 return wrapper
322def condition_as_parse_action(
323 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False
324) -> ParseAction:
325 """
326 Function to convert a simple predicate function that returns ``True`` or ``False``
327 into a parse action. Can be used in places when a parse action is required
328 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition
329 to an operator level in :class:`infix_notation`).
331 Optional keyword arguments:
333 :param message: define a custom message to be used in the raised exception
334 :param fatal: if ``True``, will raise :class:`ParseFatalException`
335 to stop parsing immediately;
336 otherwise will raise :class:`ParseException`
338 """
339 msg = message if message is not None else "failed user-defined condition"
340 exc_type = ParseFatalException if fatal else ParseException
341 fn = _trim_arity(fn)
343 @wraps(fn)
344 def pa(s, l, t):
345 if not bool(fn(s, l, t)):
346 raise exc_type(s, l, msg)
348 return pa
351def _default_start_debug_action(
352 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False
353):
354 cache_hit_str = "*" if cache_hit else ""
355 print(
356 (
357 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"
358 f" {line(loc, instring)}\n"
359 f" {'^':>{col(loc, instring)}}"
360 )
361 )
364def _default_success_debug_action(
365 instring: str,
366 startloc: int,
367 endloc: int,
368 expr: ParserElement,
369 toks: ParseResults,
370 cache_hit: bool = False,
371):
372 cache_hit_str = "*" if cache_hit else ""
373 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")
376def _default_exception_debug_action(
377 instring: str,
378 loc: int,
379 expr: ParserElement,
380 exc: Exception,
381 cache_hit: bool = False,
382):
383 cache_hit_str = "*" if cache_hit else ""
384 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")
387def null_debug_action(*args):
388 """'Do-nothing' debug action, to suppress debugging output during parsing."""
391class ParserElement(ABC):
392 """Abstract base level parser element class."""
394 DEFAULT_WHITE_CHARS: str = " \n\t\r"
395 verbose_stacktrace: bool = False
396 _literalStringClass: type = None # type: ignore[assignment]
398 @staticmethod
399 def set_default_whitespace_chars(chars: str) -> None:
400 r"""
401 Overrides the default whitespace chars
403 Example:
405 .. doctest::
407 # default whitespace chars are space, <TAB> and newline
408 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")
409 ParseResults(['abc', 'def', 'ghi', 'jkl'], {})
411 # change to just treat newline as significant
412 >>> ParserElement.set_default_whitespace_chars(" \t")
413 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")
414 ParseResults(['abc', 'def'], {})
416 # Reset to default
417 >>> ParserElement.set_default_whitespace_chars(" \n\t\r")
418 """
419 ParserElement.DEFAULT_WHITE_CHARS = chars
421 # update whitespace all parse expressions defined in this module
422 for expr in _builtin_exprs:
423 if expr.copyDefaultWhiteChars:
424 expr.whiteChars = set(chars)
426 @staticmethod
427 def inline_literals_using(cls: type) -> None:
428 """
429 Set class to be used for inclusion of string literals into a parser.
431 Example:
433 .. doctest::
434 :options: +NORMALIZE_WHITESPACE
436 # default literal class used is Literal
437 >>> integer = Word(nums)
438 >>> date_str = (
439 ... integer("year") + '/'
440 ... + integer("month") + '/'
441 ... + integer("day")
442 ... )
444 >>> date_str.parse_string("1999/12/31")
445 ParseResults(['1999', '/', '12', '/', '31'],
446 {'year': '1999', 'month': '12', 'day': '31'})
448 # change to Suppress
449 >>> ParserElement.inline_literals_using(Suppress)
450 >>> date_str = (
451 ... integer("year") + '/'
452 ... + integer("month") + '/'
453 ... + integer("day")
454 ... )
456 >>> date_str.parse_string("1999/12/31")
457 ParseResults(['1999', '12', '31'],
458 {'year': '1999', 'month': '12', 'day': '31'})
460 # Reset
461 >>> ParserElement.inline_literals_using(Literal)
462 """
463 ParserElement._literalStringClass = cls
465 @classmethod
466 def using_each(cls, seq, **class_kwargs):
467 """
468 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.
470 Example:
472 .. testcode::
474 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")
476 .. versionadded:: 3.1.0
477 """
478 yield from (cls(obj, **class_kwargs) for obj in seq)
480 class DebugActions(NamedTuple):
481 debug_try: typing.Optional[DebugStartAction]
482 debug_match: typing.Optional[DebugSuccessAction]
483 debug_fail: typing.Optional[DebugExceptionAction]
485 def __init__(self, savelist: bool = False) -> None:
486 self.parseAction: list[ParseAction] = list()
487 self.failAction: typing.Optional[ParseFailAction] = None
488 self.customName: str = None # type: ignore[assignment]
489 self._defaultName: typing.Optional[str] = None
490 self.resultsName: str = None # type: ignore[assignment]
491 self.saveAsList: bool = savelist
492 self.skipWhitespace: bool = True
493 self.whiteChars: set[str] = set(ParserElement.DEFAULT_WHITE_CHARS)
494 self.copyDefaultWhiteChars: bool = True
495 # used when checking for left-recursion
496 self._may_return_empty: bool = False
497 self.keepTabs: bool = False
498 self.ignoreExprs: list[ParserElement] = list()
499 self.debug: bool = False
500 self.streamlined: bool = False
501 # optimize exception handling for subclasses that don't advance parse index
502 self.mayIndexError: bool = True
503 self.errmsg: Union[str, None] = ""
504 # mark results names as modal (report only last) or cumulative (list all)
505 self.modalResults: bool = True
506 # custom debug actions
507 self.debugActions = self.DebugActions(None, None, None)
508 # avoid redundant calls to preParse
509 self.callPreparse: bool = True
510 self.callDuringTry: bool = False
511 self.suppress_warnings_: list[Diagnostics] = []
512 self.show_in_diagram: bool = True
514 @property
515 def mayReturnEmpty(self) -> bool:
516 """
517 .. deprecated:: 3.3.0
518 use _may_return_empty instead.
519 """
520 return self._may_return_empty
522 @mayReturnEmpty.setter
523 def mayReturnEmpty(self, value) -> None:
524 """
525 .. deprecated:: 3.3.0
526 use _may_return_empty instead.
527 """
528 self._may_return_empty = value
530 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:
531 """
532 Suppress warnings emitted for a particular diagnostic on this expression.
534 Example:
536 .. doctest::
538 >>> label = pp.Word(pp.alphas)
540 # Normally using an empty Forward in a grammar
541 # would print a warning, but we can suppress that
542 >>> base = pp.Forward().suppress_warning(
543 ... pp.Diagnostics.warn_on_parse_using_empty_Forward)
545 >>> grammar = base | label
546 >>> print(grammar.parse_string("x"))
547 ['x']
548 """
549 self.suppress_warnings_.append(warning_type)
550 return self
552 def visit_all(self):
553 """General-purpose method to yield all expressions and sub-expressions
554 in a grammar. Typically just for internal use.
555 """
556 to_visit = deque([self])
557 seen = set()
558 while to_visit:
559 cur = to_visit.popleft()
561 # guard against looping forever through recursive grammars
562 if cur in seen:
563 continue
564 seen.add(cur)
566 to_visit.extend(cur.recurse())
567 yield cur
569 def copy(self) -> ParserElement:
570 """
571 Make a copy of this :class:`ParserElement`. Useful for defining
572 different parse actions for the same parsing pattern, using copies of
573 the original parse element.
575 Example:
577 .. testcode::
579 integer = Word(nums).set_parse_action(
580 lambda toks: int(toks[0]))
581 integerK = integer.copy().add_parse_action(
582 lambda toks: toks[0] * 1024) + Suppress("K")
583 integerM = integer.copy().add_parse_action(
584 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
586 print(
587 (integerK | integerM | integer)[1, ...].parse_string(
588 "5K 100 640K 256M")
589 )
591 prints:
593 .. testoutput::
595 [5120, 100, 655360, 268435456]
597 Equivalent form of ``expr.copy()`` is just ``expr()``:
599 .. testcode::
601 integerM = integer().add_parse_action(
602 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
603 """
604 cpy = copy.copy(self)
605 cpy.parseAction = self.parseAction[:]
606 cpy.ignoreExprs = self.ignoreExprs[:]
607 if self.copyDefaultWhiteChars:
608 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
609 return cpy
611 def set_results_name(
612 self, name: str, list_all_matches: bool = False, **kwargs
613 ) -> ParserElement:
614 """
615 Define name for referencing matching tokens as a nested attribute
616 of the returned parse results.
618 Normally, results names are assigned as you would assign keys in a dict:
619 any existing value is overwritten by later values. If it is necessary to
620 keep all values captured for a particular results name, call ``set_results_name``
621 with ``list_all_matches`` = True.
623 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
624 this is so that the client can define a basic element, such as an
625 integer, and reference it in multiple places with different names.
627 You can also set results names using the abbreviated syntax,
628 ``expr("name")`` in place of ``expr.set_results_name("name")``
629 - see :meth:`__call__`. If ``list_all_matches`` is required, use
630 ``expr("name*")``.
632 Example:
634 .. testcode::
636 integer = Word(nums)
637 date_str = (integer.set_results_name("year") + '/'
638 + integer.set_results_name("month") + '/'
639 + integer.set_results_name("day"))
641 # equivalent form:
642 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
643 """
644 listAllMatches: bool = deprecate_argument(kwargs, "listAllMatches", False)
646 list_all_matches = listAllMatches or list_all_matches
647 return self._setResultsName(name, list_all_matches)
649 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
650 if name is None:
651 return self
652 newself = self.copy()
653 if name.endswith("*"):
654 name = name[:-1]
655 list_all_matches = True
656 newself.resultsName = name
657 newself.modalResults = not list_all_matches
658 return newself
660 def set_break(self, break_flag: bool = True) -> ParserElement:
661 """
662 Method to invoke the Python pdb debugger when this element is
663 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
664 disable.
665 """
666 if break_flag:
667 _parseMethod = self._parse
669 def breaker(instring, loc, do_actions=True, callPreParse=True):
670 # this call to breakpoint() is intentional, not a checkin error
671 breakpoint()
672 return _parseMethod(instring, loc, do_actions, callPreParse)
674 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]
675 self._parse = breaker # type: ignore [method-assign]
676 elif hasattr(self._parse, "_originalParseMethod"):
677 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]
678 return self
680 def set_parse_action(
681 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any
682 ) -> ParserElement:
683 """
684 Define one or more actions to perform when successfully matching parse element definition.
686 Parse actions can be called to perform data conversions, do extra validation,
687 update external data structures, or enhance or replace the parsed tokens.
688 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
689 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
691 - ``s`` = the original string being parsed (see note below)
692 - ``loc`` = the location of the matching substring
693 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object
695 The parsed tokens are passed to the parse action as ParseResults. They can be
696 modified in place using list-style append, extend, and pop operations to update
697 the parsed list elements; and with dictionary-style item set and del operations
698 to add, update, or remove any named results. If the tokens are modified in place,
699 it is not necessary to return them with a return statement.
701 Parse actions can also completely replace the given tokens, with another ``ParseResults``
702 object, or with some entirely different object (common for parse actions that perform data
703 conversions). A convenient way to build a new parse result is to define the values
704 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
706 If None is passed as the ``fn`` parse action, all previously added parse actions for this
707 expression are cleared.
709 Optional keyword arguments:
711 :param call_during_try: (default= ``False``) indicate if parse action
712 should be run during lookaheads and alternate
713 testing. For parse actions that have side
714 effects, it is important to only call the parse
715 action once it is determined that it is being
716 called as part of a successful parse.
717 For parse actions that perform additional
718 validation, then ``call_during_try`` should
719 be passed as True, so that the validation code
720 is included in the preliminary "try" parses.
722 .. Note::
723 The default parsing behavior is to expand tabs in the input string
724 before starting the parsing process.
725 See :meth:`parse_string` for more information on parsing strings
726 containing ``<TAB>`` s, and suggested methods to maintain a
727 consistent view of the parsed string, the parse location, and
728 line and column positions within the parsed string.
730 Example: Parse dates in the form ``YYYY/MM/DD``
731 -----------------------------------------------
733 Setup code:
735 .. testcode::
737 def convert_to_int(toks):
738 '''a parse action to convert toks from str to int
739 at parse time'''
740 return int(toks[0])
742 def is_valid_date(instring, loc, toks):
743 '''a parse action to verify that the date is a valid date'''
744 from datetime import date
745 year, month, day = toks[::2]
746 try:
747 date(year, month, day)
748 except ValueError:
749 raise ParseException(instring, loc, "invalid date given")
751 integer = Word(nums)
752 date_str = integer + '/' + integer + '/' + integer
754 # add parse actions
755 integer.set_parse_action(convert_to_int)
756 date_str.set_parse_action(is_valid_date)
758 Successful parse - note that integer fields are converted to ints:
760 .. testcode::
762 print(date_str.parse_string("1999/12/31"))
764 prints:
766 .. testoutput::
768 [1999, '/', 12, '/', 31]
770 Failure - invalid date:
772 .. testcode::
774 date_str.parse_string("1999/13/31")
776 prints:
778 .. testoutput::
780 Traceback (most recent call last):
781 ParseException: invalid date given, found '1999' ...
782 """
783 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)
785 if list(fns) == [None]:
786 self.parseAction.clear()
787 return self
789 if not all(callable(fn) for fn in fns):
790 raise TypeError("parse actions must be callable")
791 self.parseAction[:] = [_trim_arity(fn) for fn in fns]
792 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry
794 return self
796 def add_parse_action(
797 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any
798 ) -> ParserElement:
799 """
800 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
802 See examples in :class:`copy`.
803 """
804 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)
806 self.parseAction += [_trim_arity(fn) for fn in fns]
807 self.callDuringTry = self.callDuringTry or callDuringTry or call_during_try
808 return self
810 def add_condition(
811 self, *fns: ParseCondition, call_during_try: bool = False, **kwargs: Any
812 ) -> ParserElement:
813 """Add a boolean predicate function to expression's list of parse actions. See
814 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
815 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
817 Optional keyword arguments:
819 - ``message`` = define a custom message to be used in the raised exception
820 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
821 ParseException
822 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,
823 default=False
825 Example:
827 .. doctest::
828 :options: +NORMALIZE_WHITESPACE
830 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
831 >>> year_int = integer.copy().add_condition(
832 ... lambda toks: toks[0] >= 2000,
833 ... message="Only support years 2000 and later")
834 >>> date_str = year_int + '/' + integer + '/' + integer
836 >>> result = date_str.parse_string("1999/12/31")
837 Traceback (most recent call last):
838 ParseException: Only support years 2000 and later...
839 """
840 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)
842 for fn in fns:
843 self.parseAction.append(
844 condition_as_parse_action(
845 fn,
846 message=str(kwargs.get("message")),
847 fatal=bool(kwargs.get("fatal", False)),
848 )
849 )
851 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry
852 return self
854 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:
855 """
856 Define action to perform if parsing fails at this expression.
857 Fail acton fn is a callable function that takes the arguments
858 ``fn(s, loc, expr, err)`` where:
860 - ``s`` = string being parsed
861 - ``loc`` = location where expression match was attempted and failed
862 - ``expr`` = the parse expression that failed
863 - ``err`` = the exception thrown
865 The function returns no value. It may throw :class:`ParseFatalException`
866 if it is desired to stop parsing immediately."""
867 self.failAction = fn
868 return self
870 def _skipIgnorables(self, instring: str, loc: int) -> int:
871 if not self.ignoreExprs:
872 return loc
873 exprsFound = True
874 ignore_expr_fns = [e._parse for e in self.ignoreExprs]
875 last_loc = loc
876 while exprsFound:
877 exprsFound = False
878 for ignore_fn in ignore_expr_fns:
879 try:
880 while 1:
881 loc, dummy = ignore_fn(instring, loc)
882 exprsFound = True
883 except ParseException:
884 pass
885 # check if all ignore exprs matched but didn't actually advance the parse location
886 if loc == last_loc:
887 break
888 last_loc = loc
889 return loc
891 def preParse(self, instring: str, loc: int) -> int:
892 if self.ignoreExprs:
893 loc = self._skipIgnorables(instring, loc)
895 if self.skipWhitespace:
896 instrlen = len(instring)
897 white_chars = self.whiteChars
898 while loc < instrlen and instring[loc] in white_chars:
899 loc += 1
901 return loc
903 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
904 return loc, []
906 def postParse(self, instring, loc, tokenlist):
907 return tokenlist
909 # @profile
910 def _parseNoCache(
911 self, instring, loc, do_actions=True, callPreParse=True
912 ) -> tuple[int, ParseResults]:
913 debugging = self.debug # and do_actions)
914 len_instring = len(instring)
916 if debugging or self.failAction:
917 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
918 try:
919 if callPreParse and self.callPreparse:
920 pre_loc = self.preParse(instring, loc)
921 else:
922 pre_loc = loc
923 tokens_start = pre_loc
924 if self.debugActions.debug_try:
925 self.debugActions.debug_try(instring, tokens_start, self, False)
926 if self.mayIndexError or pre_loc >= len_instring:
927 try:
928 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
929 except IndexError:
930 raise ParseException(instring, len_instring, self.errmsg, self)
931 else:
932 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
933 except Exception as err:
934 # print("Exception raised:", err)
935 if self.debugActions.debug_fail:
936 self.debugActions.debug_fail(
937 instring, tokens_start, self, err, False
938 )
939 if self.failAction:
940 self.failAction(instring, tokens_start, self, err)
941 raise
942 else:
943 if callPreParse and self.callPreparse:
944 pre_loc = self.preParse(instring, loc)
945 else:
946 pre_loc = loc
947 tokens_start = pre_loc
948 if self.mayIndexError or pre_loc >= len_instring:
949 try:
950 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
951 except IndexError:
952 raise ParseException(instring, len_instring, self.errmsg, self)
953 else:
954 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
956 tokens = self.postParse(instring, loc, tokens)
958 ret_tokens = ParseResults(
959 tokens, self.resultsName, aslist=self.saveAsList, modal=self.modalResults
960 )
961 if self.parseAction and (do_actions or self.callDuringTry):
962 if debugging:
963 try:
964 for fn in self.parseAction:
965 try:
966 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
967 except IndexError as parse_action_exc:
968 exc = ParseException("exception raised in parse action")
969 raise exc from parse_action_exc
971 if tokens is not None and tokens is not ret_tokens:
972 ret_tokens = ParseResults(
973 tokens,
974 self.resultsName,
975 aslist=self.saveAsList
976 and isinstance(tokens, (ParseResults, list)),
977 modal=self.modalResults,
978 )
979 except Exception as err:
980 # print "Exception raised in user parse action:", err
981 if self.debugActions.debug_fail:
982 self.debugActions.debug_fail(
983 instring, tokens_start, self, err, False
984 )
985 raise
986 else:
987 for fn in self.parseAction:
988 try:
989 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
990 except IndexError as parse_action_exc:
991 exc = ParseException("exception raised in parse action")
992 raise exc from parse_action_exc
994 if tokens is not None and tokens is not ret_tokens:
995 ret_tokens = ParseResults(
996 tokens,
997 self.resultsName,
998 aslist=self.saveAsList
999 and isinstance(tokens, (ParseResults, list)),
1000 modal=self.modalResults,
1001 )
1002 if debugging:
1003 # print("Matched", self, "->", ret_tokens.as_list())
1004 if self.debugActions.debug_match:
1005 self.debugActions.debug_match(
1006 instring, tokens_start, loc, self, ret_tokens, False
1007 )
1009 return loc, ret_tokens
1011 def try_parse(
1012 self,
1013 instring: str,
1014 loc: int,
1015 *,
1016 raise_fatal: bool = False,
1017 do_actions: bool = False,
1018 ) -> int:
1019 try:
1020 return self._parse(instring, loc, do_actions=do_actions)[0]
1021 except ParseFatalException:
1022 if raise_fatal:
1023 raise
1024 raise ParseException(instring, loc, self.errmsg, self)
1026 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:
1027 try:
1028 self.try_parse(instring, loc, do_actions=do_actions)
1029 except (ParseException, IndexError):
1030 return False
1031 else:
1032 return True
1034 # cache for left-recursion in Forward references
1035 recursion_lock = RLock()
1036 recursion_memos: collections.abc.MutableMapping[
1037 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]
1038 ] = {}
1040 class _CacheType(typing.Protocol):
1041 """
1042 Class to be used for packrat and left-recursion cacheing of results
1043 and exceptions.
1044 """
1046 not_in_cache: bool
1048 def get(self, *args) -> typing.Any: ...
1050 def set(self, *args) -> None: ...
1052 def clear(self) -> None: ...
1054 class NullCache(dict):
1055 """
1056 A null cache type for initialization of the packrat_cache class variable.
1057 If/when enable_packrat() is called, this null cache will be replaced by a
1058 proper _CacheType class instance.
1059 """
1061 not_in_cache: bool = True
1063 def get(self, *args) -> typing.Any: ...
1065 def set(self, *args) -> None: ...
1067 def clear(self) -> None: ...
1069 # class-level argument cache for optimizing repeated calls when backtracking
1070 # through recursive expressions
1071 packrat_cache: _CacheType = NullCache()
1072 packrat_cache_lock = RLock()
1073 packrat_cache_stats = [0, 0]
1075 # this method gets repeatedly called during backtracking with the same arguments -
1076 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1077 def _parseCache(
1078 self, instring, loc, do_actions=True, callPreParse=True
1079 ) -> tuple[int, ParseResults]:
1080 HIT, MISS = 0, 1
1081 lookup = (self, instring, loc, callPreParse, do_actions)
1082 with ParserElement.packrat_cache_lock:
1083 cache = ParserElement.packrat_cache
1084 value = cache.get(lookup)
1085 if value is cache.not_in_cache:
1086 ParserElement.packrat_cache_stats[MISS] += 1
1087 try:
1088 value = self._parseNoCache(instring, loc, do_actions, callPreParse)
1089 except ParseBaseException as pe:
1090 # cache a copy of the exception, without the traceback
1091 cache.set(lookup, pe.__class__(*pe.args))
1092 raise
1093 else:
1094 cache.set(lookup, (value[0], value[1].copy(), loc))
1095 return value
1096 else:
1097 ParserElement.packrat_cache_stats[HIT] += 1
1098 if self.debug and self.debugActions.debug_try:
1099 try:
1100 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]
1101 except TypeError:
1102 pass
1103 if isinstance(value, Exception):
1104 if self.debug and self.debugActions.debug_fail:
1105 try:
1106 self.debugActions.debug_fail(
1107 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]
1108 )
1109 except TypeError:
1110 pass
1111 raise value
1113 value = cast(tuple[int, ParseResults, int], value)
1114 loc_, result, endloc = value[0], value[1].copy(), value[2]
1115 if self.debug and self.debugActions.debug_match:
1116 try:
1117 self.debugActions.debug_match(
1118 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]
1119 )
1120 except TypeError:
1121 pass
1123 return loc_, result
1125 _parse = _parseNoCache
1127 @staticmethod
1128 def reset_cache() -> None:
1129 """
1130 Clears caches used by packrat and left-recursion.
1131 """
1132 with ParserElement.packrat_cache_lock:
1133 ParserElement.packrat_cache.clear()
1134 ParserElement.packrat_cache_stats[:] = [0] * len(
1135 ParserElement.packrat_cache_stats
1136 )
1137 ParserElement.recursion_memos.clear()
1139 # class attributes to keep caching status
1140 _packratEnabled = False
1141 _left_recursion_enabled = False
1143 @staticmethod
1144 def disable_memoization() -> None:
1145 """
1146 Disables active Packrat or Left Recursion parsing and their memoization
1148 This method also works if neither Packrat nor Left Recursion are enabled.
1149 This makes it safe to call before activating Packrat nor Left Recursion
1150 to clear any previous settings.
1151 """
1152 with ParserElement.packrat_cache_lock:
1153 ParserElement.reset_cache()
1154 ParserElement._left_recursion_enabled = False
1155 ParserElement._packratEnabled = False
1156 ParserElement._parse = ParserElement._parseNoCache
1158 @staticmethod
1159 def enable_left_recursion(
1160 cache_size_limit: typing.Optional[int] = None, *, force=False
1161 ) -> None:
1162 """
1163 Enables "bounded recursion" parsing, which allows for both direct and indirect
1164 left-recursion. During parsing, left-recursive :class:`Forward` elements are
1165 repeatedly matched with a fixed recursion depth that is gradually increased
1166 until finding the longest match.
1168 Example:
1170 .. testcode::
1172 import pyparsing as pp
1173 pp.ParserElement.enable_left_recursion()
1175 E = pp.Forward("E")
1176 num = pp.Word(pp.nums)
1178 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1179 E <<= E + '+' - num | num
1181 print(E.parse_string("1+2+3+4"))
1183 prints:
1185 .. testoutput::
1187 ['1', '+', '2', '+', '3', '+', '4']
1189 Recursion search naturally memoizes matches of ``Forward`` elements and may
1190 thus skip reevaluation of parse actions during backtracking. This may break
1191 programs with parse actions which rely on strict ordering of side-effects.
1193 Parameters:
1195 - ``cache_size_limit`` - (default=``None``) - memoize at most this many
1196 ``Forward`` elements during matching; if ``None`` (the default),
1197 memoize all ``Forward`` elements.
1199 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1200 thus the two cannot be used together. Use ``force=True`` to disable any
1201 previous, conflicting settings.
1202 """
1203 with ParserElement.packrat_cache_lock:
1204 if force:
1205 ParserElement.disable_memoization()
1206 elif ParserElement._packratEnabled:
1207 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1208 if cache_size_limit is None:
1209 ParserElement.recursion_memos = _UnboundedMemo()
1210 elif cache_size_limit > 0:
1211 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]
1212 else:
1213 raise NotImplementedError(f"Memo size of {cache_size_limit}")
1214 ParserElement._left_recursion_enabled = True
1216 @staticmethod
1217 def enable_packrat(
1218 cache_size_limit: Union[int, None] = 128, *, force: bool = False
1219 ) -> None:
1220 """
1221 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1222 Repeated parse attempts at the same string location (which happens
1223 often in many complex grammars) can immediately return a cached value,
1224 instead of re-executing parsing/validating code. Memoizing is done of
1225 both valid results and parsing exceptions.
1227 Parameters:
1229 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided
1230 will limit the size of the packrat cache; if None is passed, then
1231 the cache size will be unbounded; if 0 is passed, the cache will
1232 be effectively disabled.
1234 This speedup may break existing programs that use parse actions that
1235 have side-effects. For this reason, packrat parsing is disabled when
1236 you first import pyparsing. To activate the packrat feature, your
1237 program must call the class method :class:`ParserElement.enable_packrat`.
1238 For best results, call ``enable_packrat()`` immediately after
1239 importing pyparsing.
1241 .. Can't really be doctested, alas
1243 Example::
1245 import pyparsing
1246 pyparsing.ParserElement.enable_packrat()
1248 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1249 thus the two cannot be used together. Use ``force=True`` to disable any
1250 previous, conflicting settings.
1251 """
1252 with ParserElement.packrat_cache_lock:
1253 if force:
1254 ParserElement.disable_memoization()
1255 elif ParserElement._left_recursion_enabled:
1256 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1258 if ParserElement._packratEnabled:
1259 return
1261 ParserElement._packratEnabled = True
1262 if cache_size_limit is None:
1263 ParserElement.packrat_cache = _UnboundedCache()
1264 else:
1265 ParserElement.packrat_cache = _FifoCache(cache_size_limit)
1266 ParserElement._parse = ParserElement._parseCache
1268 def parse_string(
1269 self, instring: str, parse_all: bool = False, **kwargs
1270 ) -> ParseResults:
1271 """
1272 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1273 client code.
1275 :param instring: The input string to be parsed.
1276 :param parse_all: If set, the entire input string must match the grammar.
1277 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1278 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1279 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1280 an object with attributes if the given parser includes results names.
1282 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1283 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().
1285 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1286 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1287 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1288 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1290 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1291 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1292 parse action's ``s`` argument, or
1293 - explicitly expand the tabs in your input string before calling ``parse_string``.
1295 Examples:
1297 By default, partial matches are OK.
1299 .. doctest::
1301 >>> res = Word('a').parse_string('aaaaabaaa')
1302 >>> print(res)
1303 ['aaaaa']
1305 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1306 directly to see more examples.
1308 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1310 .. doctest::
1312 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1313 Traceback (most recent call last):
1314 ParseException: Expected end of text, found 'b' ...
1315 """
1316 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)
1318 parse_all = parse_all or parseAll
1320 ParserElement.reset_cache()
1321 if not self.streamlined:
1322 self.streamline()
1323 for e in self.ignoreExprs:
1324 e.streamline()
1325 if not self.keepTabs:
1326 instring = instring.expandtabs()
1327 try:
1328 loc, tokens = self._parse(instring, 0)
1329 if parse_all:
1330 loc = self.preParse(instring, loc)
1331 se = Empty() + StringEnd().set_debug(False)
1332 se._parse(instring, loc)
1333 except _ParseActionIndexError as pa_exc:
1334 raise pa_exc.exc
1335 except ParseBaseException as exc:
1336 if ParserElement.verbose_stacktrace:
1337 raise
1339 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1340 raise exc.with_traceback(None)
1341 else:
1342 return tokens
1344 def scan_string(
1345 self,
1346 instring: str,
1347 max_matches: int = _MAX_INT,
1348 overlap: bool = False,
1349 always_skip_whitespace=True,
1350 *,
1351 debug: bool = False,
1352 **kwargs,
1353 ) -> Generator[tuple[ParseResults, int, int], None, None]:
1354 """
1355 Scan the input string for expression matches. Each match will return the
1356 matching tokens, start location, and end location. May be called with optional
1357 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1358 ``overlap`` is specified, then overlapping matches will be reported.
1360 Note that the start and end locations are reported relative to the string
1361 being parsed. See :class:`parse_string` for more information on parsing
1362 strings with embedded tabs.
1364 Example:
1366 .. testcode::
1368 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1369 print(source)
1370 for tokens, start, end in Word(alphas).scan_string(source):
1371 print(' '*start + '^'*(end-start))
1372 print(' '*start + tokens[0])
1374 prints:
1376 .. testoutput::
1378 sldjf123lsdjjkf345sldkjf879lkjsfd987
1379 ^^^^^
1380 sldjf
1381 ^^^^^^^
1382 lsdjjkf
1383 ^^^^^^
1384 sldkjf
1385 ^^^^^^
1386 lkjsfd
1387 """
1388 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)
1390 max_matches = min(maxMatches, max_matches)
1391 if not self.streamlined:
1392 self.streamline()
1393 for e in self.ignoreExprs:
1394 e.streamline()
1396 if not self.keepTabs:
1397 instring = str(instring).expandtabs()
1398 instrlen = len(instring)
1399 loc = 0
1400 if always_skip_whitespace:
1401 preparser = Empty()
1402 preparser.ignoreExprs = self.ignoreExprs
1403 preparser.whiteChars = self.whiteChars
1404 preparseFn = preparser.preParse
1405 else:
1406 preparseFn = self.preParse
1407 parseFn = self._parse
1408 ParserElement.reset_cache()
1409 matches = 0
1410 try:
1411 while loc <= instrlen and matches < max_matches:
1412 try:
1413 preloc: int = preparseFn(instring, loc)
1414 nextLoc: int
1415 tokens: ParseResults
1416 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1417 except ParseException:
1418 loc = preloc + 1
1419 else:
1420 if nextLoc > loc:
1421 matches += 1
1422 if debug:
1423 print(
1424 {
1425 "tokens": tokens.as_list(),
1426 "start": preloc,
1427 "end": nextLoc,
1428 }
1429 )
1430 yield tokens, preloc, nextLoc
1431 if overlap:
1432 nextloc = preparseFn(instring, loc)
1433 if nextloc > loc:
1434 loc = nextLoc
1435 else:
1436 loc += 1
1437 else:
1438 loc = nextLoc
1439 else:
1440 loc = preloc + 1
1441 except ParseBaseException as exc:
1442 if ParserElement.verbose_stacktrace:
1443 raise
1445 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1446 raise exc.with_traceback(None)
1448 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1449 """
1450 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1451 be returned from a parse action. To use ``transform_string``, define a grammar and
1452 attach a parse action to it that modifies the returned token list.
1453 Invoking ``transform_string()`` on a target string will then scan for matches,
1454 and replace the matched text patterns according to the logic in the parse
1455 action. ``transform_string()`` returns the resulting transformed string.
1457 Example:
1459 .. testcode::
1461 quote = '''now is the winter of our discontent,
1462 made glorious summer by this sun of york.'''
1464 wd = Word(alphas)
1465 wd.set_parse_action(lambda toks: toks[0].title())
1467 print(wd.transform_string(quote))
1469 prints:
1471 .. testoutput::
1473 Now Is The Winter Of Our Discontent,
1474 Made Glorious Summer By This Sun Of York.
1475 """
1476 out: list[str] = []
1477 lastE = 0
1478 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1479 # keep string locs straight between transform_string and scan_string
1480 self.keepTabs = True
1481 try:
1482 for t, s, e in self.scan_string(instring, debug=debug):
1483 if s > lastE:
1484 out.append(instring[lastE:s])
1485 lastE = e
1487 if not t:
1488 continue
1490 if isinstance(t, ParseResults):
1491 out += t.as_list()
1492 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1493 out.extend(t)
1494 else:
1495 out.append(t)
1497 out.append(instring[lastE:])
1498 out = [o for o in out if o]
1499 return "".join([str(s) for s in _flatten(out)])
1500 except ParseBaseException as exc:
1501 if ParserElement.verbose_stacktrace:
1502 raise
1504 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1505 raise exc.with_traceback(None)
1507 def search_string(
1508 self,
1509 instring: str,
1510 max_matches: int = _MAX_INT,
1511 *,
1512 debug: bool = False,
1513 **kwargs,
1514 ) -> ParseResults:
1515 """
1516 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1517 to match the given parse expression. May be called with optional
1518 ``max_matches`` argument, to clip searching after 'n' matches are found.
1520 Example:
1522 .. testcode::
1524 quote = '''More than Iron, more than Lead,
1525 more than Gold I need Electricity'''
1527 # a capitalized word starts with an uppercase letter,
1528 # followed by zero or more lowercase letters
1529 cap_word = Word(alphas.upper(), alphas.lower())
1531 print(cap_word.search_string(quote))
1533 # the sum() builtin can be used to merge results
1534 # into a single ParseResults object
1535 print(sum(cap_word.search_string(quote)))
1537 prints:
1539 .. testoutput::
1541 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1542 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1543 """
1544 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)
1546 max_matches = min(maxMatches, max_matches)
1547 try:
1548 return ParseResults(
1549 [
1550 t
1551 for t, s, e in self.scan_string(
1552 instring,
1553 max_matches=max_matches,
1554 always_skip_whitespace=False,
1555 debug=debug,
1556 )
1557 ]
1558 )
1559 except ParseBaseException as exc:
1560 if ParserElement.verbose_stacktrace:
1561 raise
1563 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1564 raise exc.with_traceback(None)
1566 def split(
1567 self,
1568 instring: str,
1569 maxsplit: int = _MAX_INT,
1570 include_separators: bool = False,
1571 **kwargs,
1572 ) -> Generator[str, None, None]:
1573 """
1574 Generator method to split a string using the given expression as a separator.
1575 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1576 and the optional ``include_separators`` argument (default= ``False``), if the separating
1577 matching text should be included in the split results.
1579 Example:
1581 .. testcode::
1583 punc = one_of(list(".,;:/-!?"))
1584 print(list(punc.split(
1585 "This, this?, this sentence, is badly punctuated!")))
1587 prints:
1589 .. testoutput::
1591 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1592 """
1593 includeSeparators: bool = deprecate_argument(kwargs, "includeSeparators", False)
1595 include_separators = includeSeparators or include_separators
1596 last = 0
1597 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1598 yield instring[last:s]
1599 if include_separators:
1600 yield t[0]
1601 last = e
1602 yield instring[last:]
1604 def __add__(self, other) -> ParserElement:
1605 """
1606 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1607 converts them to :class:`Literal`\\ s by default.
1609 Example:
1611 .. testcode::
1613 greet = Word(alphas) + "," + Word(alphas) + "!"
1614 hello = "Hello, World!"
1615 print(hello, "->", greet.parse_string(hello))
1617 prints:
1619 .. testoutput::
1621 Hello, World! -> ['Hello', ',', 'World', '!']
1623 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:
1625 .. testcode::
1627 Literal('start') + ... + Literal('end')
1629 is equivalent to:
1631 .. testcode::
1633 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1635 Note that the skipped text is returned with '_skipped' as a results name,
1636 and to support having multiple skips in the same parser, the value returned is
1637 a list of all skipped text.
1638 """
1639 if other is Ellipsis:
1640 return _PendingSkip(self)
1642 if isinstance(other, str_type):
1643 other = self._literalStringClass(other)
1644 if not isinstance(other, ParserElement):
1645 return NotImplemented
1646 return And([self, other])
1648 def __radd__(self, other) -> ParserElement:
1649 """
1650 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1651 """
1652 if other is Ellipsis:
1653 return SkipTo(self)("_skipped*") + self
1655 if isinstance(other, str_type):
1656 other = self._literalStringClass(other)
1657 if not isinstance(other, ParserElement):
1658 return NotImplemented
1659 return other + self
1661 def __sub__(self, other) -> ParserElement:
1662 """
1663 Implementation of ``-`` operator, returns :class:`And` with error stop
1664 """
1665 if isinstance(other, str_type):
1666 other = self._literalStringClass(other)
1667 if not isinstance(other, ParserElement):
1668 return NotImplemented
1669 return self + And._ErrorStop() + other
1671 def __rsub__(self, other) -> ParserElement:
1672 """
1673 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1674 """
1675 if isinstance(other, str_type):
1676 other = self._literalStringClass(other)
1677 if not isinstance(other, ParserElement):
1678 return NotImplemented
1679 return other - self
1681 def __mul__(self, other) -> ParserElement:
1682 """
1683 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1684 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1685 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1686 may also include ``None`` as in:
1688 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1689 to ``expr*n + ZeroOrMore(expr)``
1690 (read as "at least n instances of ``expr``")
1691 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1692 (read as "0 to n instances of ``expr``")
1693 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1694 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1696 Note that ``expr*(None, n)`` does not raise an exception if
1697 more than n exprs exist in the input stream; that is,
1698 ``expr*(None, n)`` does not enforce a maximum number of expr
1699 occurrences. If this behavior is desired, then write
1700 ``expr*(None, n) + ~expr``
1701 """
1702 if other is Ellipsis:
1703 other = (0, None)
1704 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1705 other = ((0,) + other[1:] + (None,))[:2]
1707 if not isinstance(other, (int, tuple)):
1708 return NotImplemented
1710 if isinstance(other, int):
1711 minElements, optElements = other, 0
1712 else:
1713 other = tuple(o if o is not Ellipsis else None for o in other)
1714 other = (other + (None, None))[:2]
1715 if other[0] is None:
1716 other = (0, other[1])
1717 if isinstance(other[0], int) and other[1] is None:
1718 if other[0] == 0:
1719 return ZeroOrMore(self)
1720 if other[0] == 1:
1721 return OneOrMore(self)
1722 else:
1723 return self * other[0] + ZeroOrMore(self)
1724 elif isinstance(other[0], int) and isinstance(other[1], int):
1725 minElements, optElements = other
1726 optElements -= minElements
1727 else:
1728 return NotImplemented
1730 if minElements < 0:
1731 raise ValueError("cannot multiply ParserElement by negative value")
1732 if optElements < 0:
1733 raise ValueError(
1734 "second tuple value must be greater or equal to first tuple value"
1735 )
1736 if minElements == optElements == 0:
1737 return And([])
1739 if optElements:
1741 def makeOptionalList(n):
1742 if n > 1:
1743 return Opt(self + makeOptionalList(n - 1))
1744 else:
1745 return Opt(self)
1747 if minElements:
1748 if minElements == 1:
1749 ret = self + makeOptionalList(optElements)
1750 else:
1751 ret = And([self] * minElements) + makeOptionalList(optElements)
1752 else:
1753 ret = makeOptionalList(optElements)
1754 else:
1755 if minElements == 1:
1756 ret = self
1757 else:
1758 ret = And([self] * minElements)
1759 return ret
1761 def __rmul__(self, other) -> ParserElement:
1762 return self.__mul__(other)
1764 def __or__(self, other) -> ParserElement:
1765 """
1766 Implementation of ``|`` operator - returns :class:`MatchFirst`
1768 .. versionchanged:: 3.1.0
1769 Support ``expr | ""`` as a synonym for ``Optional(expr)``.
1770 """
1771 if other is Ellipsis:
1772 return _PendingSkip(self, must_skip=True)
1774 if isinstance(other, str_type):
1775 # `expr | ""` is equivalent to `Opt(expr)`
1776 if other == "":
1777 return Opt(self)
1778 other = self._literalStringClass(other)
1779 if not isinstance(other, ParserElement):
1780 return NotImplemented
1781 return MatchFirst([self, other])
1783 def __ror__(self, other) -> ParserElement:
1784 """
1785 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1786 """
1787 if isinstance(other, str_type):
1788 other = self._literalStringClass(other)
1789 if not isinstance(other, ParserElement):
1790 return NotImplemented
1791 return other | self
1793 def __xor__(self, other) -> ParserElement:
1794 """
1795 Implementation of ``^`` operator - returns :class:`Or`
1796 """
1797 if isinstance(other, str_type):
1798 other = self._literalStringClass(other)
1799 if not isinstance(other, ParserElement):
1800 return NotImplemented
1801 return Or([self, other])
1803 def __rxor__(self, other) -> ParserElement:
1804 """
1805 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1806 """
1807 if isinstance(other, str_type):
1808 other = self._literalStringClass(other)
1809 if not isinstance(other, ParserElement):
1810 return NotImplemented
1811 return other ^ self
1813 def __and__(self, other) -> ParserElement:
1814 """
1815 Implementation of ``&`` operator - returns :class:`Each`
1816 """
1817 if isinstance(other, str_type):
1818 other = self._literalStringClass(other)
1819 if not isinstance(other, ParserElement):
1820 return NotImplemented
1821 return Each([self, other])
1823 def __rand__(self, other) -> ParserElement:
1824 """
1825 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1826 """
1827 if isinstance(other, str_type):
1828 other = self._literalStringClass(other)
1829 if not isinstance(other, ParserElement):
1830 return NotImplemented
1831 return other & self
1833 def __invert__(self) -> ParserElement:
1834 """
1835 Implementation of ``~`` operator - returns :class:`NotAny`
1836 """
1837 return NotAny(self)
1839 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1840 # iterate over a sequence
1841 __iter__ = None
1843 def __getitem__(self, key):
1844 """
1845 use ``[]`` indexing notation as a short form for expression repetition:
1847 - ``expr[n]`` is equivalent to ``expr*n``
1848 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1849 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1850 to ``expr*n + ZeroOrMore(expr)``
1851 (read as "at least n instances of ``expr``")
1852 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1853 (read as "0 to n instances of ``expr``")
1854 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1855 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1857 ``None`` may be used in place of ``...``.
1859 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception
1860 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is
1861 desired, then write ``expr[..., n] + ~expr``.
1863 For repetition with a stop_on expression, use slice notation:
1865 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``
1866 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``
1868 .. versionchanged:: 3.1.0
1869 Support for slice notation.
1870 """
1872 stop_on_defined = False
1873 stop_on = NoMatch()
1874 if isinstance(key, slice):
1875 key, stop_on = key.start, key.stop
1876 if key is None:
1877 key = ...
1878 stop_on_defined = True
1879 elif isinstance(key, tuple) and isinstance(key[-1], slice):
1880 key, stop_on = (key[0], key[1].start), key[1].stop
1881 stop_on_defined = True
1883 # convert single arg keys to tuples
1884 if isinstance(key, str_type):
1885 key = (key,)
1886 try:
1887 iter(key)
1888 except TypeError:
1889 key = (key, key)
1891 if len(key) > 2:
1892 raise TypeError(
1893 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"
1894 )
1896 # clip to 2 elements
1897 ret = self * tuple(key[:2])
1898 ret = typing.cast(_MultipleMatch, ret)
1900 if stop_on_defined:
1901 ret.stopOn(stop_on)
1903 return ret
1905 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:
1906 """
1907 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1909 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1910 passed as ``True``.
1912 If ``name`` is omitted, same as calling :class:`copy`.
1914 Example:
1916 .. testcode::
1918 # these are equivalent
1919 userdata = (
1920 Word(alphas).set_results_name("name")
1921 + Word(nums + "-").set_results_name("socsecno")
1922 )
1924 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1925 """
1926 if name is not None:
1927 return self._setResultsName(name)
1929 return self.copy()
1931 def suppress(self) -> ParserElement:
1932 """
1933 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1934 cluttering up returned output.
1935 """
1936 return Suppress(self)
1938 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
1939 """
1940 Enables the skipping of whitespace before matching the characters in the
1941 :class:`ParserElement`'s defined pattern.
1943 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1944 """
1945 self.skipWhitespace = True
1946 return self
1948 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
1949 """
1950 Disables the skipping of whitespace before matching the characters in the
1951 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1952 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1954 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1955 """
1956 self.skipWhitespace = False
1957 return self
1959 def set_whitespace_chars(
1960 self, chars: Union[set[str], str], copy_defaults: bool = False
1961 ) -> ParserElement:
1962 """
1963 Overrides the default whitespace chars
1964 """
1965 self.skipWhitespace = True
1966 self.whiteChars = set(chars)
1967 self.copyDefaultWhiteChars = copy_defaults
1968 return self
1970 def parse_with_tabs(self) -> ParserElement:
1971 """
1972 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1973 Must be called before ``parse_string`` when the input grammar contains elements that
1974 match ``<TAB>`` characters.
1975 """
1976 self.keepTabs = True
1977 return self
1979 def ignore(self, other: ParserElement) -> ParserElement:
1980 """
1981 Define expression to be ignored (e.g., comments) while doing pattern
1982 matching; may be called repeatedly, to define multiple comment or other
1983 ignorable patterns.
1985 Example:
1987 .. doctest::
1989 >>> patt = Word(alphas)[...]
1990 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))
1991 ['ablaj']
1993 >>> patt = Word(alphas)[...].ignore(c_style_comment)
1994 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))
1995 ['ablaj', 'lskjd']
1996 """
1997 if isinstance(other, str_type):
1998 other = Suppress(other)
2000 if isinstance(other, Suppress):
2001 if other not in self.ignoreExprs:
2002 self.ignoreExprs.append(other)
2003 else:
2004 self.ignoreExprs.append(Suppress(other.copy()))
2005 return self
2007 def set_debug_actions(
2008 self,
2009 start_action: DebugStartAction,
2010 success_action: DebugSuccessAction,
2011 exception_action: DebugExceptionAction,
2012 ) -> ParserElement:
2013 """
2014 Customize display of debugging messages while doing pattern matching:
2016 :param start_action: method to be called when an expression is about to be parsed;
2017 should have the signature::
2019 fn(input_string: str,
2020 location: int,
2021 expression: ParserElement,
2022 cache_hit: bool)
2024 :param success_action: method to be called when an expression has successfully parsed;
2025 should have the signature::
2027 fn(input_string: str,
2028 start_location: int,
2029 end_location: int,
2030 expression: ParserELement,
2031 parsed_tokens: ParseResults,
2032 cache_hit: bool)
2034 :param exception_action: method to be called when expression fails to parse;
2035 should have the signature::
2037 fn(input_string: str,
2038 location: int,
2039 expression: ParserElement,
2040 exception: Exception,
2041 cache_hit: bool)
2042 """
2043 self.debugActions = self.DebugActions(
2044 start_action or _default_start_debug_action, # type: ignore[truthy-function]
2045 success_action or _default_success_debug_action, # type: ignore[truthy-function]
2046 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]
2047 )
2048 self.debug = True
2049 return self
2051 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:
2052 """
2053 Enable display of debugging messages while doing pattern matching.
2054 Set ``flag`` to ``True`` to enable, ``False`` to disable.
2055 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.
2057 Example:
2059 .. testcode::
2061 wd = Word(alphas).set_name("alphaword")
2062 integer = Word(nums).set_name("numword")
2063 term = wd | integer
2065 # turn on debugging for wd
2066 wd.set_debug()
2068 term[1, ...].parse_string("abc 123 xyz 890")
2070 prints:
2072 .. testoutput::
2073 :options: +NORMALIZE_WHITESPACE
2075 Match alphaword at loc 0(1,1)
2076 abc 123 xyz 890
2077 ^
2078 Matched alphaword -> ['abc']
2079 Match alphaword at loc 4(1,5)
2080 abc 123 xyz 890
2081 ^
2082 Match alphaword failed, ParseException raised: Expected alphaword, ...
2083 Match alphaword at loc 8(1,9)
2084 abc 123 xyz 890
2085 ^
2086 Matched alphaword -> ['xyz']
2087 Match alphaword at loc 12(1,13)
2088 abc 123 xyz 890
2089 ^
2090 Match alphaword failed, ParseException raised: Expected alphaword, ...
2091 abc 123 xyz 890
2092 ^
2093 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ...
2095 The output shown is that produced by the default debug actions - custom debug actions can be
2096 specified using :meth:`set_debug_actions`. Prior to attempting
2097 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
2098 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
2099 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression,
2100 which makes debugging and exception messages easier to understand - for instance, the default
2101 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``.
2103 .. versionchanged:: 3.1.0
2104 ``recurse`` argument added.
2105 """
2106 if recurse:
2107 for expr in self.visit_all():
2108 expr.set_debug(flag, recurse=False)
2109 return self
2111 if flag:
2112 self.set_debug_actions(
2113 _default_start_debug_action,
2114 _default_success_debug_action,
2115 _default_exception_debug_action,
2116 )
2117 else:
2118 self.debug = False
2119 return self
2121 @property
2122 def default_name(self) -> str:
2123 if self._defaultName is None:
2124 self._defaultName = self._generateDefaultName()
2125 return self._defaultName
2127 @abstractmethod
2128 def _generateDefaultName(self) -> str:
2129 """
2130 Child classes must define this method, which defines how the ``default_name`` is set.
2131 """
2133 def set_name(self, name: typing.Optional[str]) -> ParserElement:
2134 """
2135 Define name for this expression, makes debugging and exception messages clearer. If
2136 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also
2137 enable debug for this expression.
2139 If `name` is None, clears any custom name for this expression, and clears the
2140 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.
2142 Example:
2144 .. doctest::
2146 >>> integer = Word(nums)
2147 >>> integer.parse_string("ABC")
2148 Traceback (most recent call last):
2149 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1)
2151 >>> integer.set_name("integer")
2152 integer
2153 >>> integer.parse_string("ABC")
2154 Traceback (most recent call last):
2155 ParseException: Expected integer (at char 0), (line:1, col:1)
2157 .. versionchanged:: 3.1.0
2158 Accept ``None`` as the ``name`` argument.
2159 """
2160 self.customName = name # type: ignore[assignment]
2161 self.errmsg = f"Expected {str(self)}"
2163 if __diag__.enable_debug_on_named_expressions:
2164 self.set_debug(name is not None)
2166 return self
2168 @property
2169 def name(self) -> str:
2170 """
2171 Returns a user-defined name if available, but otherwise defaults back to the auto-generated name
2172 """
2173 return self.customName if self.customName is not None else self.default_name
2175 @name.setter
2176 def name(self, new_name) -> None:
2177 self.set_name(new_name)
2179 def __str__(self) -> str:
2180 return self.name
2182 def __repr__(self) -> str:
2183 return str(self)
2185 def streamline(self) -> ParserElement:
2186 self.streamlined = True
2187 self._defaultName = None
2188 return self
2190 def recurse(self) -> list[ParserElement]:
2191 return []
2193 def _checkRecursion(self, parseElementList):
2194 subRecCheckList = parseElementList[:] + [self]
2195 for e in self.recurse():
2196 e._checkRecursion(subRecCheckList)
2198 def validate(self, validateTrace=None) -> None:
2199 """
2200 .. deprecated:: 3.0.0
2201 Do not use to check for left recursion.
2203 Check defined expressions for valid structure, check for infinite recursive definitions.
2205 """
2206 warnings.warn(
2207 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
2208 DeprecationWarning,
2209 stacklevel=2,
2210 )
2211 self._checkRecursion([])
2213 def parse_file(
2214 self,
2215 file_or_filename: Union[str, Path, TextIO],
2216 encoding: str = "utf-8",
2217 parse_all: bool = False,
2218 **kwargs,
2219 ) -> ParseResults:
2220 """
2221 Execute the parse expression on the given file or filename.
2222 If a filename is specified (instead of a file object),
2223 the entire file is opened, read, and closed before parsing.
2224 """
2225 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)
2227 parse_all = parse_all or parseAll
2228 try:
2229 file_or_filename = typing.cast(TextIO, file_or_filename)
2230 file_contents = file_or_filename.read()
2231 except AttributeError:
2232 file_or_filename = typing.cast(str, file_or_filename)
2233 with open(file_or_filename, "r", encoding=encoding) as f:
2234 file_contents = f.read()
2235 try:
2236 return self.parse_string(file_contents, parse_all)
2237 except ParseBaseException as exc:
2238 if ParserElement.verbose_stacktrace:
2239 raise
2241 # catch and re-raise exception from here, clears out pyparsing internal stack trace
2242 raise exc.with_traceback(None)
2244 def __eq__(self, other):
2245 if self is other:
2246 return True
2247 elif isinstance(other, str_type):
2248 return self.matches(other, parse_all=True)
2249 elif isinstance(other, ParserElement):
2250 return vars(self) == vars(other)
2251 return False
2253 def __hash__(self):
2254 return id(self)
2256 def matches(self, test_string: str, parse_all: bool = True, **kwargs) -> bool:
2257 """
2258 Method for quick testing of a parser against a test string. Good for simple
2259 inline microtests of sub expressions while building up larger parser.
2261 :param test_string: to test against this expression for a match
2262 :param parse_all: flag to pass to :meth:`parse_string` when running tests
2264 Example:
2266 .. doctest::
2268 >>> expr = Word(nums)
2269 >>> expr.matches("100")
2270 True
2271 """
2272 parseAll: bool = deprecate_argument(kwargs, "parseAll", True)
2274 parse_all = parse_all and parseAll
2275 try:
2276 self.parse_string(str(test_string), parse_all=parse_all)
2277 return True
2278 except ParseBaseException:
2279 return False
2281 def run_tests(
2282 self,
2283 tests: Union[str, list[str]],
2284 parse_all: bool = True,
2285 comment: typing.Optional[Union[ParserElement, str]] = "#",
2286 full_dump: bool = True,
2287 print_results: bool = True,
2288 failure_tests: bool = False,
2289 post_parse: typing.Optional[
2290 Callable[[str, ParseResults], typing.Optional[str]]
2291 ] = None,
2292 file: typing.Optional[TextIO] = None,
2293 with_line_numbers: bool = False,
2294 *,
2295 parseAll: bool = True,
2296 fullDump: bool = True,
2297 printResults: bool = True,
2298 failureTests: bool = False,
2299 postParse: typing.Optional[
2300 Callable[[str, ParseResults], typing.Optional[str]]
2301 ] = None,
2302 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:
2303 """
2304 Execute the parse expression on a series of test strings, showing each
2305 test, the parsed results or where the parse failed. Quick and easy way to
2306 run a parse expression against a list of sample strings.
2308 Parameters:
2310 - ``tests`` - a list of separate test strings, or a multiline string of test strings
2311 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2312 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
2313 string; pass None to disable comment filtering
2314 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
2315 if False, only dump nested list
2316 - ``print_results`` - (default= ``True``) prints test output to stdout
2317 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
2318 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
2319 `fn(test_string, parse_results)` and returns a string to be added to the test output
2320 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
2321 if None, will default to ``sys.stdout``
2322 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
2324 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2325 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
2326 test's output
2328 Passing example:
2330 .. testcode::
2332 number_expr = pyparsing_common.number.copy()
2334 result = number_expr.run_tests('''
2335 # unsigned integer
2336 100
2337 # negative integer
2338 -100
2339 # float with scientific notation
2340 6.02e23
2341 # integer with scientific notation
2342 1e-12
2343 # negative decimal number without leading digit
2344 -.100
2345 ''')
2346 print("Success" if result[0] else "Failed!")
2348 prints:
2350 .. testoutput::
2351 :options: +NORMALIZE_WHITESPACE
2354 # unsigned integer
2355 100
2356 [100]
2358 # negative integer
2359 -100
2360 [-100]
2362 # float with scientific notation
2363 6.02e23
2364 [6.02e+23]
2366 # integer with scientific notation
2367 1e-12
2368 [1e-12]
2370 # negative decimal number without leading digit
2371 -.100
2372 [-0.1]
2373 Success
2375 Failure-test example:
2377 .. testcode::
2379 result = number_expr.run_tests('''
2380 # stray character
2381 100Z
2382 # too many '.'
2383 3.14.159
2384 ''', failure_tests=True)
2385 print("Success" if result[0] else "Failed!")
2387 prints:
2389 .. testoutput::
2390 :options: +NORMALIZE_WHITESPACE
2393 # stray character
2394 100Z
2395 100Z
2396 ^
2397 ParseException: Expected end of text, found 'Z' ...
2399 # too many '.'
2400 3.14.159
2401 3.14.159
2402 ^
2403 ParseException: Expected end of text, found '.' ...
2404 FAIL: Expected end of text, found '.' ...
2405 Success
2407 Each test string must be on a single line. If you want to test a string that spans multiple
2408 lines, create a test like this:
2410 .. testcode::
2412 expr = Word(alphanums)[1,...]
2413 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2415 .. testoutput::
2416 :options: +NORMALIZE_WHITESPACE
2417 :hide:
2420 this is a test\\n of strings that spans \\n 3 lines
2421 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines']
2423 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2424 """
2425 from .testing import pyparsing_test
2427 parseAll = parseAll and parse_all
2428 fullDump = fullDump and full_dump
2429 printResults = printResults and print_results
2430 failureTests = failureTests or failure_tests
2431 postParse = postParse or post_parse
2432 if isinstance(tests, str_type):
2433 tests = typing.cast(str, tests)
2434 line_strip = type(tests).strip
2435 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2436 comment_specified = comment is not None
2437 if comment_specified:
2438 if isinstance(comment, str_type):
2439 comment = typing.cast(str, comment)
2440 comment = Literal(comment)
2441 comment = typing.cast(ParserElement, comment)
2442 if file is None:
2443 file = sys.stdout
2444 print_ = file.write
2446 result: Union[ParseResults, Exception]
2447 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []
2448 comments: list[str] = []
2449 success = True
2450 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2451 BOM = "\ufeff"
2452 nlstr = "\n"
2453 for t in tests:
2454 if comment_specified and comment.matches(t, False) or comments and not t:
2455 comments.append(
2456 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2457 )
2458 continue
2459 if not t:
2460 continue
2461 out = [
2462 f"{nlstr}{nlstr.join(comments) if comments else ''}",
2463 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2464 ]
2465 comments.clear()
2466 try:
2467 # convert newline marks to actual newlines, and strip leading BOM if present
2468 t = NL.transform_string(t.lstrip(BOM))
2469 result = self.parse_string(t, parse_all=parse_all)
2470 except ParseBaseException as pe:
2471 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""
2472 out.append(pe.explain())
2473 out.append(f"FAIL: {fatal}{pe}")
2474 if ParserElement.verbose_stacktrace:
2475 out.extend(traceback.format_tb(pe.__traceback__))
2476 success = success and failureTests
2477 result = pe
2478 except Exception as exc:
2479 tag = "FAIL-EXCEPTION"
2481 # see if this exception was raised in a parse action
2482 tb = exc.__traceback__
2483 it = iter(traceback.walk_tb(tb))
2484 for f, line in it:
2485 if (f.f_code.co_filename, line) == pa_call_line_synth:
2486 next_f = next(it)[0]
2487 tag += f" (raised in parse action {next_f.f_code.co_name!r})"
2488 break
2490 out.append(f"{tag}: {type(exc).__name__}: {exc}")
2491 if ParserElement.verbose_stacktrace:
2492 out.extend(traceback.format_tb(exc.__traceback__))
2493 success = success and failureTests
2494 result = exc
2495 else:
2496 success = success and not failureTests
2497 if postParse is not None:
2498 try:
2499 pp_value = postParse(t, result)
2500 if pp_value is not None:
2501 if isinstance(pp_value, ParseResults):
2502 out.append(pp_value.dump())
2503 else:
2504 out.append(str(pp_value))
2505 else:
2506 out.append(result.dump())
2507 except Exception as e:
2508 out.append(result.dump(full=fullDump))
2509 out.append(
2510 f"{postParse.__name__} failed: {type(e).__name__}: {e}"
2511 )
2512 else:
2513 out.append(result.dump(full=fullDump))
2514 out.append("")
2516 if printResults:
2517 print_("\n".join(out))
2519 allResults.append((t, result))
2521 return success, allResults
2523 def create_diagram(
2524 self,
2525 output_html: Union[TextIO, Path, str],
2526 vertical: int = 3,
2527 show_results_names: bool = False,
2528 show_groups: bool = False,
2529 embed: bool = False,
2530 show_hidden: bool = False,
2531 **kwargs,
2532 ) -> None:
2533 """
2534 Create a railroad diagram for the parser.
2536 Parameters:
2538 - ``output_html`` (str or file-like object) - output target for generated
2539 diagram HTML
2540 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically
2541 instead of horizontally (default=3)
2542 - ``show_results_names`` - bool flag whether diagram should show annotations for
2543 defined results names
2544 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box
2545 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden
2546 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed
2547 the resulting HTML in an enclosing HTML source
2548 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;
2549 can be used to insert custom CSS styling
2550 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the
2551 generated code
2553 Additional diagram-formatting keyword arguments can also be included;
2554 see railroad.Diagram class.
2556 .. versionchanged:: 3.1.0
2557 ``embed`` argument added.
2558 """
2560 try:
2561 from .diagram import to_railroad, railroad_to_html
2562 except ImportError as ie:
2563 raise Exception(
2564 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2565 ) from ie
2567 self.streamline()
2569 railroad = to_railroad(
2570 self,
2571 vertical=vertical,
2572 show_results_names=show_results_names,
2573 show_groups=show_groups,
2574 show_hidden=show_hidden,
2575 diagram_kwargs=kwargs,
2576 )
2577 if not isinstance(output_html, (str, Path)):
2578 # we were passed a file-like object, just write to it
2579 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))
2580 return
2582 with open(output_html, "w", encoding="utf-8") as diag_file:
2583 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))
2585 # Compatibility synonyms
2586 # fmt: off
2587 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))
2588 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(
2589 "setDefaultWhitespaceChars", set_default_whitespace_chars
2590 ))
2591 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))
2592 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))
2593 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))
2594 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))
2596 setResultsName = replaced_by_pep8("setResultsName", set_results_name)
2597 setBreak = replaced_by_pep8("setBreak", set_break)
2598 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)
2599 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)
2600 addCondition = replaced_by_pep8("addCondition", add_condition)
2601 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)
2602 tryParse = replaced_by_pep8("tryParse", try_parse)
2603 parseString = replaced_by_pep8("parseString", parse_string)
2604 scanString = replaced_by_pep8("scanString", scan_string)
2605 transformString = replaced_by_pep8("transformString", transform_string)
2606 searchString = replaced_by_pep8("searchString", search_string)
2607 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
2608 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
2609 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)
2610 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)
2611 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)
2612 setDebug = replaced_by_pep8("setDebug", set_debug)
2613 setName = replaced_by_pep8("setName", set_name)
2614 parseFile = replaced_by_pep8("parseFile", parse_file)
2615 runTests = replaced_by_pep8("runTests", run_tests)
2616 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)
2617 defaultName = default_name
2618 # fmt: on
2621class _PendingSkip(ParserElement):
2622 # internal placeholder class to hold a place were '...' is added to a parser element,
2623 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2624 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:
2625 super().__init__()
2626 self.anchor = expr
2627 self.must_skip = must_skip
2629 def _generateDefaultName(self) -> str:
2630 return str(self.anchor + Empty()).replace("Empty", "...")
2632 def __add__(self, other) -> ParserElement:
2633 skipper = SkipTo(other).set_name("...")("_skipped*")
2634 if self.must_skip:
2636 def must_skip(t):
2637 if not t._skipped or t._skipped.as_list() == [""]:
2638 del t[0]
2639 t.pop("_skipped", None)
2641 def show_skip(t):
2642 if t._skipped.as_list()[-1:] == [""]:
2643 t.pop("_skipped")
2644 t["_skipped"] = f"missing <{self.anchor!r}>"
2646 return (
2647 self.anchor + skipper().add_parse_action(must_skip)
2648 | skipper().add_parse_action(show_skip)
2649 ) + other
2651 return self.anchor + skipper + other
2653 def __repr__(self):
2654 return self.defaultName
2656 def parseImpl(self, *args) -> ParseImplReturnType:
2657 raise Exception(
2658 "use of `...` expression without following SkipTo target expression"
2659 )
2662class Token(ParserElement):
2663 """Abstract :class:`ParserElement` subclass, for defining atomic
2664 matching patterns.
2665 """
2667 def __init__(self) -> None:
2668 super().__init__(savelist=False)
2670 def _generateDefaultName(self) -> str:
2671 return type(self).__name__
2674class NoMatch(Token):
2675 """
2676 A token that will never match.
2677 """
2679 def __init__(self) -> None:
2680 super().__init__()
2681 self._may_return_empty = True
2682 self.mayIndexError = False
2683 self.errmsg = "Unmatchable token"
2685 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2686 raise ParseException(instring, loc, self.errmsg, self)
2689class Literal(Token):
2690 """
2691 Token to exactly match a specified string.
2693 Example:
2695 .. doctest::
2697 >>> Literal('abc').parse_string('abc')
2698 ParseResults(['abc'], {})
2699 >>> Literal('abc').parse_string('abcdef')
2700 ParseResults(['abc'], {})
2701 >>> Literal('abc').parse_string('ab')
2702 Traceback (most recent call last):
2703 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1)
2705 For case-insensitive matching, use :class:`CaselessLiteral`.
2707 For keyword matching (force word break before and after the matched string),
2708 use :class:`Keyword` or :class:`CaselessKeyword`.
2709 """
2711 def __new__(cls, match_string: str = "", **kwargs):
2712 # Performance tuning: select a subclass with optimized parseImpl
2713 if cls is Literal:
2714 matchString: str = deprecate_argument(kwargs, "matchString", "")
2716 match_string = matchString or match_string
2717 if not match_string:
2718 return super().__new__(Empty)
2719 if len(match_string) == 1:
2720 return super().__new__(_SingleCharLiteral)
2722 # Default behavior
2723 return super().__new__(cls)
2725 # Needed to make copy.copy() work correctly if we customize __new__
2726 def __getnewargs__(self):
2727 return (self.match,)
2729 def __init__(self, match_string: str = "", **kwargs) -> None:
2730 matchString: str = deprecate_argument(kwargs, "matchString", "")
2732 super().__init__()
2733 match_string = matchString or match_string
2734 self.match = match_string
2735 self.matchLen = len(match_string)
2736 self.firstMatchChar = match_string[:1]
2737 self.errmsg = f"Expected {self.name}"
2738 self._may_return_empty = False
2739 self.mayIndexError = False
2741 def _generateDefaultName(self) -> str:
2742 return repr(self.match)
2744 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2745 if instring[loc] == self.firstMatchChar and instring.startswith(
2746 self.match, loc
2747 ):
2748 return loc + self.matchLen, self.match
2749 raise ParseException(instring, loc, self.errmsg, self)
2752class Empty(Literal):
2753 """
2754 An empty token, will always match.
2755 """
2757 def __init__(self, match_string="", *, matchString="") -> None:
2758 super().__init__("")
2759 self._may_return_empty = True
2760 self.mayIndexError = False
2762 def _generateDefaultName(self) -> str:
2763 return "Empty"
2765 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2766 return loc, []
2769class _SingleCharLiteral(Literal):
2770 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2771 if instring[loc] == self.firstMatchChar:
2772 return loc + 1, self.match
2773 raise ParseException(instring, loc, self.errmsg, self)
2776ParserElement._literalStringClass = Literal
2779class Keyword(Token):
2780 """
2781 Token to exactly match a specified string as a keyword, that is,
2782 it must be immediately preceded and followed by whitespace or
2783 non-keyword characters. Compare with :class:`Literal`:
2785 - ``Literal("if")`` will match the leading ``'if'`` in
2786 ``'ifAndOnlyIf'``.
2787 - ``Keyword("if")`` will not; it will only match the leading
2788 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2790 Accepts two optional constructor arguments in addition to the
2791 keyword string:
2793 - ``ident_chars`` is a string of characters that would be valid
2794 identifier characters, defaulting to all alphanumerics + "_" and
2795 "$"
2796 - ``caseless`` allows case-insensitive matching, default is ``False``.
2798 Example:
2800 .. doctest::
2801 :options: +NORMALIZE_WHITESPACE
2803 >>> Keyword("start").parse_string("start")
2804 ParseResults(['start'], {})
2805 >>> Keyword("start").parse_string("starting")
2806 Traceback (most recent call last):
2807 ParseException: Expected Keyword 'start', keyword was immediately
2808 followed by keyword character, found 'ing' (at char 5), (line:1, col:6)
2810 .. doctest::
2811 :options: +NORMALIZE_WHITESPACE
2813 >>> Keyword("start").parse_string("starting").debug()
2814 Traceback (most recent call last):
2815 ParseException: Expected Keyword "start", keyword was immediately
2816 followed by keyword character, found 'ing' ...
2818 For case-insensitive matching, use :class:`CaselessKeyword`.
2819 """
2821 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2823 def __init__(
2824 self,
2825 match_string: str = "",
2826 ident_chars: typing.Optional[str] = None,
2827 caseless: bool = False,
2828 **kwargs,
2829 ) -> None:
2830 matchString = deprecate_argument(kwargs, "matchString", "")
2831 identChars = deprecate_argument(kwargs, "identChars", None)
2833 super().__init__()
2834 identChars = identChars or ident_chars
2835 if identChars is None:
2836 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2837 match_string = matchString or match_string
2838 self.match = match_string
2839 self.matchLen = len(match_string)
2840 self.firstMatchChar = match_string[:1]
2841 if not self.firstMatchChar:
2842 raise ValueError("null string passed to Keyword; use Empty() instead")
2843 self.errmsg = f"Expected {type(self).__name__} {self.name}"
2844 self._may_return_empty = False
2845 self.mayIndexError = False
2846 self.caseless = caseless
2847 if caseless:
2848 self.caselessmatch = match_string.upper()
2849 identChars = identChars.upper()
2850 self.ident_chars = set(identChars)
2852 @property
2853 def identChars(self) -> set[str]:
2854 """
2855 .. deprecated:: 3.3.0
2856 use ident_chars instead.
2858 Property returning the characters being used as keyword characters for this expression.
2859 """
2860 return self.ident_chars
2862 def _generateDefaultName(self) -> str:
2863 return repr(self.match)
2865 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2866 errmsg = self.errmsg or ""
2867 errloc = loc
2868 if self.caseless:
2869 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2870 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2871 if (
2872 loc >= len(instring) - self.matchLen
2873 or instring[loc + self.matchLen].upper() not in self.identChars
2874 ):
2875 return loc + self.matchLen, self.match
2877 # followed by keyword char
2878 errmsg += ", was immediately followed by keyword character"
2879 errloc = loc + self.matchLen
2880 else:
2881 # preceded by keyword char
2882 errmsg += ", keyword was immediately preceded by keyword character"
2883 errloc = loc - 1
2884 # else no match just raise plain exception
2886 elif (
2887 instring[loc] == self.firstMatchChar
2888 and self.matchLen == 1
2889 or instring.startswith(self.match, loc)
2890 ):
2891 if loc == 0 or instring[loc - 1] not in self.identChars:
2892 if (
2893 loc >= len(instring) - self.matchLen
2894 or instring[loc + self.matchLen] not in self.identChars
2895 ):
2896 return loc + self.matchLen, self.match
2898 # followed by keyword char
2899 errmsg += ", keyword was immediately followed by keyword character"
2900 errloc = loc + self.matchLen
2901 else:
2902 # preceded by keyword char
2903 errmsg += ", keyword was immediately preceded by keyword character"
2904 errloc = loc - 1
2905 # else no match just raise plain exception
2907 raise ParseException(instring, errloc, errmsg, self)
2909 @staticmethod
2910 def set_default_keyword_chars(chars) -> None:
2911 """
2912 Overrides the default characters used by :class:`Keyword` expressions.
2913 """
2914 Keyword.DEFAULT_KEYWORD_CHARS = chars
2916 # Compatibility synonyms
2917 setDefaultKeywordChars = staticmethod(
2918 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)
2919 )
2922class CaselessLiteral(Literal):
2923 """
2924 Token to match a specified string, ignoring case of letters.
2925 Note: the matched results will always be in the case of the given
2926 match string, NOT the case of the input text.
2928 Example:
2930 .. doctest::
2932 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2933 ParseResults(['CMD', 'CMD', 'CMD'], {})
2935 (Contrast with example for :class:`CaselessKeyword`.)
2936 """
2938 def __init__(self, match_string: str = "", **kwargs) -> None:
2939 matchString: str = deprecate_argument(kwargs, "matchString", "")
2941 match_string = matchString or match_string
2942 super().__init__(match_string.upper())
2943 # Preserve the defining literal.
2944 self.returnString = match_string
2945 self.errmsg = f"Expected {self.name}"
2947 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2948 if instring[loc : loc + self.matchLen].upper() == self.match:
2949 return loc + self.matchLen, self.returnString
2950 raise ParseException(instring, loc, self.errmsg, self)
2953class CaselessKeyword(Keyword):
2954 """
2955 Caseless version of :class:`Keyword`.
2957 Example:
2959 .. doctest::
2961 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2962 ParseResults(['CMD', 'CMD'], {})
2964 (Contrast with example for :class:`CaselessLiteral`.)
2965 """
2967 def __init__(
2968 self, match_string: str = "", ident_chars: typing.Optional[str] = None, **kwargs
2969 ) -> None:
2970 matchString: str = deprecate_argument(kwargs, "matchString", "")
2971 identChars: typing.Optional[str] = deprecate_argument(
2972 kwargs, "identChars", None
2973 )
2975 identChars = identChars or ident_chars
2976 match_string = matchString or match_string
2977 super().__init__(match_string, identChars, caseless=True)
2980class CloseMatch(Token):
2981 """A variation on :class:`Literal` which matches "close" matches,
2982 that is, strings with at most 'n' mismatching characters.
2983 :class:`CloseMatch` takes parameters:
2985 - ``match_string`` - string to be matched
2986 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2987 - ``max_mismatches`` - (``default=1``) maximum number of
2988 mismatches allowed to count as a match
2990 The results from a successful parse will contain the matched text
2991 from the input string and the following named results:
2993 - ``mismatches`` - a list of the positions within the
2994 match_string where mismatches were found
2995 - ``original`` - the original match_string used to compare
2996 against the input string
2998 If ``mismatches`` is an empty list, then the match was an exact
2999 match.
3001 Example:
3003 .. doctest::
3004 :options: +NORMALIZE_WHITESPACE
3006 >>> patt = CloseMatch("ATCATCGAATGGA")
3007 >>> patt.parse_string("ATCATCGAAXGGA")
3008 ParseResults(['ATCATCGAAXGGA'],
3009 {'original': 'ATCATCGAATGGA', 'mismatches': [9]})
3011 >>> patt.parse_string("ATCAXCGAAXGGA")
3012 Traceback (most recent call last):
3013 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches),
3014 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1)
3016 # exact match
3017 >>> patt.parse_string("ATCATCGAATGGA")
3018 ParseResults(['ATCATCGAATGGA'],
3019 {'original': 'ATCATCGAATGGA', 'mismatches': []})
3021 # close match allowing up to 2 mismatches
3022 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
3023 >>> patt.parse_string("ATCAXCGAAXGGA")
3024 ParseResults(['ATCAXCGAAXGGA'],
3025 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]})
3026 """
3028 def __init__(
3029 self,
3030 match_string: str,
3031 max_mismatches: typing.Optional[int] = None,
3032 *,
3033 caseless=False,
3034 **kwargs,
3035 ) -> None:
3036 maxMismatches: int = deprecate_argument(kwargs, "maxMismatches", 1)
3038 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
3039 super().__init__()
3040 self.match_string = match_string
3041 self.maxMismatches = maxMismatches
3042 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"
3043 self.caseless = caseless
3044 self.mayIndexError = False
3045 self._may_return_empty = False
3047 def _generateDefaultName(self) -> str:
3048 return f"{type(self).__name__}:{self.match_string!r}"
3050 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3051 start = loc
3052 instrlen = len(instring)
3053 maxloc = start + len(self.match_string)
3055 if maxloc <= instrlen:
3056 match_string = self.match_string
3057 match_stringloc = 0
3058 mismatches = []
3059 maxMismatches = self.maxMismatches
3061 for match_stringloc, s_m in enumerate(
3062 zip(instring[loc:maxloc], match_string)
3063 ):
3064 src, mat = s_m
3065 if self.caseless:
3066 src, mat = src.lower(), mat.lower()
3068 if src != mat:
3069 mismatches.append(match_stringloc)
3070 if len(mismatches) > maxMismatches:
3071 break
3072 else:
3073 loc = start + match_stringloc + 1
3074 results = ParseResults([instring[start:loc]])
3075 results["original"] = match_string
3076 results["mismatches"] = mismatches
3077 return loc, results
3079 raise ParseException(instring, loc, self.errmsg, self)
3082class Word(Token):
3083 """Token for matching words composed of allowed character sets.
3085 Parameters:
3087 - ``init_chars`` - string of all characters that should be used to
3088 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
3089 if ``body_chars`` is also specified, then this is the string of
3090 initial characters
3091 - ``body_chars`` - string of characters that
3092 can be used for matching after a matched initial character as
3093 given in ``init_chars``; if omitted, same as the initial characters
3094 (default=``None``)
3095 - ``min`` - minimum number of characters to match (default=1)
3096 - ``max`` - maximum number of characters to match (default=0)
3097 - ``exact`` - exact number of characters to match (default=0)
3098 - ``as_keyword`` - match as a keyword (default=``False``)
3099 - ``exclude_chars`` - characters that might be
3100 found in the input ``body_chars`` string but which should not be
3101 accepted for matching ;useful to define a word of all
3102 printables except for one or two characters, for instance
3103 (default=``None``)
3105 :class:`srange` is useful for defining custom character set strings
3106 for defining :class:`Word` expressions, using range notation from
3107 regular expression character sets.
3109 A common mistake is to use :class:`Word` to match a specific literal
3110 string, as in ``Word("Address")``. Remember that :class:`Word`
3111 uses the string argument to define *sets* of matchable characters.
3112 This expression would match "Add", "AAA", "dAred", or any other word
3113 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
3114 exact literal string, use :class:`Literal` or :class:`Keyword`.
3116 pyparsing includes helper strings for building Words:
3118 - :attr:`alphas`
3119 - :attr:`nums`
3120 - :attr:`alphanums`
3121 - :attr:`hexnums`
3122 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255
3123 - accented, tilded, umlauted, etc.)
3124 - :attr:`punc8bit` (non-alphabetic characters in ASCII range
3125 128-255 - currency, symbols, superscripts, diacriticals, etc.)
3126 - :attr:`printables` (any non-whitespace character)
3128 ``alphas``, ``nums``, and ``printables`` are also defined in several
3129 Unicode sets - see :class:`pyparsing_unicode`.
3131 Example:
3133 .. testcode::
3135 # a word composed of digits
3136 integer = Word(nums)
3137 # Two equivalent alternate forms:
3138 Word("0123456789")
3139 Word(srange("[0-9]"))
3141 # a word with a leading capital, and zero or more lowercase
3142 capitalized_word = Word(alphas.upper(), alphas.lower())
3144 # hostnames are alphanumeric, with leading alpha, and '-'
3145 hostname = Word(alphas, alphanums + '-')
3147 # roman numeral
3148 # (not a strict parser, accepts invalid mix of characters)
3149 roman = Word("IVXLCDM")
3151 # any string of non-whitespace characters, except for ','
3152 csv_value = Word(printables, exclude_chars=",")
3154 :raises ValueError: If ``min`` and ``max`` are both specified
3155 and the test ``min <= max`` fails.
3157 .. versionchanged:: 3.1.0
3158 Raises :exc:`ValueError` if ``min`` > ``max``.
3159 """
3161 def __init__(
3162 self,
3163 init_chars: str = "",
3164 body_chars: typing.Optional[str] = None,
3165 min: int = 1,
3166 max: int = 0,
3167 exact: int = 0,
3168 as_keyword: bool = False,
3169 exclude_chars: typing.Optional[str] = None,
3170 **kwargs,
3171 ) -> None:
3172 initChars: typing.Optional[str] = deprecate_argument(kwargs, "initChars", None)
3173 bodyChars: typing.Optional[str] = deprecate_argument(kwargs, "bodyChars", None)
3174 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)
3175 excludeChars: typing.Optional[str] = deprecate_argument(
3176 kwargs, "excludeChars", None
3177 )
3179 initChars = initChars or init_chars
3180 bodyChars = bodyChars or body_chars
3181 asKeyword = asKeyword or as_keyword
3182 excludeChars = excludeChars or exclude_chars
3183 super().__init__()
3184 if not initChars:
3185 raise ValueError(
3186 f"invalid {type(self).__name__}, initChars cannot be empty string"
3187 )
3189 initChars_set = set(initChars)
3190 if excludeChars:
3191 excludeChars_set = set(excludeChars)
3192 initChars_set -= excludeChars_set
3193 if bodyChars:
3194 bodyChars = "".join(set(bodyChars) - excludeChars_set)
3195 self.init_chars = initChars_set
3196 self.initCharsOrig = "".join(sorted(initChars_set))
3198 if bodyChars:
3199 self.bodyChars = set(bodyChars)
3200 self.bodyCharsOrig = "".join(sorted(bodyChars))
3201 else:
3202 self.bodyChars = initChars_set
3203 self.bodyCharsOrig = self.initCharsOrig
3205 self.maxSpecified = max > 0
3207 if min < 1:
3208 raise ValueError(
3209 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
3210 )
3212 if self.maxSpecified and min > max:
3213 raise ValueError(
3214 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
3215 )
3217 self.minLen = min
3219 if max > 0:
3220 self.maxLen = max
3221 else:
3222 self.maxLen = _MAX_INT
3224 if exact > 0:
3225 min = max = exact
3226 self.maxLen = exact
3227 self.minLen = exact
3229 self.errmsg = f"Expected {self.name}"
3230 self.mayIndexError = False
3231 self.asKeyword = asKeyword
3232 if self.asKeyword:
3233 self.errmsg += " as a keyword"
3235 # see if we can make a regex for this Word
3236 if " " not in (self.initChars | self.bodyChars):
3237 if len(self.initChars) == 1:
3238 re_leading_fragment = re.escape(self.initCharsOrig)
3239 else:
3240 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
3242 if self.bodyChars == self.initChars:
3243 if max == 0 and self.minLen == 1:
3244 repeat = "+"
3245 elif max == 1:
3246 repeat = ""
3247 else:
3248 if self.minLen != self.maxLen:
3249 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
3250 else:
3251 repeat = f"{{{self.minLen}}}"
3252 self.reString = f"{re_leading_fragment}{repeat}"
3253 else:
3254 if max == 1:
3255 re_body_fragment = ""
3256 repeat = ""
3257 else:
3258 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
3259 if max == 0 and self.minLen == 1:
3260 repeat = "*"
3261 elif max == 2:
3262 repeat = "?" if min <= 1 else ""
3263 else:
3264 if min != max:
3265 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"
3266 else:
3267 repeat = f"{{{min - 1 if min > 0 else ''}}}"
3269 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"
3271 if self.asKeyword:
3272 self.reString = rf"\b{self.reString}\b"
3274 try:
3275 self.re = re.compile(self.reString)
3276 except re.error:
3277 self.re = None # type: ignore[assignment]
3278 else:
3279 self.re_match = self.re.match
3280 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]
3282 @property
3283 def initChars(self) -> set[str]:
3284 """
3285 .. deprecated:: 3.3.0
3286 use `init_chars` instead.
3288 Property returning the initial chars to be used when matching this
3289 Word expression. If no body chars were specified, the initial characters
3290 will also be the body characters.
3291 """
3292 return set(self.init_chars)
3294 def copy(self) -> Word:
3295 """
3296 Returns a copy of this expression.
3298 Generally only used internally by pyparsing.
3299 """
3300 ret: Word = cast(Word, super().copy())
3301 if hasattr(self, "re_match"):
3302 ret.re_match = self.re_match
3303 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign]
3304 return ret
3306 def _generateDefaultName(self) -> str:
3307 def charsAsStr(s):
3308 max_repr_len = 16
3309 s = _collapse_string_to_ranges(s, re_escape=False)
3311 if len(s) > max_repr_len:
3312 return s[: max_repr_len - 3] + "..."
3314 return s
3316 if self.initChars != self.bodyChars:
3317 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"
3318 else:
3319 base = f"W:({charsAsStr(self.initChars)})"
3321 # add length specification
3322 if self.minLen > 1 or self.maxLen != _MAX_INT:
3323 if self.minLen == self.maxLen:
3324 if self.minLen == 1:
3325 return base[2:]
3326 else:
3327 return base + f"{{{self.minLen}}}"
3328 elif self.maxLen == _MAX_INT:
3329 return base + f"{{{self.minLen},...}}"
3330 else:
3331 return base + f"{{{self.minLen},{self.maxLen}}}"
3332 return base
3334 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3335 if instring[loc] not in self.initChars:
3336 raise ParseException(instring, loc, self.errmsg, self)
3338 start = loc
3339 loc += 1
3340 instrlen = len(instring)
3341 body_chars: set[str] = self.bodyChars
3342 maxloc = start + self.maxLen
3343 maxloc = min(maxloc, instrlen)
3344 while loc < maxloc and instring[loc] in body_chars:
3345 loc += 1
3347 throw_exception = False
3348 if loc - start < self.minLen:
3349 throw_exception = True
3350 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:
3351 throw_exception = True
3352 elif self.asKeyword and (
3353 (start > 0 and instring[start - 1] in body_chars)
3354 or (loc < instrlen and instring[loc] in body_chars)
3355 ):
3356 throw_exception = True
3358 if throw_exception:
3359 raise ParseException(instring, loc, self.errmsg, self)
3361 return loc, instring[start:loc]
3363 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3364 result = self.re_match(instring, loc)
3365 if not result:
3366 raise ParseException(instring, loc, self.errmsg, self)
3368 loc = result.end()
3369 return loc, result.group()
3372class Char(Word):
3373 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
3374 when defining a match of any single character in a string of
3375 characters.
3376 """
3378 def __init__(
3379 self,
3380 charset: str,
3381 as_keyword: bool = False,
3382 exclude_chars: typing.Optional[str] = None,
3383 **kwargs,
3384 ) -> None:
3385 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)
3386 excludeChars: typing.Optional[str] = deprecate_argument(
3387 kwargs, "excludeChars", None
3388 )
3390 asKeyword = asKeyword or as_keyword
3391 excludeChars = excludeChars or exclude_chars
3392 super().__init__(
3393 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars
3394 )
3397class Regex(Token):
3398 r"""Token for matching strings that match a given regular
3399 expression. Defined with string specifying the regular expression in
3400 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3401 If the given regex contains named groups (defined using ``(?P<name>...)``),
3402 these will be preserved as named :class:`ParseResults`.
3404 If instead of the Python stdlib ``re`` module you wish to use a different RE module
3405 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
3406 a compiled RE that was compiled using ``regex``.
3408 The parameters ``pattern`` and ``flags`` are passed
3409 to the ``re.compile()`` function as-is. See the Python
3410 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3411 explanation of the acceptable patterns and flags.
3413 Example:
3415 .. testcode::
3417 realnum = Regex(r"[+-]?\d+\.\d*")
3418 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3419 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3421 # named fields in a regex will be returned as named results
3422 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3424 # the Regex class will accept regular expressions compiled using the
3425 # re module
3426 import re
3427 parser = pp.Regex(re.compile(r'[0-9]'))
3428 """
3430 def __init__(
3431 self,
3432 pattern: Any,
3433 flags: Union[re.RegexFlag, int] = 0,
3434 as_group_list: bool = False,
3435 as_match: bool = False,
3436 **kwargs,
3437 ) -> None:
3438 super().__init__()
3439 asGroupList: bool = deprecate_argument(kwargs, "asGroupList", False)
3440 asMatch: bool = deprecate_argument(kwargs, "asMatch", False)
3442 asGroupList = asGroupList or as_group_list
3443 asMatch = asMatch or as_match
3445 if isinstance(pattern, str_type):
3446 if not pattern:
3447 raise ValueError("null string passed to Regex; use Empty() instead")
3449 self._re = None
3450 self._may_return_empty = None # type: ignore [assignment]
3451 self.reString = self.pattern = pattern
3453 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
3454 self._re = pattern
3455 self._may_return_empty = None # type: ignore [assignment]
3456 self.pattern = self.reString = pattern.pattern
3458 elif callable(pattern):
3459 # defer creating this pattern until we really need it
3460 self.pattern = pattern
3461 self._may_return_empty = None # type: ignore [assignment]
3462 self._re = None
3464 else:
3465 raise TypeError(
3466 "Regex may only be constructed with a string or a compiled RE object,"
3467 " or a callable that takes no arguments and returns a string or a"
3468 " compiled RE object"
3469 )
3471 self.flags = flags
3472 self.errmsg = f"Expected {self.name}"
3473 self.mayIndexError = False
3474 self.asGroupList = asGroupList
3475 self.asMatch = asMatch
3476 if self.asGroupList:
3477 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]
3478 if self.asMatch:
3479 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]
3481 def copy(self) -> Regex:
3482 """
3483 Returns a copy of this expression.
3485 Generally only used internally by pyparsing.
3486 """
3487 ret: Regex = cast(Regex, super().copy())
3488 if self.asGroupList:
3489 ret.parseImpl = ret.parseImplAsGroupList # type: ignore [method-assign]
3490 if self.asMatch:
3491 ret.parseImpl = ret.parseImplAsMatch # type: ignore [method-assign]
3492 return ret
3494 @cached_property
3495 def re(self) -> re.Pattern:
3496 """
3497 Property returning the compiled regular expression for this Regex.
3499 Generally only used internally by pyparsing.
3500 """
3501 if self._re:
3502 return self._re
3504 if callable(self.pattern):
3505 # replace self.pattern with the string returned by calling self.pattern()
3506 self.pattern = cast(Callable[[], str], self.pattern)()
3508 # see if we got a compiled RE back instead of a str - if so, we're done
3509 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):
3510 self._re = cast(re.Pattern[str], self.pattern)
3511 self.pattern = self.reString = self._re.pattern
3512 return self._re
3514 try:
3515 self._re = re.compile(self.pattern, self.flags)
3516 except re.error:
3517 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3518 else:
3519 self._may_return_empty = self.re.match("", pos=0) is not None
3520 return self._re
3522 @cached_property
3523 def re_match(self) -> Callable[[str, int], Any]:
3524 return self.re.match
3526 @property
3527 def mayReturnEmpty(self):
3528 if self._may_return_empty is None:
3529 # force compile of regex pattern, to set may_return_empty flag
3530 self.re # noqa
3531 return self._may_return_empty
3533 @mayReturnEmpty.setter
3534 def mayReturnEmpty(self, value):
3535 self._may_return_empty = value
3537 def _generateDefaultName(self) -> str:
3538 unescaped = repr(self.pattern).replace("\\\\", "\\")
3539 return f"Re:({unescaped})"
3541 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3542 # explicit check for matching past the length of the string;
3543 # this is done because the re module will not complain about
3544 # a match with `pos > len(instring)`, it will just return ""
3545 if loc > len(instring) and self.mayReturnEmpty:
3546 raise ParseException(instring, loc, self.errmsg, self)
3548 result = self.re_match(instring, loc)
3549 if not result:
3550 raise ParseException(instring, loc, self.errmsg, self)
3552 loc = result.end()
3553 ret = ParseResults(result.group())
3554 d = result.groupdict()
3556 for k, v in d.items():
3557 ret[k] = v
3559 return loc, ret
3561 def parseImplAsGroupList(self, instring, loc, do_actions=True):
3562 if loc > len(instring) and self.mayReturnEmpty:
3563 raise ParseException(instring, loc, self.errmsg, self)
3565 result = self.re_match(instring, loc)
3566 if not result:
3567 raise ParseException(instring, loc, self.errmsg, self)
3569 loc = result.end()
3570 ret = result.groups()
3571 return loc, ret
3573 def parseImplAsMatch(self, instring, loc, do_actions=True):
3574 if loc > len(instring) and self.mayReturnEmpty:
3575 raise ParseException(instring, loc, self.errmsg, self)
3577 result = self.re_match(instring, loc)
3578 if not result:
3579 raise ParseException(instring, loc, self.errmsg, self)
3581 loc = result.end()
3582 ret = result
3583 return loc, ret
3585 def sub(self, repl: str) -> ParserElement:
3586 r"""
3587 Return :class:`Regex` with an attached parse action to transform the parsed
3588 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3590 Example:
3592 .. testcode::
3594 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3595 print(make_html.transform_string("h1:main title:"))
3597 .. testoutput::
3599 <h1>main title</h1>
3600 """
3601 if self.asGroupList:
3602 raise TypeError("cannot use sub() with Regex(as_group_list=True)")
3604 if self.asMatch and callable(repl):
3605 raise TypeError(
3606 "cannot use sub() with a callable with Regex(as_match=True)"
3607 )
3609 if self.asMatch:
3611 def pa(tokens):
3612 return tokens[0].expand(repl)
3614 else:
3616 def pa(tokens):
3617 return self.re.sub(repl, tokens[0])
3619 return self.add_parse_action(pa)
3622class QuotedString(Token):
3623 r"""
3624 Token for matching strings that are delimited by quoting characters.
3626 Defined with the following parameters:
3628 - ``quote_char`` - string of one or more characters defining the
3629 quote delimiting string
3630 - ``esc_char`` - character to re_escape quotes, typically backslash
3631 (default= ``None``)
3632 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3633 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3634 (default= ``None``)
3635 - ``multiline`` - boolean indicating whether quotes can span
3636 multiple lines (default= ``False``)
3637 - ``unquote_results`` - boolean indicating whether the matched text
3638 should be unquoted (default= ``True``)
3639 - ``end_quote_char`` - string of one or more characters defining the
3640 end of the quote delimited string (default= ``None`` => same as
3641 quote_char)
3642 - ``convert_whitespace_escapes`` - convert escaped whitespace
3643 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3644 (default= ``True``)
3646 .. caution:: ``convert_whitespace_escapes`` has no effect if
3647 ``unquote_results`` is ``False``.
3649 Example:
3651 .. doctest::
3653 >>> qs = QuotedString('"')
3654 >>> print(qs.search_string('lsjdf "This is the quote" sldjf'))
3655 [['This is the quote']]
3656 >>> complex_qs = QuotedString('{{', end_quote_char='}}')
3657 >>> print(complex_qs.search_string(
3658 ... 'lsjdf {{This is the "quote"}} sldjf'))
3659 [['This is the "quote"']]
3660 >>> sql_qs = QuotedString('"', esc_quote='""')
3661 >>> print(sql_qs.search_string(
3662 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3663 [['This is the quote with "embedded" quotes']]
3664 """
3666 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))
3668 def __init__(
3669 self,
3670 quote_char: str = "",
3671 esc_char: typing.Optional[str] = None,
3672 esc_quote: typing.Optional[str] = None,
3673 multiline: bool = False,
3674 unquote_results: bool = True,
3675 end_quote_char: typing.Optional[str] = None,
3676 convert_whitespace_escapes: bool = True,
3677 **kwargs,
3678 ) -> None:
3679 super().__init__()
3680 quoteChar: str = deprecate_argument(kwargs, "quoteChar", "")
3681 escChar: str = deprecate_argument(kwargs, "escChar", None)
3682 escQuote: str = deprecate_argument(kwargs, "escQuote", None)
3683 unquoteResults: bool = deprecate_argument(kwargs, "unquoteResults", True)
3684 endQuoteChar: typing.Optional[str] = deprecate_argument(
3685 kwargs, "endQuoteChar", None
3686 )
3687 convertWhitespaceEscapes: bool = deprecate_argument(
3688 kwargs, "convertWhitespaceEscapes", True
3689 )
3691 esc_char = escChar or esc_char
3692 esc_quote = escQuote or esc_quote
3693 unquote_results = unquoteResults and unquote_results
3694 end_quote_char = endQuoteChar or end_quote_char
3695 convert_whitespace_escapes = (
3696 convertWhitespaceEscapes and convert_whitespace_escapes
3697 )
3698 quote_char = quoteChar or quote_char
3700 # remove white space from quote chars
3701 quote_char = quote_char.strip()
3702 if not quote_char:
3703 raise ValueError("quote_char cannot be the empty string")
3705 if end_quote_char is None:
3706 end_quote_char = quote_char
3707 else:
3708 end_quote_char = end_quote_char.strip()
3709 if not end_quote_char:
3710 raise ValueError("end_quote_char cannot be the empty string")
3712 self.quote_char: str = quote_char
3713 self.quote_char_len: int = len(quote_char)
3714 self.first_quote_char: str = quote_char[0]
3715 self.end_quote_char: str = end_quote_char
3716 self.end_quote_char_len: int = len(end_quote_char)
3717 self.esc_char: str = esc_char or ""
3718 self.has_esc_char: bool = esc_char is not None
3719 self.esc_quote: str = esc_quote or ""
3720 self.unquote_results: bool = unquote_results
3721 self.convert_whitespace_escapes: bool = convert_whitespace_escapes
3722 self.multiline = multiline
3723 self.re_flags = re.RegexFlag(0)
3725 # fmt: off
3726 # build up re pattern for the content between the quote delimiters
3727 inner_pattern: list[str] = []
3729 if esc_quote:
3730 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
3732 if esc_char:
3733 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
3735 if len(self.end_quote_char) > 1:
3736 inner_pattern.append(
3737 "(?:"
3738 + "|".join(
3739 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3740 for i in range(len(self.end_quote_char) - 1, 0, -1)
3741 )
3742 + ")"
3743 )
3745 if self.multiline:
3746 self.re_flags |= re.MULTILINE | re.DOTALL
3747 inner_pattern.append(
3748 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3749 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3750 )
3751 else:
3752 inner_pattern.append(
3753 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3754 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3755 )
3757 self.pattern = "".join(
3758 [
3759 re.escape(self.quote_char),
3760 "(?:",
3761 '|'.join(inner_pattern),
3762 ")*",
3763 re.escape(self.end_quote_char),
3764 ]
3765 )
3767 if self.unquote_results:
3768 if self.convert_whitespace_escapes:
3769 self.unquote_scan_re = re.compile(
3770 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3771 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"
3772 rf"|({re.escape(self.esc_char)}.)"
3773 rf"|(\n|.)",
3774 flags=self.re_flags,
3775 )
3776 else:
3777 self.unquote_scan_re = re.compile(
3778 rf"({re.escape(self.esc_char)}.)"
3779 rf"|(\n|.)",
3780 flags=self.re_flags
3781 )
3782 # fmt: on
3784 try:
3785 self.re = re.compile(self.pattern, self.re_flags)
3786 self.reString = self.pattern
3787 self.re_match = self.re.match
3788 except re.error:
3789 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")
3791 self.errmsg = f"Expected {self.name}"
3792 self.mayIndexError = False
3793 self._may_return_empty = True
3795 def _generateDefaultName(self) -> str:
3796 if self.quote_char == self.end_quote_char and isinstance(
3797 self.quote_char, str_type
3798 ):
3799 return f"string enclosed in {self.quote_char!r}"
3801 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
3803 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3804 # check first character of opening quote to see if that is a match
3805 # before doing the more complicated regex match
3806 result = (
3807 instring[loc] == self.first_quote_char
3808 and self.re_match(instring, loc)
3809 or None
3810 )
3811 if not result:
3812 raise ParseException(instring, loc, self.errmsg, self)
3814 # get ending loc and matched string from regex matching result
3815 loc = result.end()
3816 ret = result.group()
3818 def convert_escaped_numerics(s: str) -> str:
3819 if s == "0":
3820 return "\0"
3821 if s.isdigit() and len(s) == 3:
3822 return chr(int(s, base=8))
3823 elif s.startswith(("u", "x")):
3824 return chr(int(s[1:], base=16))
3825 else:
3826 return s
3828 if self.unquote_results:
3829 # strip off quotes
3830 ret = ret[self.quote_char_len : -self.end_quote_char_len]
3832 if isinstance(ret, str_type):
3833 # fmt: off
3834 if self.convert_whitespace_escapes:
3835 # as we iterate over matches in the input string,
3836 # collect from whichever match group of the unquote_scan_re
3837 # regex matches (only 1 group will match at any given time)
3838 ret = "".join(
3839 # match group 1 matches \t, \n, etc.
3840 self.ws_map[match.group(1)] if match.group(1)
3841 # match group 2 matches escaped octal, null, hex, and Unicode
3842 # sequences
3843 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)
3844 # match group 3 matches escaped characters
3845 else match.group(3)[-1] if match.group(3)
3846 # match group 4 matches any character
3847 else match.group(4)
3848 for match in self.unquote_scan_re.finditer(ret)
3849 )
3850 else:
3851 ret = "".join(
3852 # match group 1 matches escaped characters
3853 match.group(1)[-1] if match.group(1)
3854 # match group 2 matches any character
3855 else match.group(2)
3856 for match in self.unquote_scan_re.finditer(ret)
3857 )
3858 # fmt: on
3860 # replace escaped quotes
3861 if self.esc_quote:
3862 ret = ret.replace(self.esc_quote, self.end_quote_char)
3864 return loc, ret
3867class CharsNotIn(Token):
3868 """Token for matching words composed of characters *not* in a given
3869 set (will include whitespace in matched characters if not listed in
3870 the provided exclusion set - see example). Defined with string
3871 containing all disallowed characters, and an optional minimum,
3872 maximum, and/or exact length. The default value for ``min`` is
3873 1 (a minimum value < 1 is not valid); the default values for
3874 ``max`` and ``exact`` are 0, meaning no maximum or exact
3875 length restriction.
3877 Example:
3879 .. testcode::
3881 # define a comma-separated-value as anything that is not a ','
3882 csv_value = CharsNotIn(',')
3883 print(
3884 DelimitedList(csv_value).parse_string(
3885 "dkls,lsdkjf,s12 34,@!#,213"
3886 )
3887 )
3889 prints:
3891 .. testoutput::
3893 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3894 """
3896 def __init__(
3897 self, not_chars: str = "", min: int = 1, max: int = 0, exact: int = 0, **kwargs
3898 ) -> None:
3899 super().__init__()
3900 notChars: str = deprecate_argument(kwargs, "notChars", "")
3902 self.skipWhitespace = False
3903 self.notChars = not_chars or notChars
3904 self.notCharsSet = set(self.notChars)
3906 if min < 1:
3907 raise ValueError(
3908 "cannot specify a minimum length < 1; use"
3909 " Opt(CharsNotIn()) if zero-length char group is permitted"
3910 )
3912 self.minLen = min
3914 if max > 0:
3915 self.maxLen = max
3916 else:
3917 self.maxLen = _MAX_INT
3919 if exact > 0:
3920 self.maxLen = exact
3921 self.minLen = exact
3923 self.errmsg = f"Expected {self.name}"
3924 self._may_return_empty = self.minLen == 0
3925 self.mayIndexError = False
3927 def _generateDefaultName(self) -> str:
3928 not_chars_str = _collapse_string_to_ranges(self.notChars)
3929 if len(not_chars_str) > 16:
3930 return f"!W:({self.notChars[: 16 - 3]}...)"
3931 else:
3932 return f"!W:({self.notChars})"
3934 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3935 notchars = self.notCharsSet
3936 if instring[loc] in notchars:
3937 raise ParseException(instring, loc, self.errmsg, self)
3939 start = loc
3940 loc += 1
3941 maxlen = min(start + self.maxLen, len(instring))
3942 while loc < maxlen and instring[loc] not in notchars:
3943 loc += 1
3945 if loc - start < self.minLen:
3946 raise ParseException(instring, loc, self.errmsg, self)
3948 return loc, instring[start:loc]
3951class White(Token):
3952 """Special matching class for matching whitespace. Normally,
3953 whitespace is ignored by pyparsing grammars. This class is included
3954 when some whitespace structures are significant. Define with
3955 a string containing the whitespace characters to be matched; default
3956 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3957 ``max``, and ``exact`` arguments, as defined for the
3958 :class:`Word` class.
3959 """
3961 whiteStrs = {
3962 " ": "<SP>",
3963 "\t": "<TAB>",
3964 "\n": "<LF>",
3965 "\r": "<CR>",
3966 "\f": "<FF>",
3967 "\u00a0": "<NBSP>",
3968 "\u1680": "<OGHAM_SPACE_MARK>",
3969 "\u180e": "<MONGOLIAN_VOWEL_SEPARATOR>",
3970 "\u2000": "<EN_QUAD>",
3971 "\u2001": "<EM_QUAD>",
3972 "\u2002": "<EN_SPACE>",
3973 "\u2003": "<EM_SPACE>",
3974 "\u2004": "<THREE-PER-EM_SPACE>",
3975 "\u2005": "<FOUR-PER-EM_SPACE>",
3976 "\u2006": "<SIX-PER-EM_SPACE>",
3977 "\u2007": "<FIGURE_SPACE>",
3978 "\u2008": "<PUNCTUATION_SPACE>",
3979 "\u2009": "<THIN_SPACE>",
3980 "\u200a": "<HAIR_SPACE>",
3981 "\u200b": "<ZERO_WIDTH_SPACE>",
3982 "\u202f": "<NNBSP>",
3983 "\u205f": "<MMSP>",
3984 "\u3000": "<IDEOGRAPHIC_SPACE>",
3985 }
3987 def __init__(
3988 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0
3989 ) -> None:
3990 super().__init__()
3991 self.matchWhite = ws
3992 self.set_whitespace_chars(
3993 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3994 copy_defaults=True,
3995 )
3996 # self.leave_whitespace()
3997 self._may_return_empty = True
3998 self.errmsg = f"Expected {self.name}"
4000 self.minLen = min
4002 if max > 0:
4003 self.maxLen = max
4004 else:
4005 self.maxLen = _MAX_INT
4007 if exact > 0:
4008 self.maxLen = exact
4009 self.minLen = exact
4011 def _generateDefaultName(self) -> str:
4012 return "".join(White.whiteStrs[c] for c in self.matchWhite)
4014 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4015 if instring[loc] not in self.matchWhite:
4016 raise ParseException(instring, loc, self.errmsg, self)
4017 start = loc
4018 loc += 1
4019 maxloc = start + self.maxLen
4020 maxloc = min(maxloc, len(instring))
4021 while loc < maxloc and instring[loc] in self.matchWhite:
4022 loc += 1
4024 if loc - start < self.minLen:
4025 raise ParseException(instring, loc, self.errmsg, self)
4027 return loc, instring[start:loc]
4030class PositionToken(Token):
4031 def __init__(self) -> None:
4032 super().__init__()
4033 self._may_return_empty = True
4034 self.mayIndexError = False
4037class GoToColumn(PositionToken):
4038 """Token to advance to a specific column of input text; useful for
4039 tabular report scraping.
4040 """
4042 def __init__(self, colno: int) -> None:
4043 super().__init__()
4044 self.col = colno
4046 def preParse(self, instring: str, loc: int) -> int:
4047 if col(loc, instring) == self.col:
4048 return loc
4050 instrlen = len(instring)
4051 if self.ignoreExprs:
4052 loc = self._skipIgnorables(instring, loc)
4053 while (
4054 loc < instrlen
4055 and instring[loc].isspace()
4056 and col(loc, instring) != self.col
4057 ):
4058 loc += 1
4060 return loc
4062 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4063 thiscol = col(loc, instring)
4064 if thiscol > self.col:
4065 raise ParseException(instring, loc, "Text not in expected column", self)
4066 newloc = loc + self.col - thiscol
4067 ret = instring[loc:newloc]
4068 return newloc, ret
4071class LineStart(PositionToken):
4072 r"""Matches if current position is at the logical beginning of a line (after skipping whitespace)
4073 within the parse string
4075 Example:
4077 .. testcode::
4079 test = '''\
4080 AAA this line
4081 AAA and this line
4082 AAA and even this line
4083 B AAA but definitely not this line
4084 '''
4086 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):
4087 print(t)
4089 prints:
4091 .. testoutput::
4093 ['AAA', ' this line']
4094 ['AAA', ' and this line']
4095 ['AAA', ' and even this line']
4097 """
4099 def __init__(self) -> None:
4100 super().__init__()
4101 self.leave_whitespace()
4102 self.orig_whiteChars = set() | self.whiteChars
4103 self.whiteChars.discard("\n")
4104 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
4105 self.set_name("start of line")
4107 def preParse(self, instring: str, loc: int) -> int:
4108 if loc == 0:
4109 return loc
4111 ret = self.skipper.preParse(instring, loc)
4113 if "\n" in self.orig_whiteChars:
4114 while instring[ret : ret + 1] == "\n":
4115 ret = self.skipper.preParse(instring, ret + 1)
4117 return ret
4119 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4120 if col(loc, instring) == 1:
4121 return loc, []
4122 raise ParseException(instring, loc, self.errmsg, self)
4125class LineEnd(PositionToken):
4126 """Matches if current position is at the end of a line within the
4127 parse string
4128 """
4130 def __init__(self) -> None:
4131 super().__init__()
4132 self.whiteChars.discard("\n")
4133 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
4134 self.set_name("end of line")
4136 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4137 if loc < len(instring):
4138 if instring[loc] == "\n":
4139 return loc + 1, "\n"
4140 else:
4141 raise ParseException(instring, loc, self.errmsg, self)
4142 elif loc == len(instring):
4143 return loc + 1, []
4144 else:
4145 raise ParseException(instring, loc, self.errmsg, self)
4148class StringStart(PositionToken):
4149 """Matches if current position is at the beginning of the parse
4150 string
4151 """
4153 def __init__(self) -> None:
4154 super().__init__()
4155 self.set_name("start of text")
4157 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4158 # see if entire string up to here is just whitespace and ignoreables
4159 if loc != 0 and loc != self.preParse(instring, 0):
4160 raise ParseException(instring, loc, self.errmsg, self)
4162 return loc, []
4165class StringEnd(PositionToken):
4166 """
4167 Matches if current position is at the end of the parse string
4168 """
4170 def __init__(self) -> None:
4171 super().__init__()
4172 self.set_name("end of text")
4174 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4175 if loc < len(instring):
4176 raise ParseException(instring, loc, self.errmsg, self)
4177 if loc == len(instring):
4178 return loc + 1, []
4179 if loc > len(instring):
4180 return loc, []
4182 raise ParseException(instring, loc, self.errmsg, self)
4185class WordStart(PositionToken):
4186 """Matches if the current position is at the beginning of a
4187 :class:`Word`, and is not preceded by any character in a given
4188 set of ``word_chars`` (default= ``printables``). To emulate the
4189 ``\b`` behavior of regular expressions, use
4190 ``WordStart(alphanums)``. ``WordStart`` will also match at
4191 the beginning of the string being parsed, or at the beginning of
4192 a line.
4193 """
4195 def __init__(self, word_chars: str = printables, **kwargs) -> None:
4196 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)
4198 wordChars = word_chars if wordChars == printables else wordChars
4199 super().__init__()
4200 self.wordChars = set(wordChars)
4201 self.set_name("start of a word")
4203 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4204 if loc != 0:
4205 if (
4206 instring[loc - 1] in self.wordChars
4207 or instring[loc] not in self.wordChars
4208 ):
4209 raise ParseException(instring, loc, self.errmsg, self)
4210 return loc, []
4213class WordEnd(PositionToken):
4214 """Matches if the current position is at the end of a :class:`Word`,
4215 and is not followed by any character in a given set of ``word_chars``
4216 (default= ``printables``). To emulate the ``\b`` behavior of
4217 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
4218 will also match at the end of the string being parsed, or at the end
4219 of a line.
4220 """
4222 def __init__(self, word_chars: str = printables, **kwargs) -> None:
4223 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)
4225 wordChars = word_chars if wordChars == printables else wordChars
4226 super().__init__()
4227 self.wordChars = set(wordChars)
4228 self.skipWhitespace = False
4229 self.set_name("end of a word")
4231 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4232 instrlen = len(instring)
4233 if instrlen > 0 and loc < instrlen:
4234 if (
4235 instring[loc] in self.wordChars
4236 or instring[loc - 1] not in self.wordChars
4237 ):
4238 raise ParseException(instring, loc, self.errmsg, self)
4239 return loc, []
4242class Tag(Token):
4243 """
4244 A meta-element for inserting a named result into the parsed
4245 tokens that may be checked later in a parse action or while
4246 processing the parsed results. Accepts an optional tag value,
4247 defaulting to `True`.
4249 Example:
4251 .. doctest::
4253 >>> end_punc = "." | ("!" + Tag("enthusiastic"))
4254 >>> greeting = "Hello," + Word(alphas) + end_punc
4256 >>> result = greeting.parse_string("Hello, World.")
4257 >>> print(result.dump())
4258 ['Hello,', 'World', '.']
4260 >>> result = greeting.parse_string("Hello, World!")
4261 >>> print(result.dump())
4262 ['Hello,', 'World', '!']
4263 - enthusiastic: True
4265 .. versionadded:: 3.1.0
4266 """
4268 def __init__(self, tag_name: str, value: Any = True) -> None:
4269 super().__init__()
4270 self._may_return_empty = True
4271 self.mayIndexError = False
4272 self.leave_whitespace()
4273 self.tag_name = tag_name
4274 self.tag_value = value
4275 self.add_parse_action(self._add_tag)
4276 self.show_in_diagram = False
4278 def _add_tag(self, tokens: ParseResults):
4279 tokens[self.tag_name] = self.tag_value
4281 def _generateDefaultName(self) -> str:
4282 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"
4285class ParseExpression(ParserElement):
4286 """Abstract subclass of ParserElement, for combining and
4287 post-processing parsed tokens.
4288 """
4290 def __init__(
4291 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4292 ) -> None:
4293 super().__init__(savelist)
4294 self.exprs: list[ParserElement]
4295 if isinstance(exprs, _generatorType):
4296 exprs = list(exprs)
4298 if isinstance(exprs, str_type):
4299 self.exprs = [self._literalStringClass(exprs)]
4300 elif isinstance(exprs, ParserElement):
4301 self.exprs = [exprs]
4302 elif isinstance(exprs, Iterable):
4303 exprs = list(exprs)
4304 # if sequence of strings provided, wrap with Literal
4305 if any(isinstance(expr, str_type) for expr in exprs):
4306 exprs = (
4307 self._literalStringClass(e) if isinstance(e, str_type) else e
4308 for e in exprs
4309 )
4310 self.exprs = list(exprs)
4311 else:
4312 try:
4313 self.exprs = list(exprs)
4314 except TypeError:
4315 self.exprs = [exprs]
4316 self.callPreparse = False
4318 def recurse(self) -> list[ParserElement]:
4319 return self.exprs[:]
4321 def append(self, other) -> ParserElement:
4322 """
4323 Add an expression to the list of expressions related to this ParseExpression instance.
4324 """
4325 self.exprs.append(other)
4326 self._defaultName = None
4327 return self
4329 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4330 """
4331 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
4332 all contained expressions.
4333 """
4334 super().leave_whitespace(recursive)
4336 if recursive:
4337 self.exprs = [e.copy() for e in self.exprs]
4338 for e in self.exprs:
4339 e.leave_whitespace(recursive)
4340 return self
4342 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4343 """
4344 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on
4345 all contained expressions.
4346 """
4347 super().ignore_whitespace(recursive)
4348 if recursive:
4349 self.exprs = [e.copy() for e in self.exprs]
4350 for e in self.exprs:
4351 e.ignore_whitespace(recursive)
4352 return self
4354 def ignore(self, other) -> ParserElement:
4355 """
4356 Define expression to be ignored (e.g., comments) while doing pattern
4357 matching; may be called repeatedly, to define multiple comment or other
4358 ignorable patterns.
4359 """
4360 if isinstance(other, Suppress):
4361 if other not in self.ignoreExprs:
4362 super().ignore(other)
4363 for e in self.exprs:
4364 e.ignore(self.ignoreExprs[-1])
4365 else:
4366 super().ignore(other)
4367 for e in self.exprs:
4368 e.ignore(self.ignoreExprs[-1])
4369 return self
4371 def _generateDefaultName(self) -> str:
4372 return f"{type(self).__name__}:({self.exprs})"
4374 def streamline(self) -> ParserElement:
4375 if self.streamlined:
4376 return self
4378 super().streamline()
4380 for e in self.exprs:
4381 e.streamline()
4383 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
4384 # but only if there are no parse actions or resultsNames on the nested And's
4385 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
4386 if len(self.exprs) == 2:
4387 other = self.exprs[0]
4388 if (
4389 isinstance(other, self.__class__)
4390 and not other.parseAction
4391 and other.resultsName is None
4392 and not other.debug
4393 ):
4394 self.exprs = other.exprs[:] + [self.exprs[1]]
4395 self._defaultName = None
4396 self._may_return_empty |= other.mayReturnEmpty
4397 self.mayIndexError |= other.mayIndexError
4399 other = self.exprs[-1]
4400 if (
4401 isinstance(other, self.__class__)
4402 and not other.parseAction
4403 and other.resultsName is None
4404 and not other.debug
4405 ):
4406 self.exprs = self.exprs[:-1] + other.exprs[:]
4407 self._defaultName = None
4408 self._may_return_empty |= other.mayReturnEmpty
4409 self.mayIndexError |= other.mayIndexError
4411 self.errmsg = f"Expected {self}"
4413 return self
4415 def validate(self, validateTrace=None) -> None:
4416 warnings.warn(
4417 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4418 DeprecationWarning,
4419 stacklevel=2,
4420 )
4421 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
4422 for e in self.exprs:
4423 e.validate(tmp)
4424 self._checkRecursion([])
4426 def copy(self) -> ParserElement:
4427 """
4428 Returns a copy of this expression.
4430 Generally only used internally by pyparsing.
4431 """
4432 ret = super().copy()
4433 ret = typing.cast(ParseExpression, ret)
4434 ret.exprs = [e.copy() for e in self.exprs]
4435 return ret
4437 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4438 if not (
4439 __diag__.warn_ungrouped_named_tokens_in_collection
4440 and Diagnostics.warn_ungrouped_named_tokens_in_collection
4441 not in self.suppress_warnings_
4442 ):
4443 return super()._setResultsName(name, list_all_matches)
4445 for e in self.exprs:
4446 if (
4447 isinstance(e, ParserElement)
4448 and e.resultsName
4449 and (
4450 Diagnostics.warn_ungrouped_named_tokens_in_collection
4451 not in e.suppress_warnings_
4452 )
4453 ):
4454 warning = (
4455 "warn_ungrouped_named_tokens_in_collection:"
4456 f" setting results name {name!r} on {type(self).__name__} expression"
4457 f" collides with {e.resultsName!r} on contained expression"
4458 )
4459 warnings.warn(warning, stacklevel=3)
4460 break
4462 return super()._setResultsName(name, list_all_matches)
4464 # Compatibility synonyms
4465 # fmt: off
4466 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4467 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4468 # fmt: on
4471class And(ParseExpression):
4472 """
4473 Requires all given :class:`ParserElement` s to be found in the given order.
4474 Expressions may be separated by whitespace.
4475 May be constructed using the ``'+'`` operator.
4476 May also be constructed using the ``'-'`` operator, which will
4477 suppress backtracking.
4479 Example:
4481 .. testcode::
4483 integer = Word(nums)
4484 name_expr = Word(alphas)[1, ...]
4486 expr = And([integer("id"), name_expr("name"), integer("age")])
4487 # more easily written as:
4488 expr = integer("id") + name_expr("name") + integer("age")
4489 """
4491 class _ErrorStop(Empty):
4492 def __init__(self, *args, **kwargs) -> None:
4493 super().__init__(*args, **kwargs)
4494 self.leave_whitespace()
4496 def _generateDefaultName(self) -> str:
4497 return "-"
4499 def __init__(
4500 self,
4501 exprs_arg: typing.Iterable[Union[ParserElement, str]],
4502 savelist: bool = True,
4503 ) -> None:
4504 # instantiate exprs as a list, converting strs to ParserElements
4505 exprs: list[ParserElement] = [
4506 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg
4507 ]
4509 # convert any Ellipsis elements to SkipTo
4510 if Ellipsis in exprs:
4512 # Ellipsis cannot be the last element
4513 if exprs[-1] is Ellipsis:
4514 raise Exception("cannot construct And with sequence ending in ...")
4516 tmp: list[ParserElement] = []
4517 for cur_expr, next_expr in zip(exprs, exprs[1:]):
4518 if cur_expr is Ellipsis:
4519 tmp.append(SkipTo(next_expr)("_skipped*"))
4520 else:
4521 tmp.append(cur_expr)
4523 exprs[:-1] = tmp
4525 super().__init__(exprs, savelist)
4526 if self.exprs:
4527 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4528 if not isinstance(self.exprs[0], White):
4529 self.set_whitespace_chars(
4530 self.exprs[0].whiteChars,
4531 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
4532 )
4533 self.skipWhitespace = self.exprs[0].skipWhitespace
4534 else:
4535 self.skipWhitespace = False
4536 else:
4537 self._may_return_empty = True
4538 self.callPreparse = True
4540 def streamline(self) -> ParserElement:
4541 """
4542 Collapse `And` expressions like `And(And(And(A, B), C), D)`
4543 to `And(A, B, C, D)`.
4545 .. doctest::
4547 >>> expr = Word("A") + Word("B") + Word("C") + Word("D")
4548 >>> # Using '+' operator creates nested And expression
4549 >>> expr
4550 {{{W:(A) W:(B)} W:(C)} W:(D)}
4551 >>> # streamline simplifies to a single And with multiple expressions
4552 >>> expr.streamline()
4553 {W:(A) W:(B) W:(C) W:(D)}
4555 Guards against collapsing out expressions that have special features,
4556 such as results names or parse actions.
4558 Resolves pending Skip commands defined using `...` terms.
4559 """
4560 # collapse any _PendingSkip's
4561 if self.exprs and any(
4562 isinstance(e, ParseExpression)
4563 and e.exprs
4564 and isinstance(e.exprs[-1], _PendingSkip)
4565 for e in self.exprs[:-1]
4566 ):
4567 deleted_expr_marker = NoMatch()
4568 for i, e in enumerate(self.exprs[:-1]):
4569 if e is deleted_expr_marker:
4570 continue
4571 if (
4572 isinstance(e, ParseExpression)
4573 and e.exprs
4574 and isinstance(e.exprs[-1], _PendingSkip)
4575 ):
4576 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4577 self.exprs[i + 1] = deleted_expr_marker
4578 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]
4580 super().streamline()
4582 # link any IndentedBlocks to the prior expression
4583 prev: ParserElement
4584 cur: ParserElement
4585 for prev, cur in zip(self.exprs, self.exprs[1:]):
4586 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
4587 # (but watch out for recursive grammar)
4588 seen = set()
4589 while True:
4590 if id(cur) in seen:
4591 break
4592 seen.add(id(cur))
4593 if isinstance(cur, IndentedBlock):
4594 prev.add_parse_action(
4595 lambda s, l, t, cur_=cur: setattr(
4596 cur_, "parent_anchor", col(l, s)
4597 )
4598 )
4599 break
4600 subs = cur.recurse()
4601 next_first = next(iter(subs), None)
4602 if next_first is None:
4603 break
4604 cur = typing.cast(ParserElement, next_first)
4606 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4607 return self
4609 def parseImpl(self, instring, loc, do_actions=True):
4610 # pass False as callPreParse arg to _parse for first element, since we already
4611 # pre-parsed the string as part of our And pre-parsing
4612 loc, resultlist = self.exprs[0]._parse(
4613 instring, loc, do_actions, callPreParse=False
4614 )
4615 errorStop = False
4616 for e in self.exprs[1:]:
4617 # if isinstance(e, And._ErrorStop):
4618 if type(e) is And._ErrorStop:
4619 errorStop = True
4620 continue
4621 if errorStop:
4622 try:
4623 loc, exprtokens = e._parse(instring, loc, do_actions)
4624 except ParseSyntaxException:
4625 raise
4626 except ParseBaseException as pe:
4627 pe.__traceback__ = None
4628 raise ParseSyntaxException._from_exception(pe)
4629 except IndexError:
4630 raise ParseSyntaxException(
4631 instring, len(instring), self.errmsg, self
4632 )
4633 else:
4634 loc, exprtokens = e._parse(instring, loc, do_actions)
4635 resultlist += exprtokens
4636 return loc, resultlist
4638 def __iadd__(self, other):
4639 if isinstance(other, str_type):
4640 other = self._literalStringClass(other)
4641 if not isinstance(other, ParserElement):
4642 return NotImplemented
4643 return self.append(other) # And([self, other])
4645 def _checkRecursion(self, parseElementList):
4646 subRecCheckList = parseElementList[:] + [self]
4647 for e in self.exprs:
4648 e._checkRecursion(subRecCheckList)
4649 if not e.mayReturnEmpty:
4650 break
4652 def _generateDefaultName(self) -> str:
4653 inner = " ".join(str(e) for e in self.exprs)
4654 # strip off redundant inner {}'s
4655 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4656 inner = inner[1:-1]
4657 return f"{{{inner}}}"
4660class Or(ParseExpression):
4661 """Requires that at least one :class:`ParserElement` is found. If
4662 two expressions match, the expression that matches the longest
4663 string will be used. May be constructed using the ``'^'``
4664 operator.
4666 Example:
4668 .. testcode::
4670 # construct Or using '^' operator
4672 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4673 print(number.search_string("123 3.1416 789"))
4675 prints:
4677 .. testoutput::
4679 [['123'], ['3.1416'], ['789']]
4680 """
4682 def __init__(
4683 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4684 ) -> None:
4685 super().__init__(exprs, savelist)
4686 if self.exprs:
4687 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4688 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4689 else:
4690 self._may_return_empty = True
4692 def streamline(self) -> ParserElement:
4693 super().streamline()
4694 if self.exprs:
4695 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4696 self.saveAsList = any(e.saveAsList for e in self.exprs)
4697 self.skipWhitespace = all(
4698 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4699 )
4700 else:
4701 self.saveAsList = False
4702 return self
4704 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4705 maxExcLoc = -1
4706 maxException = None
4707 matches: list[tuple[int, ParserElement]] = []
4708 fatals: list[ParseFatalException] = []
4709 if all(e.callPreparse for e in self.exprs):
4710 loc = self.preParse(instring, loc)
4711 for e in self.exprs:
4712 try:
4713 loc2 = e.try_parse(instring, loc, raise_fatal=True)
4714 except ParseFatalException as pfe:
4715 pfe.__traceback__ = None
4716 pfe.parser_element = e
4717 fatals.append(pfe)
4718 maxException = None
4719 maxExcLoc = -1
4720 except ParseException as err:
4721 if not fatals:
4722 err.__traceback__ = None
4723 if err.loc > maxExcLoc:
4724 maxException = err
4725 maxExcLoc = err.loc
4726 except IndexError:
4727 if len(instring) > maxExcLoc:
4728 maxException = ParseException(
4729 instring, len(instring), e.errmsg, self
4730 )
4731 maxExcLoc = len(instring)
4732 else:
4733 # save match among all matches, to retry longest to shortest
4734 matches.append((loc2, e))
4736 if matches:
4737 # re-evaluate all matches in descending order of length of match, in case attached actions
4738 # might change whether or how much they match of the input.
4739 matches.sort(key=itemgetter(0), reverse=True)
4741 if not do_actions:
4742 # no further conditions or parse actions to change the selection of
4743 # alternative, so the first match will be the best match
4744 best_expr = matches[0][1]
4745 return best_expr._parse(instring, loc, do_actions)
4747 longest: tuple[int, typing.Optional[ParseResults]] = -1, None
4748 for loc1, expr1 in matches:
4749 if loc1 <= longest[0]:
4750 # already have a longer match than this one will deliver, we are done
4751 return longest
4753 try:
4754 loc2, toks = expr1._parse(instring, loc, do_actions)
4755 except ParseException as err:
4756 err.__traceback__ = None
4757 if err.loc > maxExcLoc:
4758 maxException = err
4759 maxExcLoc = err.loc
4760 else:
4761 if loc2 >= loc1:
4762 return loc2, toks
4763 # didn't match as much as before
4764 elif loc2 > longest[0]:
4765 longest = loc2, toks
4767 if longest != (-1, None):
4768 return longest
4770 if fatals:
4771 if len(fatals) > 1:
4772 fatals.sort(key=lambda e: -e.loc)
4773 if fatals[0].loc == fatals[1].loc:
4774 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4775 max_fatal = fatals[0]
4776 raise max_fatal
4778 if maxException is not None:
4779 # infer from this check that all alternatives failed at the current position
4780 # so emit this collective error message instead of any single error message
4781 parse_start_loc = self.preParse(instring, loc)
4782 if maxExcLoc == parse_start_loc:
4783 maxException.msg = self.errmsg or ""
4784 raise maxException
4786 raise ParseException(instring, loc, "no defined alternatives to match", self)
4788 def __ixor__(self, other):
4789 if isinstance(other, str_type):
4790 other = self._literalStringClass(other)
4791 if not isinstance(other, ParserElement):
4792 return NotImplemented
4793 return self.append(other) # Or([self, other])
4795 def _generateDefaultName(self) -> str:
4796 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"
4798 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4799 if (
4800 __diag__.warn_multiple_tokens_in_named_alternation
4801 and Diagnostics.warn_multiple_tokens_in_named_alternation
4802 not in self.suppress_warnings_
4803 ):
4804 if any(
4805 isinstance(e, And)
4806 and Diagnostics.warn_multiple_tokens_in_named_alternation
4807 not in e.suppress_warnings_
4808 for e in self.exprs
4809 ):
4810 warning = (
4811 "warn_multiple_tokens_in_named_alternation:"
4812 f" setting results name {name!r} on {type(self).__name__} expression"
4813 " will return a list of all parsed tokens in an And alternative,"
4814 " in prior versions only the first token was returned; enclose"
4815 " contained argument in Group"
4816 )
4817 warnings.warn(warning, stacklevel=3)
4819 return super()._setResultsName(name, list_all_matches)
4822class MatchFirst(ParseExpression):
4823 """Requires that at least one :class:`ParserElement` is found. If
4824 more than one expression matches, the first one listed is the one that will
4825 match. May be constructed using the ``'|'`` operator.
4827 Example: Construct MatchFirst using '|' operator
4829 .. doctest::
4831 # watch the order of expressions to match
4832 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4833 >>> print(number.search_string("123 3.1416 789")) # Fail!
4834 [['123'], ['3'], ['1416'], ['789']]
4836 # put more selective expression first
4837 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4838 >>> print(number.search_string("123 3.1416 789")) # Better
4839 [['123'], ['3.1416'], ['789']]
4840 """
4842 def __init__(
4843 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4844 ) -> None:
4845 super().__init__(exprs, savelist)
4846 if self.exprs:
4847 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4848 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4849 else:
4850 self._may_return_empty = True
4852 def streamline(self) -> ParserElement:
4853 if self.streamlined:
4854 return self
4856 super().streamline()
4857 if self.exprs:
4858 self.saveAsList = any(e.saveAsList for e in self.exprs)
4859 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4860 self.skipWhitespace = all(
4861 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4862 )
4863 else:
4864 self.saveAsList = False
4865 self._may_return_empty = True
4866 return self
4868 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4869 maxExcLoc = -1
4870 maxException = None
4872 for e in self.exprs:
4873 try:
4874 return e._parse(instring, loc, do_actions)
4875 except ParseFatalException as pfe:
4876 pfe.__traceback__ = None
4877 pfe.parser_element = e
4878 raise
4879 except ParseException as err:
4880 if err.loc > maxExcLoc:
4881 maxException = err
4882 maxExcLoc = err.loc
4883 except IndexError:
4884 if len(instring) > maxExcLoc:
4885 maxException = ParseException(
4886 instring, len(instring), e.errmsg, self
4887 )
4888 maxExcLoc = len(instring)
4890 if maxException is not None:
4891 # infer from this check that all alternatives failed at the current position
4892 # so emit this collective error message instead of any individual error message
4893 parse_start_loc = self.preParse(instring, loc)
4894 if maxExcLoc == parse_start_loc:
4895 maxException.msg = self.errmsg or ""
4896 raise maxException
4898 raise ParseException(instring, loc, "no defined alternatives to match", self)
4900 def __ior__(self, other):
4901 if isinstance(other, str_type):
4902 other = self._literalStringClass(other)
4903 if not isinstance(other, ParserElement):
4904 return NotImplemented
4905 return self.append(other) # MatchFirst([self, other])
4907 def _generateDefaultName(self) -> str:
4908 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"
4910 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4911 if (
4912 __diag__.warn_multiple_tokens_in_named_alternation
4913 and Diagnostics.warn_multiple_tokens_in_named_alternation
4914 not in self.suppress_warnings_
4915 ):
4916 if any(
4917 isinstance(e, And)
4918 and Diagnostics.warn_multiple_tokens_in_named_alternation
4919 not in e.suppress_warnings_
4920 for e in self.exprs
4921 ):
4922 warning = (
4923 "warn_multiple_tokens_in_named_alternation:"
4924 f" setting results name {name!r} on {type(self).__name__} expression"
4925 " will return a list of all parsed tokens in an And alternative,"
4926 " in prior versions only the first token was returned; enclose"
4927 " contained argument in Group"
4928 )
4929 warnings.warn(warning, stacklevel=3)
4931 return super()._setResultsName(name, list_all_matches)
4934class Each(ParseExpression):
4935 """Requires all given :class:`ParserElement` s to be found, but in
4936 any order. Expressions may be separated by whitespace.
4938 May be constructed using the ``'&'`` operator.
4940 Example:
4942 .. testcode::
4944 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4945 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4946 integer = Word(nums)
4947 shape_attr = "shape:" + shape_type("shape")
4948 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4949 color_attr = "color:" + color("color")
4950 size_attr = "size:" + integer("size")
4952 # use Each (using operator '&') to accept attributes in any order
4953 # (shape and posn are required, color and size are optional)
4954 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4956 shape_spec.run_tests('''
4957 shape: SQUARE color: BLACK posn: 100, 120
4958 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4959 color:GREEN size:20 shape:TRIANGLE posn:20,40
4960 '''
4961 )
4963 prints:
4965 .. testoutput::
4966 :options: +NORMALIZE_WHITESPACE
4969 shape: SQUARE color: BLACK posn: 100, 120
4970 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4971 - color: 'BLACK'
4972 - posn: ['100', ',', '120']
4973 - x: '100'
4974 - y: '120'
4975 - shape: 'SQUARE'
4976 ...
4978 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4979 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE',
4980 'posn:', ['50', ',', '80']]
4981 - color: 'BLUE'
4982 - posn: ['50', ',', '80']
4983 - x: '50'
4984 - y: '80'
4985 - shape: 'CIRCLE'
4986 - size: '50'
4987 ...
4989 color:GREEN size:20 shape:TRIANGLE posn:20,40
4990 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE',
4991 'posn:', ['20', ',', '40']]
4992 - color: 'GREEN'
4993 - posn: ['20', ',', '40']
4994 - x: '20'
4995 - y: '40'
4996 - shape: 'TRIANGLE'
4997 - size: '20'
4998 ...
4999 """
5001 def __init__(
5002 self, exprs: typing.Iterable[ParserElement], savelist: bool = True
5003 ) -> None:
5004 super().__init__(exprs, savelist)
5005 if self.exprs:
5006 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
5007 else:
5008 self._may_return_empty = True
5009 self.skipWhitespace = True
5010 self.initExprGroups = True
5011 self.saveAsList = True
5013 def __iand__(self, other):
5014 if isinstance(other, str_type):
5015 other = self._literalStringClass(other)
5016 if not isinstance(other, ParserElement):
5017 return NotImplemented
5018 return self.append(other) # Each([self, other])
5020 def streamline(self) -> ParserElement:
5021 super().streamline()
5022 if self.exprs:
5023 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
5024 else:
5025 self._may_return_empty = True
5026 return self
5028 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5029 if self.initExprGroups:
5030 self.opt1map = dict(
5031 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
5032 )
5033 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
5034 opt2 = [
5035 e
5036 for e in self.exprs
5037 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
5038 ]
5039 self.optionals = opt1 + opt2
5040 self.multioptionals = [
5041 e.expr.set_results_name(e.resultsName, list_all_matches=True)
5042 for e in self.exprs
5043 if isinstance(e, _MultipleMatch)
5044 ]
5045 self.multirequired = [
5046 e.expr.set_results_name(e.resultsName, list_all_matches=True)
5047 for e in self.exprs
5048 if isinstance(e, OneOrMore)
5049 ]
5050 self.required = [
5051 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
5052 ]
5053 self.required += self.multirequired
5054 self.initExprGroups = False
5056 tmpLoc = loc
5057 tmpReqd = self.required[:]
5058 tmpOpt = self.optionals[:]
5059 multis = self.multioptionals[:]
5060 matchOrder: list[ParserElement] = []
5062 keepMatching = True
5063 failed: list[ParserElement] = []
5064 fatals: list[ParseFatalException] = []
5065 while keepMatching:
5066 tmpExprs = tmpReqd + tmpOpt + multis
5067 failed.clear()
5068 fatals.clear()
5069 for e in tmpExprs:
5070 try:
5071 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
5072 except ParseFatalException as pfe:
5073 pfe.__traceback__ = None
5074 pfe.parser_element = e
5075 fatals.append(pfe)
5076 failed.append(e)
5077 except ParseException:
5078 failed.append(e)
5079 else:
5080 matchOrder.append(self.opt1map.get(id(e), e))
5081 if e in tmpReqd:
5082 tmpReqd.remove(e)
5083 elif e in tmpOpt:
5084 tmpOpt.remove(e)
5085 if len(failed) == len(tmpExprs):
5086 keepMatching = False
5088 # look for any ParseFatalExceptions
5089 if fatals:
5090 if len(fatals) > 1:
5091 fatals.sort(key=lambda e: -e.loc)
5092 if fatals[0].loc == fatals[1].loc:
5093 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
5094 max_fatal = fatals[0]
5095 raise max_fatal
5097 if tmpReqd:
5098 missing = ", ".join([str(e) for e in tmpReqd])
5099 raise ParseException(
5100 instring,
5101 loc,
5102 f"Missing one or more required elements ({missing})",
5103 )
5105 # add any unmatched Opts, in case they have default values defined
5106 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
5108 total_results = ParseResults([])
5109 for e in matchOrder:
5110 loc, results = e._parse(instring, loc, do_actions)
5111 total_results += results
5113 return loc, total_results
5115 def _generateDefaultName(self) -> str:
5116 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"
5119class ParseElementEnhance(ParserElement):
5120 """Abstract subclass of :class:`ParserElement`, for combining and
5121 post-processing parsed tokens.
5122 """
5124 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
5125 super().__init__(savelist)
5126 if isinstance(expr, str_type):
5127 expr_str = typing.cast(str, expr)
5128 if issubclass(self._literalStringClass, Token):
5129 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]
5130 elif issubclass(type(self), self._literalStringClass):
5131 expr = Literal(expr_str)
5132 else:
5133 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]
5134 expr = typing.cast(ParserElement, expr)
5135 self.expr = expr
5136 if expr is not None:
5137 self.mayIndexError = expr.mayIndexError
5138 self._may_return_empty = expr.mayReturnEmpty
5139 self.set_whitespace_chars(
5140 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
5141 )
5142 self.skipWhitespace = expr.skipWhitespace
5143 self.saveAsList = expr.saveAsList
5144 self.callPreparse = expr.callPreparse
5145 self.ignoreExprs.extend(expr.ignoreExprs)
5147 def recurse(self) -> list[ParserElement]:
5148 return [self.expr] if self.expr is not None else []
5150 def parseImpl(self, instring, loc, do_actions=True):
5151 if self.expr is None:
5152 raise ParseException(instring, loc, "No expression defined", self)
5154 try:
5155 return self.expr._parse(instring, loc, do_actions, callPreParse=False)
5156 except ParseSyntaxException:
5157 raise
5158 except ParseBaseException as pbe:
5159 pbe.pstr = pbe.pstr or instring
5160 pbe.loc = pbe.loc or loc
5161 pbe.parser_element = pbe.parser_element or self
5162 if not isinstance(self, Forward) and self.customName is not None:
5163 if self.errmsg:
5164 pbe.msg = self.errmsg
5165 raise
5167 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5168 """
5169 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
5170 the contained expression.
5171 """
5172 super().leave_whitespace(recursive)
5174 if recursive:
5175 if self.expr is not None:
5176 self.expr = self.expr.copy()
5177 self.expr.leave_whitespace(recursive)
5178 return self
5180 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5181 """
5182 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on
5183 the contained expression.
5184 """
5185 super().ignore_whitespace(recursive)
5187 if recursive:
5188 if self.expr is not None:
5189 self.expr = self.expr.copy()
5190 self.expr.ignore_whitespace(recursive)
5191 return self
5193 def ignore(self, other) -> ParserElement:
5194 """
5195 Define expression to be ignored (e.g., comments) while doing pattern
5196 matching; may be called repeatedly, to define multiple comment or other
5197 ignorable patterns.
5198 """
5199 if not isinstance(other, Suppress) or other not in self.ignoreExprs:
5200 super().ignore(other)
5201 if self.expr is not None:
5202 self.expr.ignore(self.ignoreExprs[-1])
5204 return self
5206 def streamline(self) -> ParserElement:
5207 super().streamline()
5208 if self.expr is not None:
5209 self.expr.streamline()
5210 return self
5212 def _checkRecursion(self, parseElementList):
5213 if self in parseElementList:
5214 raise RecursiveGrammarException(parseElementList + [self])
5215 subRecCheckList = parseElementList[:] + [self]
5216 if self.expr is not None:
5217 self.expr._checkRecursion(subRecCheckList)
5219 def validate(self, validateTrace=None) -> None:
5220 warnings.warn(
5221 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
5222 DeprecationWarning,
5223 stacklevel=2,
5224 )
5225 if validateTrace is None:
5226 validateTrace = []
5227 tmp = validateTrace[:] + [self]
5228 if self.expr is not None:
5229 self.expr.validate(tmp)
5230 self._checkRecursion([])
5232 def _generateDefaultName(self) -> str:
5233 return f"{type(self).__name__}:({self.expr})"
5235 # Compatibility synonyms
5236 # fmt: off
5237 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
5238 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
5239 # fmt: on
5242class IndentedBlock(ParseElementEnhance):
5243 """
5244 Expression to match one or more expressions at a given indentation level.
5245 Useful for parsing text where structure is implied by indentation (like Python source code).
5247 Example:
5249 .. testcode::
5251 '''
5252 BNF:
5253 statement ::= assignment_stmt | if_stmt
5254 assignment_stmt ::= identifier '=' rvalue
5255 rvalue ::= identifier | integer
5256 if_stmt ::= 'if' bool_condition block
5257 block ::= ([indent] statement)...
5258 identifier ::= [A..Za..z]
5259 integer ::= [0..9]...
5260 bool_condition ::= 'TRUE' | 'FALSE'
5261 '''
5263 IF, TRUE, FALSE = Keyword.using_each("IF TRUE FALSE".split())
5265 statement = Forward()
5266 identifier = Char(alphas)
5267 integer = Word(nums).add_parse_action(lambda t: int(t[0]))
5268 rvalue = identifier | integer
5269 assignment_stmt = identifier + "=" + rvalue
5271 if_stmt = IF + (TRUE | FALSE) + IndentedBlock(statement)
5273 statement <<= Group(assignment_stmt | if_stmt)
5275 result = if_stmt.parse_string('''
5276 IF TRUE
5277 a = 1000
5278 b = 2000
5279 IF FALSE
5280 z = 100
5281 ''')
5282 print(result.dump())
5284 .. testoutput::
5286 ['IF', 'TRUE', [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]]
5287 [0]:
5288 IF
5289 [1]:
5290 TRUE
5291 [2]:
5292 [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]
5293 [0]:
5294 ['a', '=', 1000]
5295 [1]:
5296 ['b', '=', 2000]
5297 [2]:
5298 ['IF', 'FALSE', [['z', '=', 100]]]
5299 [0]:
5300 IF
5301 [1]:
5302 FALSE
5303 [2]:
5304 [['z', '=', 100]]
5305 [0]:
5306 ['z', '=', 100]
5307 """
5309 class _Indent(Empty):
5310 def __init__(self, ref_col: int) -> None:
5311 super().__init__()
5312 self.errmsg = f"expected indent at column {ref_col}"
5313 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
5315 class _IndentGreater(Empty):
5316 def __init__(self, ref_col: int) -> None:
5317 super().__init__()
5318 self.errmsg = f"expected indent at column greater than {ref_col}"
5319 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
5321 def __init__(
5322 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
5323 ) -> None:
5324 super().__init__(expr, savelist=True)
5325 # if recursive:
5326 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
5327 self._recursive = recursive
5328 self._grouped = grouped
5329 self.parent_anchor = 1
5331 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5332 # advance parse position to non-whitespace by using an Empty()
5333 # this should be the column to be used for all subsequent indented lines
5334 anchor_loc = Empty().preParse(instring, loc)
5336 # see if self.expr matches at the current location - if not it will raise an exception
5337 # and no further work is necessary
5338 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)
5340 indent_col = col(anchor_loc, instring)
5341 peer_detect_expr = self._Indent(indent_col)
5343 inner_expr = Empty() + peer_detect_expr + self.expr
5344 if self._recursive:
5345 sub_indent = self._IndentGreater(indent_col)
5346 nested_block = IndentedBlock(
5347 self.expr, recursive=self._recursive, grouped=self._grouped
5348 )
5349 nested_block.set_debug(self.debug)
5350 nested_block.parent_anchor = indent_col
5351 inner_expr += Opt(sub_indent + nested_block)
5353 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
5354 block = OneOrMore(inner_expr)
5356 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
5358 if self._grouped:
5359 wrapper = Group
5360 else:
5361 wrapper = lambda expr: expr # type: ignore[misc, assignment]
5362 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
5363 instring, anchor_loc, do_actions
5364 )
5367class AtStringStart(ParseElementEnhance):
5368 """Matches if expression matches at the beginning of the parse
5369 string::
5371 AtStringStart(Word(nums)).parse_string("123")
5372 # prints ["123"]
5374 AtStringStart(Word(nums)).parse_string(" 123")
5375 # raises ParseException
5376 """
5378 def __init__(self, expr: Union[ParserElement, str]) -> None:
5379 super().__init__(expr)
5380 self.callPreparse = False
5382 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5383 if loc != 0:
5384 raise ParseException(instring, loc, "not found at string start")
5385 return super().parseImpl(instring, loc, do_actions)
5388class AtLineStart(ParseElementEnhance):
5389 r"""Matches if an expression matches at the beginning of a line within
5390 the parse string
5392 Example:
5394 .. testcode::
5396 test = '''\
5397 BBB this line
5398 BBB and this line
5399 BBB but not this one
5400 A BBB and definitely not this one
5401 '''
5403 for t in (AtLineStart('BBB') + rest_of_line).search_string(test):
5404 print(t)
5406 prints:
5408 .. testoutput::
5410 ['BBB', ' this line']
5411 ['BBB', ' and this line']
5412 """
5414 def __init__(self, expr: Union[ParserElement, str]) -> None:
5415 super().__init__(expr)
5416 self.callPreparse = False
5418 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5419 if col(loc, instring) != 1:
5420 raise ParseException(instring, loc, "not found at line start")
5421 return super().parseImpl(instring, loc, do_actions)
5424class FollowedBy(ParseElementEnhance):
5425 """Lookahead matching of the given parse expression.
5426 ``FollowedBy`` does *not* advance the parsing position within
5427 the input string, it only verifies that the specified parse
5428 expression matches at the current position. ``FollowedBy``
5429 always returns a null token list. If any results names are defined
5430 in the lookahead expression, those *will* be returned for access by
5431 name.
5433 Example:
5435 .. testcode::
5437 # use FollowedBy to match a label only if it is followed by a ':'
5438 data_word = Word(alphas)
5439 label = data_word + FollowedBy(':')
5440 attr_expr = Group(
5441 label + Suppress(':')
5442 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
5443 )
5445 attr_expr[1, ...].parse_string(
5446 "shape: SQUARE color: BLACK posn: upper left").pprint()
5448 prints:
5450 .. testoutput::
5452 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
5453 """
5455 def __init__(self, expr: Union[ParserElement, str]) -> None:
5456 super().__init__(expr)
5457 self._may_return_empty = True
5459 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5460 # by using self._expr.parse and deleting the contents of the returned ParseResults list
5461 # we keep any named results that were defined in the FollowedBy expression
5462 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)
5463 del ret[:]
5465 return loc, ret
5468class PrecededBy(ParseElementEnhance):
5469 """Lookbehind matching of the given parse expression.
5470 ``PrecededBy`` does not advance the parsing position within the
5471 input string, it only verifies that the specified parse expression
5472 matches prior to the current position. ``PrecededBy`` always
5473 returns a null token list, but if a results name is defined on the
5474 given expression, it is returned.
5476 Parameters:
5478 - ``expr`` - expression that must match prior to the current parse
5479 location
5480 - ``retreat`` - (default= ``None``) - (int) maximum number of characters
5481 to lookbehind prior to the current parse location
5483 If the lookbehind expression is a string, :class:`Literal`,
5484 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
5485 with a specified exact or maximum length, then the retreat
5486 parameter is not required. Otherwise, retreat must be specified to
5487 give a maximum number of characters to look back from
5488 the current parse position for a lookbehind match.
5490 Example:
5492 .. testcode::
5494 # VB-style variable names with type prefixes
5495 int_var = PrecededBy("#") + pyparsing_common.identifier
5496 str_var = PrecededBy("$") + pyparsing_common.identifier
5497 """
5499 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:
5500 super().__init__(expr)
5501 self.expr = self.expr().leave_whitespace()
5502 self._may_return_empty = True
5503 self.mayIndexError = False
5504 self.exact = False
5505 if isinstance(expr, str_type):
5506 expr = typing.cast(str, expr)
5507 retreat = len(expr)
5508 self.exact = True
5509 elif isinstance(expr, (Literal, Keyword)):
5510 retreat = expr.matchLen
5511 self.exact = True
5512 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
5513 retreat = expr.maxLen
5514 self.exact = True
5515 elif isinstance(expr, PositionToken):
5516 retreat = 0
5517 self.exact = True
5518 self.retreat = retreat
5519 self.errmsg = f"not preceded by {expr}"
5520 self.skipWhitespace = False
5521 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
5523 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:
5524 if self.exact:
5525 if loc < self.retreat:
5526 raise ParseException(instring, loc, self.errmsg, self)
5527 start = loc - self.retreat
5528 _, ret = self.expr._parse(instring, start)
5529 return loc, ret
5531 # retreat specified a maximum lookbehind window, iterate
5532 test_expr = self.expr + StringEnd()
5533 instring_slice = instring[max(0, loc - self.retreat) : loc]
5534 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)
5536 for offset in range(1, min(loc, self.retreat + 1) + 1):
5537 try:
5538 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
5539 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
5540 except ParseBaseException as pbe:
5541 last_expr = pbe
5542 else:
5543 break
5544 else:
5545 raise last_expr
5547 return loc, ret
5550class Located(ParseElementEnhance):
5551 """
5552 Decorates a returned token with its starting and ending
5553 locations in the input string.
5555 This helper adds the following results names:
5557 - ``locn_start`` - location where matched expression begins
5558 - ``locn_end`` - location where matched expression ends
5559 - ``value`` - the actual parsed results
5561 Be careful if the input text contains ``<TAB>`` characters, you
5562 may want to call :class:`ParserElement.parse_with_tabs`
5564 Example:
5566 .. testcode::
5568 wd = Word(alphas)
5569 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
5570 print(match)
5572 prints:
5574 .. testoutput::
5576 [0, ['ljsdf'], 5]
5577 [8, ['lksdjjf'], 15]
5578 [18, ['lkkjj'], 23]
5579 """
5581 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5582 start = loc
5583 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)
5584 ret_tokens = ParseResults([start, tokens, loc])
5585 ret_tokens["locn_start"] = start
5586 ret_tokens["value"] = tokens
5587 ret_tokens["locn_end"] = loc
5588 if self.resultsName:
5589 # must return as a list, so that the name will be attached to the complete group
5590 return loc, [ret_tokens]
5591 else:
5592 return loc, ret_tokens
5595class NotAny(ParseElementEnhance):
5596 """
5597 Lookahead to disallow matching with the given parse expression.
5598 ``NotAny`` does *not* advance the parsing position within the
5599 input string, it only verifies that the specified parse expression
5600 does *not* match at the current position. Also, ``NotAny`` does
5601 *not* skip over leading whitespace. ``NotAny`` always returns
5602 a null token list. May be constructed using the ``'~'`` operator.
5604 Example:
5606 .. testcode::
5608 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
5610 # take care not to mistake keywords for identifiers
5611 ident = ~(AND | OR | NOT) + Word(alphas)
5612 boolean_term = Opt(NOT) + ident
5614 # very crude boolean expression - to support parenthesis groups and
5615 # operation hierarchy, use infix_notation
5616 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
5618 # integers that are followed by "." are actually floats
5619 integer = Word(nums) + ~Char(".")
5620 """
5622 def __init__(self, expr: Union[ParserElement, str]) -> None:
5623 super().__init__(expr)
5624 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
5625 # self.leave_whitespace()
5626 self.skipWhitespace = False
5628 self._may_return_empty = True
5629 self.errmsg = f"Found unwanted token, {self.expr}"
5631 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5632 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):
5633 raise ParseException(instring, loc, self.errmsg, self)
5634 return loc, []
5636 def _generateDefaultName(self) -> str:
5637 return f"~{{{self.expr}}}"
5640class _MultipleMatch(ParseElementEnhance):
5641 def __init__(
5642 self,
5643 expr: Union[str, ParserElement],
5644 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5645 **kwargs,
5646 ) -> None:
5647 stopOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(
5648 kwargs, "stopOn", None
5649 )
5651 super().__init__(expr)
5652 stopOn = stopOn or stop_on
5653 self.saveAsList = True
5654 ender = stopOn
5655 if isinstance(ender, str_type):
5656 ender = self._literalStringClass(ender)
5657 self.stopOn(ender)
5659 def stop_on(self, ender) -> ParserElement:
5660 if isinstance(ender, str_type):
5661 ender = self._literalStringClass(ender)
5662 self.not_ender = ~ender if ender is not None else None
5663 return self
5665 stopOn = stop_on
5667 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5668 self_expr_parse = self.expr._parse
5669 self_skip_ignorables = self._skipIgnorables
5670 check_ender = False
5671 if self.not_ender is not None:
5672 try_not_ender = self.not_ender.try_parse
5673 check_ender = True
5675 # must be at least one (but first see if we are the stopOn sentinel;
5676 # if so, fail)
5677 if check_ender:
5678 try_not_ender(instring, loc)
5679 loc, tokens = self_expr_parse(instring, loc, do_actions)
5680 try:
5681 hasIgnoreExprs = not not self.ignoreExprs
5682 while 1:
5683 if check_ender:
5684 try_not_ender(instring, loc)
5685 if hasIgnoreExprs:
5686 preloc = self_skip_ignorables(instring, loc)
5687 else:
5688 preloc = loc
5689 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)
5690 tokens += tmptokens
5691 except (ParseException, IndexError):
5692 pass
5694 return loc, tokens
5696 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5697 if (
5698 __diag__.warn_ungrouped_named_tokens_in_collection
5699 and Diagnostics.warn_ungrouped_named_tokens_in_collection
5700 not in self.suppress_warnings_
5701 ):
5702 for e in [self.expr] + self.expr.recurse():
5703 if (
5704 isinstance(e, ParserElement)
5705 and e.resultsName
5706 and (
5707 Diagnostics.warn_ungrouped_named_tokens_in_collection
5708 not in e.suppress_warnings_
5709 )
5710 ):
5711 warning = (
5712 "warn_ungrouped_named_tokens_in_collection:"
5713 f" setting results name {name!r} on {type(self).__name__} expression"
5714 f" collides with {e.resultsName!r} on contained expression"
5715 )
5716 warnings.warn(warning, stacklevel=3)
5717 break
5719 return super()._setResultsName(name, list_all_matches)
5722class OneOrMore(_MultipleMatch):
5723 """
5724 Repetition of one or more of the given expression.
5726 Parameters:
5728 - ``expr`` - expression that must match one or more times
5729 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel
5730 (only required if the sentinel would ordinarily match the repetition
5731 expression)
5733 Example:
5735 .. doctest::
5737 >>> data_word = Word(alphas)
5738 >>> label = data_word + FollowedBy(':')
5739 >>> attr_expr = Group(
5740 ... label + Suppress(':')
5741 ... + OneOrMore(data_word).set_parse_action(' '.join))
5743 >>> text = "shape: SQUARE posn: upper left color: BLACK"
5745 # Fail! read 'posn' as data instead of next label
5746 >>> attr_expr[1, ...].parse_string(text).pprint()
5747 [['shape', 'SQUARE posn']]
5749 # use stop_on attribute for OneOrMore
5750 # to avoid reading label string as part of the data
5751 >>> attr_expr = Group(
5752 ... label + Suppress(':')
5753 ... + OneOrMore(
5754 ... data_word, stop_on=label).set_parse_action(' '.join))
5755 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better
5756 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5758 # could also be written as
5759 >>> (attr_expr * (1,)).parse_string(text).pprint()
5760 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5761 """
5763 def _generateDefaultName(self) -> str:
5764 return f"{{{self.expr}}}..."
5767class ZeroOrMore(_MultipleMatch):
5768 """
5769 Optional repetition of zero or more of the given expression.
5771 Parameters:
5773 - ``expr`` - expression that must match zero or more times
5774 - ``stop_on`` - expression for a terminating sentinel
5775 (only required if the sentinel would ordinarily match the repetition
5776 expression) - (default= ``None``)
5778 Example: similar to :class:`OneOrMore`
5779 """
5781 def __init__(
5782 self,
5783 expr: Union[str, ParserElement],
5784 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5785 **kwargs,
5786 ) -> None:
5787 stopOn: Union[ParserElement, str] = deprecate_argument(kwargs, "stopOn", None)
5789 super().__init__(expr, stop_on=stopOn or stop_on)
5790 self._may_return_empty = True
5792 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5793 try:
5794 return super().parseImpl(instring, loc, do_actions)
5795 except (ParseException, IndexError):
5796 return loc, ParseResults([], name=self.resultsName)
5798 def _generateDefaultName(self) -> str:
5799 return f"[{self.expr}]..."
5802class DelimitedList(ParseElementEnhance):
5803 """Helper to define a delimited list of expressions - the delimiter
5804 defaults to ','. By default, the list elements and delimiters can
5805 have intervening whitespace, and comments, but this can be
5806 overridden by passing ``combine=True`` in the constructor. If
5807 ``combine`` is set to ``True``, the matching tokens are
5808 returned as a single token string, with the delimiters included;
5809 otherwise, the matching tokens are returned as a list of tokens,
5810 with the delimiters suppressed.
5812 If ``allow_trailing_delim`` is set to True, then the list may end with
5813 a delimiter.
5815 Example:
5817 .. doctest::
5819 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc")
5820 ParseResults(['aa', 'bb', 'cc'], {})
5821 >>> DelimitedList(Word(hexnums), delim=':', combine=True
5822 ... ).parse_string("AA:BB:CC:DD:EE")
5823 ParseResults(['AA:BB:CC:DD:EE'], {})
5825 .. versionadded:: 3.1.0
5826 """
5828 def __init__(
5829 self,
5830 expr: Union[str, ParserElement],
5831 delim: Union[str, ParserElement] = ",",
5832 combine: bool = False,
5833 min: typing.Optional[int] = None,
5834 max: typing.Optional[int] = None,
5835 *,
5836 allow_trailing_delim: bool = False,
5837 ) -> None:
5838 if isinstance(expr, str_type):
5839 expr = ParserElement._literalStringClass(expr)
5840 expr = typing.cast(ParserElement, expr)
5842 if min is not None and min < 1:
5843 raise ValueError("min must be greater than 0")
5845 if max is not None and min is not None and max < min:
5846 raise ValueError("max must be greater than, or equal to min")
5848 self.content = expr
5849 self.raw_delim = str(delim)
5850 self.delim = delim
5851 self.combine = combine
5852 if not combine:
5853 self.delim = Suppress(delim)
5854 self.min = min or 1
5855 self.max = max
5856 self.allow_trailing_delim = allow_trailing_delim
5858 delim_list_expr = self.content + (self.delim + self.content) * (
5859 self.min - 1,
5860 None if self.max is None else self.max - 1,
5861 )
5862 if self.allow_trailing_delim:
5863 delim_list_expr += Opt(self.delim)
5865 if self.combine:
5866 delim_list_expr = Combine(delim_list_expr)
5868 super().__init__(delim_list_expr, savelist=True)
5870 def _generateDefaultName(self) -> str:
5871 content_expr = self.content.streamline()
5872 return f"{content_expr} [{self.raw_delim} {content_expr}]..."
5875class _NullToken:
5876 def __bool__(self):
5877 return False
5879 def __str__(self):
5880 return ""
5883class Opt(ParseElementEnhance):
5884 """
5885 Optional matching of the given expression.
5887 :param expr: expression that must match zero or more times
5888 :param default: (optional) - value to be returned
5889 if the optional expression is not found.
5891 Example:
5893 .. testcode::
5895 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
5896 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
5897 zip.run_tests('''
5898 # traditional ZIP code
5899 12345
5901 # ZIP+4 form
5902 12101-0001
5904 # invalid ZIP
5905 98765-
5906 ''')
5908 prints:
5910 .. testoutput::
5911 :options: +NORMALIZE_WHITESPACE
5914 # traditional ZIP code
5915 12345
5916 ['12345']
5918 # ZIP+4 form
5919 12101-0001
5920 ['12101-0001']
5922 # invalid ZIP
5923 98765-
5924 98765-
5925 ^
5926 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6)
5927 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6)
5928 """
5930 __optionalNotMatched = _NullToken()
5932 def __init__(
5933 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
5934 ) -> None:
5935 super().__init__(expr, savelist=False)
5936 self.saveAsList = self.expr.saveAsList
5937 self.defaultValue = default
5938 self._may_return_empty = True
5940 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5941 self_expr = self.expr
5942 try:
5943 loc, tokens = self_expr._parse(
5944 instring, loc, do_actions, callPreParse=False
5945 )
5946 except (ParseException, IndexError):
5947 default_value = self.defaultValue
5948 if default_value is not self.__optionalNotMatched:
5949 if self_expr.resultsName:
5950 tokens = ParseResults([default_value])
5951 tokens[self_expr.resultsName] = default_value
5952 else:
5953 tokens = [default_value] # type: ignore[assignment]
5954 else:
5955 tokens = [] # type: ignore[assignment]
5956 return loc, tokens
5958 def _generateDefaultName(self) -> str:
5959 inner = str(self.expr)
5960 # strip off redundant inner {}'s
5961 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
5962 inner = inner[1:-1]
5963 return f"[{inner}]"
5966Optional = Opt
5969class SkipTo(ParseElementEnhance):
5970 """
5971 Token for skipping over all undefined text until the matched
5972 expression is found.
5974 :param expr: target expression marking the end of the data to be skipped
5975 :param include: if ``True``, the target expression is also parsed
5976 (the skipped text and target expression are returned
5977 as a 2-element list) (default= ``False``).
5979 :param ignore: (default= ``None``) used to define grammars
5980 (typically quoted strings and comments)
5981 that might contain false matches to the target expression
5983 :param fail_on: (default= ``None``) define expressions that
5984 are not allowed to be included in the skipped test;
5985 if found before the target expression is found,
5986 the :class:`SkipTo` is not a match
5988 Example:
5990 .. testcode::
5992 report = '''
5993 Outstanding Issues Report - 1 Jan 2000
5995 # | Severity | Description | Days Open
5996 -----+----------+-------------------------------------------+-----------
5997 101 | Critical | Intermittent system crash | 6
5998 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5999 79 | Minor | System slow when running too many reports | 47
6000 '''
6001 integer = Word(nums)
6002 SEP = Suppress('|')
6003 # use SkipTo to simply match everything up until the next SEP
6004 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
6005 # - parse action will call token.strip() for each matched token, i.e., the description body
6006 string_data = SkipTo(SEP, ignore=quoted_string)
6007 string_data.set_parse_action(token_map(str.strip))
6008 ticket_expr = (integer("issue_num") + SEP
6009 + string_data("sev") + SEP
6010 + string_data("desc") + SEP
6011 + integer("days_open"))
6013 for tkt in ticket_expr.search_string(report):
6014 print(tkt.dump())
6016 prints:
6018 .. testoutput::
6020 ['101', 'Critical', 'Intermittent system crash', '6']
6021 - days_open: '6'
6022 - desc: 'Intermittent system crash'
6023 - issue_num: '101'
6024 - sev: 'Critical'
6025 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
6026 - days_open: '14'
6027 - desc: "Spelling error on Login ('log|n')"
6028 - issue_num: '94'
6029 - sev: 'Cosmetic'
6030 ['79', 'Minor', 'System slow when running too many reports', '47']
6031 - days_open: '47'
6032 - desc: 'System slow when running too many reports'
6033 - issue_num: '79'
6034 - sev: 'Minor'
6035 """
6037 def __init__(
6038 self,
6039 other: Union[ParserElement, str],
6040 include: bool = False,
6041 ignore: typing.Optional[Union[ParserElement, str]] = None,
6042 fail_on: typing.Optional[Union[ParserElement, str]] = None,
6043 **kwargs,
6044 ) -> None:
6045 failOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(
6046 kwargs, "failOn", None
6047 )
6049 super().__init__(other)
6050 failOn = failOn or fail_on
6051 self.ignoreExpr = ignore
6052 self._may_return_empty = True
6053 self.mayIndexError = False
6054 self.includeMatch = include
6055 self.saveAsList = False
6056 if isinstance(failOn, str_type):
6057 self.failOn = self._literalStringClass(failOn)
6058 else:
6059 self.failOn = failOn
6060 self.errmsg = f"No match found for {self.expr}"
6061 self.ignorer = Empty().leave_whitespace()
6062 self._update_ignorer()
6064 def _update_ignorer(self):
6065 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr
6066 self.ignorer.ignoreExprs.clear()
6067 for e in self.expr.ignoreExprs:
6068 self.ignorer.ignore(e)
6069 if self.ignoreExpr:
6070 self.ignorer.ignore(self.ignoreExpr)
6072 def ignore(self, expr):
6073 """
6074 Define expression to be ignored (e.g., comments) while doing pattern
6075 matching; may be called repeatedly, to define multiple comment or other
6076 ignorable patterns.
6077 """
6078 super().ignore(expr)
6079 self._update_ignorer()
6081 def parseImpl(self, instring, loc, do_actions=True):
6082 startloc = loc
6083 instrlen = len(instring)
6084 self_expr_parse = self.expr._parse
6085 self_failOn_canParseNext = (
6086 self.failOn.can_parse_next if self.failOn is not None else None
6087 )
6088 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None
6090 tmploc = loc
6091 while tmploc <= instrlen:
6092 if self_failOn_canParseNext is not None:
6093 # break if failOn expression matches
6094 if self_failOn_canParseNext(instring, tmploc):
6095 break
6097 if ignorer_try_parse is not None:
6098 # advance past ignore expressions
6099 prev_tmploc = tmploc
6100 while 1:
6101 try:
6102 tmploc = ignorer_try_parse(instring, tmploc)
6103 except ParseBaseException:
6104 break
6105 # see if all ignorers matched, but didn't actually ignore anything
6106 if tmploc == prev_tmploc:
6107 break
6108 prev_tmploc = tmploc
6110 try:
6111 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)
6112 except (ParseException, IndexError):
6113 # no match, advance loc in string
6114 tmploc += 1
6115 else:
6116 # matched skipto expr, done
6117 break
6119 else:
6120 # ran off the end of the input string without matching skipto expr, fail
6121 raise ParseException(instring, loc, self.errmsg, self)
6123 # build up return values
6124 loc = tmploc
6125 skiptext = instring[startloc:loc]
6126 skipresult = ParseResults(skiptext)
6128 if self.includeMatch:
6129 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)
6130 skipresult += mat
6132 return loc, skipresult
6135class Forward(ParseElementEnhance):
6136 """
6137 Forward declaration of an expression to be defined later -
6138 used for recursive grammars, such as algebraic infix notation.
6139 When the expression is known, it is assigned to the ``Forward``
6140 instance using the ``'<<'`` operator.
6142 .. Note::
6144 Take care when assigning to ``Forward`` not to overlook
6145 precedence of operators.
6147 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
6149 fwd_expr << a | b | c
6151 will actually be evaluated as::
6153 (fwd_expr << a) | b | c
6155 thereby leaving b and c out as parseable alternatives.
6156 It is recommended that you explicitly group the values
6157 inserted into the :class:`Forward`::
6159 fwd_expr << (a | b | c)
6161 Converting to use the ``'<<='`` operator instead will avoid this problem.
6163 See :meth:`ParseResults.pprint` for an example of a recursive
6164 parser created using :class:`Forward`.
6165 """
6167 def __init__(
6168 self, other: typing.Optional[Union[ParserElement, str]] = None
6169 ) -> None:
6170 self.caller_frame = traceback.extract_stack(limit=2)[0]
6171 super().__init__(other, savelist=False) # type: ignore[arg-type]
6172 self.lshift_line = None
6174 def __lshift__(self, other) -> Forward:
6175 if hasattr(self, "caller_frame"):
6176 del self.caller_frame
6177 if isinstance(other, str_type):
6178 other = self._literalStringClass(other)
6180 if not isinstance(other, ParserElement):
6181 return NotImplemented
6183 self.expr = other
6184 self.streamlined = other.streamlined
6185 self.mayIndexError = self.expr.mayIndexError
6186 self._may_return_empty = self.expr.mayReturnEmpty
6187 self.set_whitespace_chars(
6188 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
6189 )
6190 self.skipWhitespace = self.expr.skipWhitespace
6191 self.saveAsList = self.expr.saveAsList
6192 self.ignoreExprs.extend(self.expr.ignoreExprs)
6193 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]
6194 return self
6196 def __ilshift__(self, other) -> Forward:
6197 if not isinstance(other, ParserElement):
6198 return NotImplemented
6200 return self << other
6202 def __or__(self, other) -> ParserElement:
6203 caller_line = traceback.extract_stack(limit=2)[-2]
6204 if (
6205 __diag__.warn_on_match_first_with_lshift_operator
6206 and caller_line == self.lshift_line
6207 and Diagnostics.warn_on_match_first_with_lshift_operator
6208 not in self.suppress_warnings_
6209 ):
6210 warnings.warn(
6211 "warn_on_match_first_with_lshift_operator:"
6212 " using '<<' operator with '|' is probably an error, use '<<='",
6213 stacklevel=2,
6214 )
6215 ret = super().__or__(other)
6216 return ret
6218 def __del__(self):
6219 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
6220 if (
6221 self.expr is None
6222 and __diag__.warn_on_assignment_to_Forward
6223 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
6224 ):
6225 warnings.warn_explicit(
6226 "warn_on_assignment_to_Forward:"
6227 " Forward defined here but no expression attached later using '<<=' or '<<'",
6228 UserWarning,
6229 filename=self.caller_frame.filename,
6230 lineno=self.caller_frame.lineno,
6231 )
6233 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
6234 if (
6235 self.expr is None
6236 and __diag__.warn_on_parse_using_empty_Forward
6237 and Diagnostics.warn_on_parse_using_empty_Forward
6238 not in self.suppress_warnings_
6239 ):
6240 # walk stack until parse_string, scan_string, search_string, or transform_string is found
6241 parse_fns = (
6242 "parse_string",
6243 "scan_string",
6244 "search_string",
6245 "transform_string",
6246 )
6247 tb = traceback.extract_stack(limit=200)
6248 for i, frm in enumerate(reversed(tb), start=1):
6249 if frm.name in parse_fns:
6250 stacklevel = i + 1
6251 break
6252 else:
6253 stacklevel = 2
6254 warnings.warn(
6255 "warn_on_parse_using_empty_Forward:"
6256 " Forward expression was never assigned a value, will not parse any input",
6257 stacklevel=stacklevel,
6258 )
6259 if not ParserElement._left_recursion_enabled:
6260 return super().parseImpl(instring, loc, do_actions)
6261 # ## Bounded Recursion algorithm ##
6262 # Recursion only needs to be processed at ``Forward`` elements, since they are
6263 # the only ones that can actually refer to themselves. The general idea is
6264 # to handle recursion stepwise: We start at no recursion, then recurse once,
6265 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
6266 #
6267 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
6268 # - to *match* a specific recursion level, and
6269 # - to *search* the bounded recursion level
6270 # and the two run concurrently. The *search* must *match* each recursion level
6271 # to find the best possible match. This is handled by a memo table, which
6272 # provides the previous match to the next level match attempt.
6273 #
6274 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
6275 #
6276 # There is a complication since we not only *parse* but also *transform* via
6277 # actions: We do not want to run the actions too often while expanding. Thus,
6278 # we expand using `do_actions=False` and only run `do_actions=True` if the next
6279 # recursion level is acceptable.
6280 with ParserElement.recursion_lock:
6281 memo = ParserElement.recursion_memos
6282 try:
6283 # we are parsing at a specific recursion expansion - use it as-is
6284 prev_loc, prev_result = memo[loc, self, do_actions]
6285 if isinstance(prev_result, Exception):
6286 raise prev_result
6287 return prev_loc, prev_result.copy()
6288 except KeyError:
6289 act_key = (loc, self, True)
6290 peek_key = (loc, self, False)
6291 # we are searching for the best recursion expansion - keep on improving
6292 # both `do_actions` cases must be tracked separately here!
6293 prev_loc, prev_peek = memo[peek_key] = (
6294 loc - 1,
6295 ParseException(
6296 instring, loc, "Forward recursion without base case", self
6297 ),
6298 )
6299 if do_actions:
6300 memo[act_key] = memo[peek_key]
6301 while True:
6302 try:
6303 new_loc, new_peek = super().parseImpl(instring, loc, False)
6304 except ParseException:
6305 # we failed before getting any match - do not hide the error
6306 if isinstance(prev_peek, Exception):
6307 raise
6308 new_loc, new_peek = prev_loc, prev_peek
6309 # the match did not get better: we are done
6310 if new_loc <= prev_loc:
6311 if do_actions:
6312 # replace the match for do_actions=False as well,
6313 # in case the action did backtrack
6314 prev_loc, prev_result = memo[peek_key] = memo[act_key]
6315 del memo[peek_key], memo[act_key]
6316 return prev_loc, copy.copy(prev_result)
6317 del memo[peek_key]
6318 return prev_loc, copy.copy(prev_peek)
6319 # the match did get better: see if we can improve further
6320 if do_actions:
6321 try:
6322 memo[act_key] = super().parseImpl(instring, loc, True)
6323 except ParseException as e:
6324 memo[peek_key] = memo[act_key] = (new_loc, e)
6325 raise
6326 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
6328 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
6329 """
6330 Extends ``leave_whitespace`` defined in base class.
6331 """
6332 self.skipWhitespace = False
6333 return self
6335 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
6336 """
6337 Extends ``ignore_whitespace`` defined in base class.
6338 """
6339 self.skipWhitespace = True
6340 return self
6342 def streamline(self) -> ParserElement:
6343 if not self.streamlined:
6344 self.streamlined = True
6345 if self.expr is not None:
6346 self.expr.streamline()
6347 return self
6349 def validate(self, validateTrace=None) -> None:
6350 warnings.warn(
6351 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
6352 DeprecationWarning,
6353 stacklevel=2,
6354 )
6355 if validateTrace is None:
6356 validateTrace = []
6358 if self not in validateTrace:
6359 tmp = validateTrace[:] + [self]
6360 if self.expr is not None:
6361 self.expr.validate(tmp)
6362 self._checkRecursion([])
6364 def _generateDefaultName(self) -> str:
6365 # Avoid infinite recursion by setting a temporary _defaultName
6366 save_default_name = self._defaultName
6367 self._defaultName = ": ..."
6369 # Use the string representation of main expression.
6370 try:
6371 if self.expr is not None:
6372 ret_string = str(self.expr)[:1000]
6373 else:
6374 ret_string = "None"
6375 except Exception:
6376 ret_string = "..."
6378 self._defaultName = save_default_name
6379 return f"{type(self).__name__}: {ret_string}"
6381 def copy(self) -> ParserElement:
6382 """
6383 Returns a copy of this expression.
6385 Generally only used internally by pyparsing.
6386 """
6387 if self.expr is not None:
6388 return super().copy()
6389 else:
6390 ret = Forward()
6391 ret <<= self
6392 return ret
6394 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
6395 # fmt: off
6396 if (
6397 __diag__.warn_name_set_on_empty_Forward
6398 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_
6399 and self.expr is None
6400 ):
6401 warning = (
6402 "warn_name_set_on_empty_Forward:"
6403 f" setting results name {name!r} on {type(self).__name__} expression"
6404 " that has no contained expression"
6405 )
6406 warnings.warn(warning, stacklevel=3)
6407 # fmt: on
6409 return super()._setResultsName(name, list_all_matches)
6411 # Compatibility synonyms
6412 # fmt: off
6413 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
6414 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
6415 # fmt: on
6418class TokenConverter(ParseElementEnhance):
6419 """
6420 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.
6421 """
6423 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:
6424 super().__init__(expr) # , savelist)
6425 self.saveAsList = False
6428class Combine(TokenConverter):
6429 """Converter to concatenate all matching tokens to a single string.
6430 By default, the matching patterns must also be contiguous in the
6431 input string; this can be disabled by specifying
6432 ``'adjacent=False'`` in the constructor.
6434 Example:
6436 .. doctest::
6438 >>> real = Word(nums) + '.' + Word(nums)
6439 >>> print(real.parse_string('3.1416'))
6440 ['3', '.', '1416']
6442 >>> # will also erroneously match the following
6443 >>> print(real.parse_string('3. 1416'))
6444 ['3', '.', '1416']
6446 >>> real = Combine(Word(nums) + '.' + Word(nums))
6447 >>> print(real.parse_string('3.1416'))
6448 ['3.1416']
6450 >>> # no match when there are internal spaces
6451 >>> print(real.parse_string('3. 1416'))
6452 Traceback (most recent call last):
6453 ParseException: Expected W:(0123...)
6454 """
6456 def __init__(
6457 self,
6458 expr: ParserElement,
6459 join_string: str = "",
6460 adjacent: bool = True,
6461 *,
6462 joinString: typing.Optional[str] = None,
6463 ) -> None:
6464 super().__init__(expr)
6465 joinString = joinString if joinString is not None else join_string
6466 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
6467 if adjacent:
6468 self.leave_whitespace()
6469 self.adjacent = adjacent
6470 self.skipWhitespace = True
6471 self.joinString = joinString
6472 self.callPreparse = True
6474 def ignore(self, other) -> ParserElement:
6475 """
6476 Define expression to be ignored (e.g., comments) while doing pattern
6477 matching; may be called repeatedly, to define multiple comment or other
6478 ignorable patterns.
6479 """
6480 if self.adjacent:
6481 ParserElement.ignore(self, other)
6482 else:
6483 super().ignore(other)
6484 return self
6486 def postParse(self, instring, loc, tokenlist):
6487 retToks = tokenlist.copy()
6488 del retToks[:]
6489 retToks += ParseResults(
6490 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
6491 )
6493 if self.resultsName and retToks.haskeys():
6494 return [retToks]
6495 else:
6496 return retToks
6499class Group(TokenConverter):
6500 """Converter to return the matched tokens as a list - useful for
6501 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
6503 The optional ``aslist`` argument when set to True will return the
6504 parsed tokens as a Python list instead of a pyparsing ParseResults.
6506 Example:
6508 .. doctest::
6510 >>> ident = Word(alphas)
6511 >>> num = Word(nums)
6512 >>> term = ident | num
6513 >>> func = ident + Opt(DelimitedList(term))
6514 >>> print(func.parse_string("fn a, b, 100"))
6515 ['fn', 'a', 'b', '100']
6517 >>> func = ident + Group(Opt(DelimitedList(term)))
6518 >>> print(func.parse_string("fn a, b, 100"))
6519 ['fn', ['a', 'b', '100']]
6520 """
6522 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:
6523 super().__init__(expr)
6524 self.saveAsList = True
6525 self._asPythonList = aslist
6527 def postParse(self, instring, loc, tokenlist):
6528 if self._asPythonList:
6529 return ParseResults.List(
6530 tokenlist.as_list()
6531 if isinstance(tokenlist, ParseResults)
6532 else list(tokenlist)
6533 )
6535 return [tokenlist]
6538class Dict(TokenConverter):
6539 """Converter to return a repetitive expression as a list, but also
6540 as a dictionary. Each element can also be referenced using the first
6541 token in the expression as its key. Useful for tabular report
6542 scraping when the first column can be used as a item key.
6544 The optional ``asdict`` argument when set to True will return the
6545 parsed tokens as a Python dict instead of a pyparsing ParseResults.
6547 Example:
6549 .. doctest::
6551 >>> data_word = Word(alphas)
6552 >>> label = data_word + FollowedBy(':')
6554 >>> attr_expr = (
6555 ... label + Suppress(':')
6556 ... + OneOrMore(data_word, stop_on=label)
6557 ... .set_parse_action(' '.join)
6558 ... )
6560 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
6562 >>> # print attributes as plain groups
6563 >>> print(attr_expr[1, ...].parse_string(text).dump())
6564 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
6566 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...])
6567 # Dict will auto-assign names.
6568 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
6569 >>> print(result.dump())
6570 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
6571 - color: 'light blue'
6572 - posn: 'upper left'
6573 - shape: 'SQUARE'
6574 - texture: 'burlap'
6575 [0]:
6576 ['shape', 'SQUARE']
6577 [1]:
6578 ['posn', 'upper left']
6579 [2]:
6580 ['color', 'light blue']
6581 [3]:
6582 ['texture', 'burlap']
6584 # access named fields as dict entries, or output as dict
6585 >>> print(result['shape'])
6586 SQUARE
6587 >>> print(result.as_dict())
6588 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}
6590 See more examples at :class:`ParseResults` of accessing fields by results name.
6591 """
6593 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:
6594 super().__init__(expr)
6595 self.saveAsList = True
6596 self._asPythonDict = asdict
6598 def postParse(self, instring, loc, tokenlist):
6599 for i, tok in enumerate(tokenlist):
6600 if len(tok) == 0:
6601 continue
6603 ikey = tok[0]
6604 if isinstance(ikey, int):
6605 ikey = str(ikey).strip()
6607 if len(tok) == 1:
6608 tokenlist[ikey] = _ParseResultsWithOffset("", i)
6610 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
6611 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
6613 else:
6614 try:
6615 dictvalue = tok.copy() # ParseResults(i)
6616 except Exception:
6617 exc = TypeError(
6618 "could not extract dict values from parsed results"
6619 " - Dict expression must contain Grouped expressions"
6620 )
6621 raise exc from None
6623 del dictvalue[0]
6625 if len(dictvalue) != 1 or (
6626 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
6627 ):
6628 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
6629 else:
6630 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
6632 if self._asPythonDict:
6633 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
6635 return [tokenlist] if self.resultsName else tokenlist
6638class Suppress(TokenConverter):
6639 """Converter for ignoring the results of a parsed expression.
6641 Example:
6643 .. doctest::
6645 >>> source = "a, b, c,d"
6646 >>> wd = Word(alphas)
6647 >>> wd_list1 = wd + (',' + wd)[...]
6648 >>> print(wd_list1.parse_string(source))
6649 ['a', ',', 'b', ',', 'c', ',', 'd']
6651 # often, delimiters that are useful during parsing are just in the
6652 # way afterward - use Suppress to keep them out of the parsed output
6653 >>> wd_list2 = wd + (Suppress(',') + wd)[...]
6654 >>> print(wd_list2.parse_string(source))
6655 ['a', 'b', 'c', 'd']
6657 # Skipped text (using '...') can be suppressed as well
6658 >>> source = "lead in START relevant text END trailing text"
6659 >>> start_marker = Keyword("START")
6660 >>> end_marker = Keyword("END")
6661 >>> find_body = Suppress(...) + start_marker + ... + end_marker
6662 >>> print(find_body.parse_string(source))
6663 ['START', 'relevant text ', 'END']
6665 (See also :class:`DelimitedList`.)
6666 """
6668 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
6669 if expr is ...:
6670 expr = _PendingSkip(NoMatch())
6671 super().__init__(expr)
6673 def __add__(self, other) -> ParserElement:
6674 if isinstance(self.expr, _PendingSkip):
6675 return Suppress(SkipTo(other)) + other
6677 return super().__add__(other)
6679 def __sub__(self, other) -> ParserElement:
6680 if isinstance(self.expr, _PendingSkip):
6681 return Suppress(SkipTo(other)) - other
6683 return super().__sub__(other)
6685 def postParse(self, instring, loc, tokenlist):
6686 return []
6688 def suppress(self) -> ParserElement:
6689 return self
6692# XXX: Example needs to be re-done for updated output
6693def trace_parse_action(f: ParseAction) -> ParseAction:
6694 """Decorator for debugging parse actions.
6696 When the parse action is called, this decorator will print
6697 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
6698 When the parse action completes, the decorator will print
6699 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
6701 Example:
6703 .. testsetup:: stderr
6705 import sys
6706 sys.stderr = sys.stdout
6708 .. testcleanup:: stderr
6710 sys.stderr = sys.__stderr__
6712 .. testcode:: stderr
6714 wd = Word(alphas)
6716 @trace_parse_action
6717 def remove_duplicate_chars(tokens):
6718 return ''.join(sorted(set(''.join(tokens))))
6720 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
6721 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
6723 prints:
6725 .. testoutput:: stderr
6726 :options: +NORMALIZE_WHITESPACE
6728 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf',
6729 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
6730 <<leaving remove_duplicate_chars (ret: 'dfjkls')
6731 ['dfjkls']
6733 .. versionchanged:: 3.1.0
6734 Exception type added to output
6735 """
6736 f = _trim_arity(f)
6738 def z(*paArgs):
6739 thisFunc = f.__name__
6740 s, l, t = paArgs[-3:]
6741 if len(paArgs) > 3:
6742 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"
6743 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")
6744 try:
6745 ret = f(*paArgs)
6746 except Exception as exc:
6747 sys.stderr.write(
6748 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"
6749 )
6750 raise
6751 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")
6752 return ret
6754 z.__name__ = f.__name__
6755 return z
6758# convenience constants for positional expressions
6759empty = Empty().set_name("empty")
6760line_start = LineStart().set_name("line_start")
6761line_end = LineEnd().set_name("line_end")
6762string_start = StringStart().set_name("string_start")
6763string_end = StringEnd().set_name("string_end")
6765_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(
6766 lambda s, l, t: t[0][1]
6767)
6768_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
6769 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
6770)
6771_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
6772 lambda s, l, t: chr(int(t[0][1:], 8))
6773)
6774_singleChar = (
6775 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
6776)
6777_charRange = Group(_singleChar + Suppress("-") + _singleChar)
6778_reBracketExpr = (
6779 Literal("[")
6780 + Opt("^").set_results_name("negate")
6781 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
6782 + Literal("]")
6783)
6786def srange(s: str) -> str:
6787 r"""Helper to easily define string ranges for use in :class:`Word`
6788 construction. Borrows syntax from regexp ``'[]'`` string range
6789 definitions::
6791 srange("[0-9]") -> "0123456789"
6792 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
6793 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
6795 The input string must be enclosed in []'s, and the returned string
6796 is the expanded character set joined into a single string. The
6797 values enclosed in the []'s may be:
6799 - a single character
6800 - an escaped character with a leading backslash (such as ``\-``
6801 or ``\]``)
6802 - an escaped hex character with a leading ``'\x'``
6803 (``\x21``, which is a ``'!'`` character) (``\0x##``
6804 is also supported for backwards compatibility)
6805 - an escaped octal character with a leading ``'\0'``
6806 (``\041``, which is a ``'!'`` character)
6807 - a range of any of the above, separated by a dash (``'a-z'``,
6808 etc.)
6809 - any combination of the above (``'aeiouy'``,
6810 ``'a-zA-Z0-9_$'``, etc.)
6811 """
6813 def _expanded(p):
6814 if isinstance(p, ParseResults):
6815 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
6816 else:
6817 yield p
6819 try:
6820 return "".join(
6821 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]
6822 )
6823 except Exception as e:
6824 return ""
6827def token_map(func, *args) -> ParseAction:
6828 """Helper to define a parse action by mapping a function to all
6829 elements of a :class:`ParseResults` list. If any additional args are passed,
6830 they are forwarded to the given function as additional arguments
6831 after the token, as in
6832 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
6833 which will convert the parsed data to an integer using base 16.
6835 Example (compare the last to example in :class:`ParserElement.transform_string`::
6837 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
6838 hex_ints.run_tests('''
6839 00 11 22 aa FF 0a 0d 1a
6840 ''')
6842 upperword = Word(alphas).set_parse_action(token_map(str.upper))
6843 upperword[1, ...].run_tests('''
6844 my kingdom for a horse
6845 ''')
6847 wd = Word(alphas).set_parse_action(token_map(str.title))
6848 wd[1, ...].set_parse_action(' '.join).run_tests('''
6849 now is the winter of our discontent made glorious summer by this sun of york
6850 ''')
6852 prints::
6854 00 11 22 aa FF 0a 0d 1a
6855 [0, 17, 34, 170, 255, 10, 13, 26]
6857 my kingdom for a horse
6858 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
6860 now is the winter of our discontent made glorious summer by this sun of york
6861 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
6862 """
6864 def pa(s, l, t):
6865 return [func(tokn, *args) for tokn in t]
6867 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
6868 pa.__name__ = func_name
6870 return pa
6873def autoname_elements() -> None:
6874 """
6875 Utility to simplify mass-naming of parser elements, for
6876 generating railroad diagram with named subdiagrams.
6877 """
6879 # guard against _getframe not being implemented in the current Python
6880 getframe_fn = getattr(sys, "_getframe", lambda _: None)
6881 calling_frame = getframe_fn(1)
6882 if calling_frame is None:
6883 return
6885 # find all locals in the calling frame that are ParserElements
6886 calling_frame = typing.cast(types.FrameType, calling_frame)
6887 for name, var in calling_frame.f_locals.items():
6888 # if no custom name defined, set the name to the var name
6889 if isinstance(var, ParserElement) and not var.customName:
6890 var.set_name(name)
6893dbl_quoted_string = Combine(
6894 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6895).set_name("string enclosed in double quotes")
6897sgl_quoted_string = Combine(
6898 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
6899).set_name("string enclosed in single quotes")
6901quoted_string = Combine(
6902 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6903 "double quoted string"
6904 )
6905 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6906 "single quoted string"
6907 )
6908).set_name("quoted string using single or double quotes")
6910# XXX: Is there some way to make this show up in API docs?
6911# .. versionadded:: 3.1.0
6912python_quoted_string = Combine(
6913 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(
6914 "multiline double quoted string"
6915 )
6916 ^ (
6917 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"
6918 ).set_name("multiline single quoted string")
6919 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6920 "double quoted string"
6921 )
6922 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6923 "single quoted string"
6924 )
6925).set_name("Python quoted string")
6927unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
6930alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6931punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6933# build list of built-in expressions, for future reference if a global default value
6934# gets updated
6935_builtin_exprs: list[ParserElement] = [
6936 v for v in vars().values() if isinstance(v, ParserElement)
6937]
6939# Compatibility synonyms
6940# fmt: off
6941sglQuotedString = sgl_quoted_string
6942dblQuotedString = dbl_quoted_string
6943quotedString = quoted_string
6944unicodeString = unicode_string
6945lineStart = line_start
6946lineEnd = line_end
6947stringStart = string_start
6948stringEnd = string_end
6949nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)
6950traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)
6951conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)
6952tokenMap = replaced_by_pep8("tokenMap", token_map)
6953# fmt: on