Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 43%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# core.py
3#
4from __future__ import annotations
6import collections.abc
7from collections import deque
8import os
9import typing
10from typing import (
11 Any,
12 Callable,
13 Generator,
14 NamedTuple,
15 Sequence,
16 TextIO,
17 Union,
18 cast,
19)
20from abc import ABC, abstractmethod
21from enum import Enum
22import string
23import copy
24import warnings
25import re
26import sys
27from collections.abc import Iterable
28import traceback
29import types
30from operator import itemgetter
31from functools import wraps
32from threading import RLock
33from pathlib import Path
35from .util import (
36 _FifoCache,
37 _UnboundedCache,
38 __config_flags,
39 _collapse_string_to_ranges,
40 _escape_regex_range_chars,
41 _flatten,
42 LRUMemo as _LRUMemo,
43 UnboundedMemo as _UnboundedMemo,
44 replaced_by_pep8,
45)
46from .exceptions import *
47from .actions import *
48from .results import ParseResults, _ParseResultsWithOffset
49from .unicode import pyparsing_unicode
51_MAX_INT = sys.maxsize
52str_type: tuple[type, ...] = (str, bytes)
54#
55# Copyright (c) 2003-2022 Paul T. McGuire
56#
57# Permission is hereby granted, free of charge, to any person obtaining
58# a copy of this software and associated documentation files (the
59# "Software"), to deal in the Software without restriction, including
60# without limitation the rights to use, copy, modify, merge, publish,
61# distribute, sublicense, and/or sell copies of the Software, and to
62# permit persons to whom the Software is furnished to do so, subject to
63# the following conditions:
64#
65# The above copyright notice and this permission notice shall be
66# included in all copies or substantial portions of the Software.
67#
68# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
69# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
70# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
71# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
72# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
73# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
74# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
75#
77from functools import cached_property
80class __compat__(__config_flags):
81 """
82 A cross-version compatibility configuration for pyparsing features that will be
83 released in a future version. By setting values in this configuration to True,
84 those features can be enabled in prior versions for compatibility development
85 and testing.
87 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
88 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
89 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
90 behavior
91 """
93 _type_desc = "compatibility"
95 collect_all_And_tokens = True
97 _all_names = [__ for __ in locals() if not __.startswith("_")]
98 _fixed_names = """
99 collect_all_And_tokens
100 """.split()
103class __diag__(__config_flags):
104 _type_desc = "diagnostic"
106 warn_multiple_tokens_in_named_alternation = False
107 warn_ungrouped_named_tokens_in_collection = False
108 warn_name_set_on_empty_Forward = False
109 warn_on_parse_using_empty_Forward = False
110 warn_on_assignment_to_Forward = False
111 warn_on_multiple_string_args_to_oneof = False
112 warn_on_match_first_with_lshift_operator = False
113 enable_debug_on_named_expressions = False
115 _all_names = [__ for __ in locals() if not __.startswith("_")]
116 _warning_names = [name for name in _all_names if name.startswith("warn")]
117 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
119 @classmethod
120 def enable_all_warnings(cls) -> None:
121 for name in cls._warning_names:
122 cls.enable(name)
125class Diagnostics(Enum):
126 """
127 Diagnostic configuration (all default to disabled)
129 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
130 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
131 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
132 name is defined on a containing expression with ungrouped subexpressions that also
133 have results names
134 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
135 with a results name, but has no contents defined
136 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
137 defined in a grammar but has never had an expression attached to it
138 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
139 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
140 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
141 incorrectly called with multiple str arguments
142 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
143 calls to :class:`ParserElement.set_name`
145 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
146 All warnings can be enabled by calling :class:`enable_all_warnings`.
147 """
149 warn_multiple_tokens_in_named_alternation = 0
150 warn_ungrouped_named_tokens_in_collection = 1
151 warn_name_set_on_empty_Forward = 2
152 warn_on_parse_using_empty_Forward = 3
153 warn_on_assignment_to_Forward = 4
154 warn_on_multiple_string_args_to_oneof = 5
155 warn_on_match_first_with_lshift_operator = 6
156 enable_debug_on_named_expressions = 7
159def enable_diag(diag_enum: Diagnostics) -> None:
160 """
161 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
162 """
163 __diag__.enable(diag_enum.name)
166def disable_diag(diag_enum: Diagnostics) -> None:
167 """
168 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
169 """
170 __diag__.disable(diag_enum.name)
173def enable_all_warnings() -> None:
174 """
175 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
176 """
177 __diag__.enable_all_warnings()
180# hide abstract class
181del __config_flags
184def _should_enable_warnings(
185 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
186) -> bool:
187 enable = bool(warn_env_var)
188 for warn_opt in cmd_line_warn_options:
189 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
190 ":"
191 )[:5]
192 if not w_action.lower().startswith("i") and (
193 not (w_message or w_category or w_module) or w_module == "pyparsing"
194 ):
195 enable = True
196 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
197 enable = False
198 return enable
201if _should_enable_warnings(
202 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
203):
204 enable_all_warnings()
207# build list of single arg builtins, that can be used as parse actions
208# fmt: off
209_single_arg_builtins = {
210 sum, len, sorted, reversed, list, tuple, set, any, all, min, max
211}
212# fmt: on
214_generatorType = types.GeneratorType
215ParseImplReturnType = tuple[int, Any]
216PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]
218ParseCondition = Union[
219 Callable[[], bool],
220 Callable[[ParseResults], bool],
221 Callable[[int, ParseResults], bool],
222 Callable[[str, int, ParseResults], bool],
223]
224ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
225DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
226DebugSuccessAction = Callable[
227 [str, int, int, "ParserElement", ParseResults, bool], None
228]
229DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
232alphas: str = string.ascii_uppercase + string.ascii_lowercase
233identchars: str = pyparsing_unicode.Latin1.identchars
234identbodychars: str = pyparsing_unicode.Latin1.identbodychars
235nums: str = "0123456789"
236hexnums: str = nums + "ABCDEFabcdef"
237alphanums: str = alphas + nums
238printables: str = "".join([c for c in string.printable if c not in string.whitespace])
241class _ParseActionIndexError(Exception):
242 """
243 Internal wrapper around IndexError so that IndexErrors raised inside
244 parse actions aren't misinterpreted as IndexErrors raised inside
245 ParserElement parseImpl methods.
246 """
248 def __init__(self, msg: str, exc: BaseException) -> None:
249 self.msg: str = msg
250 self.exc: BaseException = exc
253_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]
254pa_call_line_synth = ()
257def _trim_arity(func, max_limit=3):
258 """decorator to trim function calls to match the arity of the target"""
259 global _trim_arity_call_line, pa_call_line_synth
261 if func in _single_arg_builtins:
262 return lambda s, l, t: func(t)
264 limit = 0
265 found_arity = False
267 # synthesize what would be returned by traceback.extract_stack at the call to
268 # user's parse action 'func', so that we don't incur call penalty at parse time
270 # fmt: off
271 LINE_DIFF = 9
272 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
273 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
274 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]
275 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
277 def wrapper(*args):
278 nonlocal found_arity, limit
279 if found_arity:
280 return func(*args[limit:])
281 while 1:
282 try:
283 ret = func(*args[limit:])
284 found_arity = True
285 return ret
286 except TypeError as te:
287 # re-raise TypeErrors if they did not come from our arity testing
288 if found_arity:
289 raise
290 else:
291 tb = te.__traceback__
292 frames = traceback.extract_tb(tb, limit=2)
293 frame_summary = frames[-1]
294 trim_arity_type_error = (
295 [frame_summary[:2]][-1][:2] == pa_call_line_synth
296 )
297 del tb
299 if trim_arity_type_error:
300 if limit < max_limit:
301 limit += 1
302 continue
304 raise
305 except IndexError as ie:
306 # wrap IndexErrors inside a _ParseActionIndexError
307 raise _ParseActionIndexError(
308 "IndexError raised in parse action", ie
309 ).with_traceback(None)
310 # fmt: on
312 # copy func name to wrapper for sensible debug output
313 # (can't use functools.wraps, since that messes with function signature)
314 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
315 wrapper.__name__ = func_name
316 wrapper.__doc__ = func.__doc__
318 return wrapper
321def condition_as_parse_action(
322 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False
323) -> ParseAction:
324 """
325 Function to convert a simple predicate function that returns ``True`` or ``False``
326 into a parse action. Can be used in places when a parse action is required
327 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition
328 to an operator level in :class:`infix_notation`).
330 Optional keyword arguments:
332 - ``message`` - define a custom message to be used in the raised exception
333 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;
334 otherwise will raise :class:`ParseException`
336 """
337 msg = message if message is not None else "failed user-defined condition"
338 exc_type = ParseFatalException if fatal else ParseException
339 fn = _trim_arity(fn)
341 @wraps(fn)
342 def pa(s, l, t):
343 if not bool(fn(s, l, t)):
344 raise exc_type(s, l, msg)
346 return pa
349def _default_start_debug_action(
350 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False
351):
352 cache_hit_str = "*" if cache_hit else ""
353 print(
354 (
355 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"
356 f" {line(loc, instring)}\n"
357 f" {'^':>{col(loc, instring)}}"
358 )
359 )
362def _default_success_debug_action(
363 instring: str,
364 startloc: int,
365 endloc: int,
366 expr: ParserElement,
367 toks: ParseResults,
368 cache_hit: bool = False,
369):
370 cache_hit_str = "*" if cache_hit else ""
371 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")
374def _default_exception_debug_action(
375 instring: str,
376 loc: int,
377 expr: ParserElement,
378 exc: Exception,
379 cache_hit: bool = False,
380):
381 cache_hit_str = "*" if cache_hit else ""
382 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")
385def null_debug_action(*args):
386 """'Do-nothing' debug action, to suppress debugging output during parsing."""
389class ParserElement(ABC):
390 """Abstract base level parser element class."""
392 DEFAULT_WHITE_CHARS: str = " \n\t\r"
393 verbose_stacktrace: bool = False
394 _literalStringClass: type = None # type: ignore[assignment]
396 @staticmethod
397 def set_default_whitespace_chars(chars: str) -> None:
398 r"""
399 Overrides the default whitespace chars
401 Example::
403 # default whitespace chars are space, <TAB> and newline
404 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
406 # change to just treat newline as significant
407 ParserElement.set_default_whitespace_chars(" \t")
408 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def']
409 """
410 ParserElement.DEFAULT_WHITE_CHARS = chars
412 # update whitespace all parse expressions defined in this module
413 for expr in _builtin_exprs:
414 if expr.copyDefaultWhiteChars:
415 expr.whiteChars = set(chars)
417 @staticmethod
418 def inline_literals_using(cls: type) -> None:
419 """
420 Set class to be used for inclusion of string literals into a parser.
422 Example::
424 # default literal class used is Literal
425 integer = Word(nums)
426 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
428 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31']
431 # change to Suppress
432 ParserElement.inline_literals_using(Suppress)
433 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
435 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31']
436 """
437 ParserElement._literalStringClass = cls
439 @classmethod
440 def using_each(cls, seq, **class_kwargs):
441 """
442 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.
444 Example::
446 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")
448 """
449 yield from (cls(obj, **class_kwargs) for obj in seq)
451 class DebugActions(NamedTuple):
452 debug_try: typing.Optional[DebugStartAction]
453 debug_match: typing.Optional[DebugSuccessAction]
454 debug_fail: typing.Optional[DebugExceptionAction]
456 def __init__(self, savelist: bool = False) -> None:
457 self.parseAction: list[ParseAction] = list()
458 self.failAction: typing.Optional[ParseFailAction] = None
459 self.customName: str = None # type: ignore[assignment]
460 self._defaultName: typing.Optional[str] = None
461 self.resultsName: str = None # type: ignore[assignment]
462 self.saveAsList = savelist
463 self.skipWhitespace = True
464 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
465 self.copyDefaultWhiteChars = True
466 # used when checking for left-recursion
467 self._may_return_empty = False
468 self.keepTabs = False
469 self.ignoreExprs: list[ParserElement] = list()
470 self.debug = False
471 self.streamlined = False
472 # optimize exception handling for subclasses that don't advance parse index
473 self.mayIndexError = True
474 self.errmsg: Union[str, None] = ""
475 # mark results names as modal (report only last) or cumulative (list all)
476 self.modalResults = True
477 # custom debug actions
478 self.debugActions = self.DebugActions(None, None, None)
479 # avoid redundant calls to preParse
480 self.callPreparse = True
481 self.callDuringTry = False
482 self.suppress_warnings_: list[Diagnostics] = []
483 self.show_in_diagram = True
485 @property
486 def mayReturnEmpty(self):
487 return self._may_return_empty
489 @mayReturnEmpty.setter
490 def mayReturnEmpty(self, value):
491 self._may_return_empty = value
493 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:
494 """
495 Suppress warnings emitted for a particular diagnostic on this expression.
497 Example::
499 base = pp.Forward()
500 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)
502 # statement would normally raise a warning, but is now suppressed
503 print(base.parse_string("x"))
505 """
506 self.suppress_warnings_.append(warning_type)
507 return self
509 def visit_all(self):
510 """General-purpose method to yield all expressions and sub-expressions
511 in a grammar. Typically just for internal use.
512 """
513 to_visit = deque([self])
514 seen = set()
515 while to_visit:
516 cur = to_visit.popleft()
518 # guard against looping forever through recursive grammars
519 if cur in seen:
520 continue
521 seen.add(cur)
523 to_visit.extend(cur.recurse())
524 yield cur
526 def copy(self) -> ParserElement:
527 """
528 Make a copy of this :class:`ParserElement`. Useful for defining
529 different parse actions for the same parsing pattern, using copies of
530 the original parse element.
532 Example::
534 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
535 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")
536 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
538 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M"))
540 prints::
542 [5120, 100, 655360, 268435456]
544 Equivalent form of ``expr.copy()`` is just ``expr()``::
546 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
547 """
548 cpy = copy.copy(self)
549 cpy.parseAction = self.parseAction[:]
550 cpy.ignoreExprs = self.ignoreExprs[:]
551 if self.copyDefaultWhiteChars:
552 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
553 return cpy
555 def set_results_name(
556 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False
557 ) -> ParserElement:
558 """
559 Define name for referencing matching tokens as a nested attribute
560 of the returned parse results.
562 Normally, results names are assigned as you would assign keys in a dict:
563 any existing value is overwritten by later values. If it is necessary to
564 keep all values captured for a particular results name, call ``set_results_name``
565 with ``list_all_matches`` = True.
567 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
568 this is so that the client can define a basic element, such as an
569 integer, and reference it in multiple places with different names.
571 You can also set results names using the abbreviated syntax,
572 ``expr("name")`` in place of ``expr.set_results_name("name")``
573 - see :class:`__call__`. If ``list_all_matches`` is required, use
574 ``expr("name*")``.
576 Example::
578 integer = Word(nums)
579 date_str = (integer.set_results_name("year") + '/'
580 + integer.set_results_name("month") + '/'
581 + integer.set_results_name("day"))
583 # equivalent form:
584 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
585 """
586 listAllMatches = listAllMatches or list_all_matches
587 return self._setResultsName(name, listAllMatches)
589 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
590 if name is None:
591 return self
592 newself = self.copy()
593 if name.endswith("*"):
594 name = name[:-1]
595 list_all_matches = True
596 newself.resultsName = name
597 newself.modalResults = not list_all_matches
598 return newself
600 def set_break(self, break_flag: bool = True) -> ParserElement:
601 """
602 Method to invoke the Python pdb debugger when this element is
603 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
604 disable.
605 """
606 if break_flag:
607 _parseMethod = self._parse
609 def breaker(instring, loc, do_actions=True, callPreParse=True):
610 # this call to breakpoint() is intentional, not a checkin error
611 breakpoint()
612 return _parseMethod(instring, loc, do_actions, callPreParse)
614 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]
615 self._parse = breaker # type: ignore [method-assign]
616 elif hasattr(self._parse, "_originalParseMethod"):
617 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]
618 return self
620 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:
621 """
622 Define one or more actions to perform when successfully matching parse element definition.
624 Parse actions can be called to perform data conversions, do extra validation,
625 update external data structures, or enhance or replace the parsed tokens.
626 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
627 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
629 - ``s`` = the original string being parsed (see note below)
630 - ``loc`` = the location of the matching substring
631 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object
633 The parsed tokens are passed to the parse action as ParseResults. They can be
634 modified in place using list-style append, extend, and pop operations to update
635 the parsed list elements; and with dictionary-style item set and del operations
636 to add, update, or remove any named results. If the tokens are modified in place,
637 it is not necessary to return them with a return statement.
639 Parse actions can also completely replace the given tokens, with another ``ParseResults``
640 object, or with some entirely different object (common for parse actions that perform data
641 conversions). A convenient way to build a new parse result is to define the values
642 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
644 If None is passed as the ``fn`` parse action, all previously added parse actions for this
645 expression are cleared.
647 Optional keyword arguments:
649 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during
650 lookaheads and alternate testing. For parse actions that have side effects, it is
651 important to only call the parse action once it is determined that it is being
652 called as part of a successful parse. For parse actions that perform additional
653 validation, then call_during_try should be passed as True, so that the validation
654 code is included in the preliminary "try" parses.
656 Note: the default parsing behavior is to expand tabs in the input string
657 before starting the parsing process. See :class:`parse_string` for more
658 information on parsing strings containing ``<TAB>`` s, and suggested
659 methods to maintain a consistent view of the parsed string, the parse
660 location, and line and column positions within the parsed string.
662 Example::
664 # parse dates in the form YYYY/MM/DD
666 # use parse action to convert toks from str to int at parse time
667 def convert_to_int(toks):
668 return int(toks[0])
670 # use a parse action to verify that the date is a valid date
671 def is_valid_date(instring, loc, toks):
672 from datetime import date
673 year, month, day = toks[::2]
674 try:
675 date(year, month, day)
676 except ValueError:
677 raise ParseException(instring, loc, "invalid date given")
679 integer = Word(nums)
680 date_str = integer + '/' + integer + '/' + integer
682 # add parse actions
683 integer.set_parse_action(convert_to_int)
684 date_str.set_parse_action(is_valid_date)
686 # note that integer fields are now ints, not strings
687 date_str.run_tests('''
688 # successful parse - note that integer fields were converted to ints
689 1999/12/31
691 # fail - invalid date
692 1999/13/31
693 ''')
694 """
695 if list(fns) == [None]:
696 self.parseAction.clear()
697 return self
699 if not all(callable(fn) for fn in fns):
700 raise TypeError("parse actions must be callable")
701 self.parseAction[:] = [_trim_arity(fn) for fn in fns]
702 self.callDuringTry = kwargs.get(
703 "call_during_try", kwargs.get("callDuringTry", False)
704 )
706 return self
708 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:
709 """
710 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
712 See examples in :class:`copy`.
713 """
714 self.parseAction += [_trim_arity(fn) for fn in fns]
715 self.callDuringTry = self.callDuringTry or kwargs.get(
716 "call_during_try", kwargs.get("callDuringTry", False)
717 )
718 return self
720 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement:
721 """Add a boolean predicate function to expression's list of parse actions. See
722 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
723 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
725 Optional keyword arguments:
727 - ``message`` = define a custom message to be used in the raised exception
728 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
729 ParseException
730 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,
731 default=False
733 Example::
735 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
736 year_int = integer.copy()
737 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
738 date_str = year_int + '/' + integer + '/' + integer
740 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0),
741 (line:1, col:1)
742 """
743 for fn in fns:
744 self.parseAction.append(
745 condition_as_parse_action(
746 fn,
747 message=str(kwargs.get("message")),
748 fatal=bool(kwargs.get("fatal", False)),
749 )
750 )
752 self.callDuringTry = self.callDuringTry or kwargs.get(
753 "call_during_try", kwargs.get("callDuringTry", False)
754 )
755 return self
757 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:
758 """
759 Define action to perform if parsing fails at this expression.
760 Fail acton fn is a callable function that takes the arguments
761 ``fn(s, loc, expr, err)`` where:
763 - ``s`` = string being parsed
764 - ``loc`` = location where expression match was attempted and failed
765 - ``expr`` = the parse expression that failed
766 - ``err`` = the exception thrown
768 The function returns no value. It may throw :class:`ParseFatalException`
769 if it is desired to stop parsing immediately."""
770 self.failAction = fn
771 return self
773 def _skipIgnorables(self, instring: str, loc: int) -> int:
774 if not self.ignoreExprs:
775 return loc
776 exprsFound = True
777 ignore_expr_fns = [e._parse for e in self.ignoreExprs]
778 last_loc = loc
779 while exprsFound:
780 exprsFound = False
781 for ignore_fn in ignore_expr_fns:
782 try:
783 while 1:
784 loc, dummy = ignore_fn(instring, loc)
785 exprsFound = True
786 except ParseException:
787 pass
788 # check if all ignore exprs matched but didn't actually advance the parse location
789 if loc == last_loc:
790 break
791 last_loc = loc
792 return loc
794 def preParse(self, instring: str, loc: int) -> int:
795 if self.ignoreExprs:
796 loc = self._skipIgnorables(instring, loc)
798 if self.skipWhitespace:
799 instrlen = len(instring)
800 white_chars = self.whiteChars
801 while loc < instrlen and instring[loc] in white_chars:
802 loc += 1
804 return loc
806 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
807 return loc, []
809 def postParse(self, instring, loc, tokenlist):
810 return tokenlist
812 # @profile
813 def _parseNoCache(
814 self, instring, loc, do_actions=True, callPreParse=True
815 ) -> tuple[int, ParseResults]:
816 debugging = self.debug # and do_actions)
817 len_instring = len(instring)
819 if debugging or self.failAction:
820 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
821 try:
822 if callPreParse and self.callPreparse:
823 pre_loc = self.preParse(instring, loc)
824 else:
825 pre_loc = loc
826 tokens_start = pre_loc
827 if self.debugActions.debug_try:
828 self.debugActions.debug_try(instring, tokens_start, self, False)
829 if self.mayIndexError or pre_loc >= len_instring:
830 try:
831 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
832 except IndexError:
833 raise ParseException(instring, len_instring, self.errmsg, self)
834 else:
835 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
836 except Exception as err:
837 # print("Exception raised:", err)
838 if self.debugActions.debug_fail:
839 self.debugActions.debug_fail(
840 instring, tokens_start, self, err, False
841 )
842 if self.failAction:
843 self.failAction(instring, tokens_start, self, err)
844 raise
845 else:
846 if callPreParse and self.callPreparse:
847 pre_loc = self.preParse(instring, loc)
848 else:
849 pre_loc = loc
850 tokens_start = pre_loc
851 if self.mayIndexError or pre_loc >= len_instring:
852 try:
853 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
854 except IndexError:
855 raise ParseException(instring, len_instring, self.errmsg, self)
856 else:
857 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
859 tokens = self.postParse(instring, loc, tokens)
861 ret_tokens = ParseResults(
862 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults
863 )
864 if self.parseAction and (do_actions or self.callDuringTry):
865 if debugging:
866 try:
867 for fn in self.parseAction:
868 try:
869 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
870 except IndexError as parse_action_exc:
871 exc = ParseException("exception raised in parse action")
872 raise exc from parse_action_exc
874 if tokens is not None and tokens is not ret_tokens:
875 ret_tokens = ParseResults(
876 tokens,
877 self.resultsName,
878 asList=self.saveAsList
879 and isinstance(tokens, (ParseResults, list)),
880 modal=self.modalResults,
881 )
882 except Exception as err:
883 # print "Exception raised in user parse action:", err
884 if self.debugActions.debug_fail:
885 self.debugActions.debug_fail(
886 instring, tokens_start, self, err, False
887 )
888 raise
889 else:
890 for fn in self.parseAction:
891 try:
892 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
893 except IndexError as parse_action_exc:
894 exc = ParseException("exception raised in parse action")
895 raise exc from parse_action_exc
897 if tokens is not None and tokens is not ret_tokens:
898 ret_tokens = ParseResults(
899 tokens,
900 self.resultsName,
901 asList=self.saveAsList
902 and isinstance(tokens, (ParseResults, list)),
903 modal=self.modalResults,
904 )
905 if debugging:
906 # print("Matched", self, "->", ret_tokens.as_list())
907 if self.debugActions.debug_match:
908 self.debugActions.debug_match(
909 instring, tokens_start, loc, self, ret_tokens, False
910 )
912 return loc, ret_tokens
914 def try_parse(
915 self,
916 instring: str,
917 loc: int,
918 *,
919 raise_fatal: bool = False,
920 do_actions: bool = False,
921 ) -> int:
922 try:
923 return self._parse(instring, loc, do_actions=do_actions)[0]
924 except ParseFatalException:
925 if raise_fatal:
926 raise
927 raise ParseException(instring, loc, self.errmsg, self)
929 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:
930 try:
931 self.try_parse(instring, loc, do_actions=do_actions)
932 except (ParseException, IndexError):
933 return False
934 else:
935 return True
937 # cache for left-recursion in Forward references
938 recursion_lock = RLock()
939 recursion_memos: collections.abc.MutableMapping[
940 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]
941 ] = {}
943 class _CacheType(typing.Protocol):
944 """
945 Class to be used for packrat and left-recursion cacheing of results
946 and exceptions.
947 """
949 not_in_cache: bool
951 def get(self, *args) -> typing.Any: ...
953 def set(self, *args) -> None: ...
955 def clear(self) -> None: ...
957 class NullCache(dict):
958 """
959 A null cache type for initialization of the packrat_cache class variable.
960 If/when enable_packrat() is called, this null cache will be replaced by a
961 proper _CacheType class instance.
962 """
964 not_in_cache: bool = True
966 def get(self, *args) -> typing.Any: ...
968 def set(self, *args) -> None: ...
970 def clear(self) -> None: ...
972 # class-level argument cache for optimizing repeated calls when backtracking
973 # through recursive expressions
974 packrat_cache: _CacheType = NullCache()
975 packrat_cache_lock = RLock()
976 packrat_cache_stats = [0, 0]
978 # this method gets repeatedly called during backtracking with the same arguments -
979 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
980 def _parseCache(
981 self, instring, loc, do_actions=True, callPreParse=True
982 ) -> tuple[int, ParseResults]:
983 HIT, MISS = 0, 1
984 lookup = (self, instring, loc, callPreParse, do_actions)
985 with ParserElement.packrat_cache_lock:
986 cache = ParserElement.packrat_cache
987 value = cache.get(lookup)
988 if value is cache.not_in_cache:
989 ParserElement.packrat_cache_stats[MISS] += 1
990 try:
991 value = self._parseNoCache(instring, loc, do_actions, callPreParse)
992 except ParseBaseException as pe:
993 # cache a copy of the exception, without the traceback
994 cache.set(lookup, pe.__class__(*pe.args))
995 raise
996 else:
997 cache.set(lookup, (value[0], value[1].copy(), loc))
998 return value
999 else:
1000 ParserElement.packrat_cache_stats[HIT] += 1
1001 if self.debug and self.debugActions.debug_try:
1002 try:
1003 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]
1004 except TypeError:
1005 pass
1006 if isinstance(value, Exception):
1007 if self.debug and self.debugActions.debug_fail:
1008 try:
1009 self.debugActions.debug_fail(
1010 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]
1011 )
1012 except TypeError:
1013 pass
1014 raise value
1016 value = cast(tuple[int, ParseResults, int], value)
1017 loc_, result, endloc = value[0], value[1].copy(), value[2]
1018 if self.debug and self.debugActions.debug_match:
1019 try:
1020 self.debugActions.debug_match(
1021 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]
1022 )
1023 except TypeError:
1024 pass
1026 return loc_, result
1028 _parse = _parseNoCache
1030 @staticmethod
1031 def reset_cache() -> None:
1032 ParserElement.packrat_cache.clear()
1033 ParserElement.packrat_cache_stats[:] = [0] * len(
1034 ParserElement.packrat_cache_stats
1035 )
1036 ParserElement.recursion_memos.clear()
1038 _packratEnabled = False
1039 _left_recursion_enabled = False
1041 @staticmethod
1042 def disable_memoization() -> None:
1043 """
1044 Disables active Packrat or Left Recursion parsing and their memoization
1046 This method also works if neither Packrat nor Left Recursion are enabled.
1047 This makes it safe to call before activating Packrat nor Left Recursion
1048 to clear any previous settings.
1049 """
1050 ParserElement.reset_cache()
1051 ParserElement._left_recursion_enabled = False
1052 ParserElement._packratEnabled = False
1053 ParserElement._parse = ParserElement._parseNoCache
1055 @staticmethod
1056 def enable_left_recursion(
1057 cache_size_limit: typing.Optional[int] = None, *, force=False
1058 ) -> None:
1059 """
1060 Enables "bounded recursion" parsing, which allows for both direct and indirect
1061 left-recursion. During parsing, left-recursive :class:`Forward` elements are
1062 repeatedly matched with a fixed recursion depth that is gradually increased
1063 until finding the longest match.
1065 Example::
1067 import pyparsing as pp
1068 pp.ParserElement.enable_left_recursion()
1070 E = pp.Forward("E")
1071 num = pp.Word(pp.nums)
1072 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1073 E <<= E + '+' - num | num
1075 print(E.parse_string("1+2+3"))
1077 Recursion search naturally memoizes matches of ``Forward`` elements and may
1078 thus skip reevaluation of parse actions during backtracking. This may break
1079 programs with parse actions which rely on strict ordering of side-effects.
1081 Parameters:
1083 - ``cache_size_limit`` - (default=``None``) - memoize at most this many
1084 ``Forward`` elements during matching; if ``None`` (the default),
1085 memoize all ``Forward`` elements.
1087 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1088 thus the two cannot be used together. Use ``force=True`` to disable any
1089 previous, conflicting settings.
1090 """
1091 if force:
1092 ParserElement.disable_memoization()
1093 elif ParserElement._packratEnabled:
1094 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1095 if cache_size_limit is None:
1096 ParserElement.recursion_memos = _UnboundedMemo()
1097 elif cache_size_limit > 0:
1098 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]
1099 else:
1100 raise NotImplementedError(f"Memo size of {cache_size_limit}")
1101 ParserElement._left_recursion_enabled = True
1103 @staticmethod
1104 def enable_packrat(
1105 cache_size_limit: Union[int, None] = 128, *, force: bool = False
1106 ) -> None:
1107 """
1108 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1109 Repeated parse attempts at the same string location (which happens
1110 often in many complex grammars) can immediately return a cached value,
1111 instead of re-executing parsing/validating code. Memoizing is done of
1112 both valid results and parsing exceptions.
1114 Parameters:
1116 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided
1117 will limit the size of the packrat cache; if None is passed, then
1118 the cache size will be unbounded; if 0 is passed, the cache will
1119 be effectively disabled.
1121 This speedup may break existing programs that use parse actions that
1122 have side-effects. For this reason, packrat parsing is disabled when
1123 you first import pyparsing. To activate the packrat feature, your
1124 program must call the class method :class:`ParserElement.enable_packrat`.
1125 For best results, call ``enable_packrat()`` immediately after
1126 importing pyparsing.
1128 Example::
1130 import pyparsing
1131 pyparsing.ParserElement.enable_packrat()
1133 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1134 thus the two cannot be used together. Use ``force=True`` to disable any
1135 previous, conflicting settings.
1136 """
1137 if force:
1138 ParserElement.disable_memoization()
1139 elif ParserElement._left_recursion_enabled:
1140 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1142 if ParserElement._packratEnabled:
1143 return
1145 ParserElement._packratEnabled = True
1146 if cache_size_limit is None:
1147 ParserElement.packrat_cache = _UnboundedCache()
1148 else:
1149 ParserElement.packrat_cache = _FifoCache(cache_size_limit)
1150 ParserElement._parse = ParserElement._parseCache
1152 def parse_string(
1153 self, instring: str, parse_all: bool = False, *, parseAll: bool = False
1154 ) -> ParseResults:
1155 """
1156 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1157 client code.
1159 :param instring: The input string to be parsed.
1160 :param parse_all: If set, the entire input string must match the grammar.
1161 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1162 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1163 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1164 an object with attributes if the given parser includes results names.
1166 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1167 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().
1169 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1170 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1171 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1172 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1174 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1175 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1176 parse action's ``s`` argument, or
1177 - explicitly expand the tabs in your input string before calling ``parse_string``.
1179 Examples:
1181 By default, partial matches are OK.
1183 >>> res = Word('a').parse_string('aaaaabaaa')
1184 >>> print(res)
1185 ['aaaaa']
1187 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1188 directly to see more examples.
1190 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1192 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1193 Traceback (most recent call last):
1194 ...
1195 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6)
1196 """
1197 parseAll = parse_all or parseAll
1199 ParserElement.reset_cache()
1200 if not self.streamlined:
1201 self.streamline()
1202 for e in self.ignoreExprs:
1203 e.streamline()
1204 if not self.keepTabs:
1205 instring = instring.expandtabs()
1206 try:
1207 loc, tokens = self._parse(instring, 0)
1208 if parseAll:
1209 loc = self.preParse(instring, loc)
1210 se = Empty() + StringEnd().set_debug(False)
1211 se._parse(instring, loc)
1212 except _ParseActionIndexError as pa_exc:
1213 raise pa_exc.exc
1214 except ParseBaseException as exc:
1215 if ParserElement.verbose_stacktrace:
1216 raise
1218 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1219 raise exc.with_traceback(None)
1220 else:
1221 return tokens
1223 def scan_string(
1224 self,
1225 instring: str,
1226 max_matches: int = _MAX_INT,
1227 overlap: bool = False,
1228 always_skip_whitespace=True,
1229 *,
1230 debug: bool = False,
1231 maxMatches: int = _MAX_INT,
1232 ) -> Generator[tuple[ParseResults, int, int], None, None]:
1233 """
1234 Scan the input string for expression matches. Each match will return the
1235 matching tokens, start location, and end location. May be called with optional
1236 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1237 ``overlap`` is specified, then overlapping matches will be reported.
1239 Note that the start and end locations are reported relative to the string
1240 being parsed. See :class:`parse_string` for more information on parsing
1241 strings with embedded tabs.
1243 Example::
1245 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1246 print(source)
1247 for tokens, start, end in Word(alphas).scan_string(source):
1248 print(' '*start + '^'*(end-start))
1249 print(' '*start + tokens[0])
1251 prints::
1253 sldjf123lsdjjkf345sldkjf879lkjsfd987
1254 ^^^^^
1255 sldjf
1256 ^^^^^^^
1257 lsdjjkf
1258 ^^^^^^
1259 sldkjf
1260 ^^^^^^
1261 lkjsfd
1262 """
1263 maxMatches = min(maxMatches, max_matches)
1264 if not self.streamlined:
1265 self.streamline()
1266 for e in self.ignoreExprs:
1267 e.streamline()
1269 if not self.keepTabs:
1270 instring = str(instring).expandtabs()
1271 instrlen = len(instring)
1272 loc = 0
1273 if always_skip_whitespace:
1274 preparser = Empty()
1275 preparser.ignoreExprs = self.ignoreExprs
1276 preparser.whiteChars = self.whiteChars
1277 preparseFn = preparser.preParse
1278 else:
1279 preparseFn = self.preParse
1280 parseFn = self._parse
1281 ParserElement.resetCache()
1282 matches = 0
1283 try:
1284 while loc <= instrlen and matches < maxMatches:
1285 try:
1286 preloc: int = preparseFn(instring, loc)
1287 nextLoc: int
1288 tokens: ParseResults
1289 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1290 except ParseException:
1291 loc = preloc + 1
1292 else:
1293 if nextLoc > loc:
1294 matches += 1
1295 if debug:
1296 print(
1297 {
1298 "tokens": tokens.asList(),
1299 "start": preloc,
1300 "end": nextLoc,
1301 }
1302 )
1303 yield tokens, preloc, nextLoc
1304 if overlap:
1305 nextloc = preparseFn(instring, loc)
1306 if nextloc > loc:
1307 loc = nextLoc
1308 else:
1309 loc += 1
1310 else:
1311 loc = nextLoc
1312 else:
1313 loc = preloc + 1
1314 except ParseBaseException as exc:
1315 if ParserElement.verbose_stacktrace:
1316 raise
1318 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1319 raise exc.with_traceback(None)
1321 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1322 """
1323 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1324 be returned from a parse action. To use ``transform_string``, define a grammar and
1325 attach a parse action to it that modifies the returned token list.
1326 Invoking ``transform_string()`` on a target string will then scan for matches,
1327 and replace the matched text patterns according to the logic in the parse
1328 action. ``transform_string()`` returns the resulting transformed string.
1330 Example::
1332 wd = Word(alphas)
1333 wd.set_parse_action(lambda toks: toks[0].title())
1335 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))
1337 prints::
1339 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1340 """
1341 out: list[str] = []
1342 lastE = 0
1343 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1344 # keep string locs straight between transform_string and scan_string
1345 self.keepTabs = True
1346 try:
1347 for t, s, e in self.scan_string(instring, debug=debug):
1348 if s > lastE:
1349 out.append(instring[lastE:s])
1350 lastE = e
1352 if not t:
1353 continue
1355 if isinstance(t, ParseResults):
1356 out += t.as_list()
1357 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1358 out.extend(t)
1359 else:
1360 out.append(t)
1362 out.append(instring[lastE:])
1363 out = [o for o in out if o]
1364 return "".join([str(s) for s in _flatten(out)])
1365 except ParseBaseException as exc:
1366 if ParserElement.verbose_stacktrace:
1367 raise
1369 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1370 raise exc.with_traceback(None)
1372 def search_string(
1373 self,
1374 instring: str,
1375 max_matches: int = _MAX_INT,
1376 *,
1377 debug: bool = False,
1378 maxMatches: int = _MAX_INT,
1379 ) -> ParseResults:
1380 """
1381 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1382 to match the given parse expression. May be called with optional
1383 ``max_matches`` argument, to clip searching after 'n' matches are found.
1385 Example::
1387 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1388 cap_word = Word(alphas.upper(), alphas.lower())
1390 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))
1392 # the sum() builtin can be used to merge results into a single ParseResults object
1393 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))
1395 prints::
1397 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1398 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1399 """
1400 maxMatches = min(maxMatches, max_matches)
1401 try:
1402 return ParseResults(
1403 [
1404 t
1405 for t, s, e in self.scan_string(
1406 instring, maxMatches, always_skip_whitespace=False, debug=debug
1407 )
1408 ]
1409 )
1410 except ParseBaseException as exc:
1411 if ParserElement.verbose_stacktrace:
1412 raise
1414 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1415 raise exc.with_traceback(None)
1417 def split(
1418 self,
1419 instring: str,
1420 maxsplit: int = _MAX_INT,
1421 include_separators: bool = False,
1422 *,
1423 includeSeparators=False,
1424 ) -> Generator[str, None, None]:
1425 """
1426 Generator method to split a string using the given expression as a separator.
1427 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1428 and the optional ``include_separators`` argument (default= ``False``), if the separating
1429 matching text should be included in the split results.
1431 Example::
1433 punc = one_of(list(".,;:/-!?"))
1434 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1436 prints::
1438 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1439 """
1440 includeSeparators = includeSeparators or include_separators
1441 last = 0
1442 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1443 yield instring[last:s]
1444 if includeSeparators:
1445 yield t[0]
1446 last = e
1447 yield instring[last:]
1449 def __add__(self, other) -> ParserElement:
1450 """
1451 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1452 converts them to :class:`Literal`\\ s by default.
1454 Example::
1456 greet = Word(alphas) + "," + Word(alphas) + "!"
1457 hello = "Hello, World!"
1458 print(hello, "->", greet.parse_string(hello))
1460 prints::
1462 Hello, World! -> ['Hello', ',', 'World', '!']
1464 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`::
1466 Literal('start') + ... + Literal('end')
1468 is equivalent to::
1470 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1472 Note that the skipped text is returned with '_skipped' as a results name,
1473 and to support having multiple skips in the same parser, the value returned is
1474 a list of all skipped text.
1475 """
1476 if other is Ellipsis:
1477 return _PendingSkip(self)
1479 if isinstance(other, str_type):
1480 other = self._literalStringClass(other)
1481 if not isinstance(other, ParserElement):
1482 return NotImplemented
1483 return And([self, other])
1485 def __radd__(self, other) -> ParserElement:
1486 """
1487 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1488 """
1489 if other is Ellipsis:
1490 return SkipTo(self)("_skipped*") + self
1492 if isinstance(other, str_type):
1493 other = self._literalStringClass(other)
1494 if not isinstance(other, ParserElement):
1495 return NotImplemented
1496 return other + self
1498 def __sub__(self, other) -> ParserElement:
1499 """
1500 Implementation of ``-`` operator, returns :class:`And` with error stop
1501 """
1502 if isinstance(other, str_type):
1503 other = self._literalStringClass(other)
1504 if not isinstance(other, ParserElement):
1505 return NotImplemented
1506 return self + And._ErrorStop() + other
1508 def __rsub__(self, other) -> ParserElement:
1509 """
1510 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1511 """
1512 if isinstance(other, str_type):
1513 other = self._literalStringClass(other)
1514 if not isinstance(other, ParserElement):
1515 return NotImplemented
1516 return other - self
1518 def __mul__(self, other) -> ParserElement:
1519 """
1520 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1521 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1522 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1523 may also include ``None`` as in:
1525 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1526 to ``expr*n + ZeroOrMore(expr)``
1527 (read as "at least n instances of ``expr``")
1528 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1529 (read as "0 to n instances of ``expr``")
1530 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1531 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1533 Note that ``expr*(None, n)`` does not raise an exception if
1534 more than n exprs exist in the input stream; that is,
1535 ``expr*(None, n)`` does not enforce a maximum number of expr
1536 occurrences. If this behavior is desired, then write
1537 ``expr*(None, n) + ~expr``
1538 """
1539 if other is Ellipsis:
1540 other = (0, None)
1541 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1542 other = ((0,) + other[1:] + (None,))[:2]
1544 if not isinstance(other, (int, tuple)):
1545 return NotImplemented
1547 if isinstance(other, int):
1548 minElements, optElements = other, 0
1549 else:
1550 other = tuple(o if o is not Ellipsis else None for o in other)
1551 other = (other + (None, None))[:2]
1552 if other[0] is None:
1553 other = (0, other[1])
1554 if isinstance(other[0], int) and other[1] is None:
1555 if other[0] == 0:
1556 return ZeroOrMore(self)
1557 if other[0] == 1:
1558 return OneOrMore(self)
1559 else:
1560 return self * other[0] + ZeroOrMore(self)
1561 elif isinstance(other[0], int) and isinstance(other[1], int):
1562 minElements, optElements = other
1563 optElements -= minElements
1564 else:
1565 return NotImplemented
1567 if minElements < 0:
1568 raise ValueError("cannot multiply ParserElement by negative value")
1569 if optElements < 0:
1570 raise ValueError(
1571 "second tuple value must be greater or equal to first tuple value"
1572 )
1573 if minElements == optElements == 0:
1574 return And([])
1576 if optElements:
1578 def makeOptionalList(n):
1579 if n > 1:
1580 return Opt(self + makeOptionalList(n - 1))
1581 else:
1582 return Opt(self)
1584 if minElements:
1585 if minElements == 1:
1586 ret = self + makeOptionalList(optElements)
1587 else:
1588 ret = And([self] * minElements) + makeOptionalList(optElements)
1589 else:
1590 ret = makeOptionalList(optElements)
1591 else:
1592 if minElements == 1:
1593 ret = self
1594 else:
1595 ret = And([self] * minElements)
1596 return ret
1598 def __rmul__(self, other) -> ParserElement:
1599 return self.__mul__(other)
1601 def __or__(self, other) -> ParserElement:
1602 """
1603 Implementation of ``|`` operator - returns :class:`MatchFirst`
1604 """
1605 if other is Ellipsis:
1606 return _PendingSkip(self, must_skip=True)
1608 if isinstance(other, str_type):
1609 # `expr | ""` is equivalent to `Opt(expr)`
1610 if other == "":
1611 return Opt(self)
1612 other = self._literalStringClass(other)
1613 if not isinstance(other, ParserElement):
1614 return NotImplemented
1615 return MatchFirst([self, other])
1617 def __ror__(self, other) -> ParserElement:
1618 """
1619 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1620 """
1621 if isinstance(other, str_type):
1622 other = self._literalStringClass(other)
1623 if not isinstance(other, ParserElement):
1624 return NotImplemented
1625 return other | self
1627 def __xor__(self, other) -> ParserElement:
1628 """
1629 Implementation of ``^`` operator - returns :class:`Or`
1630 """
1631 if isinstance(other, str_type):
1632 other = self._literalStringClass(other)
1633 if not isinstance(other, ParserElement):
1634 return NotImplemented
1635 return Or([self, other])
1637 def __rxor__(self, other) -> ParserElement:
1638 """
1639 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1640 """
1641 if isinstance(other, str_type):
1642 other = self._literalStringClass(other)
1643 if not isinstance(other, ParserElement):
1644 return NotImplemented
1645 return other ^ self
1647 def __and__(self, other) -> ParserElement:
1648 """
1649 Implementation of ``&`` operator - returns :class:`Each`
1650 """
1651 if isinstance(other, str_type):
1652 other = self._literalStringClass(other)
1653 if not isinstance(other, ParserElement):
1654 return NotImplemented
1655 return Each([self, other])
1657 def __rand__(self, other) -> ParserElement:
1658 """
1659 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1660 """
1661 if isinstance(other, str_type):
1662 other = self._literalStringClass(other)
1663 if not isinstance(other, ParserElement):
1664 return NotImplemented
1665 return other & self
1667 def __invert__(self) -> ParserElement:
1668 """
1669 Implementation of ``~`` operator - returns :class:`NotAny`
1670 """
1671 return NotAny(self)
1673 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1674 # iterate over a sequence
1675 __iter__ = None
1677 def __getitem__(self, key):
1678 """
1679 use ``[]`` indexing notation as a short form for expression repetition:
1681 - ``expr[n]`` is equivalent to ``expr*n``
1682 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1683 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1684 to ``expr*n + ZeroOrMore(expr)``
1685 (read as "at least n instances of ``expr``")
1686 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1687 (read as "0 to n instances of ``expr``")
1688 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1689 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1691 ``None`` may be used in place of ``...``.
1693 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception
1694 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is
1695 desired, then write ``expr[..., n] + ~expr``.
1697 For repetition with a stop_on expression, use slice notation:
1699 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``
1700 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``
1702 """
1704 stop_on_defined = False
1705 stop_on = NoMatch()
1706 if isinstance(key, slice):
1707 key, stop_on = key.start, key.stop
1708 if key is None:
1709 key = ...
1710 stop_on_defined = True
1711 elif isinstance(key, tuple) and isinstance(key[-1], slice):
1712 key, stop_on = (key[0], key[1].start), key[1].stop
1713 stop_on_defined = True
1715 # convert single arg keys to tuples
1716 if isinstance(key, str_type):
1717 key = (key,)
1718 try:
1719 iter(key)
1720 except TypeError:
1721 key = (key, key)
1723 if len(key) > 2:
1724 raise TypeError(
1725 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"
1726 )
1728 # clip to 2 elements
1729 ret = self * tuple(key[:2])
1730 ret = typing.cast(_MultipleMatch, ret)
1732 if stop_on_defined:
1733 ret.stopOn(stop_on)
1735 return ret
1737 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:
1738 """
1739 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1741 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1742 passed as ``True``.
1744 If ``name`` is omitted, same as calling :class:`copy`.
1746 Example::
1748 # these are equivalent
1749 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")
1750 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1751 """
1752 if name is not None:
1753 return self._setResultsName(name)
1755 return self.copy()
1757 def suppress(self) -> ParserElement:
1758 """
1759 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1760 cluttering up returned output.
1761 """
1762 return Suppress(self)
1764 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
1765 """
1766 Enables the skipping of whitespace before matching the characters in the
1767 :class:`ParserElement`'s defined pattern.
1769 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1770 """
1771 self.skipWhitespace = True
1772 return self
1774 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
1775 """
1776 Disables the skipping of whitespace before matching the characters in the
1777 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1778 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1780 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1781 """
1782 self.skipWhitespace = False
1783 return self
1785 def set_whitespace_chars(
1786 self, chars: Union[set[str], str], copy_defaults: bool = False
1787 ) -> ParserElement:
1788 """
1789 Overrides the default whitespace chars
1790 """
1791 self.skipWhitespace = True
1792 self.whiteChars = set(chars)
1793 self.copyDefaultWhiteChars = copy_defaults
1794 return self
1796 def parse_with_tabs(self) -> ParserElement:
1797 """
1798 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1799 Must be called before ``parse_string`` when the input grammar contains elements that
1800 match ``<TAB>`` characters.
1801 """
1802 self.keepTabs = True
1803 return self
1805 def ignore(self, other: ParserElement) -> ParserElement:
1806 """
1807 Define expression to be ignored (e.g., comments) while doing pattern
1808 matching; may be called repeatedly, to define multiple comment or other
1809 ignorable patterns.
1811 Example::
1813 patt = Word(alphas)[...]
1814 patt.parse_string('ablaj /* comment */ lskjd')
1815 # -> ['ablaj']
1817 patt.ignore(c_style_comment)
1818 patt.parse_string('ablaj /* comment */ lskjd')
1819 # -> ['ablaj', 'lskjd']
1820 """
1821 if isinstance(other, str_type):
1822 other = Suppress(other)
1824 if isinstance(other, Suppress):
1825 if other not in self.ignoreExprs:
1826 self.ignoreExprs.append(other)
1827 else:
1828 self.ignoreExprs.append(Suppress(other.copy()))
1829 return self
1831 def set_debug_actions(
1832 self,
1833 start_action: DebugStartAction,
1834 success_action: DebugSuccessAction,
1835 exception_action: DebugExceptionAction,
1836 ) -> ParserElement:
1837 """
1838 Customize display of debugging messages while doing pattern matching:
1840 - ``start_action`` - method to be called when an expression is about to be parsed;
1841 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``
1843 - ``success_action`` - method to be called when an expression has successfully parsed;
1844 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``
1846 - ``exception_action`` - method to be called when expression fails to parse;
1847 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
1848 """
1849 self.debugActions = self.DebugActions(
1850 start_action or _default_start_debug_action, # type: ignore[truthy-function]
1851 success_action or _default_success_debug_action, # type: ignore[truthy-function]
1852 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]
1853 )
1854 self.debug = True
1855 return self
1857 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:
1858 """
1859 Enable display of debugging messages while doing pattern matching.
1860 Set ``flag`` to ``True`` to enable, ``False`` to disable.
1861 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.
1863 Example::
1865 wd = Word(alphas).set_name("alphaword")
1866 integer = Word(nums).set_name("numword")
1867 term = wd | integer
1869 # turn on debugging for wd
1870 wd.set_debug()
1872 term[1, ...].parse_string("abc 123 xyz 890")
1874 prints::
1876 Match alphaword at loc 0(1,1)
1877 Matched alphaword -> ['abc']
1878 Match alphaword at loc 3(1,4)
1879 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
1880 Match alphaword at loc 7(1,8)
1881 Matched alphaword -> ['xyz']
1882 Match alphaword at loc 11(1,12)
1883 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
1884 Match alphaword at loc 15(1,16)
1885 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
1887 The output shown is that produced by the default debug actions - custom debug actions can be
1888 specified using :class:`set_debug_actions`. Prior to attempting
1889 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
1890 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
1891 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,
1892 which makes debugging and exception messages easier to understand - for instance, the default
1893 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.
1894 """
1895 if recurse:
1896 for expr in self.visit_all():
1897 expr.set_debug(flag, recurse=False)
1898 return self
1900 if flag:
1901 self.set_debug_actions(
1902 _default_start_debug_action,
1903 _default_success_debug_action,
1904 _default_exception_debug_action,
1905 )
1906 else:
1907 self.debug = False
1908 return self
1910 @property
1911 def default_name(self) -> str:
1912 if self._defaultName is None:
1913 self._defaultName = self._generateDefaultName()
1914 return self._defaultName
1916 @abstractmethod
1917 def _generateDefaultName(self) -> str:
1918 """
1919 Child classes must define this method, which defines how the ``default_name`` is set.
1920 """
1922 def set_name(self, name: typing.Optional[str]) -> ParserElement:
1923 """
1924 Define name for this expression, makes debugging and exception messages clearer. If
1925 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also
1926 enable debug for this expression.
1928 If `name` is None, clears any custom name for this expression, and clears the
1929 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.
1931 Example::
1933 integer = Word(nums)
1934 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)
1936 integer.set_name("integer")
1937 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1938 """
1939 self.customName = name # type: ignore[assignment]
1940 self.errmsg = f"Expected {str(self)}"
1942 if __diag__.enable_debug_on_named_expressions:
1943 self.set_debug(name is not None)
1945 return self
1947 @property
1948 def name(self) -> str:
1949 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name
1950 return self.customName if self.customName is not None else self.default_name
1952 @name.setter
1953 def name(self, new_name) -> None:
1954 self.set_name(new_name)
1956 def __str__(self) -> str:
1957 return self.name
1959 def __repr__(self) -> str:
1960 return str(self)
1962 def streamline(self) -> ParserElement:
1963 self.streamlined = True
1964 self._defaultName = None
1965 return self
1967 def recurse(self) -> list[ParserElement]:
1968 return []
1970 def _checkRecursion(self, parseElementList):
1971 subRecCheckList = parseElementList[:] + [self]
1972 for e in self.recurse():
1973 e._checkRecursion(subRecCheckList)
1975 def validate(self, validateTrace=None) -> None:
1976 """
1977 Check defined expressions for valid structure, check for infinite recursive definitions.
1978 """
1979 warnings.warn(
1980 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
1981 DeprecationWarning,
1982 stacklevel=2,
1983 )
1984 self._checkRecursion([])
1986 def parse_file(
1987 self,
1988 file_or_filename: Union[str, Path, TextIO],
1989 encoding: str = "utf-8",
1990 parse_all: bool = False,
1991 *,
1992 parseAll: bool = False,
1993 ) -> ParseResults:
1994 """
1995 Execute the parse expression on the given file or filename.
1996 If a filename is specified (instead of a file object),
1997 the entire file is opened, read, and closed before parsing.
1998 """
1999 parseAll = parseAll or parse_all
2000 try:
2001 file_or_filename = typing.cast(TextIO, file_or_filename)
2002 file_contents = file_or_filename.read()
2003 except AttributeError:
2004 file_or_filename = typing.cast(str, file_or_filename)
2005 with open(file_or_filename, "r", encoding=encoding) as f:
2006 file_contents = f.read()
2007 try:
2008 return self.parse_string(file_contents, parseAll)
2009 except ParseBaseException as exc:
2010 if ParserElement.verbose_stacktrace:
2011 raise
2013 # catch and re-raise exception from here, clears out pyparsing internal stack trace
2014 raise exc.with_traceback(None)
2016 def __eq__(self, other):
2017 if self is other:
2018 return True
2019 elif isinstance(other, str_type):
2020 return self.matches(other, parse_all=True)
2021 elif isinstance(other, ParserElement):
2022 return vars(self) == vars(other)
2023 return False
2025 def __hash__(self):
2026 return id(self)
2028 def matches(
2029 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True
2030 ) -> bool:
2031 """
2032 Method for quick testing of a parser against a test string. Good for simple
2033 inline microtests of sub expressions while building up larger parser.
2035 Parameters:
2037 - ``test_string`` - to test against this expression for a match
2038 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2040 Example::
2042 expr = Word(nums)
2043 assert expr.matches("100")
2044 """
2045 parseAll = parseAll and parse_all
2046 try:
2047 self.parse_string(str(test_string), parse_all=parseAll)
2048 return True
2049 except ParseBaseException:
2050 return False
2052 def run_tests(
2053 self,
2054 tests: Union[str, list[str]],
2055 parse_all: bool = True,
2056 comment: typing.Optional[Union[ParserElement, str]] = "#",
2057 full_dump: bool = True,
2058 print_results: bool = True,
2059 failure_tests: bool = False,
2060 post_parse: typing.Optional[
2061 Callable[[str, ParseResults], typing.Optional[str]]
2062 ] = None,
2063 file: typing.Optional[TextIO] = None,
2064 with_line_numbers: bool = False,
2065 *,
2066 parseAll: bool = True,
2067 fullDump: bool = True,
2068 printResults: bool = True,
2069 failureTests: bool = False,
2070 postParse: typing.Optional[
2071 Callable[[str, ParseResults], typing.Optional[str]]
2072 ] = None,
2073 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:
2074 """
2075 Execute the parse expression on a series of test strings, showing each
2076 test, the parsed results or where the parse failed. Quick and easy way to
2077 run a parse expression against a list of sample strings.
2079 Parameters:
2081 - ``tests`` - a list of separate test strings, or a multiline string of test strings
2082 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2083 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
2084 string; pass None to disable comment filtering
2085 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
2086 if False, only dump nested list
2087 - ``print_results`` - (default= ``True``) prints test output to stdout
2088 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
2089 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
2090 `fn(test_string, parse_results)` and returns a string to be added to the test output
2091 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
2092 if None, will default to ``sys.stdout``
2093 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
2095 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2096 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
2097 test's output
2099 Example::
2101 number_expr = pyparsing_common.number.copy()
2103 result = number_expr.run_tests('''
2104 # unsigned integer
2105 100
2106 # negative integer
2107 -100
2108 # float with scientific notation
2109 6.02e23
2110 # integer with scientific notation
2111 1e-12
2112 ''')
2113 print("Success" if result[0] else "Failed!")
2115 result = number_expr.run_tests('''
2116 # stray character
2117 100Z
2118 # missing leading digit before '.'
2119 -.100
2120 # too many '.'
2121 3.14.159
2122 ''', failure_tests=True)
2123 print("Success" if result[0] else "Failed!")
2125 prints::
2127 # unsigned integer
2128 100
2129 [100]
2131 # negative integer
2132 -100
2133 [-100]
2135 # float with scientific notation
2136 6.02e23
2137 [6.02e+23]
2139 # integer with scientific notation
2140 1e-12
2141 [1e-12]
2143 Success
2145 # stray character
2146 100Z
2147 ^
2148 FAIL: Expected end of text (at char 3), (line:1, col:4)
2150 # missing leading digit before '.'
2151 -.100
2152 ^
2153 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2155 # too many '.'
2156 3.14.159
2157 ^
2158 FAIL: Expected end of text (at char 4), (line:1, col:5)
2160 Success
2162 Each test string must be on a single line. If you want to test a string that spans multiple
2163 lines, create a test like this::
2165 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2167 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2168 """
2169 from .testing import pyparsing_test
2171 parseAll = parseAll and parse_all
2172 fullDump = fullDump and full_dump
2173 printResults = printResults and print_results
2174 failureTests = failureTests or failure_tests
2175 postParse = postParse or post_parse
2176 if isinstance(tests, str_type):
2177 tests = typing.cast(str, tests)
2178 line_strip = type(tests).strip
2179 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2180 comment_specified = comment is not None
2181 if comment_specified:
2182 if isinstance(comment, str_type):
2183 comment = typing.cast(str, comment)
2184 comment = Literal(comment)
2185 comment = typing.cast(ParserElement, comment)
2186 if file is None:
2187 file = sys.stdout
2188 print_ = file.write
2190 result: Union[ParseResults, Exception]
2191 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []
2192 comments: list[str] = []
2193 success = True
2194 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2195 BOM = "\ufeff"
2196 nlstr = "\n"
2197 for t in tests:
2198 if comment_specified and comment.matches(t, False) or comments and not t:
2199 comments.append(
2200 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2201 )
2202 continue
2203 if not t:
2204 continue
2205 out = [
2206 f"{nlstr}{nlstr.join(comments) if comments else ''}",
2207 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2208 ]
2209 comments.clear()
2210 try:
2211 # convert newline marks to actual newlines, and strip leading BOM if present
2212 t = NL.transform_string(t.lstrip(BOM))
2213 result = self.parse_string(t, parse_all=parseAll)
2214 except ParseBaseException as pe:
2215 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""
2216 out.append(pe.explain())
2217 out.append(f"FAIL: {fatal}{pe}")
2218 if ParserElement.verbose_stacktrace:
2219 out.extend(traceback.format_tb(pe.__traceback__))
2220 success = success and failureTests
2221 result = pe
2222 except Exception as exc:
2223 tag = "FAIL-EXCEPTION"
2225 # see if this exception was raised in a parse action
2226 tb = exc.__traceback__
2227 it = iter(traceback.walk_tb(tb))
2228 for f, line in it:
2229 if (f.f_code.co_filename, line) == pa_call_line_synth:
2230 next_f = next(it)[0]
2231 tag += f" (raised in parse action {next_f.f_code.co_name!r})"
2232 break
2234 out.append(f"{tag}: {type(exc).__name__}: {exc}")
2235 if ParserElement.verbose_stacktrace:
2236 out.extend(traceback.format_tb(exc.__traceback__))
2237 success = success and failureTests
2238 result = exc
2239 else:
2240 success = success and not failureTests
2241 if postParse is not None:
2242 try:
2243 pp_value = postParse(t, result)
2244 if pp_value is not None:
2245 if isinstance(pp_value, ParseResults):
2246 out.append(pp_value.dump())
2247 else:
2248 out.append(str(pp_value))
2249 else:
2250 out.append(result.dump())
2251 except Exception as e:
2252 out.append(result.dump(full=fullDump))
2253 out.append(
2254 f"{postParse.__name__} failed: {type(e).__name__}: {e}"
2255 )
2256 else:
2257 out.append(result.dump(full=fullDump))
2258 out.append("")
2260 if printResults:
2261 print_("\n".join(out))
2263 allResults.append((t, result))
2265 return success, allResults
2267 def create_diagram(
2268 self,
2269 output_html: Union[TextIO, Path, str],
2270 vertical: int = 3,
2271 show_results_names: bool = False,
2272 show_groups: bool = False,
2273 embed: bool = False,
2274 show_hidden: bool = False,
2275 **kwargs,
2276 ) -> None:
2277 """
2278 Create a railroad diagram for the parser.
2280 Parameters:
2282 - ``output_html`` (str or file-like object) - output target for generated
2283 diagram HTML
2284 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically
2285 instead of horizontally (default=3)
2286 - ``show_results_names`` - bool flag whether diagram should show annotations for
2287 defined results names
2288 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box
2289 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden
2290 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed
2291 the resulting HTML in an enclosing HTML source
2292 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;
2293 can be used to insert custom CSS styling
2294 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the
2295 generated code
2297 Additional diagram-formatting keyword arguments can also be included;
2298 see railroad.Diagram class.
2299 """
2301 try:
2302 from .diagram import to_railroad, railroad_to_html
2303 except ImportError as ie:
2304 raise Exception(
2305 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2306 ) from ie
2308 self.streamline()
2310 railroad = to_railroad(
2311 self,
2312 vertical=vertical,
2313 show_results_names=show_results_names,
2314 show_groups=show_groups,
2315 show_hidden=show_hidden,
2316 diagram_kwargs=kwargs,
2317 )
2318 if not isinstance(output_html, (str, Path)):
2319 # we were passed a file-like object, just write to it
2320 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))
2321 return
2323 with open(output_html, "w", encoding="utf-8") as diag_file:
2324 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))
2326 # Compatibility synonyms
2327 # fmt: off
2328 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))
2329 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(
2330 "setDefaultWhitespaceChars", set_default_whitespace_chars
2331 ))
2332 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))
2333 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))
2334 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))
2335 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))
2337 setResultsName = replaced_by_pep8("setResultsName", set_results_name)
2338 setBreak = replaced_by_pep8("setBreak", set_break)
2339 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)
2340 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)
2341 addCondition = replaced_by_pep8("addCondition", add_condition)
2342 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)
2343 tryParse = replaced_by_pep8("tryParse", try_parse)
2344 parseString = replaced_by_pep8("parseString", parse_string)
2345 scanString = replaced_by_pep8("scanString", scan_string)
2346 transformString = replaced_by_pep8("transformString", transform_string)
2347 searchString = replaced_by_pep8("searchString", search_string)
2348 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
2349 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
2350 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)
2351 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)
2352 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)
2353 setDebug = replaced_by_pep8("setDebug", set_debug)
2354 setName = replaced_by_pep8("setName", set_name)
2355 parseFile = replaced_by_pep8("parseFile", parse_file)
2356 runTests = replaced_by_pep8("runTests", run_tests)
2357 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)
2358 defaultName = default_name
2359 # fmt: on
2362class _PendingSkip(ParserElement):
2363 # internal placeholder class to hold a place were '...' is added to a parser element,
2364 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2365 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:
2366 super().__init__()
2367 self.anchor = expr
2368 self.must_skip = must_skip
2370 def _generateDefaultName(self) -> str:
2371 return str(self.anchor + Empty()).replace("Empty", "...")
2373 def __add__(self, other) -> ParserElement:
2374 skipper = SkipTo(other).set_name("...")("_skipped*")
2375 if self.must_skip:
2377 def must_skip(t):
2378 if not t._skipped or t._skipped.as_list() == [""]:
2379 del t[0]
2380 t.pop("_skipped", None)
2382 def show_skip(t):
2383 if t._skipped.as_list()[-1:] == [""]:
2384 t.pop("_skipped")
2385 t["_skipped"] = f"missing <{self.anchor!r}>"
2387 return (
2388 self.anchor + skipper().add_parse_action(must_skip)
2389 | skipper().add_parse_action(show_skip)
2390 ) + other
2392 return self.anchor + skipper + other
2394 def __repr__(self):
2395 return self.defaultName
2397 def parseImpl(self, *args) -> ParseImplReturnType:
2398 raise Exception(
2399 "use of `...` expression without following SkipTo target expression"
2400 )
2403class Token(ParserElement):
2404 """Abstract :class:`ParserElement` subclass, for defining atomic
2405 matching patterns.
2406 """
2408 def __init__(self) -> None:
2409 super().__init__(savelist=False)
2411 def _generateDefaultName(self) -> str:
2412 return type(self).__name__
2415class NoMatch(Token):
2416 """
2417 A token that will never match.
2418 """
2420 def __init__(self) -> None:
2421 super().__init__()
2422 self._may_return_empty = True
2423 self.mayIndexError = False
2424 self.errmsg = "Unmatchable token"
2426 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2427 raise ParseException(instring, loc, self.errmsg, self)
2430class Literal(Token):
2431 """
2432 Token to exactly match a specified string.
2434 Example::
2436 Literal('abc').parse_string('abc') # -> ['abc']
2437 Literal('abc').parse_string('abcdef') # -> ['abc']
2438 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"
2440 For case-insensitive matching, use :class:`CaselessLiteral`.
2442 For keyword matching (force word break before and after the matched string),
2443 use :class:`Keyword` or :class:`CaselessKeyword`.
2444 """
2446 def __new__(cls, match_string: str = "", *, matchString: str = ""):
2447 # Performance tuning: select a subclass with optimized parseImpl
2448 if cls is Literal:
2449 match_string = matchString or match_string
2450 if not match_string:
2451 return super().__new__(Empty)
2452 if len(match_string) == 1:
2453 return super().__new__(_SingleCharLiteral)
2455 # Default behavior
2456 return super().__new__(cls)
2458 # Needed to make copy.copy() work correctly if we customize __new__
2459 def __getnewargs__(self):
2460 return (self.match,)
2462 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:
2463 super().__init__()
2464 match_string = matchString or match_string
2465 self.match = match_string
2466 self.matchLen = len(match_string)
2467 self.firstMatchChar = match_string[:1]
2468 self.errmsg = f"Expected {self.name}"
2469 self._may_return_empty = False
2470 self.mayIndexError = False
2472 def _generateDefaultName(self) -> str:
2473 return repr(self.match)
2475 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2476 if instring[loc] == self.firstMatchChar and instring.startswith(
2477 self.match, loc
2478 ):
2479 return loc + self.matchLen, self.match
2480 raise ParseException(instring, loc, self.errmsg, self)
2483class Empty(Literal):
2484 """
2485 An empty token, will always match.
2486 """
2488 def __init__(self, match_string="", *, matchString="") -> None:
2489 super().__init__("")
2490 self._may_return_empty = True
2491 self.mayIndexError = False
2493 def _generateDefaultName(self) -> str:
2494 return "Empty"
2496 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2497 return loc, []
2500class _SingleCharLiteral(Literal):
2501 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2502 if instring[loc] == self.firstMatchChar:
2503 return loc + 1, self.match
2504 raise ParseException(instring, loc, self.errmsg, self)
2507ParserElement._literalStringClass = Literal
2510class Keyword(Token):
2511 """
2512 Token to exactly match a specified string as a keyword, that is,
2513 it must be immediately preceded and followed by whitespace or
2514 non-keyword characters. Compare with :class:`Literal`:
2516 - ``Literal("if")`` will match the leading ``'if'`` in
2517 ``'ifAndOnlyIf'``.
2518 - ``Keyword("if")`` will not; it will only match the leading
2519 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2521 Accepts two optional constructor arguments in addition to the
2522 keyword string:
2524 - ``ident_chars`` is a string of characters that would be valid
2525 identifier characters, defaulting to all alphanumerics + "_" and
2526 "$"
2527 - ``caseless`` allows case-insensitive matching, default is ``False``.
2529 Example::
2531 Keyword("start").parse_string("start") # -> ['start']
2532 Keyword("start").parse_string("starting") # -> Exception
2534 For case-insensitive matching, use :class:`CaselessKeyword`.
2535 """
2537 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2539 def __init__(
2540 self,
2541 match_string: str = "",
2542 ident_chars: typing.Optional[str] = None,
2543 caseless: bool = False,
2544 *,
2545 matchString: str = "",
2546 identChars: typing.Optional[str] = None,
2547 ) -> None:
2548 super().__init__()
2549 identChars = identChars or ident_chars
2550 if identChars is None:
2551 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2552 match_string = matchString or match_string
2553 self.match = match_string
2554 self.matchLen = len(match_string)
2555 self.firstMatchChar = match_string[:1]
2556 if not self.firstMatchChar:
2557 raise ValueError("null string passed to Keyword; use Empty() instead")
2558 self.errmsg = f"Expected {type(self).__name__} {self.name}"
2559 self._may_return_empty = False
2560 self.mayIndexError = False
2561 self.caseless = caseless
2562 if caseless:
2563 self.caselessmatch = match_string.upper()
2564 identChars = identChars.upper()
2565 self.identChars = set(identChars)
2567 def _generateDefaultName(self) -> str:
2568 return repr(self.match)
2570 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2571 errmsg = self.errmsg or ""
2572 errloc = loc
2573 if self.caseless:
2574 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2575 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2576 if (
2577 loc >= len(instring) - self.matchLen
2578 or instring[loc + self.matchLen].upper() not in self.identChars
2579 ):
2580 return loc + self.matchLen, self.match
2582 # followed by keyword char
2583 errmsg += ", was immediately followed by keyword character"
2584 errloc = loc + self.matchLen
2585 else:
2586 # preceded by keyword char
2587 errmsg += ", keyword was immediately preceded by keyword character"
2588 errloc = loc - 1
2589 # else no match just raise plain exception
2591 elif (
2592 instring[loc] == self.firstMatchChar
2593 and self.matchLen == 1
2594 or instring.startswith(self.match, loc)
2595 ):
2596 if loc == 0 or instring[loc - 1] not in self.identChars:
2597 if (
2598 loc >= len(instring) - self.matchLen
2599 or instring[loc + self.matchLen] not in self.identChars
2600 ):
2601 return loc + self.matchLen, self.match
2603 # followed by keyword char
2604 errmsg += ", keyword was immediately followed by keyword character"
2605 errloc = loc + self.matchLen
2606 else:
2607 # preceded by keyword char
2608 errmsg += ", keyword was immediately preceded by keyword character"
2609 errloc = loc - 1
2610 # else no match just raise plain exception
2612 raise ParseException(instring, errloc, errmsg, self)
2614 @staticmethod
2615 def set_default_keyword_chars(chars) -> None:
2616 """
2617 Overrides the default characters used by :class:`Keyword` expressions.
2618 """
2619 Keyword.DEFAULT_KEYWORD_CHARS = chars
2621 # Compatibility synonyms
2622 setDefaultKeywordChars = staticmethod(
2623 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)
2624 )
2627class CaselessLiteral(Literal):
2628 """
2629 Token to match a specified string, ignoring case of letters.
2630 Note: the matched results will always be in the case of the given
2631 match string, NOT the case of the input text.
2633 Example::
2635 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2636 # -> ['CMD', 'CMD', 'CMD']
2638 (Contrast with example for :class:`CaselessKeyword`.)
2639 """
2641 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:
2642 match_string = matchString or match_string
2643 super().__init__(match_string.upper())
2644 # Preserve the defining literal.
2645 self.returnString = match_string
2646 self.errmsg = f"Expected {self.name}"
2648 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2649 if instring[loc : loc + self.matchLen].upper() == self.match:
2650 return loc + self.matchLen, self.returnString
2651 raise ParseException(instring, loc, self.errmsg, self)
2654class CaselessKeyword(Keyword):
2655 """
2656 Caseless version of :class:`Keyword`.
2658 Example::
2660 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2661 # -> ['CMD', 'CMD']
2663 (Contrast with example for :class:`CaselessLiteral`.)
2664 """
2666 def __init__(
2667 self,
2668 match_string: str = "",
2669 ident_chars: typing.Optional[str] = None,
2670 *,
2671 matchString: str = "",
2672 identChars: typing.Optional[str] = None,
2673 ) -> None:
2674 identChars = identChars or ident_chars
2675 match_string = matchString or match_string
2676 super().__init__(match_string, identChars, caseless=True)
2679class CloseMatch(Token):
2680 """A variation on :class:`Literal` which matches "close" matches,
2681 that is, strings with at most 'n' mismatching characters.
2682 :class:`CloseMatch` takes parameters:
2684 - ``match_string`` - string to be matched
2685 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2686 - ``max_mismatches`` - (``default=1``) maximum number of
2687 mismatches allowed to count as a match
2689 The results from a successful parse will contain the matched text
2690 from the input string and the following named results:
2692 - ``mismatches`` - a list of the positions within the
2693 match_string where mismatches were found
2694 - ``original`` - the original match_string used to compare
2695 against the input string
2697 If ``mismatches`` is an empty list, then the match was an exact
2698 match.
2700 Example::
2702 patt = CloseMatch("ATCATCGAATGGA")
2703 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2704 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2706 # exact match
2707 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2709 # close match allowing up to 2 mismatches
2710 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
2711 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2712 """
2714 def __init__(
2715 self,
2716 match_string: str,
2717 max_mismatches: typing.Optional[int] = None,
2718 *,
2719 maxMismatches: int = 1,
2720 caseless=False,
2721 ) -> None:
2722 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
2723 super().__init__()
2724 self.match_string = match_string
2725 self.maxMismatches = maxMismatches
2726 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"
2727 self.caseless = caseless
2728 self.mayIndexError = False
2729 self._may_return_empty = False
2731 def _generateDefaultName(self) -> str:
2732 return f"{type(self).__name__}:{self.match_string!r}"
2734 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2735 start = loc
2736 instrlen = len(instring)
2737 maxloc = start + len(self.match_string)
2739 if maxloc <= instrlen:
2740 match_string = self.match_string
2741 match_stringloc = 0
2742 mismatches = []
2743 maxMismatches = self.maxMismatches
2745 for match_stringloc, s_m in enumerate(
2746 zip(instring[loc:maxloc], match_string)
2747 ):
2748 src, mat = s_m
2749 if self.caseless:
2750 src, mat = src.lower(), mat.lower()
2752 if src != mat:
2753 mismatches.append(match_stringloc)
2754 if len(mismatches) > maxMismatches:
2755 break
2756 else:
2757 loc = start + match_stringloc + 1
2758 results = ParseResults([instring[start:loc]])
2759 results["original"] = match_string
2760 results["mismatches"] = mismatches
2761 return loc, results
2763 raise ParseException(instring, loc, self.errmsg, self)
2766class Word(Token):
2767 """Token for matching words composed of allowed character sets.
2769 Parameters:
2771 - ``init_chars`` - string of all characters that should be used to
2772 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
2773 if ``body_chars`` is also specified, then this is the string of
2774 initial characters
2775 - ``body_chars`` - string of characters that
2776 can be used for matching after a matched initial character as
2777 given in ``init_chars``; if omitted, same as the initial characters
2778 (default=``None``)
2779 - ``min`` - minimum number of characters to match (default=1)
2780 - ``max`` - maximum number of characters to match (default=0)
2781 - ``exact`` - exact number of characters to match (default=0)
2782 - ``as_keyword`` - match as a keyword (default=``False``)
2783 - ``exclude_chars`` - characters that might be
2784 found in the input ``body_chars`` string but which should not be
2785 accepted for matching ;useful to define a word of all
2786 printables except for one or two characters, for instance
2787 (default=``None``)
2789 :class:`srange` is useful for defining custom character set strings
2790 for defining :class:`Word` expressions, using range notation from
2791 regular expression character sets.
2793 A common mistake is to use :class:`Word` to match a specific literal
2794 string, as in ``Word("Address")``. Remember that :class:`Word`
2795 uses the string argument to define *sets* of matchable characters.
2796 This expression would match "Add", "AAA", "dAred", or any other word
2797 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
2798 exact literal string, use :class:`Literal` or :class:`Keyword`.
2800 pyparsing includes helper strings for building Words:
2802 - :class:`alphas`
2803 - :class:`nums`
2804 - :class:`alphanums`
2805 - :class:`hexnums`
2806 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
2807 - accented, tilded, umlauted, etc.)
2808 - :class:`punc8bit` (non-alphabetic characters in ASCII range
2809 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2810 - :class:`printables` (any non-whitespace character)
2812 ``alphas``, ``nums``, and ``printables`` are also defined in several
2813 Unicode sets - see :class:`pyparsing_unicode``.
2815 Example::
2817 # a word composed of digits
2818 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2820 # a word with a leading capital, and zero or more lowercase
2821 capitalized_word = Word(alphas.upper(), alphas.lower())
2823 # hostnames are alphanumeric, with leading alpha, and '-'
2824 hostname = Word(alphas, alphanums + '-')
2826 # roman numeral (not a strict parser, accepts invalid mix of characters)
2827 roman = Word("IVXLCDM")
2829 # any string of non-whitespace characters, except for ','
2830 csv_value = Word(printables, exclude_chars=",")
2831 """
2833 def __init__(
2834 self,
2835 init_chars: str = "",
2836 body_chars: typing.Optional[str] = None,
2837 min: int = 1,
2838 max: int = 0,
2839 exact: int = 0,
2840 as_keyword: bool = False,
2841 exclude_chars: typing.Optional[str] = None,
2842 *,
2843 initChars: typing.Optional[str] = None,
2844 bodyChars: typing.Optional[str] = None,
2845 asKeyword: bool = False,
2846 excludeChars: typing.Optional[str] = None,
2847 ) -> None:
2848 initChars = initChars or init_chars
2849 bodyChars = bodyChars or body_chars
2850 asKeyword = asKeyword or as_keyword
2851 excludeChars = excludeChars or exclude_chars
2852 super().__init__()
2853 if not initChars:
2854 raise ValueError(
2855 f"invalid {type(self).__name__}, initChars cannot be empty string"
2856 )
2858 initChars_set = set(initChars)
2859 if excludeChars:
2860 excludeChars_set = set(excludeChars)
2861 initChars_set -= excludeChars_set
2862 if bodyChars:
2863 bodyChars = "".join(set(bodyChars) - excludeChars_set)
2864 self.initChars = initChars_set
2865 self.initCharsOrig = "".join(sorted(initChars_set))
2867 if bodyChars:
2868 self.bodyChars = set(bodyChars)
2869 self.bodyCharsOrig = "".join(sorted(bodyChars))
2870 else:
2871 self.bodyChars = initChars_set
2872 self.bodyCharsOrig = self.initCharsOrig
2874 self.maxSpecified = max > 0
2876 if min < 1:
2877 raise ValueError(
2878 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
2879 )
2881 if self.maxSpecified and min > max:
2882 raise ValueError(
2883 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
2884 )
2886 self.minLen = min
2888 if max > 0:
2889 self.maxLen = max
2890 else:
2891 self.maxLen = _MAX_INT
2893 if exact > 0:
2894 min = max = exact
2895 self.maxLen = exact
2896 self.minLen = exact
2898 self.errmsg = f"Expected {self.name}"
2899 self.mayIndexError = False
2900 self.asKeyword = asKeyword
2901 if self.asKeyword:
2902 self.errmsg += " as a keyword"
2904 # see if we can make a regex for this Word
2905 if " " not in (self.initChars | self.bodyChars):
2906 if len(self.initChars) == 1:
2907 re_leading_fragment = re.escape(self.initCharsOrig)
2908 else:
2909 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
2911 if self.bodyChars == self.initChars:
2912 if max == 0 and self.minLen == 1:
2913 repeat = "+"
2914 elif max == 1:
2915 repeat = ""
2916 else:
2917 if self.minLen != self.maxLen:
2918 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
2919 else:
2920 repeat = f"{{{self.minLen}}}"
2921 self.reString = f"{re_leading_fragment}{repeat}"
2922 else:
2923 if max == 1:
2924 re_body_fragment = ""
2925 repeat = ""
2926 else:
2927 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
2928 if max == 0 and self.minLen == 1:
2929 repeat = "*"
2930 elif max == 2:
2931 repeat = "?" if min <= 1 else ""
2932 else:
2933 if min != max:
2934 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"
2935 else:
2936 repeat = f"{{{min - 1 if min > 0 else ''}}}"
2938 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"
2940 if self.asKeyword:
2941 self.reString = rf"\b{self.reString}\b"
2943 try:
2944 self.re = re.compile(self.reString)
2945 except re.error:
2946 self.re = None # type: ignore[assignment]
2947 else:
2948 self.re_match = self.re.match
2949 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]
2951 def _generateDefaultName(self) -> str:
2952 def charsAsStr(s):
2953 max_repr_len = 16
2954 s = _collapse_string_to_ranges(s, re_escape=False)
2956 if len(s) > max_repr_len:
2957 return s[: max_repr_len - 3] + "..."
2959 return s
2961 if self.initChars != self.bodyChars:
2962 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"
2963 else:
2964 base = f"W:({charsAsStr(self.initChars)})"
2966 # add length specification
2967 if self.minLen > 1 or self.maxLen != _MAX_INT:
2968 if self.minLen == self.maxLen:
2969 if self.minLen == 1:
2970 return base[2:]
2971 else:
2972 return base + f"{{{self.minLen}}}"
2973 elif self.maxLen == _MAX_INT:
2974 return base + f"{{{self.minLen},...}}"
2975 else:
2976 return base + f"{{{self.minLen},{self.maxLen}}}"
2977 return base
2979 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2980 if instring[loc] not in self.initChars:
2981 raise ParseException(instring, loc, self.errmsg, self)
2983 start = loc
2984 loc += 1
2985 instrlen = len(instring)
2986 body_chars: set[str] = self.bodyChars
2987 maxloc = start + self.maxLen
2988 maxloc = min(maxloc, instrlen)
2989 while loc < maxloc and instring[loc] in body_chars:
2990 loc += 1
2992 throw_exception = False
2993 if loc - start < self.minLen:
2994 throw_exception = True
2995 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:
2996 throw_exception = True
2997 elif self.asKeyword and (
2998 (start > 0 and instring[start - 1] in body_chars)
2999 or (loc < instrlen and instring[loc] in body_chars)
3000 ):
3001 throw_exception = True
3003 if throw_exception:
3004 raise ParseException(instring, loc, self.errmsg, self)
3006 return loc, instring[start:loc]
3008 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3009 result = self.re_match(instring, loc)
3010 if not result:
3011 raise ParseException(instring, loc, self.errmsg, self)
3013 loc = result.end()
3014 return loc, result.group()
3017class Char(Word):
3018 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
3019 when defining a match of any single character in a string of
3020 characters.
3021 """
3023 def __init__(
3024 self,
3025 charset: str,
3026 as_keyword: bool = False,
3027 exclude_chars: typing.Optional[str] = None,
3028 *,
3029 asKeyword: bool = False,
3030 excludeChars: typing.Optional[str] = None,
3031 ) -> None:
3032 asKeyword = asKeyword or as_keyword
3033 excludeChars = excludeChars or exclude_chars
3034 super().__init__(
3035 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars
3036 )
3039class Regex(Token):
3040 r"""Token for matching strings that match a given regular
3041 expression. Defined with string specifying the regular expression in
3042 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3043 If the given regex contains named groups (defined using ``(?P<name>...)``),
3044 these will be preserved as named :class:`ParseResults`.
3046 If instead of the Python stdlib ``re`` module you wish to use a different RE module
3047 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
3048 a compiled RE that was compiled using ``regex``.
3050 Example::
3052 realnum = Regex(r"[+-]?\d+\.\d*")
3053 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3054 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3056 # named fields in a regex will be returned as named results
3057 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3059 # the Regex class will accept re's compiled using the regex module
3060 import regex
3061 parser = pp.Regex(regex.compile(r'[0-9]'))
3062 """
3064 def __init__(
3065 self,
3066 pattern: Any,
3067 flags: Union[re.RegexFlag, int] = 0,
3068 as_group_list: bool = False,
3069 as_match: bool = False,
3070 *,
3071 asGroupList: bool = False,
3072 asMatch: bool = False,
3073 ) -> None:
3074 """The parameters ``pattern`` and ``flags`` are passed
3075 to the ``re.compile()`` function as-is. See the Python
3076 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3077 explanation of the acceptable patterns and flags.
3078 """
3079 super().__init__()
3080 asGroupList = asGroupList or as_group_list
3081 asMatch = asMatch or as_match
3083 if isinstance(pattern, str_type):
3084 if not pattern:
3085 raise ValueError("null string passed to Regex; use Empty() instead")
3087 self._re = None
3088 self._may_return_empty = None # type: ignore [assignment]
3089 self.reString = self.pattern = pattern
3091 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
3092 self._re = pattern
3093 self._may_return_empty = None # type: ignore [assignment]
3094 self.pattern = self.reString = pattern.pattern
3096 elif callable(pattern):
3097 # defer creating this pattern until we really need it
3098 self.pattern = pattern
3099 self._may_return_empty = None # type: ignore [assignment]
3100 self._re = None
3102 else:
3103 raise TypeError(
3104 "Regex may only be constructed with a string or a compiled RE object,"
3105 " or a callable that takes no arguments and returns a string or a"
3106 " compiled RE object"
3107 )
3109 self.flags = flags
3110 self.errmsg = f"Expected {self.name}"
3111 self.mayIndexError = False
3112 self.asGroupList = asGroupList
3113 self.asMatch = asMatch
3114 if self.asGroupList:
3115 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]
3116 if self.asMatch:
3117 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]
3119 @cached_property
3120 def re(self) -> re.Pattern:
3121 if self._re:
3122 return self._re
3124 if callable(self.pattern):
3125 # replace self.pattern with the string returned by calling self.pattern()
3126 self.pattern = cast(Callable[[], str], self.pattern)()
3128 # see if we got a compiled RE back instead of a str - if so, we're done
3129 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):
3130 self._re = cast(re.Pattern[str], self.pattern)
3131 self.pattern = self.reString = self._re.pattern
3132 return self._re
3134 try:
3135 self._re = re.compile(self.pattern, self.flags)
3136 except re.error:
3137 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3138 else:
3139 self._may_return_empty = self.re.match("", pos=0) is not None
3140 return self._re
3142 @cached_property
3143 def re_match(self) -> Callable[[str, int], Any]:
3144 return self.re.match
3146 @property
3147 def mayReturnEmpty(self):
3148 if self._may_return_empty is None:
3149 # force compile of regex pattern, to set may_return_empty flag
3150 self.re # noqa
3151 return self._may_return_empty
3153 @mayReturnEmpty.setter
3154 def mayReturnEmpty(self, value):
3155 self._may_return_empty = value
3157 def _generateDefaultName(self) -> str:
3158 unescaped = repr(self.pattern).replace("\\\\", "\\")
3159 return f"Re:({unescaped})"
3161 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3162 # explicit check for matching past the length of the string;
3163 # this is done because the re module will not complain about
3164 # a match with `pos > len(instring)`, it will just return ""
3165 if loc > len(instring) and self.mayReturnEmpty:
3166 raise ParseException(instring, loc, self.errmsg, self)
3168 result = self.re_match(instring, loc)
3169 if not result:
3170 raise ParseException(instring, loc, self.errmsg, self)
3172 loc = result.end()
3173 ret = ParseResults(result.group())
3174 d = result.groupdict()
3176 for k, v in d.items():
3177 ret[k] = v
3179 return loc, ret
3181 def parseImplAsGroupList(self, instring, loc, do_actions=True):
3182 if loc > len(instring) and self.mayReturnEmpty:
3183 raise ParseException(instring, loc, self.errmsg, self)
3185 result = self.re_match(instring, loc)
3186 if not result:
3187 raise ParseException(instring, loc, self.errmsg, self)
3189 loc = result.end()
3190 ret = result.groups()
3191 return loc, ret
3193 def parseImplAsMatch(self, instring, loc, do_actions=True):
3194 if loc > len(instring) and self.mayReturnEmpty:
3195 raise ParseException(instring, loc, self.errmsg, self)
3197 result = self.re_match(instring, loc)
3198 if not result:
3199 raise ParseException(instring, loc, self.errmsg, self)
3201 loc = result.end()
3202 ret = result
3203 return loc, ret
3205 def sub(self, repl: str) -> ParserElement:
3206 r"""
3207 Return :class:`Regex` with an attached parse action to transform the parsed
3208 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3210 Example::
3212 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3213 print(make_html.transform_string("h1:main title:"))
3214 # prints "<h1>main title</h1>"
3215 """
3216 if self.asGroupList:
3217 raise TypeError("cannot use sub() with Regex(as_group_list=True)")
3219 if self.asMatch and callable(repl):
3220 raise TypeError(
3221 "cannot use sub() with a callable with Regex(as_match=True)"
3222 )
3224 if self.asMatch:
3226 def pa(tokens):
3227 return tokens[0].expand(repl)
3229 else:
3231 def pa(tokens):
3232 return self.re.sub(repl, tokens[0])
3234 return self.add_parse_action(pa)
3237class QuotedString(Token):
3238 r"""
3239 Token for matching strings that are delimited by quoting characters.
3241 Defined with the following parameters:
3243 - ``quote_char`` - string of one or more characters defining the
3244 quote delimiting string
3245 - ``esc_char`` - character to re_escape quotes, typically backslash
3246 (default= ``None``)
3247 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3248 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3249 (default= ``None``)
3250 - ``multiline`` - boolean indicating whether quotes can span
3251 multiple lines (default= ``False``)
3252 - ``unquote_results`` - boolean indicating whether the matched text
3253 should be unquoted (default= ``True``)
3254 - ``end_quote_char`` - string of one or more characters defining the
3255 end of the quote delimited string (default= ``None`` => same as
3256 quote_char)
3257 - ``convert_whitespace_escapes`` - convert escaped whitespace
3258 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3259 (default= ``True``)
3261 Example::
3263 qs = QuotedString('"')
3264 print(qs.search_string('lsjdf "This is the quote" sldjf'))
3265 complex_qs = QuotedString('{{', end_quote_char='}}')
3266 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))
3267 sql_qs = QuotedString('"', esc_quote='""')
3268 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3270 prints::
3272 [['This is the quote']]
3273 [['This is the "quote"']]
3274 [['This is the quote with "embedded" quotes']]
3275 """
3277 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))
3279 def __init__(
3280 self,
3281 quote_char: str = "",
3282 esc_char: typing.Optional[str] = None,
3283 esc_quote: typing.Optional[str] = None,
3284 multiline: bool = False,
3285 unquote_results: bool = True,
3286 end_quote_char: typing.Optional[str] = None,
3287 convert_whitespace_escapes: bool = True,
3288 *,
3289 quoteChar: str = "",
3290 escChar: typing.Optional[str] = None,
3291 escQuote: typing.Optional[str] = None,
3292 unquoteResults: bool = True,
3293 endQuoteChar: typing.Optional[str] = None,
3294 convertWhitespaceEscapes: bool = True,
3295 ) -> None:
3296 super().__init__()
3297 esc_char = escChar or esc_char
3298 esc_quote = escQuote or esc_quote
3299 unquote_results = unquoteResults and unquote_results
3300 end_quote_char = endQuoteChar or end_quote_char
3301 convert_whitespace_escapes = (
3302 convertWhitespaceEscapes and convert_whitespace_escapes
3303 )
3304 quote_char = quoteChar or quote_char
3306 # remove white space from quote chars
3307 quote_char = quote_char.strip()
3308 if not quote_char:
3309 raise ValueError("quote_char cannot be the empty string")
3311 if end_quote_char is None:
3312 end_quote_char = quote_char
3313 else:
3314 end_quote_char = end_quote_char.strip()
3315 if not end_quote_char:
3316 raise ValueError("end_quote_char cannot be the empty string")
3318 self.quote_char: str = quote_char
3319 self.quote_char_len: int = len(quote_char)
3320 self.first_quote_char: str = quote_char[0]
3321 self.end_quote_char: str = end_quote_char
3322 self.end_quote_char_len: int = len(end_quote_char)
3323 self.esc_char: str = esc_char or ""
3324 self.has_esc_char: bool = esc_char is not None
3325 self.esc_quote: str = esc_quote or ""
3326 self.unquote_results: bool = unquote_results
3327 self.convert_whitespace_escapes: bool = convert_whitespace_escapes
3328 self.multiline = multiline
3329 self.re_flags = re.RegexFlag(0)
3331 # fmt: off
3332 # build up re pattern for the content between the quote delimiters
3333 inner_pattern: list[str] = []
3335 if esc_quote:
3336 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
3338 if esc_char:
3339 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
3341 if len(self.end_quote_char) > 1:
3342 inner_pattern.append(
3343 "(?:"
3344 + "|".join(
3345 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3346 for i in range(len(self.end_quote_char) - 1, 0, -1)
3347 )
3348 + ")"
3349 )
3351 if self.multiline:
3352 self.re_flags |= re.MULTILINE | re.DOTALL
3353 inner_pattern.append(
3354 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3355 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3356 )
3357 else:
3358 inner_pattern.append(
3359 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3360 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3361 )
3363 self.pattern = "".join(
3364 [
3365 re.escape(self.quote_char),
3366 "(?:",
3367 '|'.join(inner_pattern),
3368 ")*",
3369 re.escape(self.end_quote_char),
3370 ]
3371 )
3373 if self.unquote_results:
3374 if self.convert_whitespace_escapes:
3375 self.unquote_scan_re = re.compile(
3376 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3377 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"
3378 rf"|({re.escape(self.esc_char)}.)"
3379 rf"|(\n|.)",
3380 flags=self.re_flags,
3381 )
3382 else:
3383 self.unquote_scan_re = re.compile(
3384 rf"({re.escape(self.esc_char)}.)"
3385 rf"|(\n|.)",
3386 flags=self.re_flags
3387 )
3388 # fmt: on
3390 try:
3391 self.re = re.compile(self.pattern, self.re_flags)
3392 self.reString = self.pattern
3393 self.re_match = self.re.match
3394 except re.error:
3395 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")
3397 self.errmsg = f"Expected {self.name}"
3398 self.mayIndexError = False
3399 self._may_return_empty = True
3401 def _generateDefaultName(self) -> str:
3402 if self.quote_char == self.end_quote_char and isinstance(
3403 self.quote_char, str_type
3404 ):
3405 return f"string enclosed in {self.quote_char!r}"
3407 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
3409 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3410 # check first character of opening quote to see if that is a match
3411 # before doing the more complicated regex match
3412 result = (
3413 instring[loc] == self.first_quote_char
3414 and self.re_match(instring, loc)
3415 or None
3416 )
3417 if not result:
3418 raise ParseException(instring, loc, self.errmsg, self)
3420 # get ending loc and matched string from regex matching result
3421 loc = result.end()
3422 ret = result.group()
3424 def convert_escaped_numerics(s: str) -> str:
3425 if s == "0":
3426 return "\0"
3427 if s.isdigit() and len(s) == 3:
3428 return chr(int(s, base=8))
3429 elif s.startswith(("u", "x")):
3430 return chr(int(s[1:], base=16))
3431 else:
3432 return s
3434 if self.unquote_results:
3435 # strip off quotes
3436 ret = ret[self.quote_char_len : -self.end_quote_char_len]
3438 if isinstance(ret, str_type):
3439 # fmt: off
3440 if self.convert_whitespace_escapes:
3441 # as we iterate over matches in the input string,
3442 # collect from whichever match group of the unquote_scan_re
3443 # regex matches (only 1 group will match at any given time)
3444 ret = "".join(
3445 # match group 1 matches \t, \n, etc.
3446 self.ws_map[match.group(1)] if match.group(1)
3447 # match group 2 matches escaped octal, null, hex, and Unicode
3448 # sequences
3449 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)
3450 # match group 3 matches escaped characters
3451 else match.group(3)[-1] if match.group(3)
3452 # match group 4 matches any character
3453 else match.group(4)
3454 for match in self.unquote_scan_re.finditer(ret)
3455 )
3456 else:
3457 ret = "".join(
3458 # match group 1 matches escaped characters
3459 match.group(1)[-1] if match.group(1)
3460 # match group 2 matches any character
3461 else match.group(2)
3462 for match in self.unquote_scan_re.finditer(ret)
3463 )
3464 # fmt: on
3466 # replace escaped quotes
3467 if self.esc_quote:
3468 ret = ret.replace(self.esc_quote, self.end_quote_char)
3470 return loc, ret
3473class CharsNotIn(Token):
3474 """Token for matching words composed of characters *not* in a given
3475 set (will include whitespace in matched characters if not listed in
3476 the provided exclusion set - see example). Defined with string
3477 containing all disallowed characters, and an optional minimum,
3478 maximum, and/or exact length. The default value for ``min`` is
3479 1 (a minimum value < 1 is not valid); the default values for
3480 ``max`` and ``exact`` are 0, meaning no maximum or exact
3481 length restriction.
3483 Example::
3485 # define a comma-separated-value as anything that is not a ','
3486 csv_value = CharsNotIn(',')
3487 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))
3489 prints::
3491 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3492 """
3494 def __init__(
3495 self,
3496 not_chars: str = "",
3497 min: int = 1,
3498 max: int = 0,
3499 exact: int = 0,
3500 *,
3501 notChars: str = "",
3502 ) -> None:
3503 super().__init__()
3504 self.skipWhitespace = False
3505 self.notChars = not_chars or notChars
3506 self.notCharsSet = set(self.notChars)
3508 if min < 1:
3509 raise ValueError(
3510 "cannot specify a minimum length < 1; use"
3511 " Opt(CharsNotIn()) if zero-length char group is permitted"
3512 )
3514 self.minLen = min
3516 if max > 0:
3517 self.maxLen = max
3518 else:
3519 self.maxLen = _MAX_INT
3521 if exact > 0:
3522 self.maxLen = exact
3523 self.minLen = exact
3525 self.errmsg = f"Expected {self.name}"
3526 self._may_return_empty = self.minLen == 0
3527 self.mayIndexError = False
3529 def _generateDefaultName(self) -> str:
3530 not_chars_str = _collapse_string_to_ranges(self.notChars)
3531 if len(not_chars_str) > 16:
3532 return f"!W:({self.notChars[: 16 - 3]}...)"
3533 else:
3534 return f"!W:({self.notChars})"
3536 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3537 notchars = self.notCharsSet
3538 if instring[loc] in notchars:
3539 raise ParseException(instring, loc, self.errmsg, self)
3541 start = loc
3542 loc += 1
3543 maxlen = min(start + self.maxLen, len(instring))
3544 while loc < maxlen and instring[loc] not in notchars:
3545 loc += 1
3547 if loc - start < self.minLen:
3548 raise ParseException(instring, loc, self.errmsg, self)
3550 return loc, instring[start:loc]
3553class White(Token):
3554 """Special matching class for matching whitespace. Normally,
3555 whitespace is ignored by pyparsing grammars. This class is included
3556 when some whitespace structures are significant. Define with
3557 a string containing the whitespace characters to be matched; default
3558 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3559 ``max``, and ``exact`` arguments, as defined for the
3560 :class:`Word` class.
3561 """
3563 whiteStrs = {
3564 " ": "<SP>",
3565 "\t": "<TAB>",
3566 "\n": "<LF>",
3567 "\r": "<CR>",
3568 "\f": "<FF>",
3569 "\u00A0": "<NBSP>",
3570 "\u1680": "<OGHAM_SPACE_MARK>",
3571 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",
3572 "\u2000": "<EN_QUAD>",
3573 "\u2001": "<EM_QUAD>",
3574 "\u2002": "<EN_SPACE>",
3575 "\u2003": "<EM_SPACE>",
3576 "\u2004": "<THREE-PER-EM_SPACE>",
3577 "\u2005": "<FOUR-PER-EM_SPACE>",
3578 "\u2006": "<SIX-PER-EM_SPACE>",
3579 "\u2007": "<FIGURE_SPACE>",
3580 "\u2008": "<PUNCTUATION_SPACE>",
3581 "\u2009": "<THIN_SPACE>",
3582 "\u200A": "<HAIR_SPACE>",
3583 "\u200B": "<ZERO_WIDTH_SPACE>",
3584 "\u202F": "<NNBSP>",
3585 "\u205F": "<MMSP>",
3586 "\u3000": "<IDEOGRAPHIC_SPACE>",
3587 }
3589 def __init__(
3590 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0
3591 ) -> None:
3592 super().__init__()
3593 self.matchWhite = ws
3594 self.set_whitespace_chars(
3595 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3596 copy_defaults=True,
3597 )
3598 # self.leave_whitespace()
3599 self._may_return_empty = True
3600 self.errmsg = f"Expected {self.name}"
3602 self.minLen = min
3604 if max > 0:
3605 self.maxLen = max
3606 else:
3607 self.maxLen = _MAX_INT
3609 if exact > 0:
3610 self.maxLen = exact
3611 self.minLen = exact
3613 def _generateDefaultName(self) -> str:
3614 return "".join(White.whiteStrs[c] for c in self.matchWhite)
3616 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3617 if instring[loc] not in self.matchWhite:
3618 raise ParseException(instring, loc, self.errmsg, self)
3619 start = loc
3620 loc += 1
3621 maxloc = start + self.maxLen
3622 maxloc = min(maxloc, len(instring))
3623 while loc < maxloc and instring[loc] in self.matchWhite:
3624 loc += 1
3626 if loc - start < self.minLen:
3627 raise ParseException(instring, loc, self.errmsg, self)
3629 return loc, instring[start:loc]
3632class PositionToken(Token):
3633 def __init__(self) -> None:
3634 super().__init__()
3635 self._may_return_empty = True
3636 self.mayIndexError = False
3639class GoToColumn(PositionToken):
3640 """Token to advance to a specific column of input text; useful for
3641 tabular report scraping.
3642 """
3644 def __init__(self, colno: int) -> None:
3645 super().__init__()
3646 self.col = colno
3648 def preParse(self, instring: str, loc: int) -> int:
3649 if col(loc, instring) == self.col:
3650 return loc
3652 instrlen = len(instring)
3653 if self.ignoreExprs:
3654 loc = self._skipIgnorables(instring, loc)
3655 while (
3656 loc < instrlen
3657 and instring[loc].isspace()
3658 and col(loc, instring) != self.col
3659 ):
3660 loc += 1
3662 return loc
3664 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3665 thiscol = col(loc, instring)
3666 if thiscol > self.col:
3667 raise ParseException(instring, loc, "Text not in expected column", self)
3668 newloc = loc + self.col - thiscol
3669 ret = instring[loc:newloc]
3670 return newloc, ret
3673class LineStart(PositionToken):
3674 r"""Matches if current position is at the beginning of a line within
3675 the parse string
3677 Example::
3679 test = '''\
3680 AAA this line
3681 AAA and this line
3682 AAA but not this one
3683 B AAA and definitely not this one
3684 '''
3686 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):
3687 print(t)
3689 prints::
3691 ['AAA', ' this line']
3692 ['AAA', ' and this line']
3694 """
3696 def __init__(self) -> None:
3697 super().__init__()
3698 self.leave_whitespace()
3699 self.orig_whiteChars = set() | self.whiteChars
3700 self.whiteChars.discard("\n")
3701 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
3702 self.set_name("start of line")
3704 def preParse(self, instring: str, loc: int) -> int:
3705 if loc == 0:
3706 return loc
3708 ret = self.skipper.preParse(instring, loc)
3710 if "\n" in self.orig_whiteChars:
3711 while instring[ret : ret + 1] == "\n":
3712 ret = self.skipper.preParse(instring, ret + 1)
3714 return ret
3716 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3717 if col(loc, instring) == 1:
3718 return loc, []
3719 raise ParseException(instring, loc, self.errmsg, self)
3722class LineEnd(PositionToken):
3723 """Matches if current position is at the end of a line within the
3724 parse string
3725 """
3727 def __init__(self) -> None:
3728 super().__init__()
3729 self.whiteChars.discard("\n")
3730 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
3731 self.set_name("end of line")
3733 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3734 if loc < len(instring):
3735 if instring[loc] == "\n":
3736 return loc + 1, "\n"
3737 else:
3738 raise ParseException(instring, loc, self.errmsg, self)
3739 elif loc == len(instring):
3740 return loc + 1, []
3741 else:
3742 raise ParseException(instring, loc, self.errmsg, self)
3745class StringStart(PositionToken):
3746 """Matches if current position is at the beginning of the parse
3747 string
3748 """
3750 def __init__(self) -> None:
3751 super().__init__()
3752 self.set_name("start of text")
3754 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3755 # see if entire string up to here is just whitespace and ignoreables
3756 if loc != 0 and loc != self.preParse(instring, 0):
3757 raise ParseException(instring, loc, self.errmsg, self)
3759 return loc, []
3762class StringEnd(PositionToken):
3763 """
3764 Matches if current position is at the end of the parse string
3765 """
3767 def __init__(self) -> None:
3768 super().__init__()
3769 self.set_name("end of text")
3771 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3772 if loc < len(instring):
3773 raise ParseException(instring, loc, self.errmsg, self)
3774 if loc == len(instring):
3775 return loc + 1, []
3776 if loc > len(instring):
3777 return loc, []
3779 raise ParseException(instring, loc, self.errmsg, self)
3782class WordStart(PositionToken):
3783 """Matches if the current position is at the beginning of a
3784 :class:`Word`, and is not preceded by any character in a given
3785 set of ``word_chars`` (default= ``printables``). To emulate the
3786 ``\b`` behavior of regular expressions, use
3787 ``WordStart(alphanums)``. ``WordStart`` will also match at
3788 the beginning of the string being parsed, or at the beginning of
3789 a line.
3790 """
3792 def __init__(
3793 self, word_chars: str = printables, *, wordChars: str = printables
3794 ) -> None:
3795 wordChars = word_chars if wordChars == printables else wordChars
3796 super().__init__()
3797 self.wordChars = set(wordChars)
3798 self.set_name("start of a word")
3800 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3801 if loc != 0:
3802 if (
3803 instring[loc - 1] in self.wordChars
3804 or instring[loc] not in self.wordChars
3805 ):
3806 raise ParseException(instring, loc, self.errmsg, self)
3807 return loc, []
3810class WordEnd(PositionToken):
3811 """Matches if the current position is at the end of a :class:`Word`,
3812 and is not followed by any character in a given set of ``word_chars``
3813 (default= ``printables``). To emulate the ``\b`` behavior of
3814 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3815 will also match at the end of the string being parsed, or at the end
3816 of a line.
3817 """
3819 def __init__(
3820 self, word_chars: str = printables, *, wordChars: str = printables
3821 ) -> None:
3822 wordChars = word_chars if wordChars == printables else wordChars
3823 super().__init__()
3824 self.wordChars = set(wordChars)
3825 self.skipWhitespace = False
3826 self.set_name("end of a word")
3828 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3829 instrlen = len(instring)
3830 if instrlen > 0 and loc < instrlen:
3831 if (
3832 instring[loc] in self.wordChars
3833 or instring[loc - 1] not in self.wordChars
3834 ):
3835 raise ParseException(instring, loc, self.errmsg, self)
3836 return loc, []
3839class Tag(Token):
3840 """
3841 A meta-element for inserting a named result into the parsed
3842 tokens that may be checked later in a parse action or while
3843 processing the parsed results. Accepts an optional tag value,
3844 defaulting to `True`.
3846 Example::
3848 end_punc = "." | ("!" + Tag("enthusiastic")))
3849 greeting = "Hello," + Word(alphas) + end_punc
3851 result = greeting.parse_string("Hello, World.")
3852 print(result.dump())
3854 result = greeting.parse_string("Hello, World!")
3855 print(result.dump())
3857 prints::
3859 ['Hello,', 'World', '.']
3861 ['Hello,', 'World', '!']
3862 - enthusiastic: True
3863 """
3865 def __init__(self, tag_name: str, value: Any = True) -> None:
3866 super().__init__()
3867 self._may_return_empty = True
3868 self.mayIndexError = False
3869 self.leave_whitespace()
3870 self.tag_name = tag_name
3871 self.tag_value = value
3872 self.add_parse_action(self._add_tag)
3873 self.show_in_diagram = False
3875 def _add_tag(self, tokens: ParseResults):
3876 tokens[self.tag_name] = self.tag_value
3878 def _generateDefaultName(self) -> str:
3879 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"
3882class ParseExpression(ParserElement):
3883 """Abstract subclass of ParserElement, for combining and
3884 post-processing parsed tokens.
3885 """
3887 def __init__(
3888 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
3889 ) -> None:
3890 super().__init__(savelist)
3891 self.exprs: list[ParserElement]
3892 if isinstance(exprs, _generatorType):
3893 exprs = list(exprs)
3895 if isinstance(exprs, str_type):
3896 self.exprs = [self._literalStringClass(exprs)]
3897 elif isinstance(exprs, ParserElement):
3898 self.exprs = [exprs]
3899 elif isinstance(exprs, Iterable):
3900 exprs = list(exprs)
3901 # if sequence of strings provided, wrap with Literal
3902 if any(isinstance(expr, str_type) for expr in exprs):
3903 exprs = (
3904 self._literalStringClass(e) if isinstance(e, str_type) else e
3905 for e in exprs
3906 )
3907 self.exprs = list(exprs)
3908 else:
3909 try:
3910 self.exprs = list(exprs)
3911 except TypeError:
3912 self.exprs = [exprs]
3913 self.callPreparse = False
3915 def recurse(self) -> list[ParserElement]:
3916 return self.exprs[:]
3918 def append(self, other) -> ParserElement:
3919 self.exprs.append(other)
3920 self._defaultName = None
3921 return self
3923 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
3924 """
3925 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3926 all contained expressions.
3927 """
3928 super().leave_whitespace(recursive)
3930 if recursive:
3931 self.exprs = [e.copy() for e in self.exprs]
3932 for e in self.exprs:
3933 e.leave_whitespace(recursive)
3934 return self
3936 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
3937 """
3938 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3939 all contained expressions.
3940 """
3941 super().ignore_whitespace(recursive)
3942 if recursive:
3943 self.exprs = [e.copy() for e in self.exprs]
3944 for e in self.exprs:
3945 e.ignore_whitespace(recursive)
3946 return self
3948 def ignore(self, other) -> ParserElement:
3949 if isinstance(other, Suppress):
3950 if other not in self.ignoreExprs:
3951 super().ignore(other)
3952 for e in self.exprs:
3953 e.ignore(self.ignoreExprs[-1])
3954 else:
3955 super().ignore(other)
3956 for e in self.exprs:
3957 e.ignore(self.ignoreExprs[-1])
3958 return self
3960 def _generateDefaultName(self) -> str:
3961 return f"{type(self).__name__}:({self.exprs})"
3963 def streamline(self) -> ParserElement:
3964 if self.streamlined:
3965 return self
3967 super().streamline()
3969 for e in self.exprs:
3970 e.streamline()
3972 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
3973 # but only if there are no parse actions or resultsNames on the nested And's
3974 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
3975 if len(self.exprs) == 2:
3976 other = self.exprs[0]
3977 if (
3978 isinstance(other, self.__class__)
3979 and not other.parseAction
3980 and other.resultsName is None
3981 and not other.debug
3982 ):
3983 self.exprs = other.exprs[:] + [self.exprs[1]]
3984 self._defaultName = None
3985 self._may_return_empty |= other.mayReturnEmpty
3986 self.mayIndexError |= other.mayIndexError
3988 other = self.exprs[-1]
3989 if (
3990 isinstance(other, self.__class__)
3991 and not other.parseAction
3992 and other.resultsName is None
3993 and not other.debug
3994 ):
3995 self.exprs = self.exprs[:-1] + other.exprs[:]
3996 self._defaultName = None
3997 self._may_return_empty |= other.mayReturnEmpty
3998 self.mayIndexError |= other.mayIndexError
4000 self.errmsg = f"Expected {self}"
4002 return self
4004 def validate(self, validateTrace=None) -> None:
4005 warnings.warn(
4006 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4007 DeprecationWarning,
4008 stacklevel=2,
4009 )
4010 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
4011 for e in self.exprs:
4012 e.validate(tmp)
4013 self._checkRecursion([])
4015 def copy(self) -> ParserElement:
4016 ret = super().copy()
4017 ret = typing.cast(ParseExpression, ret)
4018 ret.exprs = [e.copy() for e in self.exprs]
4019 return ret
4021 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4022 if not (
4023 __diag__.warn_ungrouped_named_tokens_in_collection
4024 and Diagnostics.warn_ungrouped_named_tokens_in_collection
4025 not in self.suppress_warnings_
4026 ):
4027 return super()._setResultsName(name, list_all_matches)
4029 for e in self.exprs:
4030 if (
4031 isinstance(e, ParserElement)
4032 and e.resultsName
4033 and (
4034 Diagnostics.warn_ungrouped_named_tokens_in_collection
4035 not in e.suppress_warnings_
4036 )
4037 ):
4038 warning = (
4039 "warn_ungrouped_named_tokens_in_collection:"
4040 f" setting results name {name!r} on {type(self).__name__} expression"
4041 f" collides with {e.resultsName!r} on contained expression"
4042 )
4043 warnings.warn(warning, stacklevel=3)
4044 break
4046 return super()._setResultsName(name, list_all_matches)
4048 # Compatibility synonyms
4049 # fmt: off
4050 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4051 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4052 # fmt: on
4055class And(ParseExpression):
4056 """
4057 Requires all given :class:`ParserElement` s to be found in the given order.
4058 Expressions may be separated by whitespace.
4059 May be constructed using the ``'+'`` operator.
4060 May also be constructed using the ``'-'`` operator, which will
4061 suppress backtracking.
4063 Example::
4065 integer = Word(nums)
4066 name_expr = Word(alphas)[1, ...]
4068 expr = And([integer("id"), name_expr("name"), integer("age")])
4069 # more easily written as:
4070 expr = integer("id") + name_expr("name") + integer("age")
4071 """
4073 class _ErrorStop(Empty):
4074 def __init__(self, *args, **kwargs) -> None:
4075 super().__init__(*args, **kwargs)
4076 self.leave_whitespace()
4078 def _generateDefaultName(self) -> str:
4079 return "-"
4081 def __init__(
4082 self,
4083 exprs_arg: typing.Iterable[Union[ParserElement, str]],
4084 savelist: bool = True,
4085 ) -> None:
4086 # instantiate exprs as a list, converting strs to ParserElements
4087 exprs: list[ParserElement] = [
4088 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg
4089 ]
4091 # convert any Ellipsis elements to SkipTo
4092 if Ellipsis in exprs:
4094 # Ellipsis cannot be the last element
4095 if exprs[-1] is Ellipsis:
4096 raise Exception("cannot construct And with sequence ending in ...")
4098 tmp: list[ParserElement] = []
4099 for cur_expr, next_expr in zip(exprs, exprs[1:]):
4100 if cur_expr is Ellipsis:
4101 tmp.append(SkipTo(next_expr)("_skipped*"))
4102 else:
4103 tmp.append(cur_expr)
4105 exprs[:-1] = tmp
4107 super().__init__(exprs, savelist)
4108 if self.exprs:
4109 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4110 if not isinstance(self.exprs[0], White):
4111 self.set_whitespace_chars(
4112 self.exprs[0].whiteChars,
4113 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
4114 )
4115 self.skipWhitespace = self.exprs[0].skipWhitespace
4116 else:
4117 self.skipWhitespace = False
4118 else:
4119 self._may_return_empty = True
4120 self.callPreparse = True
4122 def streamline(self) -> ParserElement:
4123 # collapse any _PendingSkip's
4124 if self.exprs and any(
4125 isinstance(e, ParseExpression)
4126 and e.exprs
4127 and isinstance(e.exprs[-1], _PendingSkip)
4128 for e in self.exprs[:-1]
4129 ):
4130 deleted_expr_marker = NoMatch()
4131 for i, e in enumerate(self.exprs[:-1]):
4132 if e is deleted_expr_marker:
4133 continue
4134 if (
4135 isinstance(e, ParseExpression)
4136 and e.exprs
4137 and isinstance(e.exprs[-1], _PendingSkip)
4138 ):
4139 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4140 self.exprs[i + 1] = deleted_expr_marker
4141 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]
4143 super().streamline()
4145 # link any IndentedBlocks to the prior expression
4146 prev: ParserElement
4147 cur: ParserElement
4148 for prev, cur in zip(self.exprs, self.exprs[1:]):
4149 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
4150 # (but watch out for recursive grammar)
4151 seen = set()
4152 while True:
4153 if id(cur) in seen:
4154 break
4155 seen.add(id(cur))
4156 if isinstance(cur, IndentedBlock):
4157 prev.add_parse_action(
4158 lambda s, l, t, cur_=cur: setattr(
4159 cur_, "parent_anchor", col(l, s)
4160 )
4161 )
4162 break
4163 subs = cur.recurse()
4164 next_first = next(iter(subs), None)
4165 if next_first is None:
4166 break
4167 cur = typing.cast(ParserElement, next_first)
4169 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4170 return self
4172 def parseImpl(self, instring, loc, do_actions=True):
4173 # pass False as callPreParse arg to _parse for first element, since we already
4174 # pre-parsed the string as part of our And pre-parsing
4175 loc, resultlist = self.exprs[0]._parse(
4176 instring, loc, do_actions, callPreParse=False
4177 )
4178 errorStop = False
4179 for e in self.exprs[1:]:
4180 # if isinstance(e, And._ErrorStop):
4181 if type(e) is And._ErrorStop:
4182 errorStop = True
4183 continue
4184 if errorStop:
4185 try:
4186 loc, exprtokens = e._parse(instring, loc, do_actions)
4187 except ParseSyntaxException:
4188 raise
4189 except ParseBaseException as pe:
4190 pe.__traceback__ = None
4191 raise ParseSyntaxException._from_exception(pe)
4192 except IndexError:
4193 raise ParseSyntaxException(
4194 instring, len(instring), self.errmsg, self
4195 )
4196 else:
4197 loc, exprtokens = e._parse(instring, loc, do_actions)
4198 resultlist += exprtokens
4199 return loc, resultlist
4201 def __iadd__(self, other):
4202 if isinstance(other, str_type):
4203 other = self._literalStringClass(other)
4204 if not isinstance(other, ParserElement):
4205 return NotImplemented
4206 return self.append(other) # And([self, other])
4208 def _checkRecursion(self, parseElementList):
4209 subRecCheckList = parseElementList[:] + [self]
4210 for e in self.exprs:
4211 e._checkRecursion(subRecCheckList)
4212 if not e.mayReturnEmpty:
4213 break
4215 def _generateDefaultName(self) -> str:
4216 inner = " ".join(str(e) for e in self.exprs)
4217 # strip off redundant inner {}'s
4218 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4219 inner = inner[1:-1]
4220 return f"{{{inner}}}"
4223class Or(ParseExpression):
4224 """Requires that at least one :class:`ParserElement` is found. If
4225 two expressions match, the expression that matches the longest
4226 string will be used. May be constructed using the ``'^'``
4227 operator.
4229 Example::
4231 # construct Or using '^' operator
4233 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4234 print(number.search_string("123 3.1416 789"))
4236 prints::
4238 [['123'], ['3.1416'], ['789']]
4239 """
4241 def __init__(
4242 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4243 ) -> None:
4244 super().__init__(exprs, savelist)
4245 if self.exprs:
4246 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4247 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4248 else:
4249 self._may_return_empty = True
4251 def streamline(self) -> ParserElement:
4252 super().streamline()
4253 if self.exprs:
4254 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4255 self.saveAsList = any(e.saveAsList for e in self.exprs)
4256 self.skipWhitespace = all(
4257 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4258 )
4259 else:
4260 self.saveAsList = False
4261 return self
4263 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4264 maxExcLoc = -1
4265 maxException = None
4266 matches: list[tuple[int, ParserElement]] = []
4267 fatals: list[ParseFatalException] = []
4268 if all(e.callPreparse for e in self.exprs):
4269 loc = self.preParse(instring, loc)
4270 for e in self.exprs:
4271 try:
4272 loc2 = e.try_parse(instring, loc, raise_fatal=True)
4273 except ParseFatalException as pfe:
4274 pfe.__traceback__ = None
4275 pfe.parser_element = e
4276 fatals.append(pfe)
4277 maxException = None
4278 maxExcLoc = -1
4279 except ParseException as err:
4280 if not fatals:
4281 err.__traceback__ = None
4282 if err.loc > maxExcLoc:
4283 maxException = err
4284 maxExcLoc = err.loc
4285 except IndexError:
4286 if len(instring) > maxExcLoc:
4287 maxException = ParseException(
4288 instring, len(instring), e.errmsg, self
4289 )
4290 maxExcLoc = len(instring)
4291 else:
4292 # save match among all matches, to retry longest to shortest
4293 matches.append((loc2, e))
4295 if matches:
4296 # re-evaluate all matches in descending order of length of match, in case attached actions
4297 # might change whether or how much they match of the input.
4298 matches.sort(key=itemgetter(0), reverse=True)
4300 if not do_actions:
4301 # no further conditions or parse actions to change the selection of
4302 # alternative, so the first match will be the best match
4303 best_expr = matches[0][1]
4304 return best_expr._parse(instring, loc, do_actions)
4306 longest: tuple[int, typing.Optional[ParseResults]] = -1, None
4307 for loc1, expr1 in matches:
4308 if loc1 <= longest[0]:
4309 # already have a longer match than this one will deliver, we are done
4310 return longest
4312 try:
4313 loc2, toks = expr1._parse(instring, loc, do_actions)
4314 except ParseException as err:
4315 err.__traceback__ = None
4316 if err.loc > maxExcLoc:
4317 maxException = err
4318 maxExcLoc = err.loc
4319 else:
4320 if loc2 >= loc1:
4321 return loc2, toks
4322 # didn't match as much as before
4323 elif loc2 > longest[0]:
4324 longest = loc2, toks
4326 if longest != (-1, None):
4327 return longest
4329 if fatals:
4330 if len(fatals) > 1:
4331 fatals.sort(key=lambda e: -e.loc)
4332 if fatals[0].loc == fatals[1].loc:
4333 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4334 max_fatal = fatals[0]
4335 raise max_fatal
4337 if maxException is not None:
4338 # infer from this check that all alternatives failed at the current position
4339 # so emit this collective error message instead of any single error message
4340 parse_start_loc = self.preParse(instring, loc)
4341 if maxExcLoc == parse_start_loc:
4342 maxException.msg = self.errmsg or ""
4343 raise maxException
4345 raise ParseException(instring, loc, "no defined alternatives to match", self)
4347 def __ixor__(self, other):
4348 if isinstance(other, str_type):
4349 other = self._literalStringClass(other)
4350 if not isinstance(other, ParserElement):
4351 return NotImplemented
4352 return self.append(other) # Or([self, other])
4354 def _generateDefaultName(self) -> str:
4355 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"
4357 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4358 if (
4359 __diag__.warn_multiple_tokens_in_named_alternation
4360 and Diagnostics.warn_multiple_tokens_in_named_alternation
4361 not in self.suppress_warnings_
4362 ):
4363 if any(
4364 isinstance(e, And)
4365 and Diagnostics.warn_multiple_tokens_in_named_alternation
4366 not in e.suppress_warnings_
4367 for e in self.exprs
4368 ):
4369 warning = (
4370 "warn_multiple_tokens_in_named_alternation:"
4371 f" setting results name {name!r} on {type(self).__name__} expression"
4372 " will return a list of all parsed tokens in an And alternative,"
4373 " in prior versions only the first token was returned; enclose"
4374 " contained argument in Group"
4375 )
4376 warnings.warn(warning, stacklevel=3)
4378 return super()._setResultsName(name, list_all_matches)
4381class MatchFirst(ParseExpression):
4382 """Requires that at least one :class:`ParserElement` is found. If
4383 more than one expression matches, the first one listed is the one that will
4384 match. May be constructed using the ``'|'`` operator.
4386 Example::
4388 # construct MatchFirst using '|' operator
4390 # watch the order of expressions to match
4391 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4392 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4394 # put more selective expression first
4395 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4396 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4397 """
4399 def __init__(
4400 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4401 ) -> None:
4402 super().__init__(exprs, savelist)
4403 if self.exprs:
4404 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4405 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4406 else:
4407 self._may_return_empty = True
4409 def streamline(self) -> ParserElement:
4410 if self.streamlined:
4411 return self
4413 super().streamline()
4414 if self.exprs:
4415 self.saveAsList = any(e.saveAsList for e in self.exprs)
4416 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4417 self.skipWhitespace = all(
4418 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4419 )
4420 else:
4421 self.saveAsList = False
4422 self._may_return_empty = True
4423 return self
4425 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4426 maxExcLoc = -1
4427 maxException = None
4429 for e in self.exprs:
4430 try:
4431 return e._parse(instring, loc, do_actions)
4432 except ParseFatalException as pfe:
4433 pfe.__traceback__ = None
4434 pfe.parser_element = e
4435 raise
4436 except ParseException as err:
4437 if err.loc > maxExcLoc:
4438 maxException = err
4439 maxExcLoc = err.loc
4440 except IndexError:
4441 if len(instring) > maxExcLoc:
4442 maxException = ParseException(
4443 instring, len(instring), e.errmsg, self
4444 )
4445 maxExcLoc = len(instring)
4447 if maxException is not None:
4448 # infer from this check that all alternatives failed at the current position
4449 # so emit this collective error message instead of any individual error message
4450 parse_start_loc = self.preParse(instring, loc)
4451 if maxExcLoc == parse_start_loc:
4452 maxException.msg = self.errmsg or ""
4453 raise maxException
4455 raise ParseException(instring, loc, "no defined alternatives to match", self)
4457 def __ior__(self, other):
4458 if isinstance(other, str_type):
4459 other = self._literalStringClass(other)
4460 if not isinstance(other, ParserElement):
4461 return NotImplemented
4462 return self.append(other) # MatchFirst([self, other])
4464 def _generateDefaultName(self) -> str:
4465 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"
4467 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4468 if (
4469 __diag__.warn_multiple_tokens_in_named_alternation
4470 and Diagnostics.warn_multiple_tokens_in_named_alternation
4471 not in self.suppress_warnings_
4472 ):
4473 if any(
4474 isinstance(e, And)
4475 and Diagnostics.warn_multiple_tokens_in_named_alternation
4476 not in e.suppress_warnings_
4477 for e in self.exprs
4478 ):
4479 warning = (
4480 "warn_multiple_tokens_in_named_alternation:"
4481 f" setting results name {name!r} on {type(self).__name__} expression"
4482 " will return a list of all parsed tokens in an And alternative,"
4483 " in prior versions only the first token was returned; enclose"
4484 " contained argument in Group"
4485 )
4486 warnings.warn(warning, stacklevel=3)
4488 return super()._setResultsName(name, list_all_matches)
4491class Each(ParseExpression):
4492 """Requires all given :class:`ParserElement` s to be found, but in
4493 any order. Expressions may be separated by whitespace.
4495 May be constructed using the ``'&'`` operator.
4497 Example::
4499 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4500 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4501 integer = Word(nums)
4502 shape_attr = "shape:" + shape_type("shape")
4503 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4504 color_attr = "color:" + color("color")
4505 size_attr = "size:" + integer("size")
4507 # use Each (using operator '&') to accept attributes in any order
4508 # (shape and posn are required, color and size are optional)
4509 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4511 shape_spec.run_tests('''
4512 shape: SQUARE color: BLACK posn: 100, 120
4513 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4514 color:GREEN size:20 shape:TRIANGLE posn:20,40
4515 '''
4516 )
4518 prints::
4520 shape: SQUARE color: BLACK posn: 100, 120
4521 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4522 - color: BLACK
4523 - posn: ['100', ',', '120']
4524 - x: 100
4525 - y: 120
4526 - shape: SQUARE
4529 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4530 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4531 - color: BLUE
4532 - posn: ['50', ',', '80']
4533 - x: 50
4534 - y: 80
4535 - shape: CIRCLE
4536 - size: 50
4539 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4540 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4541 - color: GREEN
4542 - posn: ['20', ',', '40']
4543 - x: 20
4544 - y: 40
4545 - shape: TRIANGLE
4546 - size: 20
4547 """
4549 def __init__(
4550 self, exprs: typing.Iterable[ParserElement], savelist: bool = True
4551 ) -> None:
4552 super().__init__(exprs, savelist)
4553 if self.exprs:
4554 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4555 else:
4556 self._may_return_empty = True
4557 self.skipWhitespace = True
4558 self.initExprGroups = True
4559 self.saveAsList = True
4561 def __iand__(self, other):
4562 if isinstance(other, str_type):
4563 other = self._literalStringClass(other)
4564 if not isinstance(other, ParserElement):
4565 return NotImplemented
4566 return self.append(other) # Each([self, other])
4568 def streamline(self) -> ParserElement:
4569 super().streamline()
4570 if self.exprs:
4571 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4572 else:
4573 self._may_return_empty = True
4574 return self
4576 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4577 if self.initExprGroups:
4578 self.opt1map = dict(
4579 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
4580 )
4581 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
4582 opt2 = [
4583 e
4584 for e in self.exprs
4585 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
4586 ]
4587 self.optionals = opt1 + opt2
4588 self.multioptionals = [
4589 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4590 for e in self.exprs
4591 if isinstance(e, _MultipleMatch)
4592 ]
4593 self.multirequired = [
4594 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4595 for e in self.exprs
4596 if isinstance(e, OneOrMore)
4597 ]
4598 self.required = [
4599 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
4600 ]
4601 self.required += self.multirequired
4602 self.initExprGroups = False
4604 tmpLoc = loc
4605 tmpReqd = self.required[:]
4606 tmpOpt = self.optionals[:]
4607 multis = self.multioptionals[:]
4608 matchOrder: list[ParserElement] = []
4610 keepMatching = True
4611 failed: list[ParserElement] = []
4612 fatals: list[ParseFatalException] = []
4613 while keepMatching:
4614 tmpExprs = tmpReqd + tmpOpt + multis
4615 failed.clear()
4616 fatals.clear()
4617 for e in tmpExprs:
4618 try:
4619 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
4620 except ParseFatalException as pfe:
4621 pfe.__traceback__ = None
4622 pfe.parser_element = e
4623 fatals.append(pfe)
4624 failed.append(e)
4625 except ParseException:
4626 failed.append(e)
4627 else:
4628 matchOrder.append(self.opt1map.get(id(e), e))
4629 if e in tmpReqd:
4630 tmpReqd.remove(e)
4631 elif e in tmpOpt:
4632 tmpOpt.remove(e)
4633 if len(failed) == len(tmpExprs):
4634 keepMatching = False
4636 # look for any ParseFatalExceptions
4637 if fatals:
4638 if len(fatals) > 1:
4639 fatals.sort(key=lambda e: -e.loc)
4640 if fatals[0].loc == fatals[1].loc:
4641 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4642 max_fatal = fatals[0]
4643 raise max_fatal
4645 if tmpReqd:
4646 missing = ", ".join([str(e) for e in tmpReqd])
4647 raise ParseException(
4648 instring,
4649 loc,
4650 f"Missing one or more required elements ({missing})",
4651 )
4653 # add any unmatched Opts, in case they have default values defined
4654 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
4656 total_results = ParseResults([])
4657 for e in matchOrder:
4658 loc, results = e._parse(instring, loc, do_actions)
4659 total_results += results
4661 return loc, total_results
4663 def _generateDefaultName(self) -> str:
4664 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"
4667class ParseElementEnhance(ParserElement):
4668 """Abstract subclass of :class:`ParserElement`, for combining and
4669 post-processing parsed tokens.
4670 """
4672 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
4673 super().__init__(savelist)
4674 if isinstance(expr, str_type):
4675 expr_str = typing.cast(str, expr)
4676 if issubclass(self._literalStringClass, Token):
4677 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]
4678 elif issubclass(type(self), self._literalStringClass):
4679 expr = Literal(expr_str)
4680 else:
4681 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]
4682 expr = typing.cast(ParserElement, expr)
4683 self.expr = expr
4684 if expr is not None:
4685 self.mayIndexError = expr.mayIndexError
4686 self._may_return_empty = expr.mayReturnEmpty
4687 self.set_whitespace_chars(
4688 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
4689 )
4690 self.skipWhitespace = expr.skipWhitespace
4691 self.saveAsList = expr.saveAsList
4692 self.callPreparse = expr.callPreparse
4693 self.ignoreExprs.extend(expr.ignoreExprs)
4695 def recurse(self) -> list[ParserElement]:
4696 return [self.expr] if self.expr is not None else []
4698 def parseImpl(self, instring, loc, do_actions=True):
4699 if self.expr is None:
4700 raise ParseException(instring, loc, "No expression defined", self)
4702 try:
4703 return self.expr._parse(instring, loc, do_actions, callPreParse=False)
4704 except ParseSyntaxException:
4705 raise
4706 except ParseBaseException as pbe:
4707 pbe.pstr = pbe.pstr or instring
4708 pbe.loc = pbe.loc or loc
4709 pbe.parser_element = pbe.parser_element or self
4710 if not isinstance(self, Forward) and self.customName is not None:
4711 if self.errmsg:
4712 pbe.msg = self.errmsg
4713 raise
4715 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4716 super().leave_whitespace(recursive)
4718 if recursive:
4719 if self.expr is not None:
4720 self.expr = self.expr.copy()
4721 self.expr.leave_whitespace(recursive)
4722 return self
4724 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4725 super().ignore_whitespace(recursive)
4727 if recursive:
4728 if self.expr is not None:
4729 self.expr = self.expr.copy()
4730 self.expr.ignore_whitespace(recursive)
4731 return self
4733 def ignore(self, other) -> ParserElement:
4734 if not isinstance(other, Suppress) or other not in self.ignoreExprs:
4735 super().ignore(other)
4736 if self.expr is not None:
4737 self.expr.ignore(self.ignoreExprs[-1])
4739 return self
4741 def streamline(self) -> ParserElement:
4742 super().streamline()
4743 if self.expr is not None:
4744 self.expr.streamline()
4745 return self
4747 def _checkRecursion(self, parseElementList):
4748 if self in parseElementList:
4749 raise RecursiveGrammarException(parseElementList + [self])
4750 subRecCheckList = parseElementList[:] + [self]
4751 if self.expr is not None:
4752 self.expr._checkRecursion(subRecCheckList)
4754 def validate(self, validateTrace=None) -> None:
4755 warnings.warn(
4756 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4757 DeprecationWarning,
4758 stacklevel=2,
4759 )
4760 if validateTrace is None:
4761 validateTrace = []
4762 tmp = validateTrace[:] + [self]
4763 if self.expr is not None:
4764 self.expr.validate(tmp)
4765 self._checkRecursion([])
4767 def _generateDefaultName(self) -> str:
4768 return f"{type(self).__name__}:({self.expr})"
4770 # Compatibility synonyms
4771 # fmt: off
4772 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4773 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4774 # fmt: on
4777class IndentedBlock(ParseElementEnhance):
4778 """
4779 Expression to match one or more expressions at a given indentation level.
4780 Useful for parsing text where structure is implied by indentation (like Python source code).
4781 """
4783 class _Indent(Empty):
4784 def __init__(self, ref_col: int) -> None:
4785 super().__init__()
4786 self.errmsg = f"expected indent at column {ref_col}"
4787 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
4789 class _IndentGreater(Empty):
4790 def __init__(self, ref_col: int) -> None:
4791 super().__init__()
4792 self.errmsg = f"expected indent at column greater than {ref_col}"
4793 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
4795 def __init__(
4796 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
4797 ) -> None:
4798 super().__init__(expr, savelist=True)
4799 # if recursive:
4800 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
4801 self._recursive = recursive
4802 self._grouped = grouped
4803 self.parent_anchor = 1
4805 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4806 # advance parse position to non-whitespace by using an Empty()
4807 # this should be the column to be used for all subsequent indented lines
4808 anchor_loc = Empty().preParse(instring, loc)
4810 # see if self.expr matches at the current location - if not it will raise an exception
4811 # and no further work is necessary
4812 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)
4814 indent_col = col(anchor_loc, instring)
4815 peer_detect_expr = self._Indent(indent_col)
4817 inner_expr = Empty() + peer_detect_expr + self.expr
4818 if self._recursive:
4819 sub_indent = self._IndentGreater(indent_col)
4820 nested_block = IndentedBlock(
4821 self.expr, recursive=self._recursive, grouped=self._grouped
4822 )
4823 nested_block.set_debug(self.debug)
4824 nested_block.parent_anchor = indent_col
4825 inner_expr += Opt(sub_indent + nested_block)
4827 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
4828 block = OneOrMore(inner_expr)
4830 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
4832 if self._grouped:
4833 wrapper = Group
4834 else:
4835 wrapper = lambda expr: expr # type: ignore[misc, assignment]
4836 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
4837 instring, anchor_loc, do_actions
4838 )
4841class AtStringStart(ParseElementEnhance):
4842 """Matches if expression matches at the beginning of the parse
4843 string::
4845 AtStringStart(Word(nums)).parse_string("123")
4846 # prints ["123"]
4848 AtStringStart(Word(nums)).parse_string(" 123")
4849 # raises ParseException
4850 """
4852 def __init__(self, expr: Union[ParserElement, str]) -> None:
4853 super().__init__(expr)
4854 self.callPreparse = False
4856 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4857 if loc != 0:
4858 raise ParseException(instring, loc, "not found at string start")
4859 return super().parseImpl(instring, loc, do_actions)
4862class AtLineStart(ParseElementEnhance):
4863 r"""Matches if an expression matches at the beginning of a line within
4864 the parse string
4866 Example::
4868 test = '''\
4869 AAA this line
4870 AAA and this line
4871 AAA but not this one
4872 B AAA and definitely not this one
4873 '''
4875 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):
4876 print(t)
4878 prints::
4880 ['AAA', ' this line']
4881 ['AAA', ' and this line']
4883 """
4885 def __init__(self, expr: Union[ParserElement, str]) -> None:
4886 super().__init__(expr)
4887 self.callPreparse = False
4889 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4890 if col(loc, instring) != 1:
4891 raise ParseException(instring, loc, "not found at line start")
4892 return super().parseImpl(instring, loc, do_actions)
4895class FollowedBy(ParseElementEnhance):
4896 """Lookahead matching of the given parse expression.
4897 ``FollowedBy`` does *not* advance the parsing position within
4898 the input string, it only verifies that the specified parse
4899 expression matches at the current position. ``FollowedBy``
4900 always returns a null token list. If any results names are defined
4901 in the lookahead expression, those *will* be returned for access by
4902 name.
4904 Example::
4906 # use FollowedBy to match a label only if it is followed by a ':'
4907 data_word = Word(alphas)
4908 label = data_word + FollowedBy(':')
4909 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4911 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()
4913 prints::
4915 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4916 """
4918 def __init__(self, expr: Union[ParserElement, str]) -> None:
4919 super().__init__(expr)
4920 self._may_return_empty = True
4922 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4923 # by using self._expr.parse and deleting the contents of the returned ParseResults list
4924 # we keep any named results that were defined in the FollowedBy expression
4925 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)
4926 del ret[:]
4928 return loc, ret
4931class PrecededBy(ParseElementEnhance):
4932 """Lookbehind matching of the given parse expression.
4933 ``PrecededBy`` does not advance the parsing position within the
4934 input string, it only verifies that the specified parse expression
4935 matches prior to the current position. ``PrecededBy`` always
4936 returns a null token list, but if a results name is defined on the
4937 given expression, it is returned.
4939 Parameters:
4941 - ``expr`` - expression that must match prior to the current parse
4942 location
4943 - ``retreat`` - (default= ``None``) - (int) maximum number of characters
4944 to lookbehind prior to the current parse location
4946 If the lookbehind expression is a string, :class:`Literal`,
4947 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
4948 with a specified exact or maximum length, then the retreat
4949 parameter is not required. Otherwise, retreat must be specified to
4950 give a maximum number of characters to look back from
4951 the current parse position for a lookbehind match.
4953 Example::
4955 # VB-style variable names with type prefixes
4956 int_var = PrecededBy("#") + pyparsing_common.identifier
4957 str_var = PrecededBy("$") + pyparsing_common.identifier
4959 """
4961 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:
4962 super().__init__(expr)
4963 self.expr = self.expr().leave_whitespace()
4964 self._may_return_empty = True
4965 self.mayIndexError = False
4966 self.exact = False
4967 if isinstance(expr, str_type):
4968 expr = typing.cast(str, expr)
4969 retreat = len(expr)
4970 self.exact = True
4971 elif isinstance(expr, (Literal, Keyword)):
4972 retreat = expr.matchLen
4973 self.exact = True
4974 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4975 retreat = expr.maxLen
4976 self.exact = True
4977 elif isinstance(expr, PositionToken):
4978 retreat = 0
4979 self.exact = True
4980 self.retreat = retreat
4981 self.errmsg = f"not preceded by {expr}"
4982 self.skipWhitespace = False
4983 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4985 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:
4986 if self.exact:
4987 if loc < self.retreat:
4988 raise ParseException(instring, loc, self.errmsg, self)
4989 start = loc - self.retreat
4990 _, ret = self.expr._parse(instring, start)
4991 return loc, ret
4993 # retreat specified a maximum lookbehind window, iterate
4994 test_expr = self.expr + StringEnd()
4995 instring_slice = instring[max(0, loc - self.retreat) : loc]
4996 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)
4998 for offset in range(1, min(loc, self.retreat + 1) + 1):
4999 try:
5000 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
5001 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
5002 except ParseBaseException as pbe:
5003 last_expr = pbe
5004 else:
5005 break
5006 else:
5007 raise last_expr
5009 return loc, ret
5012class Located(ParseElementEnhance):
5013 """
5014 Decorates a returned token with its starting and ending
5015 locations in the input string.
5017 This helper adds the following results names:
5019 - ``locn_start`` - location where matched expression begins
5020 - ``locn_end`` - location where matched expression ends
5021 - ``value`` - the actual parsed results
5023 Be careful if the input text contains ``<TAB>`` characters, you
5024 may want to call :class:`ParserElement.parse_with_tabs`
5026 Example::
5028 wd = Word(alphas)
5029 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
5030 print(match)
5032 prints::
5034 [0, ['ljsdf'], 5]
5035 [8, ['lksdjjf'], 15]
5036 [18, ['lkkjj'], 23]
5038 """
5040 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5041 start = loc
5042 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)
5043 ret_tokens = ParseResults([start, tokens, loc])
5044 ret_tokens["locn_start"] = start
5045 ret_tokens["value"] = tokens
5046 ret_tokens["locn_end"] = loc
5047 if self.resultsName:
5048 # must return as a list, so that the name will be attached to the complete group
5049 return loc, [ret_tokens]
5050 else:
5051 return loc, ret_tokens
5054class NotAny(ParseElementEnhance):
5055 """
5056 Lookahead to disallow matching with the given parse expression.
5057 ``NotAny`` does *not* advance the parsing position within the
5058 input string, it only verifies that the specified parse expression
5059 does *not* match at the current position. Also, ``NotAny`` does
5060 *not* skip over leading whitespace. ``NotAny`` always returns
5061 a null token list. May be constructed using the ``'~'`` operator.
5063 Example::
5065 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
5067 # take care not to mistake keywords for identifiers
5068 ident = ~(AND | OR | NOT) + Word(alphas)
5069 boolean_term = Opt(NOT) + ident
5071 # very crude boolean expression - to support parenthesis groups and
5072 # operation hierarchy, use infix_notation
5073 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
5075 # integers that are followed by "." are actually floats
5076 integer = Word(nums) + ~Char(".")
5077 """
5079 def __init__(self, expr: Union[ParserElement, str]) -> None:
5080 super().__init__(expr)
5081 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
5082 # self.leave_whitespace()
5083 self.skipWhitespace = False
5085 self._may_return_empty = True
5086 self.errmsg = f"Found unwanted token, {self.expr}"
5088 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5089 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):
5090 raise ParseException(instring, loc, self.errmsg, self)
5091 return loc, []
5093 def _generateDefaultName(self) -> str:
5094 return f"~{{{self.expr}}}"
5097class _MultipleMatch(ParseElementEnhance):
5098 def __init__(
5099 self,
5100 expr: Union[str, ParserElement],
5101 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5102 *,
5103 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5104 ) -> None:
5105 super().__init__(expr)
5106 stopOn = stopOn or stop_on
5107 self.saveAsList = True
5108 ender = stopOn
5109 if isinstance(ender, str_type):
5110 ender = self._literalStringClass(ender)
5111 self.stopOn(ender)
5113 def stopOn(self, ender) -> ParserElement:
5114 if isinstance(ender, str_type):
5115 ender = self._literalStringClass(ender)
5116 self.not_ender = ~ender if ender is not None else None
5117 return self
5119 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5120 self_expr_parse = self.expr._parse
5121 self_skip_ignorables = self._skipIgnorables
5122 check_ender = False
5123 if self.not_ender is not None:
5124 try_not_ender = self.not_ender.try_parse
5125 check_ender = True
5127 # must be at least one (but first see if we are the stopOn sentinel;
5128 # if so, fail)
5129 if check_ender:
5130 try_not_ender(instring, loc)
5131 loc, tokens = self_expr_parse(instring, loc, do_actions)
5132 try:
5133 hasIgnoreExprs = not not self.ignoreExprs
5134 while 1:
5135 if check_ender:
5136 try_not_ender(instring, loc)
5137 if hasIgnoreExprs:
5138 preloc = self_skip_ignorables(instring, loc)
5139 else:
5140 preloc = loc
5141 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)
5142 tokens += tmptokens
5143 except (ParseException, IndexError):
5144 pass
5146 return loc, tokens
5148 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5149 if (
5150 __diag__.warn_ungrouped_named_tokens_in_collection
5151 and Diagnostics.warn_ungrouped_named_tokens_in_collection
5152 not in self.suppress_warnings_
5153 ):
5154 for e in [self.expr] + self.expr.recurse():
5155 if (
5156 isinstance(e, ParserElement)
5157 and e.resultsName
5158 and (
5159 Diagnostics.warn_ungrouped_named_tokens_in_collection
5160 not in e.suppress_warnings_
5161 )
5162 ):
5163 warning = (
5164 "warn_ungrouped_named_tokens_in_collection:"
5165 f" setting results name {name!r} on {type(self).__name__} expression"
5166 f" collides with {e.resultsName!r} on contained expression"
5167 )
5168 warnings.warn(warning, stacklevel=3)
5169 break
5171 return super()._setResultsName(name, list_all_matches)
5174class OneOrMore(_MultipleMatch):
5175 """
5176 Repetition of one or more of the given expression.
5178 Parameters:
5180 - ``expr`` - expression that must match one or more times
5181 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel
5182 (only required if the sentinel would ordinarily match the repetition
5183 expression)
5185 Example::
5187 data_word = Word(alphas)
5188 label = data_word + FollowedBy(':')
5189 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))
5191 text = "shape: SQUARE posn: upper left color: BLACK"
5192 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
5194 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data
5195 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5196 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5198 # could also be written as
5199 (attr_expr * (1,)).parse_string(text).pprint()
5200 """
5202 def _generateDefaultName(self) -> str:
5203 return f"{{{self.expr}}}..."
5206class ZeroOrMore(_MultipleMatch):
5207 """
5208 Optional repetition of zero or more of the given expression.
5210 Parameters:
5212 - ``expr`` - expression that must match zero or more times
5213 - ``stop_on`` - expression for a terminating sentinel
5214 (only required if the sentinel would ordinarily match the repetition
5215 expression) - (default= ``None``)
5217 Example: similar to :class:`OneOrMore`
5218 """
5220 def __init__(
5221 self,
5222 expr: Union[str, ParserElement],
5223 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5224 *,
5225 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5226 ) -> None:
5227 super().__init__(expr, stopOn=stopOn or stop_on)
5228 self._may_return_empty = True
5230 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5231 try:
5232 return super().parseImpl(instring, loc, do_actions)
5233 except (ParseException, IndexError):
5234 return loc, ParseResults([], name=self.resultsName)
5236 def _generateDefaultName(self) -> str:
5237 return f"[{self.expr}]..."
5240class DelimitedList(ParseElementEnhance):
5241 def __init__(
5242 self,
5243 expr: Union[str, ParserElement],
5244 delim: Union[str, ParserElement] = ",",
5245 combine: bool = False,
5246 min: typing.Optional[int] = None,
5247 max: typing.Optional[int] = None,
5248 *,
5249 allow_trailing_delim: bool = False,
5250 ) -> None:
5251 """Helper to define a delimited list of expressions - the delimiter
5252 defaults to ','. By default, the list elements and delimiters can
5253 have intervening whitespace, and comments, but this can be
5254 overridden by passing ``combine=True`` in the constructor. If
5255 ``combine`` is set to ``True``, the matching tokens are
5256 returned as a single token string, with the delimiters included;
5257 otherwise, the matching tokens are returned as a list of tokens,
5258 with the delimiters suppressed.
5260 If ``allow_trailing_delim`` is set to True, then the list may end with
5261 a delimiter.
5263 Example::
5265 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5266 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5267 """
5268 if isinstance(expr, str_type):
5269 expr = ParserElement._literalStringClass(expr)
5270 expr = typing.cast(ParserElement, expr)
5272 if min is not None and min < 1:
5273 raise ValueError("min must be greater than 0")
5275 if max is not None and min is not None and max < min:
5276 raise ValueError("max must be greater than, or equal to min")
5278 self.content = expr
5279 self.raw_delim = str(delim)
5280 self.delim = delim
5281 self.combine = combine
5282 if not combine:
5283 self.delim = Suppress(delim)
5284 self.min = min or 1
5285 self.max = max
5286 self.allow_trailing_delim = allow_trailing_delim
5288 delim_list_expr = self.content + (self.delim + self.content) * (
5289 self.min - 1,
5290 None if self.max is None else self.max - 1,
5291 )
5292 if self.allow_trailing_delim:
5293 delim_list_expr += Opt(self.delim)
5295 if self.combine:
5296 delim_list_expr = Combine(delim_list_expr)
5298 super().__init__(delim_list_expr, savelist=True)
5300 def _generateDefaultName(self) -> str:
5301 content_expr = self.content.streamline()
5302 return f"{content_expr} [{self.raw_delim} {content_expr}]..."
5305class _NullToken:
5306 def __bool__(self):
5307 return False
5309 def __str__(self):
5310 return ""
5313class Opt(ParseElementEnhance):
5314 """
5315 Optional matching of the given expression.
5317 Parameters:
5319 - ``expr`` - expression that must match zero or more times
5320 - ``default`` (optional) - value to be returned if the optional expression is not found.
5322 Example::
5324 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
5325 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
5326 zip.run_tests('''
5327 # traditional ZIP code
5328 12345
5330 # ZIP+4 form
5331 12101-0001
5333 # invalid ZIP
5334 98765-
5335 ''')
5337 prints::
5339 # traditional ZIP code
5340 12345
5341 ['12345']
5343 # ZIP+4 form
5344 12101-0001
5345 ['12101-0001']
5347 # invalid ZIP
5348 98765-
5349 ^
5350 FAIL: Expected end of text (at char 5), (line:1, col:6)
5351 """
5353 __optionalNotMatched = _NullToken()
5355 def __init__(
5356 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
5357 ) -> None:
5358 super().__init__(expr, savelist=False)
5359 self.saveAsList = self.expr.saveAsList
5360 self.defaultValue = default
5361 self._may_return_empty = True
5363 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5364 self_expr = self.expr
5365 try:
5366 loc, tokens = self_expr._parse(
5367 instring, loc, do_actions, callPreParse=False
5368 )
5369 except (ParseException, IndexError):
5370 default_value = self.defaultValue
5371 if default_value is not self.__optionalNotMatched:
5372 if self_expr.resultsName:
5373 tokens = ParseResults([default_value])
5374 tokens[self_expr.resultsName] = default_value
5375 else:
5376 tokens = [default_value] # type: ignore[assignment]
5377 else:
5378 tokens = [] # type: ignore[assignment]
5379 return loc, tokens
5381 def _generateDefaultName(self) -> str:
5382 inner = str(self.expr)
5383 # strip off redundant inner {}'s
5384 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
5385 inner = inner[1:-1]
5386 return f"[{inner}]"
5389Optional = Opt
5392class SkipTo(ParseElementEnhance):
5393 """
5394 Token for skipping over all undefined text until the matched
5395 expression is found.
5397 Parameters:
5399 - ``expr`` - target expression marking the end of the data to be skipped
5400 - ``include`` - if ``True``, the target expression is also parsed
5401 (the skipped text and target expression are returned as a 2-element
5402 list) (default= ``False``).
5403 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and
5404 comments) that might contain false matches to the target expression
5405 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be
5406 included in the skipped test; if found before the target expression is found,
5407 the :class:`SkipTo` is not a match
5409 Example::
5411 report = '''
5412 Outstanding Issues Report - 1 Jan 2000
5414 # | Severity | Description | Days Open
5415 -----+----------+-------------------------------------------+-----------
5416 101 | Critical | Intermittent system crash | 6
5417 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5418 79 | Minor | System slow when running too many reports | 47
5419 '''
5420 integer = Word(nums)
5421 SEP = Suppress('|')
5422 # use SkipTo to simply match everything up until the next SEP
5423 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
5424 # - parse action will call token.strip() for each matched token, i.e., the description body
5425 string_data = SkipTo(SEP, ignore=quoted_string)
5426 string_data.set_parse_action(token_map(str.strip))
5427 ticket_expr = (integer("issue_num") + SEP
5428 + string_data("sev") + SEP
5429 + string_data("desc") + SEP
5430 + integer("days_open"))
5432 for tkt in ticket_expr.search_string(report):
5433 print tkt.dump()
5435 prints::
5437 ['101', 'Critical', 'Intermittent system crash', '6']
5438 - days_open: '6'
5439 - desc: 'Intermittent system crash'
5440 - issue_num: '101'
5441 - sev: 'Critical'
5442 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
5443 - days_open: '14'
5444 - desc: "Spelling error on Login ('log|n')"
5445 - issue_num: '94'
5446 - sev: 'Cosmetic'
5447 ['79', 'Minor', 'System slow when running too many reports', '47']
5448 - days_open: '47'
5449 - desc: 'System slow when running too many reports'
5450 - issue_num: '79'
5451 - sev: 'Minor'
5452 """
5454 def __init__(
5455 self,
5456 other: Union[ParserElement, str],
5457 include: bool = False,
5458 ignore: typing.Optional[Union[ParserElement, str]] = None,
5459 fail_on: typing.Optional[Union[ParserElement, str]] = None,
5460 *,
5461 failOn: typing.Optional[Union[ParserElement, str]] = None,
5462 ) -> None:
5463 super().__init__(other)
5464 failOn = failOn or fail_on
5465 self.ignoreExpr = ignore
5466 self._may_return_empty = True
5467 self.mayIndexError = False
5468 self.includeMatch = include
5469 self.saveAsList = False
5470 if isinstance(failOn, str_type):
5471 self.failOn = self._literalStringClass(failOn)
5472 else:
5473 self.failOn = failOn
5474 self.errmsg = f"No match found for {self.expr}"
5475 self.ignorer = Empty().leave_whitespace()
5476 self._update_ignorer()
5478 def _update_ignorer(self):
5479 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr
5480 self.ignorer.ignoreExprs.clear()
5481 for e in self.expr.ignoreExprs:
5482 self.ignorer.ignore(e)
5483 if self.ignoreExpr:
5484 self.ignorer.ignore(self.ignoreExpr)
5486 def ignore(self, expr):
5487 super().ignore(expr)
5488 self._update_ignorer()
5490 def parseImpl(self, instring, loc, do_actions=True):
5491 startloc = loc
5492 instrlen = len(instring)
5493 self_expr_parse = self.expr._parse
5494 self_failOn_canParseNext = (
5495 self.failOn.canParseNext if self.failOn is not None else None
5496 )
5497 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None
5499 tmploc = loc
5500 while tmploc <= instrlen:
5501 if self_failOn_canParseNext is not None:
5502 # break if failOn expression matches
5503 if self_failOn_canParseNext(instring, tmploc):
5504 break
5506 if ignorer_try_parse is not None:
5507 # advance past ignore expressions
5508 prev_tmploc = tmploc
5509 while 1:
5510 try:
5511 tmploc = ignorer_try_parse(instring, tmploc)
5512 except ParseBaseException:
5513 break
5514 # see if all ignorers matched, but didn't actually ignore anything
5515 if tmploc == prev_tmploc:
5516 break
5517 prev_tmploc = tmploc
5519 try:
5520 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)
5521 except (ParseException, IndexError):
5522 # no match, advance loc in string
5523 tmploc += 1
5524 else:
5525 # matched skipto expr, done
5526 break
5528 else:
5529 # ran off the end of the input string without matching skipto expr, fail
5530 raise ParseException(instring, loc, self.errmsg, self)
5532 # build up return values
5533 loc = tmploc
5534 skiptext = instring[startloc:loc]
5535 skipresult = ParseResults(skiptext)
5537 if self.includeMatch:
5538 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)
5539 skipresult += mat
5541 return loc, skipresult
5544class Forward(ParseElementEnhance):
5545 """
5546 Forward declaration of an expression to be defined later -
5547 used for recursive grammars, such as algebraic infix notation.
5548 When the expression is known, it is assigned to the ``Forward``
5549 variable using the ``'<<'`` operator.
5551 Note: take care when assigning to ``Forward`` not to overlook
5552 precedence of operators.
5554 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
5556 fwd_expr << a | b | c
5558 will actually be evaluated as::
5560 (fwd_expr << a) | b | c
5562 thereby leaving b and c out as parseable alternatives. It is recommended that you
5563 explicitly group the values inserted into the ``Forward``::
5565 fwd_expr << (a | b | c)
5567 Converting to use the ``'<<='`` operator instead will avoid this problem.
5569 See :class:`ParseResults.pprint` for an example of a recursive
5570 parser created using ``Forward``.
5571 """
5573 def __init__(
5574 self, other: typing.Optional[Union[ParserElement, str]] = None
5575 ) -> None:
5576 self.caller_frame = traceback.extract_stack(limit=2)[0]
5577 super().__init__(other, savelist=False) # type: ignore[arg-type]
5578 self.lshift_line = None
5580 def __lshift__(self, other) -> Forward:
5581 if hasattr(self, "caller_frame"):
5582 del self.caller_frame
5583 if isinstance(other, str_type):
5584 other = self._literalStringClass(other)
5586 if not isinstance(other, ParserElement):
5587 return NotImplemented
5589 self.expr = other
5590 self.streamlined = other.streamlined
5591 self.mayIndexError = self.expr.mayIndexError
5592 self._may_return_empty = self.expr.mayReturnEmpty
5593 self.set_whitespace_chars(
5594 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
5595 )
5596 self.skipWhitespace = self.expr.skipWhitespace
5597 self.saveAsList = self.expr.saveAsList
5598 self.ignoreExprs.extend(self.expr.ignoreExprs)
5599 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]
5600 return self
5602 def __ilshift__(self, other) -> Forward:
5603 if not isinstance(other, ParserElement):
5604 return NotImplemented
5606 return self << other
5608 def __or__(self, other) -> ParserElement:
5609 caller_line = traceback.extract_stack(limit=2)[-2]
5610 if (
5611 __diag__.warn_on_match_first_with_lshift_operator
5612 and caller_line == self.lshift_line
5613 and Diagnostics.warn_on_match_first_with_lshift_operator
5614 not in self.suppress_warnings_
5615 ):
5616 warnings.warn(
5617 "warn_on_match_first_with_lshift_operator:"
5618 " using '<<' operator with '|' is probably an error, use '<<='",
5619 stacklevel=2,
5620 )
5621 ret = super().__or__(other)
5622 return ret
5624 def __del__(self):
5625 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
5626 if (
5627 self.expr is None
5628 and __diag__.warn_on_assignment_to_Forward
5629 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
5630 ):
5631 warnings.warn_explicit(
5632 "warn_on_assignment_to_Forward:"
5633 " Forward defined here but no expression attached later using '<<=' or '<<'",
5634 UserWarning,
5635 filename=self.caller_frame.filename,
5636 lineno=self.caller_frame.lineno,
5637 )
5639 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5640 if (
5641 self.expr is None
5642 and __diag__.warn_on_parse_using_empty_Forward
5643 and Diagnostics.warn_on_parse_using_empty_Forward
5644 not in self.suppress_warnings_
5645 ):
5646 # walk stack until parse_string, scan_string, search_string, or transform_string is found
5647 parse_fns = (
5648 "parse_string",
5649 "scan_string",
5650 "search_string",
5651 "transform_string",
5652 )
5653 tb = traceback.extract_stack(limit=200)
5654 for i, frm in enumerate(reversed(tb), start=1):
5655 if frm.name in parse_fns:
5656 stacklevel = i + 1
5657 break
5658 else:
5659 stacklevel = 2
5660 warnings.warn(
5661 "warn_on_parse_using_empty_Forward:"
5662 " Forward expression was never assigned a value, will not parse any input",
5663 stacklevel=stacklevel,
5664 )
5665 if not ParserElement._left_recursion_enabled:
5666 return super().parseImpl(instring, loc, do_actions)
5667 # ## Bounded Recursion algorithm ##
5668 # Recursion only needs to be processed at ``Forward`` elements, since they are
5669 # the only ones that can actually refer to themselves. The general idea is
5670 # to handle recursion stepwise: We start at no recursion, then recurse once,
5671 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
5672 #
5673 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
5674 # - to *match* a specific recursion level, and
5675 # - to *search* the bounded recursion level
5676 # and the two run concurrently. The *search* must *match* each recursion level
5677 # to find the best possible match. This is handled by a memo table, which
5678 # provides the previous match to the next level match attempt.
5679 #
5680 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
5681 #
5682 # There is a complication since we not only *parse* but also *transform* via
5683 # actions: We do not want to run the actions too often while expanding. Thus,
5684 # we expand using `do_actions=False` and only run `do_actions=True` if the next
5685 # recursion level is acceptable.
5686 with ParserElement.recursion_lock:
5687 memo = ParserElement.recursion_memos
5688 try:
5689 # we are parsing at a specific recursion expansion - use it as-is
5690 prev_loc, prev_result = memo[loc, self, do_actions]
5691 if isinstance(prev_result, Exception):
5692 raise prev_result
5693 return prev_loc, prev_result.copy()
5694 except KeyError:
5695 act_key = (loc, self, True)
5696 peek_key = (loc, self, False)
5697 # we are searching for the best recursion expansion - keep on improving
5698 # both `do_actions` cases must be tracked separately here!
5699 prev_loc, prev_peek = memo[peek_key] = (
5700 loc - 1,
5701 ParseException(
5702 instring, loc, "Forward recursion without base case", self
5703 ),
5704 )
5705 if do_actions:
5706 memo[act_key] = memo[peek_key]
5707 while True:
5708 try:
5709 new_loc, new_peek = super().parseImpl(instring, loc, False)
5710 except ParseException:
5711 # we failed before getting any match - do not hide the error
5712 if isinstance(prev_peek, Exception):
5713 raise
5714 new_loc, new_peek = prev_loc, prev_peek
5715 # the match did not get better: we are done
5716 if new_loc <= prev_loc:
5717 if do_actions:
5718 # replace the match for do_actions=False as well,
5719 # in case the action did backtrack
5720 prev_loc, prev_result = memo[peek_key] = memo[act_key]
5721 del memo[peek_key], memo[act_key]
5722 return prev_loc, copy.copy(prev_result)
5723 del memo[peek_key]
5724 return prev_loc, copy.copy(prev_peek)
5725 # the match did get better: see if we can improve further
5726 if do_actions:
5727 try:
5728 memo[act_key] = super().parseImpl(instring, loc, True)
5729 except ParseException as e:
5730 memo[peek_key] = memo[act_key] = (new_loc, e)
5731 raise
5732 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
5734 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5735 self.skipWhitespace = False
5736 return self
5738 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5739 self.skipWhitespace = True
5740 return self
5742 def streamline(self) -> ParserElement:
5743 if not self.streamlined:
5744 self.streamlined = True
5745 if self.expr is not None:
5746 self.expr.streamline()
5747 return self
5749 def validate(self, validateTrace=None) -> None:
5750 warnings.warn(
5751 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
5752 DeprecationWarning,
5753 stacklevel=2,
5754 )
5755 if validateTrace is None:
5756 validateTrace = []
5758 if self not in validateTrace:
5759 tmp = validateTrace[:] + [self]
5760 if self.expr is not None:
5761 self.expr.validate(tmp)
5762 self._checkRecursion([])
5764 def _generateDefaultName(self) -> str:
5765 # Avoid infinite recursion by setting a temporary _defaultName
5766 save_default_name = self._defaultName
5767 self._defaultName = ": ..."
5769 # Use the string representation of main expression.
5770 try:
5771 if self.expr is not None:
5772 ret_string = str(self.expr)[:1000]
5773 else:
5774 ret_string = "None"
5775 except Exception:
5776 ret_string = "..."
5778 self._defaultName = save_default_name
5779 return f"{type(self).__name__}: {ret_string}"
5781 def copy(self) -> ParserElement:
5782 if self.expr is not None:
5783 return super().copy()
5784 else:
5785 ret = Forward()
5786 ret <<= self
5787 return ret
5789 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5790 # fmt: off
5791 if (
5792 __diag__.warn_name_set_on_empty_Forward
5793 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_
5794 and self.expr is None
5795 ):
5796 warning = (
5797 "warn_name_set_on_empty_Forward:"
5798 f" setting results name {name!r} on {type(self).__name__} expression"
5799 " that has no contained expression"
5800 )
5801 warnings.warn(warning, stacklevel=3)
5802 # fmt: on
5804 return super()._setResultsName(name, list_all_matches)
5806 # Compatibility synonyms
5807 # fmt: off
5808 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
5809 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
5810 # fmt: on
5813class TokenConverter(ParseElementEnhance):
5814 """
5815 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.
5816 """
5818 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:
5819 super().__init__(expr) # , savelist)
5820 self.saveAsList = False
5823class Combine(TokenConverter):
5824 """Converter to concatenate all matching tokens to a single string.
5825 By default, the matching patterns must also be contiguous in the
5826 input string; this can be disabled by specifying
5827 ``'adjacent=False'`` in the constructor.
5829 Example::
5831 real = Word(nums) + '.' + Word(nums)
5832 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']
5833 # will also erroneously match the following
5834 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']
5836 real = Combine(Word(nums) + '.' + Word(nums))
5837 print(real.parse_string('3.1416')) # -> ['3.1416']
5838 # no match when there are internal spaces
5839 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)
5840 """
5842 def __init__(
5843 self,
5844 expr: ParserElement,
5845 join_string: str = "",
5846 adjacent: bool = True,
5847 *,
5848 joinString: typing.Optional[str] = None,
5849 ) -> None:
5850 super().__init__(expr)
5851 joinString = joinString if joinString is not None else join_string
5852 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5853 if adjacent:
5854 self.leave_whitespace()
5855 self.adjacent = adjacent
5856 self.skipWhitespace = True
5857 self.joinString = joinString
5858 self.callPreparse = True
5860 def ignore(self, other) -> ParserElement:
5861 if self.adjacent:
5862 ParserElement.ignore(self, other)
5863 else:
5864 super().ignore(other)
5865 return self
5867 def postParse(self, instring, loc, tokenlist):
5868 retToks = tokenlist.copy()
5869 del retToks[:]
5870 retToks += ParseResults(
5871 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
5872 )
5874 if self.resultsName and retToks.haskeys():
5875 return [retToks]
5876 else:
5877 return retToks
5880class Group(TokenConverter):
5881 """Converter to return the matched tokens as a list - useful for
5882 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5884 The optional ``aslist`` argument when set to True will return the
5885 parsed tokens as a Python list instead of a pyparsing ParseResults.
5887 Example::
5889 ident = Word(alphas)
5890 num = Word(nums)
5891 term = ident | num
5892 func = ident + Opt(DelimitedList(term))
5893 print(func.parse_string("fn a, b, 100"))
5894 # -> ['fn', 'a', 'b', '100']
5896 func = ident + Group(Opt(DelimitedList(term)))
5897 print(func.parse_string("fn a, b, 100"))
5898 # -> ['fn', ['a', 'b', '100']]
5899 """
5901 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:
5902 super().__init__(expr)
5903 self.saveAsList = True
5904 self._asPythonList = aslist
5906 def postParse(self, instring, loc, tokenlist):
5907 if self._asPythonList:
5908 return ParseResults.List(
5909 tokenlist.asList()
5910 if isinstance(tokenlist, ParseResults)
5911 else list(tokenlist)
5912 )
5914 return [tokenlist]
5917class Dict(TokenConverter):
5918 """Converter to return a repetitive expression as a list, but also
5919 as a dictionary. Each element can also be referenced using the first
5920 token in the expression as its key. Useful for tabular report
5921 scraping when the first column can be used as a item key.
5923 The optional ``asdict`` argument when set to True will return the
5924 parsed tokens as a Python dict instead of a pyparsing ParseResults.
5926 Example::
5928 data_word = Word(alphas)
5929 label = data_word + FollowedBy(':')
5931 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5932 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5934 # print attributes as plain groups
5935 print(attr_expr[1, ...].parse_string(text).dump())
5937 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names
5938 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
5939 print(result.dump())
5941 # access named fields as dict entries, or output as dict
5942 print(result['shape'])
5943 print(result.as_dict())
5945 prints::
5947 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5948 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5949 - color: 'light blue'
5950 - posn: 'upper left'
5951 - shape: 'SQUARE'
5952 - texture: 'burlap'
5953 SQUARE
5954 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5956 See more examples at :class:`ParseResults` of accessing fields by results name.
5957 """
5959 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:
5960 super().__init__(expr)
5961 self.saveAsList = True
5962 self._asPythonDict = asdict
5964 def postParse(self, instring, loc, tokenlist):
5965 for i, tok in enumerate(tokenlist):
5966 if len(tok) == 0:
5967 continue
5969 ikey = tok[0]
5970 if isinstance(ikey, int):
5971 ikey = str(ikey).strip()
5973 if len(tok) == 1:
5974 tokenlist[ikey] = _ParseResultsWithOffset("", i)
5976 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5977 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5979 else:
5980 try:
5981 dictvalue = tok.copy() # ParseResults(i)
5982 except Exception:
5983 exc = TypeError(
5984 "could not extract dict values from parsed results"
5985 " - Dict expression must contain Grouped expressions"
5986 )
5987 raise exc from None
5989 del dictvalue[0]
5991 if len(dictvalue) != 1 or (
5992 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
5993 ):
5994 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5995 else:
5996 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5998 if self._asPythonDict:
5999 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
6001 return [tokenlist] if self.resultsName else tokenlist
6004class Suppress(TokenConverter):
6005 """Converter for ignoring the results of a parsed expression.
6007 Example::
6009 source = "a, b, c,d"
6010 wd = Word(alphas)
6011 wd_list1 = wd + (',' + wd)[...]
6012 print(wd_list1.parse_string(source))
6014 # often, delimiters that are useful during parsing are just in the
6015 # way afterward - use Suppress to keep them out of the parsed output
6016 wd_list2 = wd + (Suppress(',') + wd)[...]
6017 print(wd_list2.parse_string(source))
6019 # Skipped text (using '...') can be suppressed as well
6020 source = "lead in START relevant text END trailing text"
6021 start_marker = Keyword("START")
6022 end_marker = Keyword("END")
6023 find_body = Suppress(...) + start_marker + ... + end_marker
6024 print(find_body.parse_string(source)
6026 prints::
6028 ['a', ',', 'b', ',', 'c', ',', 'd']
6029 ['a', 'b', 'c', 'd']
6030 ['START', 'relevant text ', 'END']
6032 (See also :class:`DelimitedList`.)
6033 """
6035 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
6036 if expr is ...:
6037 expr = _PendingSkip(NoMatch())
6038 super().__init__(expr)
6040 def __add__(self, other) -> ParserElement:
6041 if isinstance(self.expr, _PendingSkip):
6042 return Suppress(SkipTo(other)) + other
6044 return super().__add__(other)
6046 def __sub__(self, other) -> ParserElement:
6047 if isinstance(self.expr, _PendingSkip):
6048 return Suppress(SkipTo(other)) - other
6050 return super().__sub__(other)
6052 def postParse(self, instring, loc, tokenlist):
6053 return []
6055 def suppress(self) -> ParserElement:
6056 return self
6059def trace_parse_action(f: ParseAction) -> ParseAction:
6060 """Decorator for debugging parse actions.
6062 When the parse action is called, this decorator will print
6063 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
6064 When the parse action completes, the decorator will print
6065 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
6067 Example::
6069 wd = Word(alphas)
6071 @trace_parse_action
6072 def remove_duplicate_chars(tokens):
6073 return ''.join(sorted(set(''.join(tokens))))
6075 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
6076 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
6078 prints::
6080 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
6081 <<leaving remove_duplicate_chars (ret: 'dfjkls')
6082 ['dfjkls']
6083 """
6084 f = _trim_arity(f)
6086 def z(*paArgs):
6087 thisFunc = f.__name__
6088 s, l, t = paArgs[-3:]
6089 if len(paArgs) > 3:
6090 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"
6091 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")
6092 try:
6093 ret = f(*paArgs)
6094 except Exception as exc:
6095 sys.stderr.write(
6096 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"
6097 )
6098 raise
6099 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")
6100 return ret
6102 z.__name__ = f.__name__
6103 return z
6106# convenience constants for positional expressions
6107empty = Empty().set_name("empty")
6108line_start = LineStart().set_name("line_start")
6109line_end = LineEnd().set_name("line_end")
6110string_start = StringStart().set_name("string_start")
6111string_end = StringEnd().set_name("string_end")
6113_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(
6114 lambda s, l, t: t[0][1]
6115)
6116_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
6117 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
6118)
6119_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
6120 lambda s, l, t: chr(int(t[0][1:], 8))
6121)
6122_singleChar = (
6123 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
6124)
6125_charRange = Group(_singleChar + Suppress("-") + _singleChar)
6126_reBracketExpr = (
6127 Literal("[")
6128 + Opt("^").set_results_name("negate")
6129 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
6130 + Literal("]")
6131)
6134def srange(s: str) -> str:
6135 r"""Helper to easily define string ranges for use in :class:`Word`
6136 construction. Borrows syntax from regexp ``'[]'`` string range
6137 definitions::
6139 srange("[0-9]") -> "0123456789"
6140 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
6141 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
6143 The input string must be enclosed in []'s, and the returned string
6144 is the expanded character set joined into a single string. The
6145 values enclosed in the []'s may be:
6147 - a single character
6148 - an escaped character with a leading backslash (such as ``\-``
6149 or ``\]``)
6150 - an escaped hex character with a leading ``'\x'``
6151 (``\x21``, which is a ``'!'`` character) (``\0x##``
6152 is also supported for backwards compatibility)
6153 - an escaped octal character with a leading ``'\0'``
6154 (``\041``, which is a ``'!'`` character)
6155 - a range of any of the above, separated by a dash (``'a-z'``,
6156 etc.)
6157 - any combination of the above (``'aeiouy'``,
6158 ``'a-zA-Z0-9_$'``, etc.)
6159 """
6161 def _expanded(p):
6162 if isinstance(p, ParseResults):
6163 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
6164 else:
6165 yield p
6167 try:
6168 return "".join(
6169 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]
6170 )
6171 except Exception as e:
6172 return ""
6175def token_map(func, *args) -> ParseAction:
6176 """Helper to define a parse action by mapping a function to all
6177 elements of a :class:`ParseResults` list. If any additional args are passed,
6178 they are forwarded to the given function as additional arguments
6179 after the token, as in
6180 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
6181 which will convert the parsed data to an integer using base 16.
6183 Example (compare the last to example in :class:`ParserElement.transform_string`::
6185 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
6186 hex_ints.run_tests('''
6187 00 11 22 aa FF 0a 0d 1a
6188 ''')
6190 upperword = Word(alphas).set_parse_action(token_map(str.upper))
6191 upperword[1, ...].run_tests('''
6192 my kingdom for a horse
6193 ''')
6195 wd = Word(alphas).set_parse_action(token_map(str.title))
6196 wd[1, ...].set_parse_action(' '.join).run_tests('''
6197 now is the winter of our discontent made glorious summer by this sun of york
6198 ''')
6200 prints::
6202 00 11 22 aa FF 0a 0d 1a
6203 [0, 17, 34, 170, 255, 10, 13, 26]
6205 my kingdom for a horse
6206 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
6208 now is the winter of our discontent made glorious summer by this sun of york
6209 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
6210 """
6212 def pa(s, l, t):
6213 return [func(tokn, *args) for tokn in t]
6215 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
6216 pa.__name__ = func_name
6218 return pa
6221def autoname_elements() -> None:
6222 """
6223 Utility to simplify mass-naming of parser elements, for
6224 generating railroad diagram with named subdiagrams.
6225 """
6227 # guard against _getframe not being implemented in the current Python
6228 getframe_fn = getattr(sys, "_getframe", lambda _: None)
6229 calling_frame = getframe_fn(1)
6230 if calling_frame is None:
6231 return
6233 # find all locals in the calling frame that are ParserElements
6234 calling_frame = typing.cast(types.FrameType, calling_frame)
6235 for name, var in calling_frame.f_locals.items():
6236 # if no custom name defined, set the name to the var name
6237 if isinstance(var, ParserElement) and not var.customName:
6238 var.set_name(name)
6241dbl_quoted_string = Combine(
6242 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6243).set_name("string enclosed in double quotes")
6245sgl_quoted_string = Combine(
6246 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
6247).set_name("string enclosed in single quotes")
6249quoted_string = Combine(
6250 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6251 "double quoted string"
6252 )
6253 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6254 "single quoted string"
6255 )
6256).set_name("quoted string using single or double quotes")
6258python_quoted_string = Combine(
6259 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(
6260 "multiline double quoted string"
6261 )
6262 ^ (
6263 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"
6264 ).set_name("multiline single quoted string")
6265 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6266 "double quoted string"
6267 )
6268 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6269 "single quoted string"
6270 )
6271).set_name("Python quoted string")
6273unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
6276alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6277punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6279# build list of built-in expressions, for future reference if a global default value
6280# gets updated
6281_builtin_exprs: list[ParserElement] = [
6282 v for v in vars().values() if isinstance(v, ParserElement)
6283]
6285# Compatibility synonyms
6286# fmt: off
6287sglQuotedString = sgl_quoted_string
6288dblQuotedString = dbl_quoted_string
6289quotedString = quoted_string
6290unicodeString = unicode_string
6291lineStart = line_start
6292lineEnd = line_end
6293stringStart = string_start
6294stringEnd = string_end
6295nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)
6296traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)
6297conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)
6298tokenMap = replaced_by_pep8("tokenMap", token_map)
6299# fmt: on