Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 45%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# core.py
3#
4from __future__ import annotations
6import collections.abc
7from collections import deque
8import os
9import typing
10from typing import (
11 Any,
12 Callable,
13 Generator,
14 NamedTuple,
15 Sequence,
16 TextIO,
17 Union,
18 cast,
19)
20from abc import ABC, abstractmethod
21from enum import Enum
22import string
23import copy
24import warnings
25import re
26import sys
27from collections.abc import Iterable
28import traceback
29import types
30from operator import itemgetter
31from functools import wraps
32from threading import RLock
33from pathlib import Path
35from .util import (
36 _FifoCache,
37 _UnboundedCache,
38 __config_flags,
39 _collapse_string_to_ranges,
40 _escape_regex_range_chars,
41 _flatten,
42 LRUMemo as _LRUMemo,
43 UnboundedMemo as _UnboundedMemo,
44 replaced_by_pep8,
45)
46from .exceptions import *
47from .actions import *
48from .results import ParseResults, _ParseResultsWithOffset
49from .unicode import pyparsing_unicode
51_MAX_INT = sys.maxsize
52str_type: tuple[type, ...] = (str, bytes)
54#
55# Copyright (c) 2003-2022 Paul T. McGuire
56#
57# Permission is hereby granted, free of charge, to any person obtaining
58# a copy of this software and associated documentation files (the
59# "Software"), to deal in the Software without restriction, including
60# without limitation the rights to use, copy, modify, merge, publish,
61# distribute, sublicense, and/or sell copies of the Software, and to
62# permit persons to whom the Software is furnished to do so, subject to
63# the following conditions:
64#
65# The above copyright notice and this permission notice shall be
66# included in all copies or substantial portions of the Software.
67#
68# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
69# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
70# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
71# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
72# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
73# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
74# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
75#
77from functools import cached_property
80class __compat__(__config_flags):
81 """
82 A cross-version compatibility configuration for pyparsing features that will be
83 released in a future version. By setting values in this configuration to True,
84 those features can be enabled in prior versions for compatibility development
85 and testing.
87 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
88 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
89 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
90 behavior
91 """
93 _type_desc = "compatibility"
95 collect_all_And_tokens = True
97 _all_names = [__ for __ in locals() if not __.startswith("_")]
98 _fixed_names = """
99 collect_all_And_tokens
100 """.split()
103class __diag__(__config_flags):
104 _type_desc = "diagnostic"
106 warn_multiple_tokens_in_named_alternation = False
107 warn_ungrouped_named_tokens_in_collection = False
108 warn_name_set_on_empty_Forward = False
109 warn_on_parse_using_empty_Forward = False
110 warn_on_assignment_to_Forward = False
111 warn_on_multiple_string_args_to_oneof = False
112 warn_on_match_first_with_lshift_operator = False
113 enable_debug_on_named_expressions = False
115 _all_names = [__ for __ in locals() if not __.startswith("_")]
116 _warning_names = [name for name in _all_names if name.startswith("warn")]
117 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
119 @classmethod
120 def enable_all_warnings(cls) -> None:
121 for name in cls._warning_names:
122 cls.enable(name)
125class Diagnostics(Enum):
126 """
127 Diagnostic configuration (all default to disabled)
129 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
130 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
131 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
132 name is defined on a containing expression with ungrouped subexpressions that also
133 have results names
134 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
135 with a results name, but has no contents defined
136 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
137 defined in a grammar but has never had an expression attached to it
138 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
139 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
140 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
141 incorrectly called with multiple str arguments
142 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
143 calls to :class:`ParserElement.set_name`
145 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
146 All warnings can be enabled by calling :class:`enable_all_warnings`.
147 """
149 warn_multiple_tokens_in_named_alternation = 0
150 warn_ungrouped_named_tokens_in_collection = 1
151 warn_name_set_on_empty_Forward = 2
152 warn_on_parse_using_empty_Forward = 3
153 warn_on_assignment_to_Forward = 4
154 warn_on_multiple_string_args_to_oneof = 5
155 warn_on_match_first_with_lshift_operator = 6
156 enable_debug_on_named_expressions = 7
159def enable_diag(diag_enum: Diagnostics) -> None:
160 """
161 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
162 """
163 __diag__.enable(diag_enum.name)
166def disable_diag(diag_enum: Diagnostics) -> None:
167 """
168 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
169 """
170 __diag__.disable(diag_enum.name)
173def enable_all_warnings() -> None:
174 """
175 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
176 """
177 __diag__.enable_all_warnings()
180# hide abstract class
181del __config_flags
184def _should_enable_warnings(
185 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
186) -> bool:
187 enable = bool(warn_env_var)
188 for warn_opt in cmd_line_warn_options:
189 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
190 ":"
191 )[:5]
192 if not w_action.lower().startswith("i") and (
193 not (w_message or w_category or w_module) or w_module == "pyparsing"
194 ):
195 enable = True
196 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
197 enable = False
198 return enable
201if _should_enable_warnings(
202 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
203):
204 enable_all_warnings()
207# build list of single arg builtins, that can be used as parse actions
208# fmt: off
209_single_arg_builtins = {
210 sum, len, sorted, reversed, list, tuple, set, any, all, min, max
211}
212# fmt: on
214_generatorType = types.GeneratorType
215ParseImplReturnType = tuple[int, Any]
216PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]
218ParseCondition = Union[
219 Callable[[], bool],
220 Callable[[ParseResults], bool],
221 Callable[[int, ParseResults], bool],
222 Callable[[str, int, ParseResults], bool],
223]
224ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
225DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
226DebugSuccessAction = Callable[
227 [str, int, int, "ParserElement", ParseResults, bool], None
228]
229DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
232alphas: str = string.ascii_uppercase + string.ascii_lowercase
233identchars: str = pyparsing_unicode.Latin1.identchars
234identbodychars: str = pyparsing_unicode.Latin1.identbodychars
235nums: str = "0123456789"
236hexnums: str = nums + "ABCDEFabcdef"
237alphanums: str = alphas + nums
238printables: str = "".join([c for c in string.printable if c not in string.whitespace])
241class _ParseActionIndexError(Exception):
242 """
243 Internal wrapper around IndexError so that IndexErrors raised inside
244 parse actions aren't misinterpreted as IndexErrors raised inside
245 ParserElement parseImpl methods.
246 """
248 def __init__(self, msg: str, exc: BaseException) -> None:
249 self.msg: str = msg
250 self.exc: BaseException = exc
253_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]
254pa_call_line_synth = ()
257def _trim_arity(func, max_limit=3):
258 """decorator to trim function calls to match the arity of the target"""
259 global _trim_arity_call_line, pa_call_line_synth
261 if func in _single_arg_builtins:
262 return lambda s, l, t: func(t)
264 limit = 0
265 found_arity = False
267 # synthesize what would be returned by traceback.extract_stack at the call to
268 # user's parse action 'func', so that we don't incur call penalty at parse time
270 # fmt: off
271 LINE_DIFF = 9
272 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
273 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
274 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]
275 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
277 def wrapper(*args):
278 nonlocal found_arity, limit
279 if found_arity:
280 return func(*args[limit:])
281 while 1:
282 try:
283 ret = func(*args[limit:])
284 found_arity = True
285 return ret
286 except TypeError as te:
287 # re-raise TypeErrors if they did not come from our arity testing
288 if found_arity:
289 raise
290 else:
291 tb = te.__traceback__
292 frames = traceback.extract_tb(tb, limit=2)
293 frame_summary = frames[-1]
294 trim_arity_type_error = (
295 [frame_summary[:2]][-1][:2] == pa_call_line_synth
296 )
297 del tb
299 if trim_arity_type_error:
300 if limit < max_limit:
301 limit += 1
302 continue
304 raise
305 except IndexError as ie:
306 # wrap IndexErrors inside a _ParseActionIndexError
307 raise _ParseActionIndexError(
308 "IndexError raised in parse action", ie
309 ).with_traceback(None)
310 # fmt: on
312 # copy func name to wrapper for sensible debug output
313 # (can't use functools.wraps, since that messes with function signature)
314 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
315 wrapper.__name__ = func_name
316 wrapper.__doc__ = func.__doc__
318 return wrapper
321def condition_as_parse_action(
322 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False
323) -> ParseAction:
324 """
325 Function to convert a simple predicate function that returns ``True`` or ``False``
326 into a parse action. Can be used in places when a parse action is required
327 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition
328 to an operator level in :class:`infix_notation`).
330 Optional keyword arguments:
332 - ``message`` - define a custom message to be used in the raised exception
333 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;
334 otherwise will raise :class:`ParseException`
336 """
337 msg = message if message is not None else "failed user-defined condition"
338 exc_type = ParseFatalException if fatal else ParseException
339 fn = _trim_arity(fn)
341 @wraps(fn)
342 def pa(s, l, t):
343 if not bool(fn(s, l, t)):
344 raise exc_type(s, l, msg)
346 return pa
349def _default_start_debug_action(
350 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False
351):
352 cache_hit_str = "*" if cache_hit else ""
353 print(
354 (
355 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"
356 f" {line(loc, instring)}\n"
357 f" {'^':>{col(loc, instring)}}"
358 )
359 )
362def _default_success_debug_action(
363 instring: str,
364 startloc: int,
365 endloc: int,
366 expr: ParserElement,
367 toks: ParseResults,
368 cache_hit: bool = False,
369):
370 cache_hit_str = "*" if cache_hit else ""
371 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")
374def _default_exception_debug_action(
375 instring: str,
376 loc: int,
377 expr: ParserElement,
378 exc: Exception,
379 cache_hit: bool = False,
380):
381 cache_hit_str = "*" if cache_hit else ""
382 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")
385def null_debug_action(*args):
386 """'Do-nothing' debug action, to suppress debugging output during parsing."""
389class ParserElement(ABC):
390 """Abstract base level parser element class."""
392 DEFAULT_WHITE_CHARS: str = " \n\t\r"
393 verbose_stacktrace: bool = False
394 _literalStringClass: type = None # type: ignore[assignment]
396 @staticmethod
397 def set_default_whitespace_chars(chars: str) -> None:
398 r"""
399 Overrides the default whitespace chars
401 Example::
403 # default whitespace chars are space, <TAB> and newline
404 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
406 # change to just treat newline as significant
407 ParserElement.set_default_whitespace_chars(" \t")
408 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def']
409 """
410 ParserElement.DEFAULT_WHITE_CHARS = chars
412 # update whitespace all parse expressions defined in this module
413 for expr in _builtin_exprs:
414 if expr.copyDefaultWhiteChars:
415 expr.whiteChars = set(chars)
417 @staticmethod
418 def inline_literals_using(cls: type) -> None:
419 """
420 Set class to be used for inclusion of string literals into a parser.
422 Example::
424 # default literal class used is Literal
425 integer = Word(nums)
426 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
428 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31']
431 # change to Suppress
432 ParserElement.inline_literals_using(Suppress)
433 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
435 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31']
436 """
437 ParserElement._literalStringClass = cls
439 @classmethod
440 def using_each(cls, seq, **class_kwargs):
441 """
442 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.
444 Example::
446 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")
448 .. versionadded:: 3.1.0
449 """
450 yield from (cls(obj, **class_kwargs) for obj in seq)
452 class DebugActions(NamedTuple):
453 debug_try: typing.Optional[DebugStartAction]
454 debug_match: typing.Optional[DebugSuccessAction]
455 debug_fail: typing.Optional[DebugExceptionAction]
457 def __init__(self, savelist: bool = False) -> None:
458 self.parseAction: list[ParseAction] = list()
459 self.failAction: typing.Optional[ParseFailAction] = None
460 self.customName: str = None # type: ignore[assignment]
461 self._defaultName: typing.Optional[str] = None
462 self.resultsName: str = None # type: ignore[assignment]
463 self.saveAsList = savelist
464 self.skipWhitespace = True
465 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
466 self.copyDefaultWhiteChars = True
467 # used when checking for left-recursion
468 self._may_return_empty = False
469 self.keepTabs = False
470 self.ignoreExprs: list[ParserElement] = list()
471 self.debug = False
472 self.streamlined = False
473 # optimize exception handling for subclasses that don't advance parse index
474 self.mayIndexError = True
475 self.errmsg: Union[str, None] = ""
476 # mark results names as modal (report only last) or cumulative (list all)
477 self.modalResults = True
478 # custom debug actions
479 self.debugActions = self.DebugActions(None, None, None)
480 # avoid redundant calls to preParse
481 self.callPreparse = True
482 self.callDuringTry = False
483 self.suppress_warnings_: list[Diagnostics] = []
484 self.show_in_diagram = True
486 @property
487 def mayReturnEmpty(self):
488 return self._may_return_empty
490 @mayReturnEmpty.setter
491 def mayReturnEmpty(self, value):
492 self._may_return_empty = value
494 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:
495 """
496 Suppress warnings emitted for a particular diagnostic on this expression.
498 Example::
500 base = pp.Forward()
501 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)
503 # statement would normally raise a warning, but is now suppressed
504 print(base.parse_string("x"))
506 """
507 self.suppress_warnings_.append(warning_type)
508 return self
510 def visit_all(self):
511 """General-purpose method to yield all expressions and sub-expressions
512 in a grammar. Typically just for internal use.
513 """
514 to_visit = deque([self])
515 seen = set()
516 while to_visit:
517 cur = to_visit.popleft()
519 # guard against looping forever through recursive grammars
520 if cur in seen:
521 continue
522 seen.add(cur)
524 to_visit.extend(cur.recurse())
525 yield cur
527 def copy(self) -> ParserElement:
528 """
529 Make a copy of this :class:`ParserElement`. Useful for defining
530 different parse actions for the same parsing pattern, using copies of
531 the original parse element.
533 Example::
535 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
536 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")
537 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
539 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M"))
541 prints::
543 [5120, 100, 655360, 268435456]
545 Equivalent form of ``expr.copy()`` is just ``expr()``::
547 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
548 """
549 cpy = copy.copy(self)
550 cpy.parseAction = self.parseAction[:]
551 cpy.ignoreExprs = self.ignoreExprs[:]
552 if self.copyDefaultWhiteChars:
553 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
554 return cpy
556 def set_results_name(
557 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False
558 ) -> ParserElement:
559 """
560 Define name for referencing matching tokens as a nested attribute
561 of the returned parse results.
563 Normally, results names are assigned as you would assign keys in a dict:
564 any existing value is overwritten by later values. If it is necessary to
565 keep all values captured for a particular results name, call ``set_results_name``
566 with ``list_all_matches`` = True.
568 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
569 this is so that the client can define a basic element, such as an
570 integer, and reference it in multiple places with different names.
572 You can also set results names using the abbreviated syntax,
573 ``expr("name")`` in place of ``expr.set_results_name("name")``
574 - see :class:`__call__`. If ``list_all_matches`` is required, use
575 ``expr("name*")``.
577 Example::
579 integer = Word(nums)
580 date_str = (integer.set_results_name("year") + '/'
581 + integer.set_results_name("month") + '/'
582 + integer.set_results_name("day"))
584 # equivalent form:
585 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
586 """
587 listAllMatches = listAllMatches or list_all_matches
588 return self._setResultsName(name, listAllMatches)
590 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
591 if name is None:
592 return self
593 newself = self.copy()
594 if name.endswith("*"):
595 name = name[:-1]
596 list_all_matches = True
597 newself.resultsName = name
598 newself.modalResults = not list_all_matches
599 return newself
601 def set_break(self, break_flag: bool = True) -> ParserElement:
602 """
603 Method to invoke the Python pdb debugger when this element is
604 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
605 disable.
606 """
607 if break_flag:
608 _parseMethod = self._parse
610 def breaker(instring, loc, do_actions=True, callPreParse=True):
611 # this call to breakpoint() is intentional, not a checkin error
612 breakpoint()
613 return _parseMethod(instring, loc, do_actions, callPreParse)
615 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]
616 self._parse = breaker # type: ignore [method-assign]
617 elif hasattr(self._parse, "_originalParseMethod"):
618 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]
619 return self
621 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:
622 """
623 Define one or more actions to perform when successfully matching parse element definition.
625 Parse actions can be called to perform data conversions, do extra validation,
626 update external data structures, or enhance or replace the parsed tokens.
627 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
628 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
630 - ``s`` = the original string being parsed (see note below)
631 - ``loc`` = the location of the matching substring
632 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object
634 The parsed tokens are passed to the parse action as ParseResults. They can be
635 modified in place using list-style append, extend, and pop operations to update
636 the parsed list elements; and with dictionary-style item set and del operations
637 to add, update, or remove any named results. If the tokens are modified in place,
638 it is not necessary to return them with a return statement.
640 Parse actions can also completely replace the given tokens, with another ``ParseResults``
641 object, or with some entirely different object (common for parse actions that perform data
642 conversions). A convenient way to build a new parse result is to define the values
643 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
645 If None is passed as the ``fn`` parse action, all previously added parse actions for this
646 expression are cleared.
648 Optional keyword arguments:
650 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during
651 lookaheads and alternate testing. For parse actions that have side effects, it is
652 important to only call the parse action once it is determined that it is being
653 called as part of a successful parse. For parse actions that perform additional
654 validation, then call_during_try should be passed as True, so that the validation
655 code is included in the preliminary "try" parses.
657 Note: the default parsing behavior is to expand tabs in the input string
658 before starting the parsing process. See :class:`parse_string` for more
659 information on parsing strings containing ``<TAB>`` s, and suggested
660 methods to maintain a consistent view of the parsed string, the parse
661 location, and line and column positions within the parsed string.
663 Example::
665 # parse dates in the form YYYY/MM/DD
667 # use parse action to convert toks from str to int at parse time
668 def convert_to_int(toks):
669 return int(toks[0])
671 # use a parse action to verify that the date is a valid date
672 def is_valid_date(instring, loc, toks):
673 from datetime import date
674 year, month, day = toks[::2]
675 try:
676 date(year, month, day)
677 except ValueError:
678 raise ParseException(instring, loc, "invalid date given")
680 integer = Word(nums)
681 date_str = integer + '/' + integer + '/' + integer
683 # add parse actions
684 integer.set_parse_action(convert_to_int)
685 date_str.set_parse_action(is_valid_date)
687 # note that integer fields are now ints, not strings
688 date_str.run_tests('''
689 # successful parse - note that integer fields were converted to ints
690 1999/12/31
692 # fail - invalid date
693 1999/13/31
694 ''')
695 """
696 if list(fns) == [None]:
697 self.parseAction.clear()
698 return self
700 if not all(callable(fn) for fn in fns):
701 raise TypeError("parse actions must be callable")
702 self.parseAction[:] = [_trim_arity(fn) for fn in fns]
703 self.callDuringTry = kwargs.get(
704 "call_during_try", kwargs.get("callDuringTry", False)
705 )
707 return self
709 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:
710 """
711 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
713 See examples in :class:`copy`.
714 """
715 self.parseAction += [_trim_arity(fn) for fn in fns]
716 self.callDuringTry = self.callDuringTry or kwargs.get(
717 "call_during_try", kwargs.get("callDuringTry", False)
718 )
719 return self
721 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement:
722 """Add a boolean predicate function to expression's list of parse actions. See
723 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
724 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
726 Optional keyword arguments:
728 - ``message`` = define a custom message to be used in the raised exception
729 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
730 ParseException
731 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,
732 default=False
734 Example::
736 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
737 year_int = integer.copy()
738 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
739 date_str = year_int + '/' + integer + '/' + integer
741 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0),
742 (line:1, col:1)
743 """
744 for fn in fns:
745 self.parseAction.append(
746 condition_as_parse_action(
747 fn,
748 message=str(kwargs.get("message")),
749 fatal=bool(kwargs.get("fatal", False)),
750 )
751 )
753 self.callDuringTry = self.callDuringTry or kwargs.get(
754 "call_during_try", kwargs.get("callDuringTry", False)
755 )
756 return self
758 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:
759 """
760 Define action to perform if parsing fails at this expression.
761 Fail acton fn is a callable function that takes the arguments
762 ``fn(s, loc, expr, err)`` where:
764 - ``s`` = string being parsed
765 - ``loc`` = location where expression match was attempted and failed
766 - ``expr`` = the parse expression that failed
767 - ``err`` = the exception thrown
769 The function returns no value. It may throw :class:`ParseFatalException`
770 if it is desired to stop parsing immediately."""
771 self.failAction = fn
772 return self
774 def _skipIgnorables(self, instring: str, loc: int) -> int:
775 if not self.ignoreExprs:
776 return loc
777 exprsFound = True
778 ignore_expr_fns = [e._parse for e in self.ignoreExprs]
779 last_loc = loc
780 while exprsFound:
781 exprsFound = False
782 for ignore_fn in ignore_expr_fns:
783 try:
784 while 1:
785 loc, dummy = ignore_fn(instring, loc)
786 exprsFound = True
787 except ParseException:
788 pass
789 # check if all ignore exprs matched but didn't actually advance the parse location
790 if loc == last_loc:
791 break
792 last_loc = loc
793 return loc
795 def preParse(self, instring: str, loc: int) -> int:
796 if self.ignoreExprs:
797 loc = self._skipIgnorables(instring, loc)
799 if self.skipWhitespace:
800 instrlen = len(instring)
801 white_chars = self.whiteChars
802 while loc < instrlen and instring[loc] in white_chars:
803 loc += 1
805 return loc
807 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
808 return loc, []
810 def postParse(self, instring, loc, tokenlist):
811 return tokenlist
813 # @profile
814 def _parseNoCache(
815 self, instring, loc, do_actions=True, callPreParse=True
816 ) -> tuple[int, ParseResults]:
817 debugging = self.debug # and do_actions)
818 len_instring = len(instring)
820 if debugging or self.failAction:
821 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
822 try:
823 if callPreParse and self.callPreparse:
824 pre_loc = self.preParse(instring, loc)
825 else:
826 pre_loc = loc
827 tokens_start = pre_loc
828 if self.debugActions.debug_try:
829 self.debugActions.debug_try(instring, tokens_start, self, False)
830 if self.mayIndexError or pre_loc >= len_instring:
831 try:
832 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
833 except IndexError:
834 raise ParseException(instring, len_instring, self.errmsg, self)
835 else:
836 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
837 except Exception as err:
838 # print("Exception raised:", err)
839 if self.debugActions.debug_fail:
840 self.debugActions.debug_fail(
841 instring, tokens_start, self, err, False
842 )
843 if self.failAction:
844 self.failAction(instring, tokens_start, self, err)
845 raise
846 else:
847 if callPreParse and self.callPreparse:
848 pre_loc = self.preParse(instring, loc)
849 else:
850 pre_loc = loc
851 tokens_start = pre_loc
852 if self.mayIndexError or pre_loc >= len_instring:
853 try:
854 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
855 except IndexError:
856 raise ParseException(instring, len_instring, self.errmsg, self)
857 else:
858 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
860 tokens = self.postParse(instring, loc, tokens)
862 ret_tokens = ParseResults(
863 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults
864 )
865 if self.parseAction and (do_actions or self.callDuringTry):
866 if debugging:
867 try:
868 for fn in self.parseAction:
869 try:
870 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
871 except IndexError as parse_action_exc:
872 exc = ParseException("exception raised in parse action")
873 raise exc from parse_action_exc
875 if tokens is not None and tokens is not ret_tokens:
876 ret_tokens = ParseResults(
877 tokens,
878 self.resultsName,
879 asList=self.saveAsList
880 and isinstance(tokens, (ParseResults, list)),
881 modal=self.modalResults,
882 )
883 except Exception as err:
884 # print "Exception raised in user parse action:", err
885 if self.debugActions.debug_fail:
886 self.debugActions.debug_fail(
887 instring, tokens_start, self, err, False
888 )
889 raise
890 else:
891 for fn in self.parseAction:
892 try:
893 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
894 except IndexError as parse_action_exc:
895 exc = ParseException("exception raised in parse action")
896 raise exc from parse_action_exc
898 if tokens is not None and tokens is not ret_tokens:
899 ret_tokens = ParseResults(
900 tokens,
901 self.resultsName,
902 asList=self.saveAsList
903 and isinstance(tokens, (ParseResults, list)),
904 modal=self.modalResults,
905 )
906 if debugging:
907 # print("Matched", self, "->", ret_tokens.as_list())
908 if self.debugActions.debug_match:
909 self.debugActions.debug_match(
910 instring, tokens_start, loc, self, ret_tokens, False
911 )
913 return loc, ret_tokens
915 def try_parse(
916 self,
917 instring: str,
918 loc: int,
919 *,
920 raise_fatal: bool = False,
921 do_actions: bool = False,
922 ) -> int:
923 try:
924 return self._parse(instring, loc, do_actions=do_actions)[0]
925 except ParseFatalException:
926 if raise_fatal:
927 raise
928 raise ParseException(instring, loc, self.errmsg, self)
930 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:
931 try:
932 self.try_parse(instring, loc, do_actions=do_actions)
933 except (ParseException, IndexError):
934 return False
935 else:
936 return True
938 # cache for left-recursion in Forward references
939 recursion_lock = RLock()
940 recursion_memos: collections.abc.MutableMapping[
941 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]
942 ] = {}
944 class _CacheType(typing.Protocol):
945 """
946 Class to be used for packrat and left-recursion cacheing of results
947 and exceptions.
948 """
950 not_in_cache: bool
952 def get(self, *args) -> typing.Any: ...
954 def set(self, *args) -> None: ...
956 def clear(self) -> None: ...
958 class NullCache(dict):
959 """
960 A null cache type for initialization of the packrat_cache class variable.
961 If/when enable_packrat() is called, this null cache will be replaced by a
962 proper _CacheType class instance.
963 """
965 not_in_cache: bool = True
967 def get(self, *args) -> typing.Any: ...
969 def set(self, *args) -> None: ...
971 def clear(self) -> None: ...
973 # class-level argument cache for optimizing repeated calls when backtracking
974 # through recursive expressions
975 packrat_cache: _CacheType = NullCache()
976 packrat_cache_lock = RLock()
977 packrat_cache_stats = [0, 0]
979 # this method gets repeatedly called during backtracking with the same arguments -
980 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
981 def _parseCache(
982 self, instring, loc, do_actions=True, callPreParse=True
983 ) -> tuple[int, ParseResults]:
984 HIT, MISS = 0, 1
985 lookup = (self, instring, loc, callPreParse, do_actions)
986 with ParserElement.packrat_cache_lock:
987 cache = ParserElement.packrat_cache
988 value = cache.get(lookup)
989 if value is cache.not_in_cache:
990 ParserElement.packrat_cache_stats[MISS] += 1
991 try:
992 value = self._parseNoCache(instring, loc, do_actions, callPreParse)
993 except ParseBaseException as pe:
994 # cache a copy of the exception, without the traceback
995 cache.set(lookup, pe.__class__(*pe.args))
996 raise
997 else:
998 cache.set(lookup, (value[0], value[1].copy(), loc))
999 return value
1000 else:
1001 ParserElement.packrat_cache_stats[HIT] += 1
1002 if self.debug and self.debugActions.debug_try:
1003 try:
1004 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]
1005 except TypeError:
1006 pass
1007 if isinstance(value, Exception):
1008 if self.debug and self.debugActions.debug_fail:
1009 try:
1010 self.debugActions.debug_fail(
1011 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]
1012 )
1013 except TypeError:
1014 pass
1015 raise value
1017 value = cast(tuple[int, ParseResults, int], value)
1018 loc_, result, endloc = value[0], value[1].copy(), value[2]
1019 if self.debug and self.debugActions.debug_match:
1020 try:
1021 self.debugActions.debug_match(
1022 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]
1023 )
1024 except TypeError:
1025 pass
1027 return loc_, result
1029 _parse = _parseNoCache
1031 @staticmethod
1032 def reset_cache() -> None:
1033 with ParserElement.packrat_cache_lock:
1034 ParserElement.packrat_cache.clear()
1035 ParserElement.packrat_cache_stats[:] = [0] * len(
1036 ParserElement.packrat_cache_stats
1037 )
1038 ParserElement.recursion_memos.clear()
1040 # class attributes to keep caching status
1041 _packratEnabled = False
1042 _left_recursion_enabled = False
1044 @staticmethod
1045 def disable_memoization() -> None:
1046 """
1047 Disables active Packrat or Left Recursion parsing and their memoization
1049 This method also works if neither Packrat nor Left Recursion are enabled.
1050 This makes it safe to call before activating Packrat nor Left Recursion
1051 to clear any previous settings.
1052 """
1053 with ParserElement.packrat_cache_lock:
1054 ParserElement.reset_cache()
1055 ParserElement._left_recursion_enabled = False
1056 ParserElement._packratEnabled = False
1057 ParserElement._parse = ParserElement._parseNoCache
1059 @staticmethod
1060 def enable_left_recursion(
1061 cache_size_limit: typing.Optional[int] = None, *, force=False
1062 ) -> None:
1063 """
1064 Enables "bounded recursion" parsing, which allows for both direct and indirect
1065 left-recursion. During parsing, left-recursive :class:`Forward` elements are
1066 repeatedly matched with a fixed recursion depth that is gradually increased
1067 until finding the longest match.
1069 Example::
1071 import pyparsing as pp
1072 pp.ParserElement.enable_left_recursion()
1074 E = pp.Forward("E")
1075 num = pp.Word(pp.nums)
1076 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1077 E <<= E + '+' - num | num
1079 print(E.parse_string("1+2+3"))
1081 Recursion search naturally memoizes matches of ``Forward`` elements and may
1082 thus skip reevaluation of parse actions during backtracking. This may break
1083 programs with parse actions which rely on strict ordering of side-effects.
1085 Parameters:
1087 - ``cache_size_limit`` - (default=``None``) - memoize at most this many
1088 ``Forward`` elements during matching; if ``None`` (the default),
1089 memoize all ``Forward`` elements.
1091 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1092 thus the two cannot be used together. Use ``force=True`` to disable any
1093 previous, conflicting settings.
1094 """
1095 with ParserElement.packrat_cache_lock:
1096 if force:
1097 ParserElement.disable_memoization()
1098 elif ParserElement._packratEnabled:
1099 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1100 if cache_size_limit is None:
1101 ParserElement.recursion_memos = _UnboundedMemo()
1102 elif cache_size_limit > 0:
1103 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]
1104 else:
1105 raise NotImplementedError(f"Memo size of {cache_size_limit}")
1106 ParserElement._left_recursion_enabled = True
1108 @staticmethod
1109 def enable_packrat(
1110 cache_size_limit: Union[int, None] = 128, *, force: bool = False
1111 ) -> None:
1112 """
1113 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1114 Repeated parse attempts at the same string location (which happens
1115 often in many complex grammars) can immediately return a cached value,
1116 instead of re-executing parsing/validating code. Memoizing is done of
1117 both valid results and parsing exceptions.
1119 Parameters:
1121 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided
1122 will limit the size of the packrat cache; if None is passed, then
1123 the cache size will be unbounded; if 0 is passed, the cache will
1124 be effectively disabled.
1126 This speedup may break existing programs that use parse actions that
1127 have side-effects. For this reason, packrat parsing is disabled when
1128 you first import pyparsing. To activate the packrat feature, your
1129 program must call the class method :class:`ParserElement.enable_packrat`.
1130 For best results, call ``enable_packrat()`` immediately after
1131 importing pyparsing.
1133 Example::
1135 import pyparsing
1136 pyparsing.ParserElement.enable_packrat()
1138 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1139 thus the two cannot be used together. Use ``force=True`` to disable any
1140 previous, conflicting settings.
1141 """
1142 with ParserElement.packrat_cache_lock:
1143 if force:
1144 ParserElement.disable_memoization()
1145 elif ParserElement._left_recursion_enabled:
1146 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1148 if ParserElement._packratEnabled:
1149 return
1151 ParserElement._packratEnabled = True
1152 if cache_size_limit is None:
1153 ParserElement.packrat_cache = _UnboundedCache()
1154 else:
1155 ParserElement.packrat_cache = _FifoCache(cache_size_limit)
1156 ParserElement._parse = ParserElement._parseCache
1158 def parse_string(
1159 self, instring: str, parse_all: bool = False, *, parseAll: bool = False
1160 ) -> ParseResults:
1161 """
1162 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1163 client code.
1165 :param instring: The input string to be parsed.
1166 :param parse_all: If set, the entire input string must match the grammar.
1167 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1168 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1169 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1170 an object with attributes if the given parser includes results names.
1172 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1173 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().
1175 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1176 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1177 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1178 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1180 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1181 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1182 parse action's ``s`` argument, or
1183 - explicitly expand the tabs in your input string before calling ``parse_string``.
1185 Examples:
1187 By default, partial matches are OK.
1189 >>> res = Word('a').parse_string('aaaaabaaa')
1190 >>> print(res)
1191 ['aaaaa']
1193 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1194 directly to see more examples.
1196 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1198 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1199 Traceback (most recent call last):
1200 ...
1201 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6)
1202 """
1203 parseAll = parse_all or parseAll
1205 ParserElement.reset_cache()
1206 if not self.streamlined:
1207 self.streamline()
1208 for e in self.ignoreExprs:
1209 e.streamline()
1210 if not self.keepTabs:
1211 instring = instring.expandtabs()
1212 try:
1213 loc, tokens = self._parse(instring, 0)
1214 if parseAll:
1215 loc = self.preParse(instring, loc)
1216 se = Empty() + StringEnd().set_debug(False)
1217 se._parse(instring, loc)
1218 except _ParseActionIndexError as pa_exc:
1219 raise pa_exc.exc
1220 except ParseBaseException as exc:
1221 if ParserElement.verbose_stacktrace:
1222 raise
1224 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1225 raise exc.with_traceback(None)
1226 else:
1227 return tokens
1229 def scan_string(
1230 self,
1231 instring: str,
1232 max_matches: int = _MAX_INT,
1233 overlap: bool = False,
1234 always_skip_whitespace=True,
1235 *,
1236 debug: bool = False,
1237 maxMatches: int = _MAX_INT,
1238 ) -> Generator[tuple[ParseResults, int, int], None, None]:
1239 """
1240 Scan the input string for expression matches. Each match will return the
1241 matching tokens, start location, and end location. May be called with optional
1242 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1243 ``overlap`` is specified, then overlapping matches will be reported.
1245 Note that the start and end locations are reported relative to the string
1246 being parsed. See :class:`parse_string` for more information on parsing
1247 strings with embedded tabs.
1249 Example::
1251 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1252 print(source)
1253 for tokens, start, end in Word(alphas).scan_string(source):
1254 print(' '*start + '^'*(end-start))
1255 print(' '*start + tokens[0])
1257 prints::
1259 sldjf123lsdjjkf345sldkjf879lkjsfd987
1260 ^^^^^
1261 sldjf
1262 ^^^^^^^
1263 lsdjjkf
1264 ^^^^^^
1265 sldkjf
1266 ^^^^^^
1267 lkjsfd
1268 """
1269 maxMatches = min(maxMatches, max_matches)
1270 if not self.streamlined:
1271 self.streamline()
1272 for e in self.ignoreExprs:
1273 e.streamline()
1275 if not self.keepTabs:
1276 instring = str(instring).expandtabs()
1277 instrlen = len(instring)
1278 loc = 0
1279 if always_skip_whitespace:
1280 preparser = Empty()
1281 preparser.ignoreExprs = self.ignoreExprs
1282 preparser.whiteChars = self.whiteChars
1283 preparseFn = preparser.preParse
1284 else:
1285 preparseFn = self.preParse
1286 parseFn = self._parse
1287 ParserElement.resetCache()
1288 matches = 0
1289 try:
1290 while loc <= instrlen and matches < maxMatches:
1291 try:
1292 preloc: int = preparseFn(instring, loc)
1293 nextLoc: int
1294 tokens: ParseResults
1295 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1296 except ParseException:
1297 loc = preloc + 1
1298 else:
1299 if nextLoc > loc:
1300 matches += 1
1301 if debug:
1302 print(
1303 {
1304 "tokens": tokens.asList(),
1305 "start": preloc,
1306 "end": nextLoc,
1307 }
1308 )
1309 yield tokens, preloc, nextLoc
1310 if overlap:
1311 nextloc = preparseFn(instring, loc)
1312 if nextloc > loc:
1313 loc = nextLoc
1314 else:
1315 loc += 1
1316 else:
1317 loc = nextLoc
1318 else:
1319 loc = preloc + 1
1320 except ParseBaseException as exc:
1321 if ParserElement.verbose_stacktrace:
1322 raise
1324 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1325 raise exc.with_traceback(None)
1327 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1328 """
1329 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1330 be returned from a parse action. To use ``transform_string``, define a grammar and
1331 attach a parse action to it that modifies the returned token list.
1332 Invoking ``transform_string()`` on a target string will then scan for matches,
1333 and replace the matched text patterns according to the logic in the parse
1334 action. ``transform_string()`` returns the resulting transformed string.
1336 Example::
1338 wd = Word(alphas)
1339 wd.set_parse_action(lambda toks: toks[0].title())
1341 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))
1343 prints::
1345 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1346 """
1347 out: list[str] = []
1348 lastE = 0
1349 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1350 # keep string locs straight between transform_string and scan_string
1351 self.keepTabs = True
1352 try:
1353 for t, s, e in self.scan_string(instring, debug=debug):
1354 if s > lastE:
1355 out.append(instring[lastE:s])
1356 lastE = e
1358 if not t:
1359 continue
1361 if isinstance(t, ParseResults):
1362 out += t.as_list()
1363 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1364 out.extend(t)
1365 else:
1366 out.append(t)
1368 out.append(instring[lastE:])
1369 out = [o for o in out if o]
1370 return "".join([str(s) for s in _flatten(out)])
1371 except ParseBaseException as exc:
1372 if ParserElement.verbose_stacktrace:
1373 raise
1375 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1376 raise exc.with_traceback(None)
1378 def search_string(
1379 self,
1380 instring: str,
1381 max_matches: int = _MAX_INT,
1382 *,
1383 debug: bool = False,
1384 maxMatches: int = _MAX_INT,
1385 ) -> ParseResults:
1386 """
1387 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1388 to match the given parse expression. May be called with optional
1389 ``max_matches`` argument, to clip searching after 'n' matches are found.
1391 Example::
1393 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1394 cap_word = Word(alphas.upper(), alphas.lower())
1396 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))
1398 # the sum() builtin can be used to merge results into a single ParseResults object
1399 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))
1401 prints::
1403 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1404 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1405 """
1406 maxMatches = min(maxMatches, max_matches)
1407 try:
1408 return ParseResults(
1409 [
1410 t
1411 for t, s, e in self.scan_string(
1412 instring, maxMatches, always_skip_whitespace=False, debug=debug
1413 )
1414 ]
1415 )
1416 except ParseBaseException as exc:
1417 if ParserElement.verbose_stacktrace:
1418 raise
1420 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1421 raise exc.with_traceback(None)
1423 def split(
1424 self,
1425 instring: str,
1426 maxsplit: int = _MAX_INT,
1427 include_separators: bool = False,
1428 *,
1429 includeSeparators=False,
1430 ) -> Generator[str, None, None]:
1431 """
1432 Generator method to split a string using the given expression as a separator.
1433 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1434 and the optional ``include_separators`` argument (default= ``False``), if the separating
1435 matching text should be included in the split results.
1437 Example::
1439 punc = one_of(list(".,;:/-!?"))
1440 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1442 prints::
1444 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1445 """
1446 includeSeparators = includeSeparators or include_separators
1447 last = 0
1448 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1449 yield instring[last:s]
1450 if includeSeparators:
1451 yield t[0]
1452 last = e
1453 yield instring[last:]
1455 def __add__(self, other) -> ParserElement:
1456 """
1457 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1458 converts them to :class:`Literal`\\ s by default.
1460 Example::
1462 greet = Word(alphas) + "," + Word(alphas) + "!"
1463 hello = "Hello, World!"
1464 print(hello, "->", greet.parse_string(hello))
1466 prints::
1468 Hello, World! -> ['Hello', ',', 'World', '!']
1470 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`::
1472 Literal('start') + ... + Literal('end')
1474 is equivalent to::
1476 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1478 Note that the skipped text is returned with '_skipped' as a results name,
1479 and to support having multiple skips in the same parser, the value returned is
1480 a list of all skipped text.
1481 """
1482 if other is Ellipsis:
1483 return _PendingSkip(self)
1485 if isinstance(other, str_type):
1486 other = self._literalStringClass(other)
1487 if not isinstance(other, ParserElement):
1488 return NotImplemented
1489 return And([self, other])
1491 def __radd__(self, other) -> ParserElement:
1492 """
1493 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1494 """
1495 if other is Ellipsis:
1496 return SkipTo(self)("_skipped*") + self
1498 if isinstance(other, str_type):
1499 other = self._literalStringClass(other)
1500 if not isinstance(other, ParserElement):
1501 return NotImplemented
1502 return other + self
1504 def __sub__(self, other) -> ParserElement:
1505 """
1506 Implementation of ``-`` operator, returns :class:`And` with error stop
1507 """
1508 if isinstance(other, str_type):
1509 other = self._literalStringClass(other)
1510 if not isinstance(other, ParserElement):
1511 return NotImplemented
1512 return self + And._ErrorStop() + other
1514 def __rsub__(self, other) -> ParserElement:
1515 """
1516 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1517 """
1518 if isinstance(other, str_type):
1519 other = self._literalStringClass(other)
1520 if not isinstance(other, ParserElement):
1521 return NotImplemented
1522 return other - self
1524 def __mul__(self, other) -> ParserElement:
1525 """
1526 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1527 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1528 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1529 may also include ``None`` as in:
1531 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1532 to ``expr*n + ZeroOrMore(expr)``
1533 (read as "at least n instances of ``expr``")
1534 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1535 (read as "0 to n instances of ``expr``")
1536 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1537 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1539 Note that ``expr*(None, n)`` does not raise an exception if
1540 more than n exprs exist in the input stream; that is,
1541 ``expr*(None, n)`` does not enforce a maximum number of expr
1542 occurrences. If this behavior is desired, then write
1543 ``expr*(None, n) + ~expr``
1544 """
1545 if other is Ellipsis:
1546 other = (0, None)
1547 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1548 other = ((0,) + other[1:] + (None,))[:2]
1550 if not isinstance(other, (int, tuple)):
1551 return NotImplemented
1553 if isinstance(other, int):
1554 minElements, optElements = other, 0
1555 else:
1556 other = tuple(o if o is not Ellipsis else None for o in other)
1557 other = (other + (None, None))[:2]
1558 if other[0] is None:
1559 other = (0, other[1])
1560 if isinstance(other[0], int) and other[1] is None:
1561 if other[0] == 0:
1562 return ZeroOrMore(self)
1563 if other[0] == 1:
1564 return OneOrMore(self)
1565 else:
1566 return self * other[0] + ZeroOrMore(self)
1567 elif isinstance(other[0], int) and isinstance(other[1], int):
1568 minElements, optElements = other
1569 optElements -= minElements
1570 else:
1571 return NotImplemented
1573 if minElements < 0:
1574 raise ValueError("cannot multiply ParserElement by negative value")
1575 if optElements < 0:
1576 raise ValueError(
1577 "second tuple value must be greater or equal to first tuple value"
1578 )
1579 if minElements == optElements == 0:
1580 return And([])
1582 if optElements:
1584 def makeOptionalList(n):
1585 if n > 1:
1586 return Opt(self + makeOptionalList(n - 1))
1587 else:
1588 return Opt(self)
1590 if minElements:
1591 if minElements == 1:
1592 ret = self + makeOptionalList(optElements)
1593 else:
1594 ret = And([self] * minElements) + makeOptionalList(optElements)
1595 else:
1596 ret = makeOptionalList(optElements)
1597 else:
1598 if minElements == 1:
1599 ret = self
1600 else:
1601 ret = And([self] * minElements)
1602 return ret
1604 def __rmul__(self, other) -> ParserElement:
1605 return self.__mul__(other)
1607 def __or__(self, other) -> ParserElement:
1608 """
1609 Implementation of ``|`` operator - returns :class:`MatchFirst`
1611 .. versionchanged:: 3.1.0
1612 Support ``expr | ""`` as a synonym for ``Optional(expr)``.
1613 """
1614 if other is Ellipsis:
1615 return _PendingSkip(self, must_skip=True)
1617 if isinstance(other, str_type):
1618 # `expr | ""` is equivalent to `Opt(expr)`
1619 if other == "":
1620 return Opt(self)
1621 other = self._literalStringClass(other)
1622 if not isinstance(other, ParserElement):
1623 return NotImplemented
1624 return MatchFirst([self, other])
1626 def __ror__(self, other) -> ParserElement:
1627 """
1628 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1629 """
1630 if isinstance(other, str_type):
1631 other = self._literalStringClass(other)
1632 if not isinstance(other, ParserElement):
1633 return NotImplemented
1634 return other | self
1636 def __xor__(self, other) -> ParserElement:
1637 """
1638 Implementation of ``^`` operator - returns :class:`Or`
1639 """
1640 if isinstance(other, str_type):
1641 other = self._literalStringClass(other)
1642 if not isinstance(other, ParserElement):
1643 return NotImplemented
1644 return Or([self, other])
1646 def __rxor__(self, other) -> ParserElement:
1647 """
1648 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1649 """
1650 if isinstance(other, str_type):
1651 other = self._literalStringClass(other)
1652 if not isinstance(other, ParserElement):
1653 return NotImplemented
1654 return other ^ self
1656 def __and__(self, other) -> ParserElement:
1657 """
1658 Implementation of ``&`` operator - returns :class:`Each`
1659 """
1660 if isinstance(other, str_type):
1661 other = self._literalStringClass(other)
1662 if not isinstance(other, ParserElement):
1663 return NotImplemented
1664 return Each([self, other])
1666 def __rand__(self, other) -> ParserElement:
1667 """
1668 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1669 """
1670 if isinstance(other, str_type):
1671 other = self._literalStringClass(other)
1672 if not isinstance(other, ParserElement):
1673 return NotImplemented
1674 return other & self
1676 def __invert__(self) -> ParserElement:
1677 """
1678 Implementation of ``~`` operator - returns :class:`NotAny`
1679 """
1680 return NotAny(self)
1682 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1683 # iterate over a sequence
1684 __iter__ = None
1686 def __getitem__(self, key):
1687 """
1688 use ``[]`` indexing notation as a short form for expression repetition:
1690 - ``expr[n]`` is equivalent to ``expr*n``
1691 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1692 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1693 to ``expr*n + ZeroOrMore(expr)``
1694 (read as "at least n instances of ``expr``")
1695 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1696 (read as "0 to n instances of ``expr``")
1697 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1698 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1700 ``None`` may be used in place of ``...``.
1702 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception
1703 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is
1704 desired, then write ``expr[..., n] + ~expr``.
1706 For repetition with a stop_on expression, use slice notation:
1708 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``
1709 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``
1711 .. versionchanged:: 3.1.0
1712 Support for slice notation.
1713 """
1715 stop_on_defined = False
1716 stop_on = NoMatch()
1717 if isinstance(key, slice):
1718 key, stop_on = key.start, key.stop
1719 if key is None:
1720 key = ...
1721 stop_on_defined = True
1722 elif isinstance(key, tuple) and isinstance(key[-1], slice):
1723 key, stop_on = (key[0], key[1].start), key[1].stop
1724 stop_on_defined = True
1726 # convert single arg keys to tuples
1727 if isinstance(key, str_type):
1728 key = (key,)
1729 try:
1730 iter(key)
1731 except TypeError:
1732 key = (key, key)
1734 if len(key) > 2:
1735 raise TypeError(
1736 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"
1737 )
1739 # clip to 2 elements
1740 ret = self * tuple(key[:2])
1741 ret = typing.cast(_MultipleMatch, ret)
1743 if stop_on_defined:
1744 ret.stopOn(stop_on)
1746 return ret
1748 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:
1749 """
1750 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1752 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1753 passed as ``True``.
1755 If ``name`` is omitted, same as calling :class:`copy`.
1757 Example::
1759 # these are equivalent
1760 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")
1761 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1762 """
1763 if name is not None:
1764 return self._setResultsName(name)
1766 return self.copy()
1768 def suppress(self) -> ParserElement:
1769 """
1770 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1771 cluttering up returned output.
1772 """
1773 return Suppress(self)
1775 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
1776 """
1777 Enables the skipping of whitespace before matching the characters in the
1778 :class:`ParserElement`'s defined pattern.
1780 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1781 """
1782 self.skipWhitespace = True
1783 return self
1785 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
1786 """
1787 Disables the skipping of whitespace before matching the characters in the
1788 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1789 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1791 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1792 """
1793 self.skipWhitespace = False
1794 return self
1796 def set_whitespace_chars(
1797 self, chars: Union[set[str], str], copy_defaults: bool = False
1798 ) -> ParserElement:
1799 """
1800 Overrides the default whitespace chars
1801 """
1802 self.skipWhitespace = True
1803 self.whiteChars = set(chars)
1804 self.copyDefaultWhiteChars = copy_defaults
1805 return self
1807 def parse_with_tabs(self) -> ParserElement:
1808 """
1809 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1810 Must be called before ``parse_string`` when the input grammar contains elements that
1811 match ``<TAB>`` characters.
1812 """
1813 self.keepTabs = True
1814 return self
1816 def ignore(self, other: ParserElement) -> ParserElement:
1817 """
1818 Define expression to be ignored (e.g., comments) while doing pattern
1819 matching; may be called repeatedly, to define multiple comment or other
1820 ignorable patterns.
1822 Example::
1824 patt = Word(alphas)[...]
1825 patt.parse_string('ablaj /* comment */ lskjd')
1826 # -> ['ablaj']
1828 patt.ignore(c_style_comment)
1829 patt.parse_string('ablaj /* comment */ lskjd')
1830 # -> ['ablaj', 'lskjd']
1831 """
1832 if isinstance(other, str_type):
1833 other = Suppress(other)
1835 if isinstance(other, Suppress):
1836 if other not in self.ignoreExprs:
1837 self.ignoreExprs.append(other)
1838 else:
1839 self.ignoreExprs.append(Suppress(other.copy()))
1840 return self
1842 def set_debug_actions(
1843 self,
1844 start_action: DebugStartAction,
1845 success_action: DebugSuccessAction,
1846 exception_action: DebugExceptionAction,
1847 ) -> ParserElement:
1848 """
1849 Customize display of debugging messages while doing pattern matching:
1851 - ``start_action`` - method to be called when an expression is about to be parsed;
1852 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``
1854 - ``success_action`` - method to be called when an expression has successfully parsed;
1855 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``
1857 - ``exception_action`` - method to be called when expression fails to parse;
1858 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
1859 """
1860 self.debugActions = self.DebugActions(
1861 start_action or _default_start_debug_action, # type: ignore[truthy-function]
1862 success_action or _default_success_debug_action, # type: ignore[truthy-function]
1863 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]
1864 )
1865 self.debug = True
1866 return self
1868 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:
1869 """
1870 Enable display of debugging messages while doing pattern matching.
1871 Set ``flag`` to ``True`` to enable, ``False`` to disable.
1872 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.
1874 Example::
1876 wd = Word(alphas).set_name("alphaword")
1877 integer = Word(nums).set_name("numword")
1878 term = wd | integer
1880 # turn on debugging for wd
1881 wd.set_debug()
1883 term[1, ...].parse_string("abc 123 xyz 890")
1885 prints::
1887 Match alphaword at loc 0(1,1)
1888 Matched alphaword -> ['abc']
1889 Match alphaword at loc 3(1,4)
1890 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
1891 Match alphaword at loc 7(1,8)
1892 Matched alphaword -> ['xyz']
1893 Match alphaword at loc 11(1,12)
1894 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
1895 Match alphaword at loc 15(1,16)
1896 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
1898 The output shown is that produced by the default debug actions - custom debug actions can be
1899 specified using :class:`set_debug_actions`. Prior to attempting
1900 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
1901 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
1902 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,
1903 which makes debugging and exception messages easier to understand - for instance, the default
1904 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.
1906 .. versionchanged:: 3.1.0
1907 ``recurse`` argument added.
1908 """
1909 if recurse:
1910 for expr in self.visit_all():
1911 expr.set_debug(flag, recurse=False)
1912 return self
1914 if flag:
1915 self.set_debug_actions(
1916 _default_start_debug_action,
1917 _default_success_debug_action,
1918 _default_exception_debug_action,
1919 )
1920 else:
1921 self.debug = False
1922 return self
1924 @property
1925 def default_name(self) -> str:
1926 if self._defaultName is None:
1927 self._defaultName = self._generateDefaultName()
1928 return self._defaultName
1930 @abstractmethod
1931 def _generateDefaultName(self) -> str:
1932 """
1933 Child classes must define this method, which defines how the ``default_name`` is set.
1934 """
1936 def set_name(self, name: typing.Optional[str]) -> ParserElement:
1937 """
1938 Define name for this expression, makes debugging and exception messages clearer. If
1939 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also
1940 enable debug for this expression.
1942 If `name` is None, clears any custom name for this expression, and clears the
1943 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.
1945 Example::
1947 integer = Word(nums)
1948 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)
1950 integer.set_name("integer")
1951 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1953 .. versionchanged:: 3.1.0
1954 Accept ``None`` as the ``name`` argument.
1955 """
1956 self.customName = name # type: ignore[assignment]
1957 self.errmsg = f"Expected {str(self)}"
1959 if __diag__.enable_debug_on_named_expressions:
1960 self.set_debug(name is not None)
1962 return self
1964 @property
1965 def name(self) -> str:
1966 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name
1967 return self.customName if self.customName is not None else self.default_name
1969 @name.setter
1970 def name(self, new_name) -> None:
1971 self.set_name(new_name)
1973 def __str__(self) -> str:
1974 return self.name
1976 def __repr__(self) -> str:
1977 return str(self)
1979 def streamline(self) -> ParserElement:
1980 self.streamlined = True
1981 self._defaultName = None
1982 return self
1984 def recurse(self) -> list[ParserElement]:
1985 return []
1987 def _checkRecursion(self, parseElementList):
1988 subRecCheckList = parseElementList[:] + [self]
1989 for e in self.recurse():
1990 e._checkRecursion(subRecCheckList)
1992 def validate(self, validateTrace=None) -> None:
1993 """
1994 .. deprecated:: 3.0.0
1995 Do not use to check for left recursion.
1997 Check defined expressions for valid structure, check for infinite recursive definitions.
1999 """
2000 warnings.warn(
2001 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
2002 DeprecationWarning,
2003 stacklevel=2,
2004 )
2005 self._checkRecursion([])
2007 def parse_file(
2008 self,
2009 file_or_filename: Union[str, Path, TextIO],
2010 encoding: str = "utf-8",
2011 parse_all: bool = False,
2012 *,
2013 parseAll: bool = False,
2014 ) -> ParseResults:
2015 """
2016 Execute the parse expression on the given file or filename.
2017 If a filename is specified (instead of a file object),
2018 the entire file is opened, read, and closed before parsing.
2019 """
2020 parseAll = parseAll or parse_all
2021 try:
2022 file_or_filename = typing.cast(TextIO, file_or_filename)
2023 file_contents = file_or_filename.read()
2024 except AttributeError:
2025 file_or_filename = typing.cast(str, file_or_filename)
2026 with open(file_or_filename, "r", encoding=encoding) as f:
2027 file_contents = f.read()
2028 try:
2029 return self.parse_string(file_contents, parseAll)
2030 except ParseBaseException as exc:
2031 if ParserElement.verbose_stacktrace:
2032 raise
2034 # catch and re-raise exception from here, clears out pyparsing internal stack trace
2035 raise exc.with_traceback(None)
2037 def __eq__(self, other):
2038 if self is other:
2039 return True
2040 elif isinstance(other, str_type):
2041 return self.matches(other, parse_all=True)
2042 elif isinstance(other, ParserElement):
2043 return vars(self) == vars(other)
2044 return False
2046 def __hash__(self):
2047 return id(self)
2049 def matches(
2050 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True
2051 ) -> bool:
2052 """
2053 Method for quick testing of a parser against a test string. Good for simple
2054 inline microtests of sub expressions while building up larger parser.
2056 Parameters:
2058 - ``test_string`` - to test against this expression for a match
2059 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2061 Example::
2063 expr = Word(nums)
2064 assert expr.matches("100")
2065 """
2066 parseAll = parseAll and parse_all
2067 try:
2068 self.parse_string(str(test_string), parse_all=parseAll)
2069 return True
2070 except ParseBaseException:
2071 return False
2073 def run_tests(
2074 self,
2075 tests: Union[str, list[str]],
2076 parse_all: bool = True,
2077 comment: typing.Optional[Union[ParserElement, str]] = "#",
2078 full_dump: bool = True,
2079 print_results: bool = True,
2080 failure_tests: bool = False,
2081 post_parse: typing.Optional[
2082 Callable[[str, ParseResults], typing.Optional[str]]
2083 ] = None,
2084 file: typing.Optional[TextIO] = None,
2085 with_line_numbers: bool = False,
2086 *,
2087 parseAll: bool = True,
2088 fullDump: bool = True,
2089 printResults: bool = True,
2090 failureTests: bool = False,
2091 postParse: typing.Optional[
2092 Callable[[str, ParseResults], typing.Optional[str]]
2093 ] = None,
2094 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:
2095 """
2096 Execute the parse expression on a series of test strings, showing each
2097 test, the parsed results or where the parse failed. Quick and easy way to
2098 run a parse expression against a list of sample strings.
2100 Parameters:
2102 - ``tests`` - a list of separate test strings, or a multiline string of test strings
2103 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2104 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
2105 string; pass None to disable comment filtering
2106 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
2107 if False, only dump nested list
2108 - ``print_results`` - (default= ``True``) prints test output to stdout
2109 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
2110 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
2111 `fn(test_string, parse_results)` and returns a string to be added to the test output
2112 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
2113 if None, will default to ``sys.stdout``
2114 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
2116 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2117 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
2118 test's output
2120 Example::
2122 number_expr = pyparsing_common.number.copy()
2124 result = number_expr.run_tests('''
2125 # unsigned integer
2126 100
2127 # negative integer
2128 -100
2129 # float with scientific notation
2130 6.02e23
2131 # integer with scientific notation
2132 1e-12
2133 ''')
2134 print("Success" if result[0] else "Failed!")
2136 result = number_expr.run_tests('''
2137 # stray character
2138 100Z
2139 # missing leading digit before '.'
2140 -.100
2141 # too many '.'
2142 3.14.159
2143 ''', failure_tests=True)
2144 print("Success" if result[0] else "Failed!")
2146 prints::
2148 # unsigned integer
2149 100
2150 [100]
2152 # negative integer
2153 -100
2154 [-100]
2156 # float with scientific notation
2157 6.02e23
2158 [6.02e+23]
2160 # integer with scientific notation
2161 1e-12
2162 [1e-12]
2164 Success
2166 # stray character
2167 100Z
2168 ^
2169 FAIL: Expected end of text (at char 3), (line:1, col:4)
2171 # missing leading digit before '.'
2172 -.100
2173 ^
2174 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2176 # too many '.'
2177 3.14.159
2178 ^
2179 FAIL: Expected end of text (at char 4), (line:1, col:5)
2181 Success
2183 Each test string must be on a single line. If you want to test a string that spans multiple
2184 lines, create a test like this::
2186 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2188 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2189 """
2190 from .testing import pyparsing_test
2192 parseAll = parseAll and parse_all
2193 fullDump = fullDump and full_dump
2194 printResults = printResults and print_results
2195 failureTests = failureTests or failure_tests
2196 postParse = postParse or post_parse
2197 if isinstance(tests, str_type):
2198 tests = typing.cast(str, tests)
2199 line_strip = type(tests).strip
2200 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2201 comment_specified = comment is not None
2202 if comment_specified:
2203 if isinstance(comment, str_type):
2204 comment = typing.cast(str, comment)
2205 comment = Literal(comment)
2206 comment = typing.cast(ParserElement, comment)
2207 if file is None:
2208 file = sys.stdout
2209 print_ = file.write
2211 result: Union[ParseResults, Exception]
2212 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []
2213 comments: list[str] = []
2214 success = True
2215 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2216 BOM = "\ufeff"
2217 nlstr = "\n"
2218 for t in tests:
2219 if comment_specified and comment.matches(t, False) or comments and not t:
2220 comments.append(
2221 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2222 )
2223 continue
2224 if not t:
2225 continue
2226 out = [
2227 f"{nlstr}{nlstr.join(comments) if comments else ''}",
2228 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2229 ]
2230 comments.clear()
2231 try:
2232 # convert newline marks to actual newlines, and strip leading BOM if present
2233 t = NL.transform_string(t.lstrip(BOM))
2234 result = self.parse_string(t, parse_all=parseAll)
2235 except ParseBaseException as pe:
2236 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""
2237 out.append(pe.explain())
2238 out.append(f"FAIL: {fatal}{pe}")
2239 if ParserElement.verbose_stacktrace:
2240 out.extend(traceback.format_tb(pe.__traceback__))
2241 success = success and failureTests
2242 result = pe
2243 except Exception as exc:
2244 tag = "FAIL-EXCEPTION"
2246 # see if this exception was raised in a parse action
2247 tb = exc.__traceback__
2248 it = iter(traceback.walk_tb(tb))
2249 for f, line in it:
2250 if (f.f_code.co_filename, line) == pa_call_line_synth:
2251 next_f = next(it)[0]
2252 tag += f" (raised in parse action {next_f.f_code.co_name!r})"
2253 break
2255 out.append(f"{tag}: {type(exc).__name__}: {exc}")
2256 if ParserElement.verbose_stacktrace:
2257 out.extend(traceback.format_tb(exc.__traceback__))
2258 success = success and failureTests
2259 result = exc
2260 else:
2261 success = success and not failureTests
2262 if postParse is not None:
2263 try:
2264 pp_value = postParse(t, result)
2265 if pp_value is not None:
2266 if isinstance(pp_value, ParseResults):
2267 out.append(pp_value.dump())
2268 else:
2269 out.append(str(pp_value))
2270 else:
2271 out.append(result.dump())
2272 except Exception as e:
2273 out.append(result.dump(full=fullDump))
2274 out.append(
2275 f"{postParse.__name__} failed: {type(e).__name__}: {e}"
2276 )
2277 else:
2278 out.append(result.dump(full=fullDump))
2279 out.append("")
2281 if printResults:
2282 print_("\n".join(out))
2284 allResults.append((t, result))
2286 return success, allResults
2288 def create_diagram(
2289 self,
2290 output_html: Union[TextIO, Path, str],
2291 vertical: int = 3,
2292 show_results_names: bool = False,
2293 show_groups: bool = False,
2294 embed: bool = False,
2295 show_hidden: bool = False,
2296 **kwargs,
2297 ) -> None:
2298 """
2299 Create a railroad diagram for the parser.
2301 Parameters:
2303 - ``output_html`` (str or file-like object) - output target for generated
2304 diagram HTML
2305 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically
2306 instead of horizontally (default=3)
2307 - ``show_results_names`` - bool flag whether diagram should show annotations for
2308 defined results names
2309 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box
2310 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden
2311 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed
2312 the resulting HTML in an enclosing HTML source
2313 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;
2314 can be used to insert custom CSS styling
2315 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the
2316 generated code
2318 Additional diagram-formatting keyword arguments can also be included;
2319 see railroad.Diagram class.
2321 .. versionchanged:: 3.1.0
2322 ``embed`` argument added.
2323 """
2325 try:
2326 from .diagram import to_railroad, railroad_to_html
2327 except ImportError as ie:
2328 raise Exception(
2329 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2330 ) from ie
2332 self.streamline()
2334 railroad = to_railroad(
2335 self,
2336 vertical=vertical,
2337 show_results_names=show_results_names,
2338 show_groups=show_groups,
2339 show_hidden=show_hidden,
2340 diagram_kwargs=kwargs,
2341 )
2342 if not isinstance(output_html, (str, Path)):
2343 # we were passed a file-like object, just write to it
2344 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))
2345 return
2347 with open(output_html, "w", encoding="utf-8") as diag_file:
2348 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))
2350 # Compatibility synonyms
2351 # fmt: off
2352 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))
2353 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(
2354 "setDefaultWhitespaceChars", set_default_whitespace_chars
2355 ))
2356 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))
2357 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))
2358 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))
2359 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))
2361 setResultsName = replaced_by_pep8("setResultsName", set_results_name)
2362 setBreak = replaced_by_pep8("setBreak", set_break)
2363 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)
2364 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)
2365 addCondition = replaced_by_pep8("addCondition", add_condition)
2366 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)
2367 tryParse = replaced_by_pep8("tryParse", try_parse)
2368 parseString = replaced_by_pep8("parseString", parse_string)
2369 scanString = replaced_by_pep8("scanString", scan_string)
2370 transformString = replaced_by_pep8("transformString", transform_string)
2371 searchString = replaced_by_pep8("searchString", search_string)
2372 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
2373 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
2374 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)
2375 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)
2376 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)
2377 setDebug = replaced_by_pep8("setDebug", set_debug)
2378 setName = replaced_by_pep8("setName", set_name)
2379 parseFile = replaced_by_pep8("parseFile", parse_file)
2380 runTests = replaced_by_pep8("runTests", run_tests)
2381 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)
2382 defaultName = default_name
2383 # fmt: on
2386class _PendingSkip(ParserElement):
2387 # internal placeholder class to hold a place were '...' is added to a parser element,
2388 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2389 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:
2390 super().__init__()
2391 self.anchor = expr
2392 self.must_skip = must_skip
2394 def _generateDefaultName(self) -> str:
2395 return str(self.anchor + Empty()).replace("Empty", "...")
2397 def __add__(self, other) -> ParserElement:
2398 skipper = SkipTo(other).set_name("...")("_skipped*")
2399 if self.must_skip:
2401 def must_skip(t):
2402 if not t._skipped or t._skipped.as_list() == [""]:
2403 del t[0]
2404 t.pop("_skipped", None)
2406 def show_skip(t):
2407 if t._skipped.as_list()[-1:] == [""]:
2408 t.pop("_skipped")
2409 t["_skipped"] = f"missing <{self.anchor!r}>"
2411 return (
2412 self.anchor + skipper().add_parse_action(must_skip)
2413 | skipper().add_parse_action(show_skip)
2414 ) + other
2416 return self.anchor + skipper + other
2418 def __repr__(self):
2419 return self.defaultName
2421 def parseImpl(self, *args) -> ParseImplReturnType:
2422 raise Exception(
2423 "use of `...` expression without following SkipTo target expression"
2424 )
2427class Token(ParserElement):
2428 """Abstract :class:`ParserElement` subclass, for defining atomic
2429 matching patterns.
2430 """
2432 def __init__(self) -> None:
2433 super().__init__(savelist=False)
2435 def _generateDefaultName(self) -> str:
2436 return type(self).__name__
2439class NoMatch(Token):
2440 """
2441 A token that will never match.
2442 """
2444 def __init__(self) -> None:
2445 super().__init__()
2446 self._may_return_empty = True
2447 self.mayIndexError = False
2448 self.errmsg = "Unmatchable token"
2450 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2451 raise ParseException(instring, loc, self.errmsg, self)
2454class Literal(Token):
2455 """
2456 Token to exactly match a specified string.
2458 Example::
2460 Literal('abc').parse_string('abc') # -> ['abc']
2461 Literal('abc').parse_string('abcdef') # -> ['abc']
2462 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"
2464 For case-insensitive matching, use :class:`CaselessLiteral`.
2466 For keyword matching (force word break before and after the matched string),
2467 use :class:`Keyword` or :class:`CaselessKeyword`.
2468 """
2470 def __new__(cls, match_string: str = "", *, matchString: str = ""):
2471 # Performance tuning: select a subclass with optimized parseImpl
2472 if cls is Literal:
2473 match_string = matchString or match_string
2474 if not match_string:
2475 return super().__new__(Empty)
2476 if len(match_string) == 1:
2477 return super().__new__(_SingleCharLiteral)
2479 # Default behavior
2480 return super().__new__(cls)
2482 # Needed to make copy.copy() work correctly if we customize __new__
2483 def __getnewargs__(self):
2484 return (self.match,)
2486 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:
2487 super().__init__()
2488 match_string = matchString or match_string
2489 self.match = match_string
2490 self.matchLen = len(match_string)
2491 self.firstMatchChar = match_string[:1]
2492 self.errmsg = f"Expected {self.name}"
2493 self._may_return_empty = False
2494 self.mayIndexError = False
2496 def _generateDefaultName(self) -> str:
2497 return repr(self.match)
2499 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2500 if instring[loc] == self.firstMatchChar and instring.startswith(
2501 self.match, loc
2502 ):
2503 return loc + self.matchLen, self.match
2504 raise ParseException(instring, loc, self.errmsg, self)
2507class Empty(Literal):
2508 """
2509 An empty token, will always match.
2510 """
2512 def __init__(self, match_string="", *, matchString="") -> None:
2513 super().__init__("")
2514 self._may_return_empty = True
2515 self.mayIndexError = False
2517 def _generateDefaultName(self) -> str:
2518 return "Empty"
2520 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2521 return loc, []
2524class _SingleCharLiteral(Literal):
2525 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2526 if instring[loc] == self.firstMatchChar:
2527 return loc + 1, self.match
2528 raise ParseException(instring, loc, self.errmsg, self)
2531ParserElement._literalStringClass = Literal
2534class Keyword(Token):
2535 """
2536 Token to exactly match a specified string as a keyword, that is,
2537 it must be immediately preceded and followed by whitespace or
2538 non-keyword characters. Compare with :class:`Literal`:
2540 - ``Literal("if")`` will match the leading ``'if'`` in
2541 ``'ifAndOnlyIf'``.
2542 - ``Keyword("if")`` will not; it will only match the leading
2543 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2545 Accepts two optional constructor arguments in addition to the
2546 keyword string:
2548 - ``ident_chars`` is a string of characters that would be valid
2549 identifier characters, defaulting to all alphanumerics + "_" and
2550 "$"
2551 - ``caseless`` allows case-insensitive matching, default is ``False``.
2553 Example::
2555 Keyword("start").parse_string("start") # -> ['start']
2556 Keyword("start").parse_string("starting") # -> Exception
2558 For case-insensitive matching, use :class:`CaselessKeyword`.
2559 """
2561 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2563 def __init__(
2564 self,
2565 match_string: str = "",
2566 ident_chars: typing.Optional[str] = None,
2567 caseless: bool = False,
2568 *,
2569 matchString: str = "",
2570 identChars: typing.Optional[str] = None,
2571 ) -> None:
2572 super().__init__()
2573 identChars = identChars or ident_chars
2574 if identChars is None:
2575 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2576 match_string = matchString or match_string
2577 self.match = match_string
2578 self.matchLen = len(match_string)
2579 self.firstMatchChar = match_string[:1]
2580 if not self.firstMatchChar:
2581 raise ValueError("null string passed to Keyword; use Empty() instead")
2582 self.errmsg = f"Expected {type(self).__name__} {self.name}"
2583 self._may_return_empty = False
2584 self.mayIndexError = False
2585 self.caseless = caseless
2586 if caseless:
2587 self.caselessmatch = match_string.upper()
2588 identChars = identChars.upper()
2589 self.identChars = set(identChars)
2591 def _generateDefaultName(self) -> str:
2592 return repr(self.match)
2594 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2595 errmsg = self.errmsg or ""
2596 errloc = loc
2597 if self.caseless:
2598 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2599 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2600 if (
2601 loc >= len(instring) - self.matchLen
2602 or instring[loc + self.matchLen].upper() not in self.identChars
2603 ):
2604 return loc + self.matchLen, self.match
2606 # followed by keyword char
2607 errmsg += ", was immediately followed by keyword character"
2608 errloc = loc + self.matchLen
2609 else:
2610 # preceded by keyword char
2611 errmsg += ", keyword was immediately preceded by keyword character"
2612 errloc = loc - 1
2613 # else no match just raise plain exception
2615 elif (
2616 instring[loc] == self.firstMatchChar
2617 and self.matchLen == 1
2618 or instring.startswith(self.match, loc)
2619 ):
2620 if loc == 0 or instring[loc - 1] not in self.identChars:
2621 if (
2622 loc >= len(instring) - self.matchLen
2623 or instring[loc + self.matchLen] not in self.identChars
2624 ):
2625 return loc + self.matchLen, self.match
2627 # followed by keyword char
2628 errmsg += ", keyword was immediately followed by keyword character"
2629 errloc = loc + self.matchLen
2630 else:
2631 # preceded by keyword char
2632 errmsg += ", keyword was immediately preceded by keyword character"
2633 errloc = loc - 1
2634 # else no match just raise plain exception
2636 raise ParseException(instring, errloc, errmsg, self)
2638 @staticmethod
2639 def set_default_keyword_chars(chars) -> None:
2640 """
2641 Overrides the default characters used by :class:`Keyword` expressions.
2642 """
2643 Keyword.DEFAULT_KEYWORD_CHARS = chars
2645 # Compatibility synonyms
2646 setDefaultKeywordChars = staticmethod(
2647 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)
2648 )
2651class CaselessLiteral(Literal):
2652 """
2653 Token to match a specified string, ignoring case of letters.
2654 Note: the matched results will always be in the case of the given
2655 match string, NOT the case of the input text.
2657 Example::
2659 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2660 # -> ['CMD', 'CMD', 'CMD']
2662 (Contrast with example for :class:`CaselessKeyword`.)
2663 """
2665 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:
2666 match_string = matchString or match_string
2667 super().__init__(match_string.upper())
2668 # Preserve the defining literal.
2669 self.returnString = match_string
2670 self.errmsg = f"Expected {self.name}"
2672 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2673 if instring[loc : loc + self.matchLen].upper() == self.match:
2674 return loc + self.matchLen, self.returnString
2675 raise ParseException(instring, loc, self.errmsg, self)
2678class CaselessKeyword(Keyword):
2679 """
2680 Caseless version of :class:`Keyword`.
2682 Example::
2684 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2685 # -> ['CMD', 'CMD']
2687 (Contrast with example for :class:`CaselessLiteral`.)
2688 """
2690 def __init__(
2691 self,
2692 match_string: str = "",
2693 ident_chars: typing.Optional[str] = None,
2694 *,
2695 matchString: str = "",
2696 identChars: typing.Optional[str] = None,
2697 ) -> None:
2698 identChars = identChars or ident_chars
2699 match_string = matchString or match_string
2700 super().__init__(match_string, identChars, caseless=True)
2703class CloseMatch(Token):
2704 """A variation on :class:`Literal` which matches "close" matches,
2705 that is, strings with at most 'n' mismatching characters.
2706 :class:`CloseMatch` takes parameters:
2708 - ``match_string`` - string to be matched
2709 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2710 - ``max_mismatches`` - (``default=1``) maximum number of
2711 mismatches allowed to count as a match
2713 The results from a successful parse will contain the matched text
2714 from the input string and the following named results:
2716 - ``mismatches`` - a list of the positions within the
2717 match_string where mismatches were found
2718 - ``original`` - the original match_string used to compare
2719 against the input string
2721 If ``mismatches`` is an empty list, then the match was an exact
2722 match.
2724 Example::
2726 patt = CloseMatch("ATCATCGAATGGA")
2727 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2728 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2730 # exact match
2731 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2733 # close match allowing up to 2 mismatches
2734 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
2735 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2736 """
2738 def __init__(
2739 self,
2740 match_string: str,
2741 max_mismatches: typing.Optional[int] = None,
2742 *,
2743 maxMismatches: int = 1,
2744 caseless=False,
2745 ) -> None:
2746 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
2747 super().__init__()
2748 self.match_string = match_string
2749 self.maxMismatches = maxMismatches
2750 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"
2751 self.caseless = caseless
2752 self.mayIndexError = False
2753 self._may_return_empty = False
2755 def _generateDefaultName(self) -> str:
2756 return f"{type(self).__name__}:{self.match_string!r}"
2758 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2759 start = loc
2760 instrlen = len(instring)
2761 maxloc = start + len(self.match_string)
2763 if maxloc <= instrlen:
2764 match_string = self.match_string
2765 match_stringloc = 0
2766 mismatches = []
2767 maxMismatches = self.maxMismatches
2769 for match_stringloc, s_m in enumerate(
2770 zip(instring[loc:maxloc], match_string)
2771 ):
2772 src, mat = s_m
2773 if self.caseless:
2774 src, mat = src.lower(), mat.lower()
2776 if src != mat:
2777 mismatches.append(match_stringloc)
2778 if len(mismatches) > maxMismatches:
2779 break
2780 else:
2781 loc = start + match_stringloc + 1
2782 results = ParseResults([instring[start:loc]])
2783 results["original"] = match_string
2784 results["mismatches"] = mismatches
2785 return loc, results
2787 raise ParseException(instring, loc, self.errmsg, self)
2790class Word(Token):
2791 """Token for matching words composed of allowed character sets.
2793 Parameters:
2795 - ``init_chars`` - string of all characters that should be used to
2796 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
2797 if ``body_chars`` is also specified, then this is the string of
2798 initial characters
2799 - ``body_chars`` - string of characters that
2800 can be used for matching after a matched initial character as
2801 given in ``init_chars``; if omitted, same as the initial characters
2802 (default=``None``)
2803 - ``min`` - minimum number of characters to match (default=1)
2804 - ``max`` - maximum number of characters to match (default=0)
2805 - ``exact`` - exact number of characters to match (default=0)
2806 - ``as_keyword`` - match as a keyword (default=``False``)
2807 - ``exclude_chars`` - characters that might be
2808 found in the input ``body_chars`` string but which should not be
2809 accepted for matching ;useful to define a word of all
2810 printables except for one or two characters, for instance
2811 (default=``None``)
2813 :class:`srange` is useful for defining custom character set strings
2814 for defining :class:`Word` expressions, using range notation from
2815 regular expression character sets.
2817 A common mistake is to use :class:`Word` to match a specific literal
2818 string, as in ``Word("Address")``. Remember that :class:`Word`
2819 uses the string argument to define *sets* of matchable characters.
2820 This expression would match "Add", "AAA", "dAred", or any other word
2821 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
2822 exact literal string, use :class:`Literal` or :class:`Keyword`.
2824 pyparsing includes helper strings for building Words:
2826 - :class:`alphas`
2827 - :class:`nums`
2828 - :class:`alphanums`
2829 - :class:`hexnums`
2830 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
2831 - accented, tilded, umlauted, etc.)
2832 - :class:`punc8bit` (non-alphabetic characters in ASCII range
2833 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2834 - :class:`printables` (any non-whitespace character)
2836 ``alphas``, ``nums``, and ``printables`` are also defined in several
2837 Unicode sets - see :class:`pyparsing_unicode`.
2839 Example::
2841 # a word composed of digits
2842 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2844 # a word with a leading capital, and zero or more lowercase
2845 capitalized_word = Word(alphas.upper(), alphas.lower())
2847 # hostnames are alphanumeric, with leading alpha, and '-'
2848 hostname = Word(alphas, alphanums + '-')
2850 # roman numeral (not a strict parser, accepts invalid mix of characters)
2851 roman = Word("IVXLCDM")
2853 # any string of non-whitespace characters, except for ','
2854 csv_value = Word(printables, exclude_chars=",")
2856 :raises ValueError: If ``min`` and ``max`` are both specified
2857 and the test ``min <= max`` fails.
2859 .. versionchanged:: 3.1.0
2860 Raises :exc:`ValueError` if ``min`` > ``max``.
2861 """
2863 def __init__(
2864 self,
2865 init_chars: str = "",
2866 body_chars: typing.Optional[str] = None,
2867 min: int = 1,
2868 max: int = 0,
2869 exact: int = 0,
2870 as_keyword: bool = False,
2871 exclude_chars: typing.Optional[str] = None,
2872 *,
2873 initChars: typing.Optional[str] = None,
2874 bodyChars: typing.Optional[str] = None,
2875 asKeyword: bool = False,
2876 excludeChars: typing.Optional[str] = None,
2877 ) -> None:
2878 initChars = initChars or init_chars
2879 bodyChars = bodyChars or body_chars
2880 asKeyword = asKeyword or as_keyword
2881 excludeChars = excludeChars or exclude_chars
2882 super().__init__()
2883 if not initChars:
2884 raise ValueError(
2885 f"invalid {type(self).__name__}, initChars cannot be empty string"
2886 )
2888 initChars_set = set(initChars)
2889 if excludeChars:
2890 excludeChars_set = set(excludeChars)
2891 initChars_set -= excludeChars_set
2892 if bodyChars:
2893 bodyChars = "".join(set(bodyChars) - excludeChars_set)
2894 self.initChars = initChars_set
2895 self.initCharsOrig = "".join(sorted(initChars_set))
2897 if bodyChars:
2898 self.bodyChars = set(bodyChars)
2899 self.bodyCharsOrig = "".join(sorted(bodyChars))
2900 else:
2901 self.bodyChars = initChars_set
2902 self.bodyCharsOrig = self.initCharsOrig
2904 self.maxSpecified = max > 0
2906 if min < 1:
2907 raise ValueError(
2908 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
2909 )
2911 if self.maxSpecified and min > max:
2912 raise ValueError(
2913 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
2914 )
2916 self.minLen = min
2918 if max > 0:
2919 self.maxLen = max
2920 else:
2921 self.maxLen = _MAX_INT
2923 if exact > 0:
2924 min = max = exact
2925 self.maxLen = exact
2926 self.minLen = exact
2928 self.errmsg = f"Expected {self.name}"
2929 self.mayIndexError = False
2930 self.asKeyword = asKeyword
2931 if self.asKeyword:
2932 self.errmsg += " as a keyword"
2934 # see if we can make a regex for this Word
2935 if " " not in (self.initChars | self.bodyChars):
2936 if len(self.initChars) == 1:
2937 re_leading_fragment = re.escape(self.initCharsOrig)
2938 else:
2939 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
2941 if self.bodyChars == self.initChars:
2942 if max == 0 and self.minLen == 1:
2943 repeat = "+"
2944 elif max == 1:
2945 repeat = ""
2946 else:
2947 if self.minLen != self.maxLen:
2948 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
2949 else:
2950 repeat = f"{{{self.minLen}}}"
2951 self.reString = f"{re_leading_fragment}{repeat}"
2952 else:
2953 if max == 1:
2954 re_body_fragment = ""
2955 repeat = ""
2956 else:
2957 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
2958 if max == 0 and self.minLen == 1:
2959 repeat = "*"
2960 elif max == 2:
2961 repeat = "?" if min <= 1 else ""
2962 else:
2963 if min != max:
2964 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"
2965 else:
2966 repeat = f"{{{min - 1 if min > 0 else ''}}}"
2968 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"
2970 if self.asKeyword:
2971 self.reString = rf"\b{self.reString}\b"
2973 try:
2974 self.re = re.compile(self.reString)
2975 except re.error:
2976 self.re = None # type: ignore[assignment]
2977 else:
2978 self.re_match = self.re.match
2979 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]
2981 def copy(self) -> Word:
2982 ret: Word = cast(Word, super().copy())
2983 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign]
2984 return ret
2986 def _generateDefaultName(self) -> str:
2987 def charsAsStr(s):
2988 max_repr_len = 16
2989 s = _collapse_string_to_ranges(s, re_escape=False)
2991 if len(s) > max_repr_len:
2992 return s[: max_repr_len - 3] + "..."
2994 return s
2996 if self.initChars != self.bodyChars:
2997 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"
2998 else:
2999 base = f"W:({charsAsStr(self.initChars)})"
3001 # add length specification
3002 if self.minLen > 1 or self.maxLen != _MAX_INT:
3003 if self.minLen == self.maxLen:
3004 if self.minLen == 1:
3005 return base[2:]
3006 else:
3007 return base + f"{{{self.minLen}}}"
3008 elif self.maxLen == _MAX_INT:
3009 return base + f"{{{self.minLen},...}}"
3010 else:
3011 return base + f"{{{self.minLen},{self.maxLen}}}"
3012 return base
3014 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3015 if instring[loc] not in self.initChars:
3016 raise ParseException(instring, loc, self.errmsg, self)
3018 start = loc
3019 loc += 1
3020 instrlen = len(instring)
3021 body_chars: set[str] = self.bodyChars
3022 maxloc = start + self.maxLen
3023 maxloc = min(maxloc, instrlen)
3024 while loc < maxloc and instring[loc] in body_chars:
3025 loc += 1
3027 throw_exception = False
3028 if loc - start < self.minLen:
3029 throw_exception = True
3030 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:
3031 throw_exception = True
3032 elif self.asKeyword and (
3033 (start > 0 and instring[start - 1] in body_chars)
3034 or (loc < instrlen and instring[loc] in body_chars)
3035 ):
3036 throw_exception = True
3038 if throw_exception:
3039 raise ParseException(instring, loc, self.errmsg, self)
3041 return loc, instring[start:loc]
3043 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3044 result = self.re_match(instring, loc)
3045 if not result:
3046 raise ParseException(instring, loc, self.errmsg, self)
3048 loc = result.end()
3049 return loc, result.group()
3052class Char(Word):
3053 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
3054 when defining a match of any single character in a string of
3055 characters.
3056 """
3058 def __init__(
3059 self,
3060 charset: str,
3061 as_keyword: bool = False,
3062 exclude_chars: typing.Optional[str] = None,
3063 *,
3064 asKeyword: bool = False,
3065 excludeChars: typing.Optional[str] = None,
3066 ) -> None:
3067 asKeyword = asKeyword or as_keyword
3068 excludeChars = excludeChars or exclude_chars
3069 super().__init__(
3070 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars
3071 )
3074class Regex(Token):
3075 r"""Token for matching strings that match a given regular
3076 expression. Defined with string specifying the regular expression in
3077 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3078 If the given regex contains named groups (defined using ``(?P<name>...)``),
3079 these will be preserved as named :class:`ParseResults`.
3081 If instead of the Python stdlib ``re`` module you wish to use a different RE module
3082 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
3083 a compiled RE that was compiled using ``regex``.
3085 The parameters ``pattern`` and ``flags`` are passed
3086 to the ``re.compile()`` function as-is. See the Python
3087 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3088 explanation of the acceptable patterns and flags.
3090 Example::
3092 realnum = Regex(r"[+-]?\d+\.\d*")
3093 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3094 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3096 # named fields in a regex will be returned as named results
3097 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3099 # the Regex class will accept re's compiled using the regex module
3100 import regex
3101 parser = pp.Regex(regex.compile(r'[0-9]'))
3102 """
3104 def __init__(
3105 self,
3106 pattern: Any,
3107 flags: Union[re.RegexFlag, int] = 0,
3108 as_group_list: bool = False,
3109 as_match: bool = False,
3110 *,
3111 asGroupList: bool = False,
3112 asMatch: bool = False,
3113 ) -> None:
3114 super().__init__()
3115 asGroupList = asGroupList or as_group_list
3116 asMatch = asMatch or as_match
3118 if isinstance(pattern, str_type):
3119 if not pattern:
3120 raise ValueError("null string passed to Regex; use Empty() instead")
3122 self._re = None
3123 self._may_return_empty = None # type: ignore [assignment]
3124 self.reString = self.pattern = pattern
3126 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
3127 self._re = pattern
3128 self._may_return_empty = None # type: ignore [assignment]
3129 self.pattern = self.reString = pattern.pattern
3131 elif callable(pattern):
3132 # defer creating this pattern until we really need it
3133 self.pattern = pattern
3134 self._may_return_empty = None # type: ignore [assignment]
3135 self._re = None
3137 else:
3138 raise TypeError(
3139 "Regex may only be constructed with a string or a compiled RE object,"
3140 " or a callable that takes no arguments and returns a string or a"
3141 " compiled RE object"
3142 )
3144 self.flags = flags
3145 self.errmsg = f"Expected {self.name}"
3146 self.mayIndexError = False
3147 self.asGroupList = asGroupList
3148 self.asMatch = asMatch
3149 if self.asGroupList:
3150 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]
3151 if self.asMatch:
3152 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]
3154 def copy(self):
3155 ret: Regex = cast(Regex, super().copy())
3156 if self.asGroupList:
3157 ret.parseImpl = ret.parseImplAsGroupList
3158 if self.asMatch:
3159 ret.parseImpl = ret.parseImplAsMatch
3160 return ret
3162 @cached_property
3163 def re(self) -> re.Pattern:
3164 if self._re:
3165 return self._re
3167 if callable(self.pattern):
3168 # replace self.pattern with the string returned by calling self.pattern()
3169 self.pattern = cast(Callable[[], str], self.pattern)()
3171 # see if we got a compiled RE back instead of a str - if so, we're done
3172 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):
3173 self._re = cast(re.Pattern[str], self.pattern)
3174 self.pattern = self.reString = self._re.pattern
3175 return self._re
3177 try:
3178 self._re = re.compile(self.pattern, self.flags)
3179 except re.error:
3180 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3181 else:
3182 self._may_return_empty = self.re.match("", pos=0) is not None
3183 return self._re
3185 @cached_property
3186 def re_match(self) -> Callable[[str, int], Any]:
3187 return self.re.match
3189 @property
3190 def mayReturnEmpty(self):
3191 if self._may_return_empty is None:
3192 # force compile of regex pattern, to set may_return_empty flag
3193 self.re # noqa
3194 return self._may_return_empty
3196 @mayReturnEmpty.setter
3197 def mayReturnEmpty(self, value):
3198 self._may_return_empty = value
3200 def _generateDefaultName(self) -> str:
3201 unescaped = repr(self.pattern).replace("\\\\", "\\")
3202 return f"Re:({unescaped})"
3204 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3205 # explicit check for matching past the length of the string;
3206 # this is done because the re module will not complain about
3207 # a match with `pos > len(instring)`, it will just return ""
3208 if loc > len(instring) and self.mayReturnEmpty:
3209 raise ParseException(instring, loc, self.errmsg, self)
3211 result = self.re_match(instring, loc)
3212 if not result:
3213 raise ParseException(instring, loc, self.errmsg, self)
3215 loc = result.end()
3216 ret = ParseResults(result.group())
3217 d = result.groupdict()
3219 for k, v in d.items():
3220 ret[k] = v
3222 return loc, ret
3224 def parseImplAsGroupList(self, instring, loc, do_actions=True):
3225 if loc > len(instring) and self.mayReturnEmpty:
3226 raise ParseException(instring, loc, self.errmsg, self)
3228 result = self.re_match(instring, loc)
3229 if not result:
3230 raise ParseException(instring, loc, self.errmsg, self)
3232 loc = result.end()
3233 ret = result.groups()
3234 return loc, ret
3236 def parseImplAsMatch(self, instring, loc, do_actions=True):
3237 if loc > len(instring) and self.mayReturnEmpty:
3238 raise ParseException(instring, loc, self.errmsg, self)
3240 result = self.re_match(instring, loc)
3241 if not result:
3242 raise ParseException(instring, loc, self.errmsg, self)
3244 loc = result.end()
3245 ret = result
3246 return loc, ret
3248 def sub(self, repl: str) -> ParserElement:
3249 r"""
3250 Return :class:`Regex` with an attached parse action to transform the parsed
3251 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3253 Example::
3255 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3256 print(make_html.transform_string("h1:main title:"))
3257 # prints "<h1>main title</h1>"
3258 """
3259 if self.asGroupList:
3260 raise TypeError("cannot use sub() with Regex(as_group_list=True)")
3262 if self.asMatch and callable(repl):
3263 raise TypeError(
3264 "cannot use sub() with a callable with Regex(as_match=True)"
3265 )
3267 if self.asMatch:
3269 def pa(tokens):
3270 return tokens[0].expand(repl)
3272 else:
3274 def pa(tokens):
3275 return self.re.sub(repl, tokens[0])
3277 return self.add_parse_action(pa)
3280class QuotedString(Token):
3281 r"""
3282 Token for matching strings that are delimited by quoting characters.
3284 Defined with the following parameters:
3286 - ``quote_char`` - string of one or more characters defining the
3287 quote delimiting string
3288 - ``esc_char`` - character to re_escape quotes, typically backslash
3289 (default= ``None``)
3290 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3291 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3292 (default= ``None``)
3293 - ``multiline`` - boolean indicating whether quotes can span
3294 multiple lines (default= ``False``)
3295 - ``unquote_results`` - boolean indicating whether the matched text
3296 should be unquoted (default= ``True``)
3297 - ``end_quote_char`` - string of one or more characters defining the
3298 end of the quote delimited string (default= ``None`` => same as
3299 quote_char)
3300 - ``convert_whitespace_escapes`` - convert escaped whitespace
3301 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3302 (default= ``True``)
3304 .. caution:: ``convert_whitespace_escapes`` has no effect if
3305 ``unquote_results`` is ``False``.
3307 Example::
3309 qs = QuotedString('"')
3310 print(qs.search_string('lsjdf "This is the quote" sldjf'))
3311 complex_qs = QuotedString('{{', end_quote_char='}}')
3312 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))
3313 sql_qs = QuotedString('"', esc_quote='""')
3314 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3316 prints::
3318 [['This is the quote']]
3319 [['This is the "quote"']]
3320 [['This is the quote with "embedded" quotes']]
3321 """
3323 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))
3325 def __init__(
3326 self,
3327 quote_char: str = "",
3328 esc_char: typing.Optional[str] = None,
3329 esc_quote: typing.Optional[str] = None,
3330 multiline: bool = False,
3331 unquote_results: bool = True,
3332 end_quote_char: typing.Optional[str] = None,
3333 convert_whitespace_escapes: bool = True,
3334 *,
3335 quoteChar: str = "",
3336 escChar: typing.Optional[str] = None,
3337 escQuote: typing.Optional[str] = None,
3338 unquoteResults: bool = True,
3339 endQuoteChar: typing.Optional[str] = None,
3340 convertWhitespaceEscapes: bool = True,
3341 ) -> None:
3342 super().__init__()
3343 esc_char = escChar or esc_char
3344 esc_quote = escQuote or esc_quote
3345 unquote_results = unquoteResults and unquote_results
3346 end_quote_char = endQuoteChar or end_quote_char
3347 convert_whitespace_escapes = (
3348 convertWhitespaceEscapes and convert_whitespace_escapes
3349 )
3350 quote_char = quoteChar or quote_char
3352 # remove white space from quote chars
3353 quote_char = quote_char.strip()
3354 if not quote_char:
3355 raise ValueError("quote_char cannot be the empty string")
3357 if end_quote_char is None:
3358 end_quote_char = quote_char
3359 else:
3360 end_quote_char = end_quote_char.strip()
3361 if not end_quote_char:
3362 raise ValueError("end_quote_char cannot be the empty string")
3364 self.quote_char: str = quote_char
3365 self.quote_char_len: int = len(quote_char)
3366 self.first_quote_char: str = quote_char[0]
3367 self.end_quote_char: str = end_quote_char
3368 self.end_quote_char_len: int = len(end_quote_char)
3369 self.esc_char: str = esc_char or ""
3370 self.has_esc_char: bool = esc_char is not None
3371 self.esc_quote: str = esc_quote or ""
3372 self.unquote_results: bool = unquote_results
3373 self.convert_whitespace_escapes: bool = convert_whitespace_escapes
3374 self.multiline = multiline
3375 self.re_flags = re.RegexFlag(0)
3377 # fmt: off
3378 # build up re pattern for the content between the quote delimiters
3379 inner_pattern: list[str] = []
3381 if esc_quote:
3382 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
3384 if esc_char:
3385 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
3387 if len(self.end_quote_char) > 1:
3388 inner_pattern.append(
3389 "(?:"
3390 + "|".join(
3391 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3392 for i in range(len(self.end_quote_char) - 1, 0, -1)
3393 )
3394 + ")"
3395 )
3397 if self.multiline:
3398 self.re_flags |= re.MULTILINE | re.DOTALL
3399 inner_pattern.append(
3400 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3401 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3402 )
3403 else:
3404 inner_pattern.append(
3405 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3406 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3407 )
3409 self.pattern = "".join(
3410 [
3411 re.escape(self.quote_char),
3412 "(?:",
3413 '|'.join(inner_pattern),
3414 ")*",
3415 re.escape(self.end_quote_char),
3416 ]
3417 )
3419 if self.unquote_results:
3420 if self.convert_whitespace_escapes:
3421 self.unquote_scan_re = re.compile(
3422 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3423 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"
3424 rf"|({re.escape(self.esc_char)}.)"
3425 rf"|(\n|.)",
3426 flags=self.re_flags,
3427 )
3428 else:
3429 self.unquote_scan_re = re.compile(
3430 rf"({re.escape(self.esc_char)}.)"
3431 rf"|(\n|.)",
3432 flags=self.re_flags
3433 )
3434 # fmt: on
3436 try:
3437 self.re = re.compile(self.pattern, self.re_flags)
3438 self.reString = self.pattern
3439 self.re_match = self.re.match
3440 except re.error:
3441 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")
3443 self.errmsg = f"Expected {self.name}"
3444 self.mayIndexError = False
3445 self._may_return_empty = True
3447 def _generateDefaultName(self) -> str:
3448 if self.quote_char == self.end_quote_char and isinstance(
3449 self.quote_char, str_type
3450 ):
3451 return f"string enclosed in {self.quote_char!r}"
3453 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
3455 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3456 # check first character of opening quote to see if that is a match
3457 # before doing the more complicated regex match
3458 result = (
3459 instring[loc] == self.first_quote_char
3460 and self.re_match(instring, loc)
3461 or None
3462 )
3463 if not result:
3464 raise ParseException(instring, loc, self.errmsg, self)
3466 # get ending loc and matched string from regex matching result
3467 loc = result.end()
3468 ret = result.group()
3470 def convert_escaped_numerics(s: str) -> str:
3471 if s == "0":
3472 return "\0"
3473 if s.isdigit() and len(s) == 3:
3474 return chr(int(s, base=8))
3475 elif s.startswith(("u", "x")):
3476 return chr(int(s[1:], base=16))
3477 else:
3478 return s
3480 if self.unquote_results:
3481 # strip off quotes
3482 ret = ret[self.quote_char_len : -self.end_quote_char_len]
3484 if isinstance(ret, str_type):
3485 # fmt: off
3486 if self.convert_whitespace_escapes:
3487 # as we iterate over matches in the input string,
3488 # collect from whichever match group of the unquote_scan_re
3489 # regex matches (only 1 group will match at any given time)
3490 ret = "".join(
3491 # match group 1 matches \t, \n, etc.
3492 self.ws_map[match.group(1)] if match.group(1)
3493 # match group 2 matches escaped octal, null, hex, and Unicode
3494 # sequences
3495 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)
3496 # match group 3 matches escaped characters
3497 else match.group(3)[-1] if match.group(3)
3498 # match group 4 matches any character
3499 else match.group(4)
3500 for match in self.unquote_scan_re.finditer(ret)
3501 )
3502 else:
3503 ret = "".join(
3504 # match group 1 matches escaped characters
3505 match.group(1)[-1] if match.group(1)
3506 # match group 2 matches any character
3507 else match.group(2)
3508 for match in self.unquote_scan_re.finditer(ret)
3509 )
3510 # fmt: on
3512 # replace escaped quotes
3513 if self.esc_quote:
3514 ret = ret.replace(self.esc_quote, self.end_quote_char)
3516 return loc, ret
3519class CharsNotIn(Token):
3520 """Token for matching words composed of characters *not* in a given
3521 set (will include whitespace in matched characters if not listed in
3522 the provided exclusion set - see example). Defined with string
3523 containing all disallowed characters, and an optional minimum,
3524 maximum, and/or exact length. The default value for ``min`` is
3525 1 (a minimum value < 1 is not valid); the default values for
3526 ``max`` and ``exact`` are 0, meaning no maximum or exact
3527 length restriction.
3529 Example::
3531 # define a comma-separated-value as anything that is not a ','
3532 csv_value = CharsNotIn(',')
3533 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))
3535 prints::
3537 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3538 """
3540 def __init__(
3541 self,
3542 not_chars: str = "",
3543 min: int = 1,
3544 max: int = 0,
3545 exact: int = 0,
3546 *,
3547 notChars: str = "",
3548 ) -> None:
3549 super().__init__()
3550 self.skipWhitespace = False
3551 self.notChars = not_chars or notChars
3552 self.notCharsSet = set(self.notChars)
3554 if min < 1:
3555 raise ValueError(
3556 "cannot specify a minimum length < 1; use"
3557 " Opt(CharsNotIn()) if zero-length char group is permitted"
3558 )
3560 self.minLen = min
3562 if max > 0:
3563 self.maxLen = max
3564 else:
3565 self.maxLen = _MAX_INT
3567 if exact > 0:
3568 self.maxLen = exact
3569 self.minLen = exact
3571 self.errmsg = f"Expected {self.name}"
3572 self._may_return_empty = self.minLen == 0
3573 self.mayIndexError = False
3575 def _generateDefaultName(self) -> str:
3576 not_chars_str = _collapse_string_to_ranges(self.notChars)
3577 if len(not_chars_str) > 16:
3578 return f"!W:({self.notChars[: 16 - 3]}...)"
3579 else:
3580 return f"!W:({self.notChars})"
3582 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3583 notchars = self.notCharsSet
3584 if instring[loc] in notchars:
3585 raise ParseException(instring, loc, self.errmsg, self)
3587 start = loc
3588 loc += 1
3589 maxlen = min(start + self.maxLen, len(instring))
3590 while loc < maxlen and instring[loc] not in notchars:
3591 loc += 1
3593 if loc - start < self.minLen:
3594 raise ParseException(instring, loc, self.errmsg, self)
3596 return loc, instring[start:loc]
3599class White(Token):
3600 """Special matching class for matching whitespace. Normally,
3601 whitespace is ignored by pyparsing grammars. This class is included
3602 when some whitespace structures are significant. Define with
3603 a string containing the whitespace characters to be matched; default
3604 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3605 ``max``, and ``exact`` arguments, as defined for the
3606 :class:`Word` class.
3607 """
3609 whiteStrs = {
3610 " ": "<SP>",
3611 "\t": "<TAB>",
3612 "\n": "<LF>",
3613 "\r": "<CR>",
3614 "\f": "<FF>",
3615 "\u00A0": "<NBSP>",
3616 "\u1680": "<OGHAM_SPACE_MARK>",
3617 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",
3618 "\u2000": "<EN_QUAD>",
3619 "\u2001": "<EM_QUAD>",
3620 "\u2002": "<EN_SPACE>",
3621 "\u2003": "<EM_SPACE>",
3622 "\u2004": "<THREE-PER-EM_SPACE>",
3623 "\u2005": "<FOUR-PER-EM_SPACE>",
3624 "\u2006": "<SIX-PER-EM_SPACE>",
3625 "\u2007": "<FIGURE_SPACE>",
3626 "\u2008": "<PUNCTUATION_SPACE>",
3627 "\u2009": "<THIN_SPACE>",
3628 "\u200A": "<HAIR_SPACE>",
3629 "\u200B": "<ZERO_WIDTH_SPACE>",
3630 "\u202F": "<NNBSP>",
3631 "\u205F": "<MMSP>",
3632 "\u3000": "<IDEOGRAPHIC_SPACE>",
3633 }
3635 def __init__(
3636 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0
3637 ) -> None:
3638 super().__init__()
3639 self.matchWhite = ws
3640 self.set_whitespace_chars(
3641 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3642 copy_defaults=True,
3643 )
3644 # self.leave_whitespace()
3645 self._may_return_empty = True
3646 self.errmsg = f"Expected {self.name}"
3648 self.minLen = min
3650 if max > 0:
3651 self.maxLen = max
3652 else:
3653 self.maxLen = _MAX_INT
3655 if exact > 0:
3656 self.maxLen = exact
3657 self.minLen = exact
3659 def _generateDefaultName(self) -> str:
3660 return "".join(White.whiteStrs[c] for c in self.matchWhite)
3662 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3663 if instring[loc] not in self.matchWhite:
3664 raise ParseException(instring, loc, self.errmsg, self)
3665 start = loc
3666 loc += 1
3667 maxloc = start + self.maxLen
3668 maxloc = min(maxloc, len(instring))
3669 while loc < maxloc and instring[loc] in self.matchWhite:
3670 loc += 1
3672 if loc - start < self.minLen:
3673 raise ParseException(instring, loc, self.errmsg, self)
3675 return loc, instring[start:loc]
3678class PositionToken(Token):
3679 def __init__(self) -> None:
3680 super().__init__()
3681 self._may_return_empty = True
3682 self.mayIndexError = False
3685class GoToColumn(PositionToken):
3686 """Token to advance to a specific column of input text; useful for
3687 tabular report scraping.
3688 """
3690 def __init__(self, colno: int) -> None:
3691 super().__init__()
3692 self.col = colno
3694 def preParse(self, instring: str, loc: int) -> int:
3695 if col(loc, instring) == self.col:
3696 return loc
3698 instrlen = len(instring)
3699 if self.ignoreExprs:
3700 loc = self._skipIgnorables(instring, loc)
3701 while (
3702 loc < instrlen
3703 and instring[loc].isspace()
3704 and col(loc, instring) != self.col
3705 ):
3706 loc += 1
3708 return loc
3710 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3711 thiscol = col(loc, instring)
3712 if thiscol > self.col:
3713 raise ParseException(instring, loc, "Text not in expected column", self)
3714 newloc = loc + self.col - thiscol
3715 ret = instring[loc:newloc]
3716 return newloc, ret
3719class LineStart(PositionToken):
3720 r"""Matches if current position is at the beginning of a line within
3721 the parse string
3723 Example::
3725 test = '''\
3726 AAA this line
3727 AAA and this line
3728 AAA but not this one
3729 B AAA and definitely not this one
3730 '''
3732 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):
3733 print(t)
3735 prints::
3737 ['AAA', ' this line']
3738 ['AAA', ' and this line']
3740 """
3742 def __init__(self) -> None:
3743 super().__init__()
3744 self.leave_whitespace()
3745 self.orig_whiteChars = set() | self.whiteChars
3746 self.whiteChars.discard("\n")
3747 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
3748 self.set_name("start of line")
3750 def preParse(self, instring: str, loc: int) -> int:
3751 if loc == 0:
3752 return loc
3754 ret = self.skipper.preParse(instring, loc)
3756 if "\n" in self.orig_whiteChars:
3757 while instring[ret : ret + 1] == "\n":
3758 ret = self.skipper.preParse(instring, ret + 1)
3760 return ret
3762 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3763 if col(loc, instring) == 1:
3764 return loc, []
3765 raise ParseException(instring, loc, self.errmsg, self)
3768class LineEnd(PositionToken):
3769 """Matches if current position is at the end of a line within the
3770 parse string
3771 """
3773 def __init__(self) -> None:
3774 super().__init__()
3775 self.whiteChars.discard("\n")
3776 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
3777 self.set_name("end of line")
3779 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3780 if loc < len(instring):
3781 if instring[loc] == "\n":
3782 return loc + 1, "\n"
3783 else:
3784 raise ParseException(instring, loc, self.errmsg, self)
3785 elif loc == len(instring):
3786 return loc + 1, []
3787 else:
3788 raise ParseException(instring, loc, self.errmsg, self)
3791class StringStart(PositionToken):
3792 """Matches if current position is at the beginning of the parse
3793 string
3794 """
3796 def __init__(self) -> None:
3797 super().__init__()
3798 self.set_name("start of text")
3800 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3801 # see if entire string up to here is just whitespace and ignoreables
3802 if loc != 0 and loc != self.preParse(instring, 0):
3803 raise ParseException(instring, loc, self.errmsg, self)
3805 return loc, []
3808class StringEnd(PositionToken):
3809 """
3810 Matches if current position is at the end of the parse string
3811 """
3813 def __init__(self) -> None:
3814 super().__init__()
3815 self.set_name("end of text")
3817 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3818 if loc < len(instring):
3819 raise ParseException(instring, loc, self.errmsg, self)
3820 if loc == len(instring):
3821 return loc + 1, []
3822 if loc > len(instring):
3823 return loc, []
3825 raise ParseException(instring, loc, self.errmsg, self)
3828class WordStart(PositionToken):
3829 """Matches if the current position is at the beginning of a
3830 :class:`Word`, and is not preceded by any character in a given
3831 set of ``word_chars`` (default= ``printables``). To emulate the
3832 ``\b`` behavior of regular expressions, use
3833 ``WordStart(alphanums)``. ``WordStart`` will also match at
3834 the beginning of the string being parsed, or at the beginning of
3835 a line.
3836 """
3838 def __init__(
3839 self, word_chars: str = printables, *, wordChars: str = printables
3840 ) -> None:
3841 wordChars = word_chars if wordChars == printables else wordChars
3842 super().__init__()
3843 self.wordChars = set(wordChars)
3844 self.set_name("start of a word")
3846 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3847 if loc != 0:
3848 if (
3849 instring[loc - 1] in self.wordChars
3850 or instring[loc] not in self.wordChars
3851 ):
3852 raise ParseException(instring, loc, self.errmsg, self)
3853 return loc, []
3856class WordEnd(PositionToken):
3857 """Matches if the current position is at the end of a :class:`Word`,
3858 and is not followed by any character in a given set of ``word_chars``
3859 (default= ``printables``). To emulate the ``\b`` behavior of
3860 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3861 will also match at the end of the string being parsed, or at the end
3862 of a line.
3863 """
3865 def __init__(
3866 self, word_chars: str = printables, *, wordChars: str = printables
3867 ) -> None:
3868 wordChars = word_chars if wordChars == printables else wordChars
3869 super().__init__()
3870 self.wordChars = set(wordChars)
3871 self.skipWhitespace = False
3872 self.set_name("end of a word")
3874 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3875 instrlen = len(instring)
3876 if instrlen > 0 and loc < instrlen:
3877 if (
3878 instring[loc] in self.wordChars
3879 or instring[loc - 1] not in self.wordChars
3880 ):
3881 raise ParseException(instring, loc, self.errmsg, self)
3882 return loc, []
3885class Tag(Token):
3886 """
3887 A meta-element for inserting a named result into the parsed
3888 tokens that may be checked later in a parse action or while
3889 processing the parsed results. Accepts an optional tag value,
3890 defaulting to `True`.
3892 Example::
3894 end_punc = "." | ("!" + Tag("enthusiastic"))
3895 greeting = "Hello," + Word(alphas) + end_punc
3897 result = greeting.parse_string("Hello, World.")
3898 print(result.dump())
3900 result = greeting.parse_string("Hello, World!")
3901 print(result.dump())
3903 prints::
3905 ['Hello,', 'World', '.']
3907 ['Hello,', 'World', '!']
3908 - enthusiastic: True
3910 .. versionadded:: 3.1.0
3911 """
3913 def __init__(self, tag_name: str, value: Any = True) -> None:
3914 super().__init__()
3915 self._may_return_empty = True
3916 self.mayIndexError = False
3917 self.leave_whitespace()
3918 self.tag_name = tag_name
3919 self.tag_value = value
3920 self.add_parse_action(self._add_tag)
3921 self.show_in_diagram = False
3923 def _add_tag(self, tokens: ParseResults):
3924 tokens[self.tag_name] = self.tag_value
3926 def _generateDefaultName(self) -> str:
3927 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"
3930class ParseExpression(ParserElement):
3931 """Abstract subclass of ParserElement, for combining and
3932 post-processing parsed tokens.
3933 """
3935 def __init__(
3936 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
3937 ) -> None:
3938 super().__init__(savelist)
3939 self.exprs: list[ParserElement]
3940 if isinstance(exprs, _generatorType):
3941 exprs = list(exprs)
3943 if isinstance(exprs, str_type):
3944 self.exprs = [self._literalStringClass(exprs)]
3945 elif isinstance(exprs, ParserElement):
3946 self.exprs = [exprs]
3947 elif isinstance(exprs, Iterable):
3948 exprs = list(exprs)
3949 # if sequence of strings provided, wrap with Literal
3950 if any(isinstance(expr, str_type) for expr in exprs):
3951 exprs = (
3952 self._literalStringClass(e) if isinstance(e, str_type) else e
3953 for e in exprs
3954 )
3955 self.exprs = list(exprs)
3956 else:
3957 try:
3958 self.exprs = list(exprs)
3959 except TypeError:
3960 self.exprs = [exprs]
3961 self.callPreparse = False
3963 def recurse(self) -> list[ParserElement]:
3964 return self.exprs[:]
3966 def append(self, other) -> ParserElement:
3967 self.exprs.append(other)
3968 self._defaultName = None
3969 return self
3971 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
3972 """
3973 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3974 all contained expressions.
3975 """
3976 super().leave_whitespace(recursive)
3978 if recursive:
3979 self.exprs = [e.copy() for e in self.exprs]
3980 for e in self.exprs:
3981 e.leave_whitespace(recursive)
3982 return self
3984 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
3985 """
3986 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3987 all contained expressions.
3988 """
3989 super().ignore_whitespace(recursive)
3990 if recursive:
3991 self.exprs = [e.copy() for e in self.exprs]
3992 for e in self.exprs:
3993 e.ignore_whitespace(recursive)
3994 return self
3996 def ignore(self, other) -> ParserElement:
3997 if isinstance(other, Suppress):
3998 if other not in self.ignoreExprs:
3999 super().ignore(other)
4000 for e in self.exprs:
4001 e.ignore(self.ignoreExprs[-1])
4002 else:
4003 super().ignore(other)
4004 for e in self.exprs:
4005 e.ignore(self.ignoreExprs[-1])
4006 return self
4008 def _generateDefaultName(self) -> str:
4009 return f"{type(self).__name__}:({self.exprs})"
4011 def streamline(self) -> ParserElement:
4012 if self.streamlined:
4013 return self
4015 super().streamline()
4017 for e in self.exprs:
4018 e.streamline()
4020 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
4021 # but only if there are no parse actions or resultsNames on the nested And's
4022 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
4023 if len(self.exprs) == 2:
4024 other = self.exprs[0]
4025 if (
4026 isinstance(other, self.__class__)
4027 and not other.parseAction
4028 and other.resultsName is None
4029 and not other.debug
4030 ):
4031 self.exprs = other.exprs[:] + [self.exprs[1]]
4032 self._defaultName = None
4033 self._may_return_empty |= other.mayReturnEmpty
4034 self.mayIndexError |= other.mayIndexError
4036 other = self.exprs[-1]
4037 if (
4038 isinstance(other, self.__class__)
4039 and not other.parseAction
4040 and other.resultsName is None
4041 and not other.debug
4042 ):
4043 self.exprs = self.exprs[:-1] + other.exprs[:]
4044 self._defaultName = None
4045 self._may_return_empty |= other.mayReturnEmpty
4046 self.mayIndexError |= other.mayIndexError
4048 self.errmsg = f"Expected {self}"
4050 return self
4052 def validate(self, validateTrace=None) -> None:
4053 warnings.warn(
4054 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4055 DeprecationWarning,
4056 stacklevel=2,
4057 )
4058 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
4059 for e in self.exprs:
4060 e.validate(tmp)
4061 self._checkRecursion([])
4063 def copy(self) -> ParserElement:
4064 ret = super().copy()
4065 ret = typing.cast(ParseExpression, ret)
4066 ret.exprs = [e.copy() for e in self.exprs]
4067 return ret
4069 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4070 if not (
4071 __diag__.warn_ungrouped_named_tokens_in_collection
4072 and Diagnostics.warn_ungrouped_named_tokens_in_collection
4073 not in self.suppress_warnings_
4074 ):
4075 return super()._setResultsName(name, list_all_matches)
4077 for e in self.exprs:
4078 if (
4079 isinstance(e, ParserElement)
4080 and e.resultsName
4081 and (
4082 Diagnostics.warn_ungrouped_named_tokens_in_collection
4083 not in e.suppress_warnings_
4084 )
4085 ):
4086 warning = (
4087 "warn_ungrouped_named_tokens_in_collection:"
4088 f" setting results name {name!r} on {type(self).__name__} expression"
4089 f" collides with {e.resultsName!r} on contained expression"
4090 )
4091 warnings.warn(warning, stacklevel=3)
4092 break
4094 return super()._setResultsName(name, list_all_matches)
4096 # Compatibility synonyms
4097 # fmt: off
4098 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4099 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4100 # fmt: on
4103class And(ParseExpression):
4104 """
4105 Requires all given :class:`ParserElement` s to be found in the given order.
4106 Expressions may be separated by whitespace.
4107 May be constructed using the ``'+'`` operator.
4108 May also be constructed using the ``'-'`` operator, which will
4109 suppress backtracking.
4111 Example::
4113 integer = Word(nums)
4114 name_expr = Word(alphas)[1, ...]
4116 expr = And([integer("id"), name_expr("name"), integer("age")])
4117 # more easily written as:
4118 expr = integer("id") + name_expr("name") + integer("age")
4119 """
4121 class _ErrorStop(Empty):
4122 def __init__(self, *args, **kwargs) -> None:
4123 super().__init__(*args, **kwargs)
4124 self.leave_whitespace()
4126 def _generateDefaultName(self) -> str:
4127 return "-"
4129 def __init__(
4130 self,
4131 exprs_arg: typing.Iterable[Union[ParserElement, str]],
4132 savelist: bool = True,
4133 ) -> None:
4134 # instantiate exprs as a list, converting strs to ParserElements
4135 exprs: list[ParserElement] = [
4136 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg
4137 ]
4139 # convert any Ellipsis elements to SkipTo
4140 if Ellipsis in exprs:
4142 # Ellipsis cannot be the last element
4143 if exprs[-1] is Ellipsis:
4144 raise Exception("cannot construct And with sequence ending in ...")
4146 tmp: list[ParserElement] = []
4147 for cur_expr, next_expr in zip(exprs, exprs[1:]):
4148 if cur_expr is Ellipsis:
4149 tmp.append(SkipTo(next_expr)("_skipped*"))
4150 else:
4151 tmp.append(cur_expr)
4153 exprs[:-1] = tmp
4155 super().__init__(exprs, savelist)
4156 if self.exprs:
4157 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4158 if not isinstance(self.exprs[0], White):
4159 self.set_whitespace_chars(
4160 self.exprs[0].whiteChars,
4161 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
4162 )
4163 self.skipWhitespace = self.exprs[0].skipWhitespace
4164 else:
4165 self.skipWhitespace = False
4166 else:
4167 self._may_return_empty = True
4168 self.callPreparse = True
4170 def streamline(self) -> ParserElement:
4171 # collapse any _PendingSkip's
4172 if self.exprs and any(
4173 isinstance(e, ParseExpression)
4174 and e.exprs
4175 and isinstance(e.exprs[-1], _PendingSkip)
4176 for e in self.exprs[:-1]
4177 ):
4178 deleted_expr_marker = NoMatch()
4179 for i, e in enumerate(self.exprs[:-1]):
4180 if e is deleted_expr_marker:
4181 continue
4182 if (
4183 isinstance(e, ParseExpression)
4184 and e.exprs
4185 and isinstance(e.exprs[-1], _PendingSkip)
4186 ):
4187 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4188 self.exprs[i + 1] = deleted_expr_marker
4189 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]
4191 super().streamline()
4193 # link any IndentedBlocks to the prior expression
4194 prev: ParserElement
4195 cur: ParserElement
4196 for prev, cur in zip(self.exprs, self.exprs[1:]):
4197 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
4198 # (but watch out for recursive grammar)
4199 seen = set()
4200 while True:
4201 if id(cur) in seen:
4202 break
4203 seen.add(id(cur))
4204 if isinstance(cur, IndentedBlock):
4205 prev.add_parse_action(
4206 lambda s, l, t, cur_=cur: setattr(
4207 cur_, "parent_anchor", col(l, s)
4208 )
4209 )
4210 break
4211 subs = cur.recurse()
4212 next_first = next(iter(subs), None)
4213 if next_first is None:
4214 break
4215 cur = typing.cast(ParserElement, next_first)
4217 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4218 return self
4220 def parseImpl(self, instring, loc, do_actions=True):
4221 # pass False as callPreParse arg to _parse for first element, since we already
4222 # pre-parsed the string as part of our And pre-parsing
4223 loc, resultlist = self.exprs[0]._parse(
4224 instring, loc, do_actions, callPreParse=False
4225 )
4226 errorStop = False
4227 for e in self.exprs[1:]:
4228 # if isinstance(e, And._ErrorStop):
4229 if type(e) is And._ErrorStop:
4230 errorStop = True
4231 continue
4232 if errorStop:
4233 try:
4234 loc, exprtokens = e._parse(instring, loc, do_actions)
4235 except ParseSyntaxException:
4236 raise
4237 except ParseBaseException as pe:
4238 pe.__traceback__ = None
4239 raise ParseSyntaxException._from_exception(pe)
4240 except IndexError:
4241 raise ParseSyntaxException(
4242 instring, len(instring), self.errmsg, self
4243 )
4244 else:
4245 loc, exprtokens = e._parse(instring, loc, do_actions)
4246 resultlist += exprtokens
4247 return loc, resultlist
4249 def __iadd__(self, other):
4250 if isinstance(other, str_type):
4251 other = self._literalStringClass(other)
4252 if not isinstance(other, ParserElement):
4253 return NotImplemented
4254 return self.append(other) # And([self, other])
4256 def _checkRecursion(self, parseElementList):
4257 subRecCheckList = parseElementList[:] + [self]
4258 for e in self.exprs:
4259 e._checkRecursion(subRecCheckList)
4260 if not e.mayReturnEmpty:
4261 break
4263 def _generateDefaultName(self) -> str:
4264 inner = " ".join(str(e) for e in self.exprs)
4265 # strip off redundant inner {}'s
4266 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4267 inner = inner[1:-1]
4268 return f"{{{inner}}}"
4271class Or(ParseExpression):
4272 """Requires that at least one :class:`ParserElement` is found. If
4273 two expressions match, the expression that matches the longest
4274 string will be used. May be constructed using the ``'^'``
4275 operator.
4277 Example::
4279 # construct Or using '^' operator
4281 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4282 print(number.search_string("123 3.1416 789"))
4284 prints::
4286 [['123'], ['3.1416'], ['789']]
4287 """
4289 def __init__(
4290 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4291 ) -> None:
4292 super().__init__(exprs, savelist)
4293 if self.exprs:
4294 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4295 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4296 else:
4297 self._may_return_empty = True
4299 def streamline(self) -> ParserElement:
4300 super().streamline()
4301 if self.exprs:
4302 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4303 self.saveAsList = any(e.saveAsList for e in self.exprs)
4304 self.skipWhitespace = all(
4305 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4306 )
4307 else:
4308 self.saveAsList = False
4309 return self
4311 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4312 maxExcLoc = -1
4313 maxException = None
4314 matches: list[tuple[int, ParserElement]] = []
4315 fatals: list[ParseFatalException] = []
4316 if all(e.callPreparse for e in self.exprs):
4317 loc = self.preParse(instring, loc)
4318 for e in self.exprs:
4319 try:
4320 loc2 = e.try_parse(instring, loc, raise_fatal=True)
4321 except ParseFatalException as pfe:
4322 pfe.__traceback__ = None
4323 pfe.parser_element = e
4324 fatals.append(pfe)
4325 maxException = None
4326 maxExcLoc = -1
4327 except ParseException as err:
4328 if not fatals:
4329 err.__traceback__ = None
4330 if err.loc > maxExcLoc:
4331 maxException = err
4332 maxExcLoc = err.loc
4333 except IndexError:
4334 if len(instring) > maxExcLoc:
4335 maxException = ParseException(
4336 instring, len(instring), e.errmsg, self
4337 )
4338 maxExcLoc = len(instring)
4339 else:
4340 # save match among all matches, to retry longest to shortest
4341 matches.append((loc2, e))
4343 if matches:
4344 # re-evaluate all matches in descending order of length of match, in case attached actions
4345 # might change whether or how much they match of the input.
4346 matches.sort(key=itemgetter(0), reverse=True)
4348 if not do_actions:
4349 # no further conditions or parse actions to change the selection of
4350 # alternative, so the first match will be the best match
4351 best_expr = matches[0][1]
4352 return best_expr._parse(instring, loc, do_actions)
4354 longest: tuple[int, typing.Optional[ParseResults]] = -1, None
4355 for loc1, expr1 in matches:
4356 if loc1 <= longest[0]:
4357 # already have a longer match than this one will deliver, we are done
4358 return longest
4360 try:
4361 loc2, toks = expr1._parse(instring, loc, do_actions)
4362 except ParseException as err:
4363 err.__traceback__ = None
4364 if err.loc > maxExcLoc:
4365 maxException = err
4366 maxExcLoc = err.loc
4367 else:
4368 if loc2 >= loc1:
4369 return loc2, toks
4370 # didn't match as much as before
4371 elif loc2 > longest[0]:
4372 longest = loc2, toks
4374 if longest != (-1, None):
4375 return longest
4377 if fatals:
4378 if len(fatals) > 1:
4379 fatals.sort(key=lambda e: -e.loc)
4380 if fatals[0].loc == fatals[1].loc:
4381 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4382 max_fatal = fatals[0]
4383 raise max_fatal
4385 if maxException is not None:
4386 # infer from this check that all alternatives failed at the current position
4387 # so emit this collective error message instead of any single error message
4388 parse_start_loc = self.preParse(instring, loc)
4389 if maxExcLoc == parse_start_loc:
4390 maxException.msg = self.errmsg or ""
4391 raise maxException
4393 raise ParseException(instring, loc, "no defined alternatives to match", self)
4395 def __ixor__(self, other):
4396 if isinstance(other, str_type):
4397 other = self._literalStringClass(other)
4398 if not isinstance(other, ParserElement):
4399 return NotImplemented
4400 return self.append(other) # Or([self, other])
4402 def _generateDefaultName(self) -> str:
4403 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"
4405 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4406 if (
4407 __diag__.warn_multiple_tokens_in_named_alternation
4408 and Diagnostics.warn_multiple_tokens_in_named_alternation
4409 not in self.suppress_warnings_
4410 ):
4411 if any(
4412 isinstance(e, And)
4413 and Diagnostics.warn_multiple_tokens_in_named_alternation
4414 not in e.suppress_warnings_
4415 for e in self.exprs
4416 ):
4417 warning = (
4418 "warn_multiple_tokens_in_named_alternation:"
4419 f" setting results name {name!r} on {type(self).__name__} expression"
4420 " will return a list of all parsed tokens in an And alternative,"
4421 " in prior versions only the first token was returned; enclose"
4422 " contained argument in Group"
4423 )
4424 warnings.warn(warning, stacklevel=3)
4426 return super()._setResultsName(name, list_all_matches)
4429class MatchFirst(ParseExpression):
4430 """Requires that at least one :class:`ParserElement` is found. If
4431 more than one expression matches, the first one listed is the one that will
4432 match. May be constructed using the ``'|'`` operator.
4434 Example::
4436 # construct MatchFirst using '|' operator
4438 # watch the order of expressions to match
4439 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4440 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4442 # put more selective expression first
4443 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4444 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4445 """
4447 def __init__(
4448 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4449 ) -> None:
4450 super().__init__(exprs, savelist)
4451 if self.exprs:
4452 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4453 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4454 else:
4455 self._may_return_empty = True
4457 def streamline(self) -> ParserElement:
4458 if self.streamlined:
4459 return self
4461 super().streamline()
4462 if self.exprs:
4463 self.saveAsList = any(e.saveAsList for e in self.exprs)
4464 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4465 self.skipWhitespace = all(
4466 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4467 )
4468 else:
4469 self.saveAsList = False
4470 self._may_return_empty = True
4471 return self
4473 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4474 maxExcLoc = -1
4475 maxException = None
4477 for e in self.exprs:
4478 try:
4479 return e._parse(instring, loc, do_actions)
4480 except ParseFatalException as pfe:
4481 pfe.__traceback__ = None
4482 pfe.parser_element = e
4483 raise
4484 except ParseException as err:
4485 if err.loc > maxExcLoc:
4486 maxException = err
4487 maxExcLoc = err.loc
4488 except IndexError:
4489 if len(instring) > maxExcLoc:
4490 maxException = ParseException(
4491 instring, len(instring), e.errmsg, self
4492 )
4493 maxExcLoc = len(instring)
4495 if maxException is not None:
4496 # infer from this check that all alternatives failed at the current position
4497 # so emit this collective error message instead of any individual error message
4498 parse_start_loc = self.preParse(instring, loc)
4499 if maxExcLoc == parse_start_loc:
4500 maxException.msg = self.errmsg or ""
4501 raise maxException
4503 raise ParseException(instring, loc, "no defined alternatives to match", self)
4505 def __ior__(self, other):
4506 if isinstance(other, str_type):
4507 other = self._literalStringClass(other)
4508 if not isinstance(other, ParserElement):
4509 return NotImplemented
4510 return self.append(other) # MatchFirst([self, other])
4512 def _generateDefaultName(self) -> str:
4513 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"
4515 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4516 if (
4517 __diag__.warn_multiple_tokens_in_named_alternation
4518 and Diagnostics.warn_multiple_tokens_in_named_alternation
4519 not in self.suppress_warnings_
4520 ):
4521 if any(
4522 isinstance(e, And)
4523 and Diagnostics.warn_multiple_tokens_in_named_alternation
4524 not in e.suppress_warnings_
4525 for e in self.exprs
4526 ):
4527 warning = (
4528 "warn_multiple_tokens_in_named_alternation:"
4529 f" setting results name {name!r} on {type(self).__name__} expression"
4530 " will return a list of all parsed tokens in an And alternative,"
4531 " in prior versions only the first token was returned; enclose"
4532 " contained argument in Group"
4533 )
4534 warnings.warn(warning, stacklevel=3)
4536 return super()._setResultsName(name, list_all_matches)
4539class Each(ParseExpression):
4540 """Requires all given :class:`ParserElement` s to be found, but in
4541 any order. Expressions may be separated by whitespace.
4543 May be constructed using the ``'&'`` operator.
4545 Example::
4547 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4548 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4549 integer = Word(nums)
4550 shape_attr = "shape:" + shape_type("shape")
4551 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4552 color_attr = "color:" + color("color")
4553 size_attr = "size:" + integer("size")
4555 # use Each (using operator '&') to accept attributes in any order
4556 # (shape and posn are required, color and size are optional)
4557 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4559 shape_spec.run_tests('''
4560 shape: SQUARE color: BLACK posn: 100, 120
4561 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4562 color:GREEN size:20 shape:TRIANGLE posn:20,40
4563 '''
4564 )
4566 prints::
4568 shape: SQUARE color: BLACK posn: 100, 120
4569 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4570 - color: BLACK
4571 - posn: ['100', ',', '120']
4572 - x: 100
4573 - y: 120
4574 - shape: SQUARE
4577 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4578 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4579 - color: BLUE
4580 - posn: ['50', ',', '80']
4581 - x: 50
4582 - y: 80
4583 - shape: CIRCLE
4584 - size: 50
4587 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4588 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4589 - color: GREEN
4590 - posn: ['20', ',', '40']
4591 - x: 20
4592 - y: 40
4593 - shape: TRIANGLE
4594 - size: 20
4595 """
4597 def __init__(
4598 self, exprs: typing.Iterable[ParserElement], savelist: bool = True
4599 ) -> None:
4600 super().__init__(exprs, savelist)
4601 if self.exprs:
4602 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4603 else:
4604 self._may_return_empty = True
4605 self.skipWhitespace = True
4606 self.initExprGroups = True
4607 self.saveAsList = True
4609 def __iand__(self, other):
4610 if isinstance(other, str_type):
4611 other = self._literalStringClass(other)
4612 if not isinstance(other, ParserElement):
4613 return NotImplemented
4614 return self.append(other) # Each([self, other])
4616 def streamline(self) -> ParserElement:
4617 super().streamline()
4618 if self.exprs:
4619 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4620 else:
4621 self._may_return_empty = True
4622 return self
4624 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4625 if self.initExprGroups:
4626 self.opt1map = dict(
4627 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
4628 )
4629 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
4630 opt2 = [
4631 e
4632 for e in self.exprs
4633 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
4634 ]
4635 self.optionals = opt1 + opt2
4636 self.multioptionals = [
4637 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4638 for e in self.exprs
4639 if isinstance(e, _MultipleMatch)
4640 ]
4641 self.multirequired = [
4642 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4643 for e in self.exprs
4644 if isinstance(e, OneOrMore)
4645 ]
4646 self.required = [
4647 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
4648 ]
4649 self.required += self.multirequired
4650 self.initExprGroups = False
4652 tmpLoc = loc
4653 tmpReqd = self.required[:]
4654 tmpOpt = self.optionals[:]
4655 multis = self.multioptionals[:]
4656 matchOrder: list[ParserElement] = []
4658 keepMatching = True
4659 failed: list[ParserElement] = []
4660 fatals: list[ParseFatalException] = []
4661 while keepMatching:
4662 tmpExprs = tmpReqd + tmpOpt + multis
4663 failed.clear()
4664 fatals.clear()
4665 for e in tmpExprs:
4666 try:
4667 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
4668 except ParseFatalException as pfe:
4669 pfe.__traceback__ = None
4670 pfe.parser_element = e
4671 fatals.append(pfe)
4672 failed.append(e)
4673 except ParseException:
4674 failed.append(e)
4675 else:
4676 matchOrder.append(self.opt1map.get(id(e), e))
4677 if e in tmpReqd:
4678 tmpReqd.remove(e)
4679 elif e in tmpOpt:
4680 tmpOpt.remove(e)
4681 if len(failed) == len(tmpExprs):
4682 keepMatching = False
4684 # look for any ParseFatalExceptions
4685 if fatals:
4686 if len(fatals) > 1:
4687 fatals.sort(key=lambda e: -e.loc)
4688 if fatals[0].loc == fatals[1].loc:
4689 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4690 max_fatal = fatals[0]
4691 raise max_fatal
4693 if tmpReqd:
4694 missing = ", ".join([str(e) for e in tmpReqd])
4695 raise ParseException(
4696 instring,
4697 loc,
4698 f"Missing one or more required elements ({missing})",
4699 )
4701 # add any unmatched Opts, in case they have default values defined
4702 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
4704 total_results = ParseResults([])
4705 for e in matchOrder:
4706 loc, results = e._parse(instring, loc, do_actions)
4707 total_results += results
4709 return loc, total_results
4711 def _generateDefaultName(self) -> str:
4712 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"
4715class ParseElementEnhance(ParserElement):
4716 """Abstract subclass of :class:`ParserElement`, for combining and
4717 post-processing parsed tokens.
4718 """
4720 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
4721 super().__init__(savelist)
4722 if isinstance(expr, str_type):
4723 expr_str = typing.cast(str, expr)
4724 if issubclass(self._literalStringClass, Token):
4725 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]
4726 elif issubclass(type(self), self._literalStringClass):
4727 expr = Literal(expr_str)
4728 else:
4729 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]
4730 expr = typing.cast(ParserElement, expr)
4731 self.expr = expr
4732 if expr is not None:
4733 self.mayIndexError = expr.mayIndexError
4734 self._may_return_empty = expr.mayReturnEmpty
4735 self.set_whitespace_chars(
4736 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
4737 )
4738 self.skipWhitespace = expr.skipWhitespace
4739 self.saveAsList = expr.saveAsList
4740 self.callPreparse = expr.callPreparse
4741 self.ignoreExprs.extend(expr.ignoreExprs)
4743 def recurse(self) -> list[ParserElement]:
4744 return [self.expr] if self.expr is not None else []
4746 def parseImpl(self, instring, loc, do_actions=True):
4747 if self.expr is None:
4748 raise ParseException(instring, loc, "No expression defined", self)
4750 try:
4751 return self.expr._parse(instring, loc, do_actions, callPreParse=False)
4752 except ParseSyntaxException:
4753 raise
4754 except ParseBaseException as pbe:
4755 pbe.pstr = pbe.pstr or instring
4756 pbe.loc = pbe.loc or loc
4757 pbe.parser_element = pbe.parser_element or self
4758 if not isinstance(self, Forward) and self.customName is not None:
4759 if self.errmsg:
4760 pbe.msg = self.errmsg
4761 raise
4763 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4764 super().leave_whitespace(recursive)
4766 if recursive:
4767 if self.expr is not None:
4768 self.expr = self.expr.copy()
4769 self.expr.leave_whitespace(recursive)
4770 return self
4772 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4773 super().ignore_whitespace(recursive)
4775 if recursive:
4776 if self.expr is not None:
4777 self.expr = self.expr.copy()
4778 self.expr.ignore_whitespace(recursive)
4779 return self
4781 def ignore(self, other) -> ParserElement:
4782 if not isinstance(other, Suppress) or other not in self.ignoreExprs:
4783 super().ignore(other)
4784 if self.expr is not None:
4785 self.expr.ignore(self.ignoreExprs[-1])
4787 return self
4789 def streamline(self) -> ParserElement:
4790 super().streamline()
4791 if self.expr is not None:
4792 self.expr.streamline()
4793 return self
4795 def _checkRecursion(self, parseElementList):
4796 if self in parseElementList:
4797 raise RecursiveGrammarException(parseElementList + [self])
4798 subRecCheckList = parseElementList[:] + [self]
4799 if self.expr is not None:
4800 self.expr._checkRecursion(subRecCheckList)
4802 def validate(self, validateTrace=None) -> None:
4803 warnings.warn(
4804 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4805 DeprecationWarning,
4806 stacklevel=2,
4807 )
4808 if validateTrace is None:
4809 validateTrace = []
4810 tmp = validateTrace[:] + [self]
4811 if self.expr is not None:
4812 self.expr.validate(tmp)
4813 self._checkRecursion([])
4815 def _generateDefaultName(self) -> str:
4816 return f"{type(self).__name__}:({self.expr})"
4818 # Compatibility synonyms
4819 # fmt: off
4820 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4821 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4822 # fmt: on
4825class IndentedBlock(ParseElementEnhance):
4826 """
4827 Expression to match one or more expressions at a given indentation level.
4828 Useful for parsing text where structure is implied by indentation (like Python source code).
4829 """
4831 class _Indent(Empty):
4832 def __init__(self, ref_col: int) -> None:
4833 super().__init__()
4834 self.errmsg = f"expected indent at column {ref_col}"
4835 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
4837 class _IndentGreater(Empty):
4838 def __init__(self, ref_col: int) -> None:
4839 super().__init__()
4840 self.errmsg = f"expected indent at column greater than {ref_col}"
4841 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
4843 def __init__(
4844 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
4845 ) -> None:
4846 super().__init__(expr, savelist=True)
4847 # if recursive:
4848 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
4849 self._recursive = recursive
4850 self._grouped = grouped
4851 self.parent_anchor = 1
4853 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4854 # advance parse position to non-whitespace by using an Empty()
4855 # this should be the column to be used for all subsequent indented lines
4856 anchor_loc = Empty().preParse(instring, loc)
4858 # see if self.expr matches at the current location - if not it will raise an exception
4859 # and no further work is necessary
4860 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)
4862 indent_col = col(anchor_loc, instring)
4863 peer_detect_expr = self._Indent(indent_col)
4865 inner_expr = Empty() + peer_detect_expr + self.expr
4866 if self._recursive:
4867 sub_indent = self._IndentGreater(indent_col)
4868 nested_block = IndentedBlock(
4869 self.expr, recursive=self._recursive, grouped=self._grouped
4870 )
4871 nested_block.set_debug(self.debug)
4872 nested_block.parent_anchor = indent_col
4873 inner_expr += Opt(sub_indent + nested_block)
4875 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
4876 block = OneOrMore(inner_expr)
4878 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
4880 if self._grouped:
4881 wrapper = Group
4882 else:
4883 wrapper = lambda expr: expr # type: ignore[misc, assignment]
4884 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
4885 instring, anchor_loc, do_actions
4886 )
4889class AtStringStart(ParseElementEnhance):
4890 """Matches if expression matches at the beginning of the parse
4891 string::
4893 AtStringStart(Word(nums)).parse_string("123")
4894 # prints ["123"]
4896 AtStringStart(Word(nums)).parse_string(" 123")
4897 # raises ParseException
4898 """
4900 def __init__(self, expr: Union[ParserElement, str]) -> None:
4901 super().__init__(expr)
4902 self.callPreparse = False
4904 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4905 if loc != 0:
4906 raise ParseException(instring, loc, "not found at string start")
4907 return super().parseImpl(instring, loc, do_actions)
4910class AtLineStart(ParseElementEnhance):
4911 r"""Matches if an expression matches at the beginning of a line within
4912 the parse string
4914 Example::
4916 test = '''\
4917 AAA this line
4918 AAA and this line
4919 AAA but not this one
4920 B AAA and definitely not this one
4921 '''
4923 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):
4924 print(t)
4926 prints::
4928 ['AAA', ' this line']
4929 ['AAA', ' and this line']
4931 """
4933 def __init__(self, expr: Union[ParserElement, str]) -> None:
4934 super().__init__(expr)
4935 self.callPreparse = False
4937 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4938 if col(loc, instring) != 1:
4939 raise ParseException(instring, loc, "not found at line start")
4940 return super().parseImpl(instring, loc, do_actions)
4943class FollowedBy(ParseElementEnhance):
4944 """Lookahead matching of the given parse expression.
4945 ``FollowedBy`` does *not* advance the parsing position within
4946 the input string, it only verifies that the specified parse
4947 expression matches at the current position. ``FollowedBy``
4948 always returns a null token list. If any results names are defined
4949 in the lookahead expression, those *will* be returned for access by
4950 name.
4952 Example::
4954 # use FollowedBy to match a label only if it is followed by a ':'
4955 data_word = Word(alphas)
4956 label = data_word + FollowedBy(':')
4957 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4959 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()
4961 prints::
4963 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4964 """
4966 def __init__(self, expr: Union[ParserElement, str]) -> None:
4967 super().__init__(expr)
4968 self._may_return_empty = True
4970 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4971 # by using self._expr.parse and deleting the contents of the returned ParseResults list
4972 # we keep any named results that were defined in the FollowedBy expression
4973 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)
4974 del ret[:]
4976 return loc, ret
4979class PrecededBy(ParseElementEnhance):
4980 """Lookbehind matching of the given parse expression.
4981 ``PrecededBy`` does not advance the parsing position within the
4982 input string, it only verifies that the specified parse expression
4983 matches prior to the current position. ``PrecededBy`` always
4984 returns a null token list, but if a results name is defined on the
4985 given expression, it is returned.
4987 Parameters:
4989 - ``expr`` - expression that must match prior to the current parse
4990 location
4991 - ``retreat`` - (default= ``None``) - (int) maximum number of characters
4992 to lookbehind prior to the current parse location
4994 If the lookbehind expression is a string, :class:`Literal`,
4995 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
4996 with a specified exact or maximum length, then the retreat
4997 parameter is not required. Otherwise, retreat must be specified to
4998 give a maximum number of characters to look back from
4999 the current parse position for a lookbehind match.
5001 Example::
5003 # VB-style variable names with type prefixes
5004 int_var = PrecededBy("#") + pyparsing_common.identifier
5005 str_var = PrecededBy("$") + pyparsing_common.identifier
5007 """
5009 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:
5010 super().__init__(expr)
5011 self.expr = self.expr().leave_whitespace()
5012 self._may_return_empty = True
5013 self.mayIndexError = False
5014 self.exact = False
5015 if isinstance(expr, str_type):
5016 expr = typing.cast(str, expr)
5017 retreat = len(expr)
5018 self.exact = True
5019 elif isinstance(expr, (Literal, Keyword)):
5020 retreat = expr.matchLen
5021 self.exact = True
5022 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
5023 retreat = expr.maxLen
5024 self.exact = True
5025 elif isinstance(expr, PositionToken):
5026 retreat = 0
5027 self.exact = True
5028 self.retreat = retreat
5029 self.errmsg = f"not preceded by {expr}"
5030 self.skipWhitespace = False
5031 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
5033 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:
5034 if self.exact:
5035 if loc < self.retreat:
5036 raise ParseException(instring, loc, self.errmsg, self)
5037 start = loc - self.retreat
5038 _, ret = self.expr._parse(instring, start)
5039 return loc, ret
5041 # retreat specified a maximum lookbehind window, iterate
5042 test_expr = self.expr + StringEnd()
5043 instring_slice = instring[max(0, loc - self.retreat) : loc]
5044 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)
5046 for offset in range(1, min(loc, self.retreat + 1) + 1):
5047 try:
5048 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
5049 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
5050 except ParseBaseException as pbe:
5051 last_expr = pbe
5052 else:
5053 break
5054 else:
5055 raise last_expr
5057 return loc, ret
5060class Located(ParseElementEnhance):
5061 """
5062 Decorates a returned token with its starting and ending
5063 locations in the input string.
5065 This helper adds the following results names:
5067 - ``locn_start`` - location where matched expression begins
5068 - ``locn_end`` - location where matched expression ends
5069 - ``value`` - the actual parsed results
5071 Be careful if the input text contains ``<TAB>`` characters, you
5072 may want to call :class:`ParserElement.parse_with_tabs`
5074 Example::
5076 wd = Word(alphas)
5077 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
5078 print(match)
5080 prints::
5082 [0, ['ljsdf'], 5]
5083 [8, ['lksdjjf'], 15]
5084 [18, ['lkkjj'], 23]
5086 """
5088 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5089 start = loc
5090 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)
5091 ret_tokens = ParseResults([start, tokens, loc])
5092 ret_tokens["locn_start"] = start
5093 ret_tokens["value"] = tokens
5094 ret_tokens["locn_end"] = loc
5095 if self.resultsName:
5096 # must return as a list, so that the name will be attached to the complete group
5097 return loc, [ret_tokens]
5098 else:
5099 return loc, ret_tokens
5102class NotAny(ParseElementEnhance):
5103 """
5104 Lookahead to disallow matching with the given parse expression.
5105 ``NotAny`` does *not* advance the parsing position within the
5106 input string, it only verifies that the specified parse expression
5107 does *not* match at the current position. Also, ``NotAny`` does
5108 *not* skip over leading whitespace. ``NotAny`` always returns
5109 a null token list. May be constructed using the ``'~'`` operator.
5111 Example::
5113 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
5115 # take care not to mistake keywords for identifiers
5116 ident = ~(AND | OR | NOT) + Word(alphas)
5117 boolean_term = Opt(NOT) + ident
5119 # very crude boolean expression - to support parenthesis groups and
5120 # operation hierarchy, use infix_notation
5121 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
5123 # integers that are followed by "." are actually floats
5124 integer = Word(nums) + ~Char(".")
5125 """
5127 def __init__(self, expr: Union[ParserElement, str]) -> None:
5128 super().__init__(expr)
5129 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
5130 # self.leave_whitespace()
5131 self.skipWhitespace = False
5133 self._may_return_empty = True
5134 self.errmsg = f"Found unwanted token, {self.expr}"
5136 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5137 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):
5138 raise ParseException(instring, loc, self.errmsg, self)
5139 return loc, []
5141 def _generateDefaultName(self) -> str:
5142 return f"~{{{self.expr}}}"
5145class _MultipleMatch(ParseElementEnhance):
5146 def __init__(
5147 self,
5148 expr: Union[str, ParserElement],
5149 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5150 *,
5151 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5152 ) -> None:
5153 super().__init__(expr)
5154 stopOn = stopOn or stop_on
5155 self.saveAsList = True
5156 ender = stopOn
5157 if isinstance(ender, str_type):
5158 ender = self._literalStringClass(ender)
5159 self.stopOn(ender)
5161 def stopOn(self, ender) -> ParserElement:
5162 if isinstance(ender, str_type):
5163 ender = self._literalStringClass(ender)
5164 self.not_ender = ~ender if ender is not None else None
5165 return self
5167 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5168 self_expr_parse = self.expr._parse
5169 self_skip_ignorables = self._skipIgnorables
5170 check_ender = False
5171 if self.not_ender is not None:
5172 try_not_ender = self.not_ender.try_parse
5173 check_ender = True
5175 # must be at least one (but first see if we are the stopOn sentinel;
5176 # if so, fail)
5177 if check_ender:
5178 try_not_ender(instring, loc)
5179 loc, tokens = self_expr_parse(instring, loc, do_actions)
5180 try:
5181 hasIgnoreExprs = not not self.ignoreExprs
5182 while 1:
5183 if check_ender:
5184 try_not_ender(instring, loc)
5185 if hasIgnoreExprs:
5186 preloc = self_skip_ignorables(instring, loc)
5187 else:
5188 preloc = loc
5189 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)
5190 tokens += tmptokens
5191 except (ParseException, IndexError):
5192 pass
5194 return loc, tokens
5196 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5197 if (
5198 __diag__.warn_ungrouped_named_tokens_in_collection
5199 and Diagnostics.warn_ungrouped_named_tokens_in_collection
5200 not in self.suppress_warnings_
5201 ):
5202 for e in [self.expr] + self.expr.recurse():
5203 if (
5204 isinstance(e, ParserElement)
5205 and e.resultsName
5206 and (
5207 Diagnostics.warn_ungrouped_named_tokens_in_collection
5208 not in e.suppress_warnings_
5209 )
5210 ):
5211 warning = (
5212 "warn_ungrouped_named_tokens_in_collection:"
5213 f" setting results name {name!r} on {type(self).__name__} expression"
5214 f" collides with {e.resultsName!r} on contained expression"
5215 )
5216 warnings.warn(warning, stacklevel=3)
5217 break
5219 return super()._setResultsName(name, list_all_matches)
5222class OneOrMore(_MultipleMatch):
5223 """
5224 Repetition of one or more of the given expression.
5226 Parameters:
5228 - ``expr`` - expression that must match one or more times
5229 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel
5230 (only required if the sentinel would ordinarily match the repetition
5231 expression)
5233 Example::
5235 data_word = Word(alphas)
5236 label = data_word + FollowedBy(':')
5237 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))
5239 text = "shape: SQUARE posn: upper left color: BLACK"
5240 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
5242 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data
5243 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5244 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5246 # could also be written as
5247 (attr_expr * (1,)).parse_string(text).pprint()
5248 """
5250 def _generateDefaultName(self) -> str:
5251 return f"{{{self.expr}}}..."
5254class ZeroOrMore(_MultipleMatch):
5255 """
5256 Optional repetition of zero or more of the given expression.
5258 Parameters:
5260 - ``expr`` - expression that must match zero or more times
5261 - ``stop_on`` - expression for a terminating sentinel
5262 (only required if the sentinel would ordinarily match the repetition
5263 expression) - (default= ``None``)
5265 Example: similar to :class:`OneOrMore`
5266 """
5268 def __init__(
5269 self,
5270 expr: Union[str, ParserElement],
5271 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5272 *,
5273 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5274 ) -> None:
5275 super().__init__(expr, stopOn=stopOn or stop_on)
5276 self._may_return_empty = True
5278 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5279 try:
5280 return super().parseImpl(instring, loc, do_actions)
5281 except (ParseException, IndexError):
5282 return loc, ParseResults([], name=self.resultsName)
5284 def _generateDefaultName(self) -> str:
5285 return f"[{self.expr}]..."
5288class DelimitedList(ParseElementEnhance):
5289 """Helper to define a delimited list of expressions - the delimiter
5290 defaults to ','. By default, the list elements and delimiters can
5291 have intervening whitespace, and comments, but this can be
5292 overridden by passing ``combine=True`` in the constructor. If
5293 ``combine`` is set to ``True``, the matching tokens are
5294 returned as a single token string, with the delimiters included;
5295 otherwise, the matching tokens are returned as a list of tokens,
5296 with the delimiters suppressed.
5298 If ``allow_trailing_delim`` is set to True, then the list may end with
5299 a delimiter.
5301 Example::
5303 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5304 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5306 .. versionadded:: 3.1.0
5307 """
5309 def __init__(
5310 self,
5311 expr: Union[str, ParserElement],
5312 delim: Union[str, ParserElement] = ",",
5313 combine: bool = False,
5314 min: typing.Optional[int] = None,
5315 max: typing.Optional[int] = None,
5316 *,
5317 allow_trailing_delim: bool = False,
5318 ) -> None:
5319 if isinstance(expr, str_type):
5320 expr = ParserElement._literalStringClass(expr)
5321 expr = typing.cast(ParserElement, expr)
5323 if min is not None and min < 1:
5324 raise ValueError("min must be greater than 0")
5326 if max is not None and min is not None and max < min:
5327 raise ValueError("max must be greater than, or equal to min")
5329 self.content = expr
5330 self.raw_delim = str(delim)
5331 self.delim = delim
5332 self.combine = combine
5333 if not combine:
5334 self.delim = Suppress(delim)
5335 self.min = min or 1
5336 self.max = max
5337 self.allow_trailing_delim = allow_trailing_delim
5339 delim_list_expr = self.content + (self.delim + self.content) * (
5340 self.min - 1,
5341 None if self.max is None else self.max - 1,
5342 )
5343 if self.allow_trailing_delim:
5344 delim_list_expr += Opt(self.delim)
5346 if self.combine:
5347 delim_list_expr = Combine(delim_list_expr)
5349 super().__init__(delim_list_expr, savelist=True)
5351 def _generateDefaultName(self) -> str:
5352 content_expr = self.content.streamline()
5353 return f"{content_expr} [{self.raw_delim} {content_expr}]..."
5356class _NullToken:
5357 def __bool__(self):
5358 return False
5360 def __str__(self):
5361 return ""
5364class Opt(ParseElementEnhance):
5365 """
5366 Optional matching of the given expression.
5368 Parameters:
5370 - ``expr`` - expression that must match zero or more times
5371 - ``default`` (optional) - value to be returned if the optional expression is not found.
5373 Example::
5375 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
5376 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
5377 zip.run_tests('''
5378 # traditional ZIP code
5379 12345
5381 # ZIP+4 form
5382 12101-0001
5384 # invalid ZIP
5385 98765-
5386 ''')
5388 prints::
5390 # traditional ZIP code
5391 12345
5392 ['12345']
5394 # ZIP+4 form
5395 12101-0001
5396 ['12101-0001']
5398 # invalid ZIP
5399 98765-
5400 ^
5401 FAIL: Expected end of text (at char 5), (line:1, col:6)
5402 """
5404 __optionalNotMatched = _NullToken()
5406 def __init__(
5407 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
5408 ) -> None:
5409 super().__init__(expr, savelist=False)
5410 self.saveAsList = self.expr.saveAsList
5411 self.defaultValue = default
5412 self._may_return_empty = True
5414 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5415 self_expr = self.expr
5416 try:
5417 loc, tokens = self_expr._parse(
5418 instring, loc, do_actions, callPreParse=False
5419 )
5420 except (ParseException, IndexError):
5421 default_value = self.defaultValue
5422 if default_value is not self.__optionalNotMatched:
5423 if self_expr.resultsName:
5424 tokens = ParseResults([default_value])
5425 tokens[self_expr.resultsName] = default_value
5426 else:
5427 tokens = [default_value] # type: ignore[assignment]
5428 else:
5429 tokens = [] # type: ignore[assignment]
5430 return loc, tokens
5432 def _generateDefaultName(self) -> str:
5433 inner = str(self.expr)
5434 # strip off redundant inner {}'s
5435 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
5436 inner = inner[1:-1]
5437 return f"[{inner}]"
5440Optional = Opt
5443class SkipTo(ParseElementEnhance):
5444 """
5445 Token for skipping over all undefined text until the matched
5446 expression is found.
5448 Parameters:
5450 - ``expr`` - target expression marking the end of the data to be skipped
5451 - ``include`` - if ``True``, the target expression is also parsed
5452 (the skipped text and target expression are returned as a 2-element
5453 list) (default= ``False``).
5454 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and
5455 comments) that might contain false matches to the target expression
5456 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be
5457 included in the skipped test; if found before the target expression is found,
5458 the :class:`SkipTo` is not a match
5460 Example::
5462 report = '''
5463 Outstanding Issues Report - 1 Jan 2000
5465 # | Severity | Description | Days Open
5466 -----+----------+-------------------------------------------+-----------
5467 101 | Critical | Intermittent system crash | 6
5468 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5469 79 | Minor | System slow when running too many reports | 47
5470 '''
5471 integer = Word(nums)
5472 SEP = Suppress('|')
5473 # use SkipTo to simply match everything up until the next SEP
5474 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
5475 # - parse action will call token.strip() for each matched token, i.e., the description body
5476 string_data = SkipTo(SEP, ignore=quoted_string)
5477 string_data.set_parse_action(token_map(str.strip))
5478 ticket_expr = (integer("issue_num") + SEP
5479 + string_data("sev") + SEP
5480 + string_data("desc") + SEP
5481 + integer("days_open"))
5483 for tkt in ticket_expr.search_string(report):
5484 print tkt.dump()
5486 prints::
5488 ['101', 'Critical', 'Intermittent system crash', '6']
5489 - days_open: '6'
5490 - desc: 'Intermittent system crash'
5491 - issue_num: '101'
5492 - sev: 'Critical'
5493 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
5494 - days_open: '14'
5495 - desc: "Spelling error on Login ('log|n')"
5496 - issue_num: '94'
5497 - sev: 'Cosmetic'
5498 ['79', 'Minor', 'System slow when running too many reports', '47']
5499 - days_open: '47'
5500 - desc: 'System slow when running too many reports'
5501 - issue_num: '79'
5502 - sev: 'Minor'
5503 """
5505 def __init__(
5506 self,
5507 other: Union[ParserElement, str],
5508 include: bool = False,
5509 ignore: typing.Optional[Union[ParserElement, str]] = None,
5510 fail_on: typing.Optional[Union[ParserElement, str]] = None,
5511 *,
5512 failOn: typing.Optional[Union[ParserElement, str]] = None,
5513 ) -> None:
5514 super().__init__(other)
5515 failOn = failOn or fail_on
5516 self.ignoreExpr = ignore
5517 self._may_return_empty = True
5518 self.mayIndexError = False
5519 self.includeMatch = include
5520 self.saveAsList = False
5521 if isinstance(failOn, str_type):
5522 self.failOn = self._literalStringClass(failOn)
5523 else:
5524 self.failOn = failOn
5525 self.errmsg = f"No match found for {self.expr}"
5526 self.ignorer = Empty().leave_whitespace()
5527 self._update_ignorer()
5529 def _update_ignorer(self):
5530 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr
5531 self.ignorer.ignoreExprs.clear()
5532 for e in self.expr.ignoreExprs:
5533 self.ignorer.ignore(e)
5534 if self.ignoreExpr:
5535 self.ignorer.ignore(self.ignoreExpr)
5537 def ignore(self, expr):
5538 super().ignore(expr)
5539 self._update_ignorer()
5541 def parseImpl(self, instring, loc, do_actions=True):
5542 startloc = loc
5543 instrlen = len(instring)
5544 self_expr_parse = self.expr._parse
5545 self_failOn_canParseNext = (
5546 self.failOn.canParseNext if self.failOn is not None else None
5547 )
5548 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None
5550 tmploc = loc
5551 while tmploc <= instrlen:
5552 if self_failOn_canParseNext is not None:
5553 # break if failOn expression matches
5554 if self_failOn_canParseNext(instring, tmploc):
5555 break
5557 if ignorer_try_parse is not None:
5558 # advance past ignore expressions
5559 prev_tmploc = tmploc
5560 while 1:
5561 try:
5562 tmploc = ignorer_try_parse(instring, tmploc)
5563 except ParseBaseException:
5564 break
5565 # see if all ignorers matched, but didn't actually ignore anything
5566 if tmploc == prev_tmploc:
5567 break
5568 prev_tmploc = tmploc
5570 try:
5571 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)
5572 except (ParseException, IndexError):
5573 # no match, advance loc in string
5574 tmploc += 1
5575 else:
5576 # matched skipto expr, done
5577 break
5579 else:
5580 # ran off the end of the input string without matching skipto expr, fail
5581 raise ParseException(instring, loc, self.errmsg, self)
5583 # build up return values
5584 loc = tmploc
5585 skiptext = instring[startloc:loc]
5586 skipresult = ParseResults(skiptext)
5588 if self.includeMatch:
5589 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)
5590 skipresult += mat
5592 return loc, skipresult
5595class Forward(ParseElementEnhance):
5596 """
5597 Forward declaration of an expression to be defined later -
5598 used for recursive grammars, such as algebraic infix notation.
5599 When the expression is known, it is assigned to the ``Forward``
5600 variable using the ``'<<'`` operator.
5602 Note: take care when assigning to ``Forward`` not to overlook
5603 precedence of operators.
5605 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
5607 fwd_expr << a | b | c
5609 will actually be evaluated as::
5611 (fwd_expr << a) | b | c
5613 thereby leaving b and c out as parseable alternatives. It is recommended that you
5614 explicitly group the values inserted into the ``Forward``::
5616 fwd_expr << (a | b | c)
5618 Converting to use the ``'<<='`` operator instead will avoid this problem.
5620 See :class:`ParseResults.pprint` for an example of a recursive
5621 parser created using ``Forward``.
5622 """
5624 def __init__(
5625 self, other: typing.Optional[Union[ParserElement, str]] = None
5626 ) -> None:
5627 self.caller_frame = traceback.extract_stack(limit=2)[0]
5628 super().__init__(other, savelist=False) # type: ignore[arg-type]
5629 self.lshift_line = None
5631 def __lshift__(self, other) -> Forward:
5632 if hasattr(self, "caller_frame"):
5633 del self.caller_frame
5634 if isinstance(other, str_type):
5635 other = self._literalStringClass(other)
5637 if not isinstance(other, ParserElement):
5638 return NotImplemented
5640 self.expr = other
5641 self.streamlined = other.streamlined
5642 self.mayIndexError = self.expr.mayIndexError
5643 self._may_return_empty = self.expr.mayReturnEmpty
5644 self.set_whitespace_chars(
5645 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
5646 )
5647 self.skipWhitespace = self.expr.skipWhitespace
5648 self.saveAsList = self.expr.saveAsList
5649 self.ignoreExprs.extend(self.expr.ignoreExprs)
5650 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]
5651 return self
5653 def __ilshift__(self, other) -> Forward:
5654 if not isinstance(other, ParserElement):
5655 return NotImplemented
5657 return self << other
5659 def __or__(self, other) -> ParserElement:
5660 caller_line = traceback.extract_stack(limit=2)[-2]
5661 if (
5662 __diag__.warn_on_match_first_with_lshift_operator
5663 and caller_line == self.lshift_line
5664 and Diagnostics.warn_on_match_first_with_lshift_operator
5665 not in self.suppress_warnings_
5666 ):
5667 warnings.warn(
5668 "warn_on_match_first_with_lshift_operator:"
5669 " using '<<' operator with '|' is probably an error, use '<<='",
5670 stacklevel=2,
5671 )
5672 ret = super().__or__(other)
5673 return ret
5675 def __del__(self):
5676 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
5677 if (
5678 self.expr is None
5679 and __diag__.warn_on_assignment_to_Forward
5680 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
5681 ):
5682 warnings.warn_explicit(
5683 "warn_on_assignment_to_Forward:"
5684 " Forward defined here but no expression attached later using '<<=' or '<<'",
5685 UserWarning,
5686 filename=self.caller_frame.filename,
5687 lineno=self.caller_frame.lineno,
5688 )
5690 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5691 if (
5692 self.expr is None
5693 and __diag__.warn_on_parse_using_empty_Forward
5694 and Diagnostics.warn_on_parse_using_empty_Forward
5695 not in self.suppress_warnings_
5696 ):
5697 # walk stack until parse_string, scan_string, search_string, or transform_string is found
5698 parse_fns = (
5699 "parse_string",
5700 "scan_string",
5701 "search_string",
5702 "transform_string",
5703 )
5704 tb = traceback.extract_stack(limit=200)
5705 for i, frm in enumerate(reversed(tb), start=1):
5706 if frm.name in parse_fns:
5707 stacklevel = i + 1
5708 break
5709 else:
5710 stacklevel = 2
5711 warnings.warn(
5712 "warn_on_parse_using_empty_Forward:"
5713 " Forward expression was never assigned a value, will not parse any input",
5714 stacklevel=stacklevel,
5715 )
5716 if not ParserElement._left_recursion_enabled:
5717 return super().parseImpl(instring, loc, do_actions)
5718 # ## Bounded Recursion algorithm ##
5719 # Recursion only needs to be processed at ``Forward`` elements, since they are
5720 # the only ones that can actually refer to themselves. The general idea is
5721 # to handle recursion stepwise: We start at no recursion, then recurse once,
5722 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
5723 #
5724 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
5725 # - to *match* a specific recursion level, and
5726 # - to *search* the bounded recursion level
5727 # and the two run concurrently. The *search* must *match* each recursion level
5728 # to find the best possible match. This is handled by a memo table, which
5729 # provides the previous match to the next level match attempt.
5730 #
5731 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
5732 #
5733 # There is a complication since we not only *parse* but also *transform* via
5734 # actions: We do not want to run the actions too often while expanding. Thus,
5735 # we expand using `do_actions=False` and only run `do_actions=True` if the next
5736 # recursion level is acceptable.
5737 with ParserElement.recursion_lock:
5738 memo = ParserElement.recursion_memos
5739 try:
5740 # we are parsing at a specific recursion expansion - use it as-is
5741 prev_loc, prev_result = memo[loc, self, do_actions]
5742 if isinstance(prev_result, Exception):
5743 raise prev_result
5744 return prev_loc, prev_result.copy()
5745 except KeyError:
5746 act_key = (loc, self, True)
5747 peek_key = (loc, self, False)
5748 # we are searching for the best recursion expansion - keep on improving
5749 # both `do_actions` cases must be tracked separately here!
5750 prev_loc, prev_peek = memo[peek_key] = (
5751 loc - 1,
5752 ParseException(
5753 instring, loc, "Forward recursion without base case", self
5754 ),
5755 )
5756 if do_actions:
5757 memo[act_key] = memo[peek_key]
5758 while True:
5759 try:
5760 new_loc, new_peek = super().parseImpl(instring, loc, False)
5761 except ParseException:
5762 # we failed before getting any match - do not hide the error
5763 if isinstance(prev_peek, Exception):
5764 raise
5765 new_loc, new_peek = prev_loc, prev_peek
5766 # the match did not get better: we are done
5767 if new_loc <= prev_loc:
5768 if do_actions:
5769 # replace the match for do_actions=False as well,
5770 # in case the action did backtrack
5771 prev_loc, prev_result = memo[peek_key] = memo[act_key]
5772 del memo[peek_key], memo[act_key]
5773 return prev_loc, copy.copy(prev_result)
5774 del memo[peek_key]
5775 return prev_loc, copy.copy(prev_peek)
5776 # the match did get better: see if we can improve further
5777 if do_actions:
5778 try:
5779 memo[act_key] = super().parseImpl(instring, loc, True)
5780 except ParseException as e:
5781 memo[peek_key] = memo[act_key] = (new_loc, e)
5782 raise
5783 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
5785 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5786 self.skipWhitespace = False
5787 return self
5789 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5790 self.skipWhitespace = True
5791 return self
5793 def streamline(self) -> ParserElement:
5794 if not self.streamlined:
5795 self.streamlined = True
5796 if self.expr is not None:
5797 self.expr.streamline()
5798 return self
5800 def validate(self, validateTrace=None) -> None:
5801 warnings.warn(
5802 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
5803 DeprecationWarning,
5804 stacklevel=2,
5805 )
5806 if validateTrace is None:
5807 validateTrace = []
5809 if self not in validateTrace:
5810 tmp = validateTrace[:] + [self]
5811 if self.expr is not None:
5812 self.expr.validate(tmp)
5813 self._checkRecursion([])
5815 def _generateDefaultName(self) -> str:
5816 # Avoid infinite recursion by setting a temporary _defaultName
5817 save_default_name = self._defaultName
5818 self._defaultName = ": ..."
5820 # Use the string representation of main expression.
5821 try:
5822 if self.expr is not None:
5823 ret_string = str(self.expr)[:1000]
5824 else:
5825 ret_string = "None"
5826 except Exception:
5827 ret_string = "..."
5829 self._defaultName = save_default_name
5830 return f"{type(self).__name__}: {ret_string}"
5832 def copy(self) -> ParserElement:
5833 if self.expr is not None:
5834 return super().copy()
5835 else:
5836 ret = Forward()
5837 ret <<= self
5838 return ret
5840 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5841 # fmt: off
5842 if (
5843 __diag__.warn_name_set_on_empty_Forward
5844 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_
5845 and self.expr is None
5846 ):
5847 warning = (
5848 "warn_name_set_on_empty_Forward:"
5849 f" setting results name {name!r} on {type(self).__name__} expression"
5850 " that has no contained expression"
5851 )
5852 warnings.warn(warning, stacklevel=3)
5853 # fmt: on
5855 return super()._setResultsName(name, list_all_matches)
5857 # Compatibility synonyms
5858 # fmt: off
5859 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
5860 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
5861 # fmt: on
5864class TokenConverter(ParseElementEnhance):
5865 """
5866 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.
5867 """
5869 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:
5870 super().__init__(expr) # , savelist)
5871 self.saveAsList = False
5874class Combine(TokenConverter):
5875 """Converter to concatenate all matching tokens to a single string.
5876 By default, the matching patterns must also be contiguous in the
5877 input string; this can be disabled by specifying
5878 ``'adjacent=False'`` in the constructor.
5880 Example::
5882 real = Word(nums) + '.' + Word(nums)
5883 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']
5884 # will also erroneously match the following
5885 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']
5887 real = Combine(Word(nums) + '.' + Word(nums))
5888 print(real.parse_string('3.1416')) # -> ['3.1416']
5889 # no match when there are internal spaces
5890 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)
5891 """
5893 def __init__(
5894 self,
5895 expr: ParserElement,
5896 join_string: str = "",
5897 adjacent: bool = True,
5898 *,
5899 joinString: typing.Optional[str] = None,
5900 ) -> None:
5901 super().__init__(expr)
5902 joinString = joinString if joinString is not None else join_string
5903 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5904 if adjacent:
5905 self.leave_whitespace()
5906 self.adjacent = adjacent
5907 self.skipWhitespace = True
5908 self.joinString = joinString
5909 self.callPreparse = True
5911 def ignore(self, other) -> ParserElement:
5912 if self.adjacent:
5913 ParserElement.ignore(self, other)
5914 else:
5915 super().ignore(other)
5916 return self
5918 def postParse(self, instring, loc, tokenlist):
5919 retToks = tokenlist.copy()
5920 del retToks[:]
5921 retToks += ParseResults(
5922 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
5923 )
5925 if self.resultsName and retToks.haskeys():
5926 return [retToks]
5927 else:
5928 return retToks
5931class Group(TokenConverter):
5932 """Converter to return the matched tokens as a list - useful for
5933 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5935 The optional ``aslist`` argument when set to True will return the
5936 parsed tokens as a Python list instead of a pyparsing ParseResults.
5938 Example::
5940 ident = Word(alphas)
5941 num = Word(nums)
5942 term = ident | num
5943 func = ident + Opt(DelimitedList(term))
5944 print(func.parse_string("fn a, b, 100"))
5945 # -> ['fn', 'a', 'b', '100']
5947 func = ident + Group(Opt(DelimitedList(term)))
5948 print(func.parse_string("fn a, b, 100"))
5949 # -> ['fn', ['a', 'b', '100']]
5950 """
5952 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:
5953 super().__init__(expr)
5954 self.saveAsList = True
5955 self._asPythonList = aslist
5957 def postParse(self, instring, loc, tokenlist):
5958 if self._asPythonList:
5959 return ParseResults.List(
5960 tokenlist.asList()
5961 if isinstance(tokenlist, ParseResults)
5962 else list(tokenlist)
5963 )
5965 return [tokenlist]
5968class Dict(TokenConverter):
5969 """Converter to return a repetitive expression as a list, but also
5970 as a dictionary. Each element can also be referenced using the first
5971 token in the expression as its key. Useful for tabular report
5972 scraping when the first column can be used as a item key.
5974 The optional ``asdict`` argument when set to True will return the
5975 parsed tokens as a Python dict instead of a pyparsing ParseResults.
5977 Example::
5979 data_word = Word(alphas)
5980 label = data_word + FollowedBy(':')
5982 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5983 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5985 # print attributes as plain groups
5986 print(attr_expr[1, ...].parse_string(text).dump())
5988 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names
5989 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
5990 print(result.dump())
5992 # access named fields as dict entries, or output as dict
5993 print(result['shape'])
5994 print(result.as_dict())
5996 prints::
5998 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5999 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
6000 - color: 'light blue'
6001 - posn: 'upper left'
6002 - shape: 'SQUARE'
6003 - texture: 'burlap'
6004 SQUARE
6005 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
6007 See more examples at :class:`ParseResults` of accessing fields by results name.
6008 """
6010 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:
6011 super().__init__(expr)
6012 self.saveAsList = True
6013 self._asPythonDict = asdict
6015 def postParse(self, instring, loc, tokenlist):
6016 for i, tok in enumerate(tokenlist):
6017 if len(tok) == 0:
6018 continue
6020 ikey = tok[0]
6021 if isinstance(ikey, int):
6022 ikey = str(ikey).strip()
6024 if len(tok) == 1:
6025 tokenlist[ikey] = _ParseResultsWithOffset("", i)
6027 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
6028 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
6030 else:
6031 try:
6032 dictvalue = tok.copy() # ParseResults(i)
6033 except Exception:
6034 exc = TypeError(
6035 "could not extract dict values from parsed results"
6036 " - Dict expression must contain Grouped expressions"
6037 )
6038 raise exc from None
6040 del dictvalue[0]
6042 if len(dictvalue) != 1 or (
6043 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
6044 ):
6045 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
6046 else:
6047 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
6049 if self._asPythonDict:
6050 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
6052 return [tokenlist] if self.resultsName else tokenlist
6055class Suppress(TokenConverter):
6056 """Converter for ignoring the results of a parsed expression.
6058 Example::
6060 source = "a, b, c,d"
6061 wd = Word(alphas)
6062 wd_list1 = wd + (',' + wd)[...]
6063 print(wd_list1.parse_string(source))
6065 # often, delimiters that are useful during parsing are just in the
6066 # way afterward - use Suppress to keep them out of the parsed output
6067 wd_list2 = wd + (Suppress(',') + wd)[...]
6068 print(wd_list2.parse_string(source))
6070 # Skipped text (using '...') can be suppressed as well
6071 source = "lead in START relevant text END trailing text"
6072 start_marker = Keyword("START")
6073 end_marker = Keyword("END")
6074 find_body = Suppress(...) + start_marker + ... + end_marker
6075 print(find_body.parse_string(source)
6077 prints::
6079 ['a', ',', 'b', ',', 'c', ',', 'd']
6080 ['a', 'b', 'c', 'd']
6081 ['START', 'relevant text ', 'END']
6083 (See also :class:`DelimitedList`.)
6084 """
6086 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
6087 if expr is ...:
6088 expr = _PendingSkip(NoMatch())
6089 super().__init__(expr)
6091 def __add__(self, other) -> ParserElement:
6092 if isinstance(self.expr, _PendingSkip):
6093 return Suppress(SkipTo(other)) + other
6095 return super().__add__(other)
6097 def __sub__(self, other) -> ParserElement:
6098 if isinstance(self.expr, _PendingSkip):
6099 return Suppress(SkipTo(other)) - other
6101 return super().__sub__(other)
6103 def postParse(self, instring, loc, tokenlist):
6104 return []
6106 def suppress(self) -> ParserElement:
6107 return self
6110# XXX: Example needs to be re-done for updated output
6111def trace_parse_action(f: ParseAction) -> ParseAction:
6112 """Decorator for debugging parse actions.
6114 When the parse action is called, this decorator will print
6115 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
6116 When the parse action completes, the decorator will print
6117 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
6119 Example::
6121 wd = Word(alphas)
6123 @trace_parse_action
6124 def remove_duplicate_chars(tokens):
6125 return ''.join(sorted(set(''.join(tokens))))
6127 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
6128 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
6130 prints::
6132 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
6133 <<leaving remove_duplicate_chars (ret: 'dfjkls')
6134 ['dfjkls']
6136 .. versionchanged:: 3.1.0
6137 Exception type added to output
6138 """
6139 f = _trim_arity(f)
6141 def z(*paArgs):
6142 thisFunc = f.__name__
6143 s, l, t = paArgs[-3:]
6144 if len(paArgs) > 3:
6145 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"
6146 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")
6147 try:
6148 ret = f(*paArgs)
6149 except Exception as exc:
6150 sys.stderr.write(
6151 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"
6152 )
6153 raise
6154 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")
6155 return ret
6157 z.__name__ = f.__name__
6158 return z
6161# convenience constants for positional expressions
6162empty = Empty().set_name("empty")
6163line_start = LineStart().set_name("line_start")
6164line_end = LineEnd().set_name("line_end")
6165string_start = StringStart().set_name("string_start")
6166string_end = StringEnd().set_name("string_end")
6168_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(
6169 lambda s, l, t: t[0][1]
6170)
6171_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
6172 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
6173)
6174_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
6175 lambda s, l, t: chr(int(t[0][1:], 8))
6176)
6177_singleChar = (
6178 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
6179)
6180_charRange = Group(_singleChar + Suppress("-") + _singleChar)
6181_reBracketExpr = (
6182 Literal("[")
6183 + Opt("^").set_results_name("negate")
6184 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
6185 + Literal("]")
6186)
6189def srange(s: str) -> str:
6190 r"""Helper to easily define string ranges for use in :class:`Word`
6191 construction. Borrows syntax from regexp ``'[]'`` string range
6192 definitions::
6194 srange("[0-9]") -> "0123456789"
6195 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
6196 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
6198 The input string must be enclosed in []'s, and the returned string
6199 is the expanded character set joined into a single string. The
6200 values enclosed in the []'s may be:
6202 - a single character
6203 - an escaped character with a leading backslash (such as ``\-``
6204 or ``\]``)
6205 - an escaped hex character with a leading ``'\x'``
6206 (``\x21``, which is a ``'!'`` character) (``\0x##``
6207 is also supported for backwards compatibility)
6208 - an escaped octal character with a leading ``'\0'``
6209 (``\041``, which is a ``'!'`` character)
6210 - a range of any of the above, separated by a dash (``'a-z'``,
6211 etc.)
6212 - any combination of the above (``'aeiouy'``,
6213 ``'a-zA-Z0-9_$'``, etc.)
6214 """
6216 def _expanded(p):
6217 if isinstance(p, ParseResults):
6218 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
6219 else:
6220 yield p
6222 try:
6223 return "".join(
6224 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]
6225 )
6226 except Exception as e:
6227 return ""
6230def token_map(func, *args) -> ParseAction:
6231 """Helper to define a parse action by mapping a function to all
6232 elements of a :class:`ParseResults` list. If any additional args are passed,
6233 they are forwarded to the given function as additional arguments
6234 after the token, as in
6235 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
6236 which will convert the parsed data to an integer using base 16.
6238 Example (compare the last to example in :class:`ParserElement.transform_string`::
6240 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
6241 hex_ints.run_tests('''
6242 00 11 22 aa FF 0a 0d 1a
6243 ''')
6245 upperword = Word(alphas).set_parse_action(token_map(str.upper))
6246 upperword[1, ...].run_tests('''
6247 my kingdom for a horse
6248 ''')
6250 wd = Word(alphas).set_parse_action(token_map(str.title))
6251 wd[1, ...].set_parse_action(' '.join).run_tests('''
6252 now is the winter of our discontent made glorious summer by this sun of york
6253 ''')
6255 prints::
6257 00 11 22 aa FF 0a 0d 1a
6258 [0, 17, 34, 170, 255, 10, 13, 26]
6260 my kingdom for a horse
6261 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
6263 now is the winter of our discontent made glorious summer by this sun of york
6264 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
6265 """
6267 def pa(s, l, t):
6268 return [func(tokn, *args) for tokn in t]
6270 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
6271 pa.__name__ = func_name
6273 return pa
6276def autoname_elements() -> None:
6277 """
6278 Utility to simplify mass-naming of parser elements, for
6279 generating railroad diagram with named subdiagrams.
6280 """
6282 # guard against _getframe not being implemented in the current Python
6283 getframe_fn = getattr(sys, "_getframe", lambda _: None)
6284 calling_frame = getframe_fn(1)
6285 if calling_frame is None:
6286 return
6288 # find all locals in the calling frame that are ParserElements
6289 calling_frame = typing.cast(types.FrameType, calling_frame)
6290 for name, var in calling_frame.f_locals.items():
6291 # if no custom name defined, set the name to the var name
6292 if isinstance(var, ParserElement) and not var.customName:
6293 var.set_name(name)
6296dbl_quoted_string = Combine(
6297 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6298).set_name("string enclosed in double quotes")
6300sgl_quoted_string = Combine(
6301 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
6302).set_name("string enclosed in single quotes")
6304quoted_string = Combine(
6305 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6306 "double quoted string"
6307 )
6308 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6309 "single quoted string"
6310 )
6311).set_name("quoted string using single or double quotes")
6313# XXX: Is there some way to make this show up in API docs?
6314# .. versionadded:: 3.1.0
6315python_quoted_string = Combine(
6316 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(
6317 "multiline double quoted string"
6318 )
6319 ^ (
6320 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"
6321 ).set_name("multiline single quoted string")
6322 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6323 "double quoted string"
6324 )
6325 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6326 "single quoted string"
6327 )
6328).set_name("Python quoted string")
6330unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
6333alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6334punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6336# build list of built-in expressions, for future reference if a global default value
6337# gets updated
6338_builtin_exprs: list[ParserElement] = [
6339 v for v in vars().values() if isinstance(v, ParserElement)
6340]
6342# Compatibility synonyms
6343# fmt: off
6344sglQuotedString = sgl_quoted_string
6345dblQuotedString = dbl_quoted_string
6346quotedString = quoted_string
6347unicodeString = unicode_string
6348lineStart = line_start
6349lineEnd = line_end
6350stringStart = string_start
6351stringEnd = string_end
6352nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)
6353traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)
6354conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)
6355tokenMap = replaced_by_pep8("tokenMap", token_map)
6356# fmt: on