Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 45%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# core.py
3#
4from __future__ import annotations
6import collections.abc
7from collections import deque
8import os
9import typing
10from typing import (
11 Any,
12 Callable,
13 Generator,
14 NamedTuple,
15 Sequence,
16 TextIO,
17 Union,
18 cast,
19)
20from abc import ABC, abstractmethod
21from enum import Enum
22import string
23import copy
24import warnings
25import re
26import sys
27from collections.abc import Iterable
28import traceback
29import types
30from operator import itemgetter
31from functools import wraps
32from threading import RLock
33from pathlib import Path
35from .util import (
36 _FifoCache,
37 _UnboundedCache,
38 __config_flags,
39 _collapse_string_to_ranges,
40 _escape_regex_range_chars,
41 _flatten,
42 LRUMemo as _LRUMemo,
43 UnboundedMemo as _UnboundedMemo,
44 replaced_by_pep8,
45)
46from .exceptions import *
47from .actions import *
48from .results import ParseResults, _ParseResultsWithOffset
49from .unicode import pyparsing_unicode
51_MAX_INT = sys.maxsize
52str_type: tuple[type, ...] = (str, bytes)
54#
55# Copyright (c) 2003-2022 Paul T. McGuire
56#
57# Permission is hereby granted, free of charge, to any person obtaining
58# a copy of this software and associated documentation files (the
59# "Software"), to deal in the Software without restriction, including
60# without limitation the rights to use, copy, modify, merge, publish,
61# distribute, sublicense, and/or sell copies of the Software, and to
62# permit persons to whom the Software is furnished to do so, subject to
63# the following conditions:
64#
65# The above copyright notice and this permission notice shall be
66# included in all copies or substantial portions of the Software.
67#
68# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
69# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
70# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
71# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
72# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
73# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
74# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
75#
77from functools import cached_property
80class __compat__(__config_flags):
81 """
82 A cross-version compatibility configuration for pyparsing features that will be
83 released in a future version. By setting values in this configuration to True,
84 those features can be enabled in prior versions for compatibility development
85 and testing.
87 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
88 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
89 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
90 behavior
91 """
93 _type_desc = "compatibility"
95 collect_all_And_tokens = True
97 _all_names = [__ for __ in locals() if not __.startswith("_")]
98 _fixed_names = """
99 collect_all_And_tokens
100 """.split()
103class __diag__(__config_flags):
104 _type_desc = "diagnostic"
106 warn_multiple_tokens_in_named_alternation = False
107 warn_ungrouped_named_tokens_in_collection = False
108 warn_name_set_on_empty_Forward = False
109 warn_on_parse_using_empty_Forward = False
110 warn_on_assignment_to_Forward = False
111 warn_on_multiple_string_args_to_oneof = False
112 warn_on_match_first_with_lshift_operator = False
113 enable_debug_on_named_expressions = False
115 _all_names = [__ for __ in locals() if not __.startswith("_")]
116 _warning_names = [name for name in _all_names if name.startswith("warn")]
117 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
119 @classmethod
120 def enable_all_warnings(cls) -> None:
121 for name in cls._warning_names:
122 cls.enable(name)
125class Diagnostics(Enum):
126 """
127 Diagnostic configuration (all default to disabled)
129 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
130 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
131 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
132 name is defined on a containing expression with ungrouped subexpressions that also
133 have results names
134 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
135 with a results name, but has no contents defined
136 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
137 defined in a grammar but has never had an expression attached to it
138 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
139 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
140 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
141 incorrectly called with multiple str arguments
142 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
143 calls to :class:`ParserElement.set_name`
145 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
146 All warnings can be enabled by calling :class:`enable_all_warnings`.
147 """
149 warn_multiple_tokens_in_named_alternation = 0
150 warn_ungrouped_named_tokens_in_collection = 1
151 warn_name_set_on_empty_Forward = 2
152 warn_on_parse_using_empty_Forward = 3
153 warn_on_assignment_to_Forward = 4
154 warn_on_multiple_string_args_to_oneof = 5
155 warn_on_match_first_with_lshift_operator = 6
156 enable_debug_on_named_expressions = 7
159def enable_diag(diag_enum: Diagnostics) -> None:
160 """
161 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
162 """
163 __diag__.enable(diag_enum.name)
166def disable_diag(diag_enum: Diagnostics) -> None:
167 """
168 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
169 """
170 __diag__.disable(diag_enum.name)
173def enable_all_warnings() -> None:
174 """
175 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
176 """
177 __diag__.enable_all_warnings()
180# hide abstract class
181del __config_flags
184def _should_enable_warnings(
185 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
186) -> bool:
187 enable = bool(warn_env_var)
188 for warn_opt in cmd_line_warn_options:
189 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
190 ":"
191 )[:5]
192 if not w_action.lower().startswith("i") and (
193 not (w_message or w_category or w_module) or w_module == "pyparsing"
194 ):
195 enable = True
196 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
197 enable = False
198 return enable
201if _should_enable_warnings(
202 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
203):
204 enable_all_warnings()
207# build list of single arg builtins, that can be used as parse actions
208# fmt: off
209_single_arg_builtins = {
210 sum, len, sorted, reversed, list, tuple, set, any, all, min, max
211}
212# fmt: on
214_generatorType = types.GeneratorType
215ParseImplReturnType = tuple[int, Any]
216PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]
218ParseCondition = Union[
219 Callable[[], bool],
220 Callable[[ParseResults], bool],
221 Callable[[int, ParseResults], bool],
222 Callable[[str, int, ParseResults], bool],
223]
224ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
225DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
226DebugSuccessAction = Callable[
227 [str, int, int, "ParserElement", ParseResults, bool], None
228]
229DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
232alphas: str = string.ascii_uppercase + string.ascii_lowercase
233identchars: str = pyparsing_unicode.Latin1.identchars
234identbodychars: str = pyparsing_unicode.Latin1.identbodychars
235nums: str = "0123456789"
236hexnums: str = nums + "ABCDEFabcdef"
237alphanums: str = alphas + nums
238printables: str = "".join([c for c in string.printable if c not in string.whitespace])
241class _ParseActionIndexError(Exception):
242 """
243 Internal wrapper around IndexError so that IndexErrors raised inside
244 parse actions aren't misinterpreted as IndexErrors raised inside
245 ParserElement parseImpl methods.
246 """
248 def __init__(self, msg: str, exc: BaseException) -> None:
249 self.msg: str = msg
250 self.exc: BaseException = exc
253_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]
254pa_call_line_synth = ()
257def _trim_arity(func, max_limit=3):
258 """decorator to trim function calls to match the arity of the target"""
259 global _trim_arity_call_line, pa_call_line_synth
261 if func in _single_arg_builtins:
262 return lambda s, l, t: func(t)
264 limit = 0
265 found_arity = False
267 # synthesize what would be returned by traceback.extract_stack at the call to
268 # user's parse action 'func', so that we don't incur call penalty at parse time
270 # fmt: off
271 LINE_DIFF = 9
272 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
273 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
274 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]
275 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
277 def wrapper(*args):
278 nonlocal found_arity, limit
279 if found_arity:
280 return func(*args[limit:])
281 while 1:
282 try:
283 ret = func(*args[limit:])
284 found_arity = True
285 return ret
286 except TypeError as te:
287 # re-raise TypeErrors if they did not come from our arity testing
288 if found_arity:
289 raise
290 else:
291 tb = te.__traceback__
292 frames = traceback.extract_tb(tb, limit=2)
293 frame_summary = frames[-1]
294 trim_arity_type_error = (
295 [frame_summary[:2]][-1][:2] == pa_call_line_synth
296 )
297 del tb
299 if trim_arity_type_error:
300 if limit < max_limit:
301 limit += 1
302 continue
304 raise
305 except IndexError as ie:
306 # wrap IndexErrors inside a _ParseActionIndexError
307 raise _ParseActionIndexError(
308 "IndexError raised in parse action", ie
309 ).with_traceback(None)
310 # fmt: on
312 # copy func name to wrapper for sensible debug output
313 # (can't use functools.wraps, since that messes with function signature)
314 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
315 wrapper.__name__ = func_name
316 wrapper.__doc__ = func.__doc__
318 return wrapper
321def condition_as_parse_action(
322 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False
323) -> ParseAction:
324 """
325 Function to convert a simple predicate function that returns ``True`` or ``False``
326 into a parse action. Can be used in places when a parse action is required
327 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition
328 to an operator level in :class:`infix_notation`).
330 Optional keyword arguments:
332 - ``message`` - define a custom message to be used in the raised exception
333 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;
334 otherwise will raise :class:`ParseException`
336 """
337 msg = message if message is not None else "failed user-defined condition"
338 exc_type = ParseFatalException if fatal else ParseException
339 fn = _trim_arity(fn)
341 @wraps(fn)
342 def pa(s, l, t):
343 if not bool(fn(s, l, t)):
344 raise exc_type(s, l, msg)
346 return pa
349def _default_start_debug_action(
350 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False
351):
352 cache_hit_str = "*" if cache_hit else ""
353 print(
354 (
355 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"
356 f" {line(loc, instring)}\n"
357 f" {'^':>{col(loc, instring)}}"
358 )
359 )
362def _default_success_debug_action(
363 instring: str,
364 startloc: int,
365 endloc: int,
366 expr: ParserElement,
367 toks: ParseResults,
368 cache_hit: bool = False,
369):
370 cache_hit_str = "*" if cache_hit else ""
371 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")
374def _default_exception_debug_action(
375 instring: str,
376 loc: int,
377 expr: ParserElement,
378 exc: Exception,
379 cache_hit: bool = False,
380):
381 cache_hit_str = "*" if cache_hit else ""
382 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")
385def null_debug_action(*args):
386 """'Do-nothing' debug action, to suppress debugging output during parsing."""
389class ParserElement(ABC):
390 """Abstract base level parser element class."""
392 DEFAULT_WHITE_CHARS: str = " \n\t\r"
393 verbose_stacktrace: bool = False
394 _literalStringClass: type = None # type: ignore[assignment]
396 @staticmethod
397 def set_default_whitespace_chars(chars: str) -> None:
398 r"""
399 Overrides the default whitespace chars
401 Example::
403 # default whitespace chars are space, <TAB> and newline
404 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
406 # change to just treat newline as significant
407 ParserElement.set_default_whitespace_chars(" \t")
408 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def']
409 """
410 ParserElement.DEFAULT_WHITE_CHARS = chars
412 # update whitespace all parse expressions defined in this module
413 for expr in _builtin_exprs:
414 if expr.copyDefaultWhiteChars:
415 expr.whiteChars = set(chars)
417 @staticmethod
418 def inline_literals_using(cls: type) -> None:
419 """
420 Set class to be used for inclusion of string literals into a parser.
422 Example::
424 # default literal class used is Literal
425 integer = Word(nums)
426 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
428 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31']
431 # change to Suppress
432 ParserElement.inline_literals_using(Suppress)
433 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
435 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31']
436 """
437 ParserElement._literalStringClass = cls
439 @classmethod
440 def using_each(cls, seq, **class_kwargs):
441 """
442 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.
444 Example::
446 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")
448 .. versionadded:: 3.1.0
449 """
450 yield from (cls(obj, **class_kwargs) for obj in seq)
452 class DebugActions(NamedTuple):
453 debug_try: typing.Optional[DebugStartAction]
454 debug_match: typing.Optional[DebugSuccessAction]
455 debug_fail: typing.Optional[DebugExceptionAction]
457 def __init__(self, savelist: bool = False) -> None:
458 self.parseAction: list[ParseAction] = list()
459 self.failAction: typing.Optional[ParseFailAction] = None
460 self.customName: str = None # type: ignore[assignment]
461 self._defaultName: typing.Optional[str] = None
462 self.resultsName: str = None # type: ignore[assignment]
463 self.saveAsList = savelist
464 self.skipWhitespace = True
465 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
466 self.copyDefaultWhiteChars = True
467 # used when checking for left-recursion
468 self._may_return_empty = False
469 self.keepTabs = False
470 self.ignoreExprs: list[ParserElement] = list()
471 self.debug = False
472 self.streamlined = False
473 # optimize exception handling for subclasses that don't advance parse index
474 self.mayIndexError = True
475 self.errmsg: Union[str, None] = ""
476 # mark results names as modal (report only last) or cumulative (list all)
477 self.modalResults = True
478 # custom debug actions
479 self.debugActions = self.DebugActions(None, None, None)
480 # avoid redundant calls to preParse
481 self.callPreparse = True
482 self.callDuringTry = False
483 self.suppress_warnings_: list[Diagnostics] = []
484 self.show_in_diagram = True
486 @property
487 def mayReturnEmpty(self):
488 return self._may_return_empty
490 @mayReturnEmpty.setter
491 def mayReturnEmpty(self, value):
492 self._may_return_empty = value
494 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:
495 """
496 Suppress warnings emitted for a particular diagnostic on this expression.
498 Example::
500 base = pp.Forward()
501 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)
503 # statement would normally raise a warning, but is now suppressed
504 print(base.parse_string("x"))
506 """
507 self.suppress_warnings_.append(warning_type)
508 return self
510 def visit_all(self):
511 """General-purpose method to yield all expressions and sub-expressions
512 in a grammar. Typically just for internal use.
513 """
514 to_visit = deque([self])
515 seen = set()
516 while to_visit:
517 cur = to_visit.popleft()
519 # guard against looping forever through recursive grammars
520 if cur in seen:
521 continue
522 seen.add(cur)
524 to_visit.extend(cur.recurse())
525 yield cur
527 def copy(self) -> ParserElement:
528 """
529 Make a copy of this :class:`ParserElement`. Useful for defining
530 different parse actions for the same parsing pattern, using copies of
531 the original parse element.
533 Example::
535 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
536 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")
537 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
539 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M"))
541 prints::
543 [5120, 100, 655360, 268435456]
545 Equivalent form of ``expr.copy()`` is just ``expr()``::
547 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
548 """
549 cpy = copy.copy(self)
550 cpy.parseAction = self.parseAction[:]
551 cpy.ignoreExprs = self.ignoreExprs[:]
552 if self.copyDefaultWhiteChars:
553 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
554 return cpy
556 def set_results_name(
557 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False
558 ) -> ParserElement:
559 """
560 Define name for referencing matching tokens as a nested attribute
561 of the returned parse results.
563 Normally, results names are assigned as you would assign keys in a dict:
564 any existing value is overwritten by later values. If it is necessary to
565 keep all values captured for a particular results name, call ``set_results_name``
566 with ``list_all_matches`` = True.
568 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
569 this is so that the client can define a basic element, such as an
570 integer, and reference it in multiple places with different names.
572 You can also set results names using the abbreviated syntax,
573 ``expr("name")`` in place of ``expr.set_results_name("name")``
574 - see :class:`__call__`. If ``list_all_matches`` is required, use
575 ``expr("name*")``.
577 Example::
579 integer = Word(nums)
580 date_str = (integer.set_results_name("year") + '/'
581 + integer.set_results_name("month") + '/'
582 + integer.set_results_name("day"))
584 # equivalent form:
585 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
586 """
587 listAllMatches = listAllMatches or list_all_matches
588 return self._setResultsName(name, listAllMatches)
590 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
591 if name is None:
592 return self
593 newself = self.copy()
594 if name.endswith("*"):
595 name = name[:-1]
596 list_all_matches = True
597 newself.resultsName = name
598 newself.modalResults = not list_all_matches
599 return newself
601 def set_break(self, break_flag: bool = True) -> ParserElement:
602 """
603 Method to invoke the Python pdb debugger when this element is
604 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
605 disable.
606 """
607 if break_flag:
608 _parseMethod = self._parse
610 def breaker(instring, loc, do_actions=True, callPreParse=True):
611 # this call to breakpoint() is intentional, not a checkin error
612 breakpoint()
613 return _parseMethod(instring, loc, do_actions, callPreParse)
615 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]
616 self._parse = breaker # type: ignore [method-assign]
617 elif hasattr(self._parse, "_originalParseMethod"):
618 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]
619 return self
621 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:
622 """
623 Define one or more actions to perform when successfully matching parse element definition.
625 Parse actions can be called to perform data conversions, do extra validation,
626 update external data structures, or enhance or replace the parsed tokens.
627 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
628 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
630 - ``s`` = the original string being parsed (see note below)
631 - ``loc`` = the location of the matching substring
632 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object
634 The parsed tokens are passed to the parse action as ParseResults. They can be
635 modified in place using list-style append, extend, and pop operations to update
636 the parsed list elements; and with dictionary-style item set and del operations
637 to add, update, or remove any named results. If the tokens are modified in place,
638 it is not necessary to return them with a return statement.
640 Parse actions can also completely replace the given tokens, with another ``ParseResults``
641 object, or with some entirely different object (common for parse actions that perform data
642 conversions). A convenient way to build a new parse result is to define the values
643 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
645 If None is passed as the ``fn`` parse action, all previously added parse actions for this
646 expression are cleared.
648 Optional keyword arguments:
650 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during
651 lookaheads and alternate testing. For parse actions that have side effects, it is
652 important to only call the parse action once it is determined that it is being
653 called as part of a successful parse. For parse actions that perform additional
654 validation, then call_during_try should be passed as True, so that the validation
655 code is included in the preliminary "try" parses.
657 Note: the default parsing behavior is to expand tabs in the input string
658 before starting the parsing process. See :class:`parse_string` for more
659 information on parsing strings containing ``<TAB>`` s, and suggested
660 methods to maintain a consistent view of the parsed string, the parse
661 location, and line and column positions within the parsed string.
663 Example::
665 # parse dates in the form YYYY/MM/DD
667 # use parse action to convert toks from str to int at parse time
668 def convert_to_int(toks):
669 return int(toks[0])
671 # use a parse action to verify that the date is a valid date
672 def is_valid_date(instring, loc, toks):
673 from datetime import date
674 year, month, day = toks[::2]
675 try:
676 date(year, month, day)
677 except ValueError:
678 raise ParseException(instring, loc, "invalid date given")
680 integer = Word(nums)
681 date_str = integer + '/' + integer + '/' + integer
683 # add parse actions
684 integer.set_parse_action(convert_to_int)
685 date_str.set_parse_action(is_valid_date)
687 # note that integer fields are now ints, not strings
688 date_str.run_tests('''
689 # successful parse - note that integer fields were converted to ints
690 1999/12/31
692 # fail - invalid date
693 1999/13/31
694 ''')
695 """
696 if list(fns) == [None]:
697 self.parseAction.clear()
698 return self
700 if not all(callable(fn) for fn in fns):
701 raise TypeError("parse actions must be callable")
702 self.parseAction[:] = [_trim_arity(fn) for fn in fns]
703 self.callDuringTry = kwargs.get(
704 "call_during_try", kwargs.get("callDuringTry", False)
705 )
707 return self
709 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:
710 """
711 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
713 See examples in :class:`copy`.
714 """
715 self.parseAction += [_trim_arity(fn) for fn in fns]
716 self.callDuringTry = self.callDuringTry or kwargs.get(
717 "call_during_try", kwargs.get("callDuringTry", False)
718 )
719 return self
721 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement:
722 """Add a boolean predicate function to expression's list of parse actions. See
723 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
724 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
726 Optional keyword arguments:
728 - ``message`` = define a custom message to be used in the raised exception
729 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
730 ParseException
731 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,
732 default=False
734 Example::
736 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
737 year_int = integer.copy()
738 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
739 date_str = year_int + '/' + integer + '/' + integer
741 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0),
742 (line:1, col:1)
743 """
744 for fn in fns:
745 self.parseAction.append(
746 condition_as_parse_action(
747 fn,
748 message=str(kwargs.get("message")),
749 fatal=bool(kwargs.get("fatal", False)),
750 )
751 )
753 self.callDuringTry = self.callDuringTry or kwargs.get(
754 "call_during_try", kwargs.get("callDuringTry", False)
755 )
756 return self
758 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:
759 """
760 Define action to perform if parsing fails at this expression.
761 Fail acton fn is a callable function that takes the arguments
762 ``fn(s, loc, expr, err)`` where:
764 - ``s`` = string being parsed
765 - ``loc`` = location where expression match was attempted and failed
766 - ``expr`` = the parse expression that failed
767 - ``err`` = the exception thrown
769 The function returns no value. It may throw :class:`ParseFatalException`
770 if it is desired to stop parsing immediately."""
771 self.failAction = fn
772 return self
774 def _skipIgnorables(self, instring: str, loc: int) -> int:
775 if not self.ignoreExprs:
776 return loc
777 exprsFound = True
778 ignore_expr_fns = [e._parse for e in self.ignoreExprs]
779 last_loc = loc
780 while exprsFound:
781 exprsFound = False
782 for ignore_fn in ignore_expr_fns:
783 try:
784 while 1:
785 loc, dummy = ignore_fn(instring, loc)
786 exprsFound = True
787 except ParseException:
788 pass
789 # check if all ignore exprs matched but didn't actually advance the parse location
790 if loc == last_loc:
791 break
792 last_loc = loc
793 return loc
795 def preParse(self, instring: str, loc: int) -> int:
796 if self.ignoreExprs:
797 loc = self._skipIgnorables(instring, loc)
799 if self.skipWhitespace:
800 instrlen = len(instring)
801 white_chars = self.whiteChars
802 while loc < instrlen and instring[loc] in white_chars:
803 loc += 1
805 return loc
807 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
808 return loc, []
810 def postParse(self, instring, loc, tokenlist):
811 return tokenlist
813 # @profile
814 def _parseNoCache(
815 self, instring, loc, do_actions=True, callPreParse=True
816 ) -> tuple[int, ParseResults]:
817 debugging = self.debug # and do_actions)
818 len_instring = len(instring)
820 if debugging or self.failAction:
821 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
822 try:
823 if callPreParse and self.callPreparse:
824 pre_loc = self.preParse(instring, loc)
825 else:
826 pre_loc = loc
827 tokens_start = pre_loc
828 if self.debugActions.debug_try:
829 self.debugActions.debug_try(instring, tokens_start, self, False)
830 if self.mayIndexError or pre_loc >= len_instring:
831 try:
832 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
833 except IndexError:
834 raise ParseException(instring, len_instring, self.errmsg, self)
835 else:
836 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
837 except Exception as err:
838 # print("Exception raised:", err)
839 if self.debugActions.debug_fail:
840 self.debugActions.debug_fail(
841 instring, tokens_start, self, err, False
842 )
843 if self.failAction:
844 self.failAction(instring, tokens_start, self, err)
845 raise
846 else:
847 if callPreParse and self.callPreparse:
848 pre_loc = self.preParse(instring, loc)
849 else:
850 pre_loc = loc
851 tokens_start = pre_loc
852 if self.mayIndexError or pre_loc >= len_instring:
853 try:
854 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
855 except IndexError:
856 raise ParseException(instring, len_instring, self.errmsg, self)
857 else:
858 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)
860 tokens = self.postParse(instring, loc, tokens)
862 ret_tokens = ParseResults(
863 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults
864 )
865 if self.parseAction and (do_actions or self.callDuringTry):
866 if debugging:
867 try:
868 for fn in self.parseAction:
869 try:
870 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
871 except IndexError as parse_action_exc:
872 exc = ParseException("exception raised in parse action")
873 raise exc from parse_action_exc
875 if tokens is not None and tokens is not ret_tokens:
876 ret_tokens = ParseResults(
877 tokens,
878 self.resultsName,
879 asList=self.saveAsList
880 and isinstance(tokens, (ParseResults, list)),
881 modal=self.modalResults,
882 )
883 except Exception as err:
884 # print "Exception raised in user parse action:", err
885 if self.debugActions.debug_fail:
886 self.debugActions.debug_fail(
887 instring, tokens_start, self, err, False
888 )
889 raise
890 else:
891 for fn in self.parseAction:
892 try:
893 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]
894 except IndexError as parse_action_exc:
895 exc = ParseException("exception raised in parse action")
896 raise exc from parse_action_exc
898 if tokens is not None and tokens is not ret_tokens:
899 ret_tokens = ParseResults(
900 tokens,
901 self.resultsName,
902 asList=self.saveAsList
903 and isinstance(tokens, (ParseResults, list)),
904 modal=self.modalResults,
905 )
906 if debugging:
907 # print("Matched", self, "->", ret_tokens.as_list())
908 if self.debugActions.debug_match:
909 self.debugActions.debug_match(
910 instring, tokens_start, loc, self, ret_tokens, False
911 )
913 return loc, ret_tokens
915 def try_parse(
916 self,
917 instring: str,
918 loc: int,
919 *,
920 raise_fatal: bool = False,
921 do_actions: bool = False,
922 ) -> int:
923 try:
924 return self._parse(instring, loc, do_actions=do_actions)[0]
925 except ParseFatalException:
926 if raise_fatal:
927 raise
928 raise ParseException(instring, loc, self.errmsg, self)
930 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:
931 try:
932 self.try_parse(instring, loc, do_actions=do_actions)
933 except (ParseException, IndexError):
934 return False
935 else:
936 return True
938 # cache for left-recursion in Forward references
939 recursion_lock = RLock()
940 recursion_memos: collections.abc.MutableMapping[
941 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]
942 ] = {}
944 class _CacheType(typing.Protocol):
945 """
946 Class to be used for packrat and left-recursion cacheing of results
947 and exceptions.
948 """
950 not_in_cache: bool
952 def get(self, *args) -> typing.Any: ...
954 def set(self, *args) -> None: ...
956 def clear(self) -> None: ...
958 class NullCache(dict):
959 """
960 A null cache type for initialization of the packrat_cache class variable.
961 If/when enable_packrat() is called, this null cache will be replaced by a
962 proper _CacheType class instance.
963 """
965 not_in_cache: bool = True
967 def get(self, *args) -> typing.Any: ...
969 def set(self, *args) -> None: ...
971 def clear(self) -> None: ...
973 # class-level argument cache for optimizing repeated calls when backtracking
974 # through recursive expressions
975 packrat_cache: _CacheType = NullCache()
976 packrat_cache_lock = RLock()
977 packrat_cache_stats = [0, 0]
979 # this method gets repeatedly called during backtracking with the same arguments -
980 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
981 def _parseCache(
982 self, instring, loc, do_actions=True, callPreParse=True
983 ) -> tuple[int, ParseResults]:
984 HIT, MISS = 0, 1
985 lookup = (self, instring, loc, callPreParse, do_actions)
986 with ParserElement.packrat_cache_lock:
987 cache = ParserElement.packrat_cache
988 value = cache.get(lookup)
989 if value is cache.not_in_cache:
990 ParserElement.packrat_cache_stats[MISS] += 1
991 try:
992 value = self._parseNoCache(instring, loc, do_actions, callPreParse)
993 except ParseBaseException as pe:
994 # cache a copy of the exception, without the traceback
995 cache.set(lookup, pe.__class__(*pe.args))
996 raise
997 else:
998 cache.set(lookup, (value[0], value[1].copy(), loc))
999 return value
1000 else:
1001 ParserElement.packrat_cache_stats[HIT] += 1
1002 if self.debug and self.debugActions.debug_try:
1003 try:
1004 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]
1005 except TypeError:
1006 pass
1007 if isinstance(value, Exception):
1008 if self.debug and self.debugActions.debug_fail:
1009 try:
1010 self.debugActions.debug_fail(
1011 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]
1012 )
1013 except TypeError:
1014 pass
1015 raise value
1017 value = cast(tuple[int, ParseResults, int], value)
1018 loc_, result, endloc = value[0], value[1].copy(), value[2]
1019 if self.debug and self.debugActions.debug_match:
1020 try:
1021 self.debugActions.debug_match(
1022 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]
1023 )
1024 except TypeError:
1025 pass
1027 return loc_, result
1029 _parse = _parseNoCache
1031 @staticmethod
1032 def reset_cache() -> None:
1033 with ParserElement.packrat_cache_lock:
1034 ParserElement.packrat_cache.clear()
1035 ParserElement.packrat_cache_stats[:] = [0] * len(
1036 ParserElement.packrat_cache_stats
1037 )
1038 ParserElement.recursion_memos.clear()
1040 # class attributes to keep caching status
1041 _packratEnabled = False
1042 _left_recursion_enabled = False
1044 @staticmethod
1045 def disable_memoization() -> None:
1046 """
1047 Disables active Packrat or Left Recursion parsing and their memoization
1049 This method also works if neither Packrat nor Left Recursion are enabled.
1050 This makes it safe to call before activating Packrat nor Left Recursion
1051 to clear any previous settings.
1052 """
1053 with ParserElement.packrat_cache_lock:
1054 ParserElement.reset_cache()
1055 ParserElement._left_recursion_enabled = False
1056 ParserElement._packratEnabled = False
1057 ParserElement._parse = ParserElement._parseNoCache
1059 @staticmethod
1060 def enable_left_recursion(
1061 cache_size_limit: typing.Optional[int] = None, *, force=False
1062 ) -> None:
1063 """
1064 Enables "bounded recursion" parsing, which allows for both direct and indirect
1065 left-recursion. During parsing, left-recursive :class:`Forward` elements are
1066 repeatedly matched with a fixed recursion depth that is gradually increased
1067 until finding the longest match.
1069 Example::
1071 import pyparsing as pp
1072 pp.ParserElement.enable_left_recursion()
1074 E = pp.Forward("E")
1075 num = pp.Word(pp.nums)
1076 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1077 E <<= E + '+' - num | num
1079 print(E.parse_string("1+2+3"))
1081 Recursion search naturally memoizes matches of ``Forward`` elements and may
1082 thus skip reevaluation of parse actions during backtracking. This may break
1083 programs with parse actions which rely on strict ordering of side-effects.
1085 Parameters:
1087 - ``cache_size_limit`` - (default=``None``) - memoize at most this many
1088 ``Forward`` elements during matching; if ``None`` (the default),
1089 memoize all ``Forward`` elements.
1091 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1092 thus the two cannot be used together. Use ``force=True`` to disable any
1093 previous, conflicting settings.
1094 """
1095 with ParserElement.packrat_cache_lock:
1096 if force:
1097 ParserElement.disable_memoization()
1098 elif ParserElement._packratEnabled:
1099 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1100 if cache_size_limit is None:
1101 ParserElement.recursion_memos = _UnboundedMemo()
1102 elif cache_size_limit > 0:
1103 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]
1104 else:
1105 raise NotImplementedError(f"Memo size of {cache_size_limit}")
1106 ParserElement._left_recursion_enabled = True
1108 @staticmethod
1109 def enable_packrat(
1110 cache_size_limit: Union[int, None] = 128, *, force: bool = False
1111 ) -> None:
1112 """
1113 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1114 Repeated parse attempts at the same string location (which happens
1115 often in many complex grammars) can immediately return a cached value,
1116 instead of re-executing parsing/validating code. Memoizing is done of
1117 both valid results and parsing exceptions.
1119 Parameters:
1121 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided
1122 will limit the size of the packrat cache; if None is passed, then
1123 the cache size will be unbounded; if 0 is passed, the cache will
1124 be effectively disabled.
1126 This speedup may break existing programs that use parse actions that
1127 have side-effects. For this reason, packrat parsing is disabled when
1128 you first import pyparsing. To activate the packrat feature, your
1129 program must call the class method :class:`ParserElement.enable_packrat`.
1130 For best results, call ``enable_packrat()`` immediately after
1131 importing pyparsing.
1133 Example::
1135 import pyparsing
1136 pyparsing.ParserElement.enable_packrat()
1138 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1139 thus the two cannot be used together. Use ``force=True`` to disable any
1140 previous, conflicting settings.
1141 """
1142 with ParserElement.packrat_cache_lock:
1143 if force:
1144 ParserElement.disable_memoization()
1145 elif ParserElement._left_recursion_enabled:
1146 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1148 if ParserElement._packratEnabled:
1149 return
1151 ParserElement._packratEnabled = True
1152 if cache_size_limit is None:
1153 ParserElement.packrat_cache = _UnboundedCache()
1154 else:
1155 ParserElement.packrat_cache = _FifoCache(cache_size_limit)
1156 ParserElement._parse = ParserElement._parseCache
1158 def parse_string(
1159 self, instring: str, parse_all: bool = False, *, parseAll: bool = False
1160 ) -> ParseResults:
1161 """
1162 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1163 client code.
1165 :param instring: The input string to be parsed.
1166 :param parse_all: If set, the entire input string must match the grammar.
1167 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1168 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1169 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1170 an object with attributes if the given parser includes results names.
1172 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1173 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().
1175 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1176 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1177 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1178 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1180 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1181 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1182 parse action's ``s`` argument, or
1183 - explicitly expand the tabs in your input string before calling ``parse_string``.
1185 Examples:
1187 By default, partial matches are OK.
1189 >>> res = Word('a').parse_string('aaaaabaaa')
1190 >>> print(res)
1191 ['aaaaa']
1193 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1194 directly to see more examples.
1196 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1198 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1199 Traceback (most recent call last):
1200 ...
1201 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6)
1202 """
1203 parseAll = parse_all or parseAll
1205 ParserElement.reset_cache()
1206 if not self.streamlined:
1207 self.streamline()
1208 for e in self.ignoreExprs:
1209 e.streamline()
1210 if not self.keepTabs:
1211 instring = instring.expandtabs()
1212 try:
1213 loc, tokens = self._parse(instring, 0)
1214 if parseAll:
1215 loc = self.preParse(instring, loc)
1216 se = Empty() + StringEnd().set_debug(False)
1217 se._parse(instring, loc)
1218 except _ParseActionIndexError as pa_exc:
1219 raise pa_exc.exc
1220 except ParseBaseException as exc:
1221 if ParserElement.verbose_stacktrace:
1222 raise
1224 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1225 raise exc.with_traceback(None)
1226 else:
1227 return tokens
1229 def scan_string(
1230 self,
1231 instring: str,
1232 max_matches: int = _MAX_INT,
1233 overlap: bool = False,
1234 always_skip_whitespace=True,
1235 *,
1236 debug: bool = False,
1237 maxMatches: int = _MAX_INT,
1238 ) -> Generator[tuple[ParseResults, int, int], None, None]:
1239 """
1240 Scan the input string for expression matches. Each match will return the
1241 matching tokens, start location, and end location. May be called with optional
1242 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1243 ``overlap`` is specified, then overlapping matches will be reported.
1245 Note that the start and end locations are reported relative to the string
1246 being parsed. See :class:`parse_string` for more information on parsing
1247 strings with embedded tabs.
1249 Example::
1251 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1252 print(source)
1253 for tokens, start, end in Word(alphas).scan_string(source):
1254 print(' '*start + '^'*(end-start))
1255 print(' '*start + tokens[0])
1257 prints::
1259 sldjf123lsdjjkf345sldkjf879lkjsfd987
1260 ^^^^^
1261 sldjf
1262 ^^^^^^^
1263 lsdjjkf
1264 ^^^^^^
1265 sldkjf
1266 ^^^^^^
1267 lkjsfd
1268 """
1269 maxMatches = min(maxMatches, max_matches)
1270 if not self.streamlined:
1271 self.streamline()
1272 for e in self.ignoreExprs:
1273 e.streamline()
1275 if not self.keepTabs:
1276 instring = str(instring).expandtabs()
1277 instrlen = len(instring)
1278 loc = 0
1279 if always_skip_whitespace:
1280 preparser = Empty()
1281 preparser.ignoreExprs = self.ignoreExprs
1282 preparser.whiteChars = self.whiteChars
1283 preparseFn = preparser.preParse
1284 else:
1285 preparseFn = self.preParse
1286 parseFn = self._parse
1287 ParserElement.resetCache()
1288 matches = 0
1289 try:
1290 while loc <= instrlen and matches < maxMatches:
1291 try:
1292 preloc: int = preparseFn(instring, loc)
1293 nextLoc: int
1294 tokens: ParseResults
1295 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1296 except ParseException:
1297 loc = preloc + 1
1298 else:
1299 if nextLoc > loc:
1300 matches += 1
1301 if debug:
1302 print(
1303 {
1304 "tokens": tokens.asList(),
1305 "start": preloc,
1306 "end": nextLoc,
1307 }
1308 )
1309 yield tokens, preloc, nextLoc
1310 if overlap:
1311 nextloc = preparseFn(instring, loc)
1312 if nextloc > loc:
1313 loc = nextLoc
1314 else:
1315 loc += 1
1316 else:
1317 loc = nextLoc
1318 else:
1319 loc = preloc + 1
1320 except ParseBaseException as exc:
1321 if ParserElement.verbose_stacktrace:
1322 raise
1324 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1325 raise exc.with_traceback(None)
1327 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1328 """
1329 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1330 be returned from a parse action. To use ``transform_string``, define a grammar and
1331 attach a parse action to it that modifies the returned token list.
1332 Invoking ``transform_string()`` on a target string will then scan for matches,
1333 and replace the matched text patterns according to the logic in the parse
1334 action. ``transform_string()`` returns the resulting transformed string.
1336 Example::
1338 wd = Word(alphas)
1339 wd.set_parse_action(lambda toks: toks[0].title())
1341 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))
1343 prints::
1345 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1346 """
1347 out: list[str] = []
1348 lastE = 0
1349 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1350 # keep string locs straight between transform_string and scan_string
1351 self.keepTabs = True
1352 try:
1353 for t, s, e in self.scan_string(instring, debug=debug):
1354 if s > lastE:
1355 out.append(instring[lastE:s])
1356 lastE = e
1358 if not t:
1359 continue
1361 if isinstance(t, ParseResults):
1362 out += t.as_list()
1363 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1364 out.extend(t)
1365 else:
1366 out.append(t)
1368 out.append(instring[lastE:])
1369 out = [o for o in out if o]
1370 return "".join([str(s) for s in _flatten(out)])
1371 except ParseBaseException as exc:
1372 if ParserElement.verbose_stacktrace:
1373 raise
1375 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1376 raise exc.with_traceback(None)
1378 def search_string(
1379 self,
1380 instring: str,
1381 max_matches: int = _MAX_INT,
1382 *,
1383 debug: bool = False,
1384 maxMatches: int = _MAX_INT,
1385 ) -> ParseResults:
1386 """
1387 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1388 to match the given parse expression. May be called with optional
1389 ``max_matches`` argument, to clip searching after 'n' matches are found.
1391 Example::
1393 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1394 cap_word = Word(alphas.upper(), alphas.lower())
1396 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))
1398 # the sum() builtin can be used to merge results into a single ParseResults object
1399 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))
1401 prints::
1403 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1404 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1405 """
1406 maxMatches = min(maxMatches, max_matches)
1407 try:
1408 return ParseResults(
1409 [
1410 t
1411 for t, s, e in self.scan_string(
1412 instring, maxMatches, always_skip_whitespace=False, debug=debug
1413 )
1414 ]
1415 )
1416 except ParseBaseException as exc:
1417 if ParserElement.verbose_stacktrace:
1418 raise
1420 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1421 raise exc.with_traceback(None)
1423 def split(
1424 self,
1425 instring: str,
1426 maxsplit: int = _MAX_INT,
1427 include_separators: bool = False,
1428 *,
1429 includeSeparators=False,
1430 ) -> Generator[str, None, None]:
1431 """
1432 Generator method to split a string using the given expression as a separator.
1433 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1434 and the optional ``include_separators`` argument (default= ``False``), if the separating
1435 matching text should be included in the split results.
1437 Example::
1439 punc = one_of(list(".,;:/-!?"))
1440 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1442 prints::
1444 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1445 """
1446 includeSeparators = includeSeparators or include_separators
1447 last = 0
1448 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1449 yield instring[last:s]
1450 if includeSeparators:
1451 yield t[0]
1452 last = e
1453 yield instring[last:]
1455 def __add__(self, other) -> ParserElement:
1456 """
1457 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1458 converts them to :class:`Literal`\\ s by default.
1460 Example::
1462 greet = Word(alphas) + "," + Word(alphas) + "!"
1463 hello = "Hello, World!"
1464 print(hello, "->", greet.parse_string(hello))
1466 prints::
1468 Hello, World! -> ['Hello', ',', 'World', '!']
1470 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`::
1472 Literal('start') + ... + Literal('end')
1474 is equivalent to::
1476 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1478 Note that the skipped text is returned with '_skipped' as a results name,
1479 and to support having multiple skips in the same parser, the value returned is
1480 a list of all skipped text.
1481 """
1482 if other is Ellipsis:
1483 return _PendingSkip(self)
1485 if isinstance(other, str_type):
1486 other = self._literalStringClass(other)
1487 if not isinstance(other, ParserElement):
1488 return NotImplemented
1489 return And([self, other])
1491 def __radd__(self, other) -> ParserElement:
1492 """
1493 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1494 """
1495 if other is Ellipsis:
1496 return SkipTo(self)("_skipped*") + self
1498 if isinstance(other, str_type):
1499 other = self._literalStringClass(other)
1500 if not isinstance(other, ParserElement):
1501 return NotImplemented
1502 return other + self
1504 def __sub__(self, other) -> ParserElement:
1505 """
1506 Implementation of ``-`` operator, returns :class:`And` with error stop
1507 """
1508 if isinstance(other, str_type):
1509 other = self._literalStringClass(other)
1510 if not isinstance(other, ParserElement):
1511 return NotImplemented
1512 return self + And._ErrorStop() + other
1514 def __rsub__(self, other) -> ParserElement:
1515 """
1516 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1517 """
1518 if isinstance(other, str_type):
1519 other = self._literalStringClass(other)
1520 if not isinstance(other, ParserElement):
1521 return NotImplemented
1522 return other - self
1524 def __mul__(self, other) -> ParserElement:
1525 """
1526 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1527 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1528 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1529 may also include ``None`` as in:
1531 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1532 to ``expr*n + ZeroOrMore(expr)``
1533 (read as "at least n instances of ``expr``")
1534 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1535 (read as "0 to n instances of ``expr``")
1536 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1537 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1539 Note that ``expr*(None, n)`` does not raise an exception if
1540 more than n exprs exist in the input stream; that is,
1541 ``expr*(None, n)`` does not enforce a maximum number of expr
1542 occurrences. If this behavior is desired, then write
1543 ``expr*(None, n) + ~expr``
1544 """
1545 if other is Ellipsis:
1546 other = (0, None)
1547 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1548 other = ((0,) + other[1:] + (None,))[:2]
1550 if not isinstance(other, (int, tuple)):
1551 return NotImplemented
1553 if isinstance(other, int):
1554 minElements, optElements = other, 0
1555 else:
1556 other = tuple(o if o is not Ellipsis else None for o in other)
1557 other = (other + (None, None))[:2]
1558 if other[0] is None:
1559 other = (0, other[1])
1560 if isinstance(other[0], int) and other[1] is None:
1561 if other[0] == 0:
1562 return ZeroOrMore(self)
1563 if other[0] == 1:
1564 return OneOrMore(self)
1565 else:
1566 return self * other[0] + ZeroOrMore(self)
1567 elif isinstance(other[0], int) and isinstance(other[1], int):
1568 minElements, optElements = other
1569 optElements -= minElements
1570 else:
1571 return NotImplemented
1573 if minElements < 0:
1574 raise ValueError("cannot multiply ParserElement by negative value")
1575 if optElements < 0:
1576 raise ValueError(
1577 "second tuple value must be greater or equal to first tuple value"
1578 )
1579 if minElements == optElements == 0:
1580 return And([])
1582 if optElements:
1584 def makeOptionalList(n):
1585 if n > 1:
1586 return Opt(self + makeOptionalList(n - 1))
1587 else:
1588 return Opt(self)
1590 if minElements:
1591 if minElements == 1:
1592 ret = self + makeOptionalList(optElements)
1593 else:
1594 ret = And([self] * minElements) + makeOptionalList(optElements)
1595 else:
1596 ret = makeOptionalList(optElements)
1597 else:
1598 if minElements == 1:
1599 ret = self
1600 else:
1601 ret = And([self] * minElements)
1602 return ret
1604 def __rmul__(self, other) -> ParserElement:
1605 return self.__mul__(other)
1607 def __or__(self, other) -> ParserElement:
1608 """
1609 Implementation of ``|`` operator - returns :class:`MatchFirst`
1611 .. versionchanged:: 3.1.0
1612 Support ``expr | ""`` as a synonym for ``Optional(expr)``.
1613 """
1614 if other is Ellipsis:
1615 return _PendingSkip(self, must_skip=True)
1617 if isinstance(other, str_type):
1618 # `expr | ""` is equivalent to `Opt(expr)`
1619 if other == "":
1620 return Opt(self)
1621 other = self._literalStringClass(other)
1622 if not isinstance(other, ParserElement):
1623 return NotImplemented
1624 return MatchFirst([self, other])
1626 def __ror__(self, other) -> ParserElement:
1627 """
1628 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1629 """
1630 if isinstance(other, str_type):
1631 other = self._literalStringClass(other)
1632 if not isinstance(other, ParserElement):
1633 return NotImplemented
1634 return other | self
1636 def __xor__(self, other) -> ParserElement:
1637 """
1638 Implementation of ``^`` operator - returns :class:`Or`
1639 """
1640 if isinstance(other, str_type):
1641 other = self._literalStringClass(other)
1642 if not isinstance(other, ParserElement):
1643 return NotImplemented
1644 return Or([self, other])
1646 def __rxor__(self, other) -> ParserElement:
1647 """
1648 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1649 """
1650 if isinstance(other, str_type):
1651 other = self._literalStringClass(other)
1652 if not isinstance(other, ParserElement):
1653 return NotImplemented
1654 return other ^ self
1656 def __and__(self, other) -> ParserElement:
1657 """
1658 Implementation of ``&`` operator - returns :class:`Each`
1659 """
1660 if isinstance(other, str_type):
1661 other = self._literalStringClass(other)
1662 if not isinstance(other, ParserElement):
1663 return NotImplemented
1664 return Each([self, other])
1666 def __rand__(self, other) -> ParserElement:
1667 """
1668 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1669 """
1670 if isinstance(other, str_type):
1671 other = self._literalStringClass(other)
1672 if not isinstance(other, ParserElement):
1673 return NotImplemented
1674 return other & self
1676 def __invert__(self) -> ParserElement:
1677 """
1678 Implementation of ``~`` operator - returns :class:`NotAny`
1679 """
1680 return NotAny(self)
1682 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1683 # iterate over a sequence
1684 __iter__ = None
1686 def __getitem__(self, key):
1687 """
1688 use ``[]`` indexing notation as a short form for expression repetition:
1690 - ``expr[n]`` is equivalent to ``expr*n``
1691 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1692 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1693 to ``expr*n + ZeroOrMore(expr)``
1694 (read as "at least n instances of ``expr``")
1695 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1696 (read as "0 to n instances of ``expr``")
1697 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1698 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1700 ``None`` may be used in place of ``...``.
1702 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception
1703 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is
1704 desired, then write ``expr[..., n] + ~expr``.
1706 For repetition with a stop_on expression, use slice notation:
1708 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``
1709 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``
1711 .. versionchanged:: 3.1.0
1712 Support for slice notation.
1713 """
1715 stop_on_defined = False
1716 stop_on = NoMatch()
1717 if isinstance(key, slice):
1718 key, stop_on = key.start, key.stop
1719 if key is None:
1720 key = ...
1721 stop_on_defined = True
1722 elif isinstance(key, tuple) and isinstance(key[-1], slice):
1723 key, stop_on = (key[0], key[1].start), key[1].stop
1724 stop_on_defined = True
1726 # convert single arg keys to tuples
1727 if isinstance(key, str_type):
1728 key = (key,)
1729 try:
1730 iter(key)
1731 except TypeError:
1732 key = (key, key)
1734 if len(key) > 2:
1735 raise TypeError(
1736 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"
1737 )
1739 # clip to 2 elements
1740 ret = self * tuple(key[:2])
1741 ret = typing.cast(_MultipleMatch, ret)
1743 if stop_on_defined:
1744 ret.stopOn(stop_on)
1746 return ret
1748 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:
1749 """
1750 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1752 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1753 passed as ``True``.
1755 If ``name`` is omitted, same as calling :class:`copy`.
1757 Example::
1759 # these are equivalent
1760 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")
1761 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1762 """
1763 if name is not None:
1764 return self._setResultsName(name)
1766 return self.copy()
1768 def suppress(self) -> ParserElement:
1769 """
1770 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1771 cluttering up returned output.
1772 """
1773 return Suppress(self)
1775 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
1776 """
1777 Enables the skipping of whitespace before matching the characters in the
1778 :class:`ParserElement`'s defined pattern.
1780 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1781 """
1782 self.skipWhitespace = True
1783 return self
1785 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
1786 """
1787 Disables the skipping of whitespace before matching the characters in the
1788 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1789 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1791 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1792 """
1793 self.skipWhitespace = False
1794 return self
1796 def set_whitespace_chars(
1797 self, chars: Union[set[str], str], copy_defaults: bool = False
1798 ) -> ParserElement:
1799 """
1800 Overrides the default whitespace chars
1801 """
1802 self.skipWhitespace = True
1803 self.whiteChars = set(chars)
1804 self.copyDefaultWhiteChars = copy_defaults
1805 return self
1807 def parse_with_tabs(self) -> ParserElement:
1808 """
1809 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1810 Must be called before ``parse_string`` when the input grammar contains elements that
1811 match ``<TAB>`` characters.
1812 """
1813 self.keepTabs = True
1814 return self
1816 def ignore(self, other: ParserElement) -> ParserElement:
1817 """
1818 Define expression to be ignored (e.g., comments) while doing pattern
1819 matching; may be called repeatedly, to define multiple comment or other
1820 ignorable patterns.
1822 Example::
1824 patt = Word(alphas)[...]
1825 patt.parse_string('ablaj /* comment */ lskjd')
1826 # -> ['ablaj']
1828 patt.ignore(c_style_comment)
1829 patt.parse_string('ablaj /* comment */ lskjd')
1830 # -> ['ablaj', 'lskjd']
1831 """
1832 if isinstance(other, str_type):
1833 other = Suppress(other)
1835 if isinstance(other, Suppress):
1836 if other not in self.ignoreExprs:
1837 self.ignoreExprs.append(other)
1838 else:
1839 self.ignoreExprs.append(Suppress(other.copy()))
1840 return self
1842 def set_debug_actions(
1843 self,
1844 start_action: DebugStartAction,
1845 success_action: DebugSuccessAction,
1846 exception_action: DebugExceptionAction,
1847 ) -> ParserElement:
1848 """
1849 Customize display of debugging messages while doing pattern matching:
1851 - ``start_action`` - method to be called when an expression is about to be parsed;
1852 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``
1854 - ``success_action`` - method to be called when an expression has successfully parsed;
1855 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``
1857 - ``exception_action`` - method to be called when expression fails to parse;
1858 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
1859 """
1860 self.debugActions = self.DebugActions(
1861 start_action or _default_start_debug_action, # type: ignore[truthy-function]
1862 success_action or _default_success_debug_action, # type: ignore[truthy-function]
1863 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]
1864 )
1865 self.debug = True
1866 return self
1868 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:
1869 """
1870 Enable display of debugging messages while doing pattern matching.
1871 Set ``flag`` to ``True`` to enable, ``False`` to disable.
1872 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.
1874 Example::
1876 wd = Word(alphas).set_name("alphaword")
1877 integer = Word(nums).set_name("numword")
1878 term = wd | integer
1880 # turn on debugging for wd
1881 wd.set_debug()
1883 term[1, ...].parse_string("abc 123 xyz 890")
1885 prints::
1887 Match alphaword at loc 0(1,1)
1888 Matched alphaword -> ['abc']
1889 Match alphaword at loc 3(1,4)
1890 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
1891 Match alphaword at loc 7(1,8)
1892 Matched alphaword -> ['xyz']
1893 Match alphaword at loc 11(1,12)
1894 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
1895 Match alphaword at loc 15(1,16)
1896 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
1898 The output shown is that produced by the default debug actions - custom debug actions can be
1899 specified using :class:`set_debug_actions`. Prior to attempting
1900 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
1901 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
1902 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,
1903 which makes debugging and exception messages easier to understand - for instance, the default
1904 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.
1906 .. versionchanged:: 3.1.0
1907 ``recurse`` argument added.
1908 """
1909 if recurse:
1910 for expr in self.visit_all():
1911 expr.set_debug(flag, recurse=False)
1912 return self
1914 if flag:
1915 self.set_debug_actions(
1916 _default_start_debug_action,
1917 _default_success_debug_action,
1918 _default_exception_debug_action,
1919 )
1920 else:
1921 self.debug = False
1922 return self
1924 @property
1925 def default_name(self) -> str:
1926 if self._defaultName is None:
1927 self._defaultName = self._generateDefaultName()
1928 return self._defaultName
1930 @abstractmethod
1931 def _generateDefaultName(self) -> str:
1932 """
1933 Child classes must define this method, which defines how the ``default_name`` is set.
1934 """
1936 def set_name(self, name: typing.Optional[str]) -> ParserElement:
1937 """
1938 Define name for this expression, makes debugging and exception messages clearer. If
1939 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also
1940 enable debug for this expression.
1942 If `name` is None, clears any custom name for this expression, and clears the
1943 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.
1945 Example::
1947 integer = Word(nums)
1948 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)
1950 integer.set_name("integer")
1951 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1953 .. versionchanged:: 3.1.0
1954 Accept ``None`` as the ``name`` argument.
1955 """
1956 self.customName = name # type: ignore[assignment]
1957 self.errmsg = f"Expected {str(self)}"
1959 if __diag__.enable_debug_on_named_expressions:
1960 self.set_debug(name is not None)
1962 return self
1964 @property
1965 def name(self) -> str:
1966 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name
1967 return self.customName if self.customName is not None else self.default_name
1969 @name.setter
1970 def name(self, new_name) -> None:
1971 self.set_name(new_name)
1973 def __str__(self) -> str:
1974 return self.name
1976 def __repr__(self) -> str:
1977 return str(self)
1979 def streamline(self) -> ParserElement:
1980 self.streamlined = True
1981 self._defaultName = None
1982 return self
1984 def recurse(self) -> list[ParserElement]:
1985 return []
1987 def _checkRecursion(self, parseElementList):
1988 subRecCheckList = parseElementList[:] + [self]
1989 for e in self.recurse():
1990 e._checkRecursion(subRecCheckList)
1992 def validate(self, validateTrace=None) -> None:
1993 """
1994 .. deprecated:: 3.0.0
1995 Do not use to check for left recursion.
1997 Check defined expressions for valid structure, check for infinite recursive definitions.
1999 """
2000 warnings.warn(
2001 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
2002 DeprecationWarning,
2003 stacklevel=2,
2004 )
2005 self._checkRecursion([])
2007 def parse_file(
2008 self,
2009 file_or_filename: Union[str, Path, TextIO],
2010 encoding: str = "utf-8",
2011 parse_all: bool = False,
2012 *,
2013 parseAll: bool = False,
2014 ) -> ParseResults:
2015 """
2016 Execute the parse expression on the given file or filename.
2017 If a filename is specified (instead of a file object),
2018 the entire file is opened, read, and closed before parsing.
2019 """
2020 parseAll = parseAll or parse_all
2021 try:
2022 file_or_filename = typing.cast(TextIO, file_or_filename)
2023 file_contents = file_or_filename.read()
2024 except AttributeError:
2025 file_or_filename = typing.cast(str, file_or_filename)
2026 with open(file_or_filename, "r", encoding=encoding) as f:
2027 file_contents = f.read()
2028 try:
2029 return self.parse_string(file_contents, parseAll)
2030 except ParseBaseException as exc:
2031 if ParserElement.verbose_stacktrace:
2032 raise
2034 # catch and re-raise exception from here, clears out pyparsing internal stack trace
2035 raise exc.with_traceback(None)
2037 def __eq__(self, other):
2038 if self is other:
2039 return True
2040 elif isinstance(other, str_type):
2041 return self.matches(other, parse_all=True)
2042 elif isinstance(other, ParserElement):
2043 return vars(self) == vars(other)
2044 return False
2046 def __hash__(self):
2047 return id(self)
2049 def matches(
2050 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True
2051 ) -> bool:
2052 """
2053 Method for quick testing of a parser against a test string. Good for simple
2054 inline microtests of sub expressions while building up larger parser.
2056 Parameters:
2058 - ``test_string`` - to test against this expression for a match
2059 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2061 Example::
2063 expr = Word(nums)
2064 assert expr.matches("100")
2065 """
2066 parseAll = parseAll and parse_all
2067 try:
2068 self.parse_string(str(test_string), parse_all=parseAll)
2069 return True
2070 except ParseBaseException:
2071 return False
2073 def run_tests(
2074 self,
2075 tests: Union[str, list[str]],
2076 parse_all: bool = True,
2077 comment: typing.Optional[Union[ParserElement, str]] = "#",
2078 full_dump: bool = True,
2079 print_results: bool = True,
2080 failure_tests: bool = False,
2081 post_parse: typing.Optional[
2082 Callable[[str, ParseResults], typing.Optional[str]]
2083 ] = None,
2084 file: typing.Optional[TextIO] = None,
2085 with_line_numbers: bool = False,
2086 *,
2087 parseAll: bool = True,
2088 fullDump: bool = True,
2089 printResults: bool = True,
2090 failureTests: bool = False,
2091 postParse: typing.Optional[
2092 Callable[[str, ParseResults], typing.Optional[str]]
2093 ] = None,
2094 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:
2095 """
2096 Execute the parse expression on a series of test strings, showing each
2097 test, the parsed results or where the parse failed. Quick and easy way to
2098 run a parse expression against a list of sample strings.
2100 Parameters:
2102 - ``tests`` - a list of separate test strings, or a multiline string of test strings
2103 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
2104 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
2105 string; pass None to disable comment filtering
2106 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
2107 if False, only dump nested list
2108 - ``print_results`` - (default= ``True``) prints test output to stdout
2109 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
2110 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
2111 `fn(test_string, parse_results)` and returns a string to be added to the test output
2112 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
2113 if None, will default to ``sys.stdout``
2114 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
2116 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2117 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
2118 test's output
2120 Example::
2122 number_expr = pyparsing_common.number.copy()
2124 result = number_expr.run_tests('''
2125 # unsigned integer
2126 100
2127 # negative integer
2128 -100
2129 # float with scientific notation
2130 6.02e23
2131 # integer with scientific notation
2132 1e-12
2133 ''')
2134 print("Success" if result[0] else "Failed!")
2136 result = number_expr.run_tests('''
2137 # stray character
2138 100Z
2139 # missing leading digit before '.'
2140 -.100
2141 # too many '.'
2142 3.14.159
2143 ''', failure_tests=True)
2144 print("Success" if result[0] else "Failed!")
2146 prints::
2148 # unsigned integer
2149 100
2150 [100]
2152 # negative integer
2153 -100
2154 [-100]
2156 # float with scientific notation
2157 6.02e23
2158 [6.02e+23]
2160 # integer with scientific notation
2161 1e-12
2162 [1e-12]
2164 Success
2166 # stray character
2167 100Z
2168 ^
2169 FAIL: Expected end of text (at char 3), (line:1, col:4)
2171 # missing leading digit before '.'
2172 -.100
2173 ^
2174 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2176 # too many '.'
2177 3.14.159
2178 ^
2179 FAIL: Expected end of text (at char 4), (line:1, col:5)
2181 Success
2183 Each test string must be on a single line. If you want to test a string that spans multiple
2184 lines, create a test like this::
2186 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2188 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2189 """
2190 from .testing import pyparsing_test
2192 parseAll = parseAll and parse_all
2193 fullDump = fullDump and full_dump
2194 printResults = printResults and print_results
2195 failureTests = failureTests or failure_tests
2196 postParse = postParse or post_parse
2197 if isinstance(tests, str_type):
2198 tests = typing.cast(str, tests)
2199 line_strip = type(tests).strip
2200 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2201 comment_specified = comment is not None
2202 if comment_specified:
2203 if isinstance(comment, str_type):
2204 comment = typing.cast(str, comment)
2205 comment = Literal(comment)
2206 comment = typing.cast(ParserElement, comment)
2207 if file is None:
2208 file = sys.stdout
2209 print_ = file.write
2211 result: Union[ParseResults, Exception]
2212 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []
2213 comments: list[str] = []
2214 success = True
2215 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2216 BOM = "\ufeff"
2217 nlstr = "\n"
2218 for t in tests:
2219 if comment_specified and comment.matches(t, False) or comments and not t:
2220 comments.append(
2221 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2222 )
2223 continue
2224 if not t:
2225 continue
2226 out = [
2227 f"{nlstr}{nlstr.join(comments) if comments else ''}",
2228 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2229 ]
2230 comments.clear()
2231 try:
2232 # convert newline marks to actual newlines, and strip leading BOM if present
2233 t = NL.transform_string(t.lstrip(BOM))
2234 result = self.parse_string(t, parse_all=parseAll)
2235 except ParseBaseException as pe:
2236 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""
2237 out.append(pe.explain())
2238 out.append(f"FAIL: {fatal}{pe}")
2239 if ParserElement.verbose_stacktrace:
2240 out.extend(traceback.format_tb(pe.__traceback__))
2241 success = success and failureTests
2242 result = pe
2243 except Exception as exc:
2244 tag = "FAIL-EXCEPTION"
2246 # see if this exception was raised in a parse action
2247 tb = exc.__traceback__
2248 it = iter(traceback.walk_tb(tb))
2249 for f, line in it:
2250 if (f.f_code.co_filename, line) == pa_call_line_synth:
2251 next_f = next(it)[0]
2252 tag += f" (raised in parse action {next_f.f_code.co_name!r})"
2253 break
2255 out.append(f"{tag}: {type(exc).__name__}: {exc}")
2256 if ParserElement.verbose_stacktrace:
2257 out.extend(traceback.format_tb(exc.__traceback__))
2258 success = success and failureTests
2259 result = exc
2260 else:
2261 success = success and not failureTests
2262 if postParse is not None:
2263 try:
2264 pp_value = postParse(t, result)
2265 if pp_value is not None:
2266 if isinstance(pp_value, ParseResults):
2267 out.append(pp_value.dump())
2268 else:
2269 out.append(str(pp_value))
2270 else:
2271 out.append(result.dump())
2272 except Exception as e:
2273 out.append(result.dump(full=fullDump))
2274 out.append(
2275 f"{postParse.__name__} failed: {type(e).__name__}: {e}"
2276 )
2277 else:
2278 out.append(result.dump(full=fullDump))
2279 out.append("")
2281 if printResults:
2282 print_("\n".join(out))
2284 allResults.append((t, result))
2286 return success, allResults
2288 def create_diagram(
2289 self,
2290 output_html: Union[TextIO, Path, str],
2291 vertical: int = 3,
2292 show_results_names: bool = False,
2293 show_groups: bool = False,
2294 embed: bool = False,
2295 show_hidden: bool = False,
2296 **kwargs,
2297 ) -> None:
2298 """
2299 Create a railroad diagram for the parser.
2301 Parameters:
2303 - ``output_html`` (str or file-like object) - output target for generated
2304 diagram HTML
2305 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically
2306 instead of horizontally (default=3)
2307 - ``show_results_names`` - bool flag whether diagram should show annotations for
2308 defined results names
2309 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box
2310 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden
2311 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed
2312 the resulting HTML in an enclosing HTML source
2313 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;
2314 can be used to insert custom CSS styling
2315 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the
2316 generated code
2318 Additional diagram-formatting keyword arguments can also be included;
2319 see railroad.Diagram class.
2321 .. versionchanged:: 3.1.0
2322 ``embed`` argument added.
2323 """
2325 try:
2326 from .diagram import to_railroad, railroad_to_html
2327 except ImportError as ie:
2328 raise Exception(
2329 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2330 ) from ie
2332 self.streamline()
2334 railroad = to_railroad(
2335 self,
2336 vertical=vertical,
2337 show_results_names=show_results_names,
2338 show_groups=show_groups,
2339 show_hidden=show_hidden,
2340 diagram_kwargs=kwargs,
2341 )
2342 if not isinstance(output_html, (str, Path)):
2343 # we were passed a file-like object, just write to it
2344 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))
2345 return
2347 with open(output_html, "w", encoding="utf-8") as diag_file:
2348 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))
2350 # Compatibility synonyms
2351 # fmt: off
2352 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))
2353 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(
2354 "setDefaultWhitespaceChars", set_default_whitespace_chars
2355 ))
2356 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))
2357 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))
2358 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))
2359 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))
2361 setResultsName = replaced_by_pep8("setResultsName", set_results_name)
2362 setBreak = replaced_by_pep8("setBreak", set_break)
2363 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)
2364 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)
2365 addCondition = replaced_by_pep8("addCondition", add_condition)
2366 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)
2367 tryParse = replaced_by_pep8("tryParse", try_parse)
2368 parseString = replaced_by_pep8("parseString", parse_string)
2369 scanString = replaced_by_pep8("scanString", scan_string)
2370 transformString = replaced_by_pep8("transformString", transform_string)
2371 searchString = replaced_by_pep8("searchString", search_string)
2372 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
2373 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
2374 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)
2375 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)
2376 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)
2377 setDebug = replaced_by_pep8("setDebug", set_debug)
2378 setName = replaced_by_pep8("setName", set_name)
2379 parseFile = replaced_by_pep8("parseFile", parse_file)
2380 runTests = replaced_by_pep8("runTests", run_tests)
2381 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)
2382 defaultName = default_name
2383 # fmt: on
2386class _PendingSkip(ParserElement):
2387 # internal placeholder class to hold a place were '...' is added to a parser element,
2388 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2389 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:
2390 super().__init__()
2391 self.anchor = expr
2392 self.must_skip = must_skip
2394 def _generateDefaultName(self) -> str:
2395 return str(self.anchor + Empty()).replace("Empty", "...")
2397 def __add__(self, other) -> ParserElement:
2398 skipper = SkipTo(other).set_name("...")("_skipped*")
2399 if self.must_skip:
2401 def must_skip(t):
2402 if not t._skipped or t._skipped.as_list() == [""]:
2403 del t[0]
2404 t.pop("_skipped", None)
2406 def show_skip(t):
2407 if t._skipped.as_list()[-1:] == [""]:
2408 t.pop("_skipped")
2409 t["_skipped"] = f"missing <{self.anchor!r}>"
2411 return (
2412 self.anchor + skipper().add_parse_action(must_skip)
2413 | skipper().add_parse_action(show_skip)
2414 ) + other
2416 return self.anchor + skipper + other
2418 def __repr__(self):
2419 return self.defaultName
2421 def parseImpl(self, *args) -> ParseImplReturnType:
2422 raise Exception(
2423 "use of `...` expression without following SkipTo target expression"
2424 )
2427class Token(ParserElement):
2428 """Abstract :class:`ParserElement` subclass, for defining atomic
2429 matching patterns.
2430 """
2432 def __init__(self) -> None:
2433 super().__init__(savelist=False)
2435 def _generateDefaultName(self) -> str:
2436 return type(self).__name__
2439class NoMatch(Token):
2440 """
2441 A token that will never match.
2442 """
2444 def __init__(self) -> None:
2445 super().__init__()
2446 self._may_return_empty = True
2447 self.mayIndexError = False
2448 self.errmsg = "Unmatchable token"
2450 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2451 raise ParseException(instring, loc, self.errmsg, self)
2454class Literal(Token):
2455 """
2456 Token to exactly match a specified string.
2458 Example::
2460 Literal('abc').parse_string('abc') # -> ['abc']
2461 Literal('abc').parse_string('abcdef') # -> ['abc']
2462 Literal('abc').parse_string('ab') # -> Exception: Expected "abc"
2464 For case-insensitive matching, use :class:`CaselessLiteral`.
2466 For keyword matching (force word break before and after the matched string),
2467 use :class:`Keyword` or :class:`CaselessKeyword`.
2468 """
2470 def __new__(cls, match_string: str = "", *, matchString: str = ""):
2471 # Performance tuning: select a subclass with optimized parseImpl
2472 if cls is Literal:
2473 match_string = matchString or match_string
2474 if not match_string:
2475 return super().__new__(Empty)
2476 if len(match_string) == 1:
2477 return super().__new__(_SingleCharLiteral)
2479 # Default behavior
2480 return super().__new__(cls)
2482 # Needed to make copy.copy() work correctly if we customize __new__
2483 def __getnewargs__(self):
2484 return (self.match,)
2486 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:
2487 super().__init__()
2488 match_string = matchString or match_string
2489 self.match = match_string
2490 self.matchLen = len(match_string)
2491 self.firstMatchChar = match_string[:1]
2492 self.errmsg = f"Expected {self.name}"
2493 self._may_return_empty = False
2494 self.mayIndexError = False
2496 def _generateDefaultName(self) -> str:
2497 return repr(self.match)
2499 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2500 if instring[loc] == self.firstMatchChar and instring.startswith(
2501 self.match, loc
2502 ):
2503 return loc + self.matchLen, self.match
2504 raise ParseException(instring, loc, self.errmsg, self)
2507class Empty(Literal):
2508 """
2509 An empty token, will always match.
2510 """
2512 def __init__(self, match_string="", *, matchString="") -> None:
2513 super().__init__("")
2514 self._may_return_empty = True
2515 self.mayIndexError = False
2517 def _generateDefaultName(self) -> str:
2518 return "Empty"
2520 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2521 return loc, []
2524class _SingleCharLiteral(Literal):
2525 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2526 if instring[loc] == self.firstMatchChar:
2527 return loc + 1, self.match
2528 raise ParseException(instring, loc, self.errmsg, self)
2531ParserElement._literalStringClass = Literal
2534class Keyword(Token):
2535 """
2536 Token to exactly match a specified string as a keyword, that is,
2537 it must be immediately preceded and followed by whitespace or
2538 non-keyword characters. Compare with :class:`Literal`:
2540 - ``Literal("if")`` will match the leading ``'if'`` in
2541 ``'ifAndOnlyIf'``.
2542 - ``Keyword("if")`` will not; it will only match the leading
2543 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2545 Accepts two optional constructor arguments in addition to the
2546 keyword string:
2548 - ``ident_chars`` is a string of characters that would be valid
2549 identifier characters, defaulting to all alphanumerics + "_" and
2550 "$"
2551 - ``caseless`` allows case-insensitive matching, default is ``False``.
2553 Example::
2555 Keyword("start").parse_string("start") # -> ['start']
2556 Keyword("start").parse_string("starting") # -> Exception
2558 For case-insensitive matching, use :class:`CaselessKeyword`.
2559 """
2561 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2563 def __init__(
2564 self,
2565 match_string: str = "",
2566 ident_chars: typing.Optional[str] = None,
2567 caseless: bool = False,
2568 *,
2569 matchString: str = "",
2570 identChars: typing.Optional[str] = None,
2571 ) -> None:
2572 super().__init__()
2573 identChars = identChars or ident_chars
2574 if identChars is None:
2575 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2576 match_string = matchString or match_string
2577 self.match = match_string
2578 self.matchLen = len(match_string)
2579 self.firstMatchChar = match_string[:1]
2580 if not self.firstMatchChar:
2581 raise ValueError("null string passed to Keyword; use Empty() instead")
2582 self.errmsg = f"Expected {type(self).__name__} {self.name}"
2583 self._may_return_empty = False
2584 self.mayIndexError = False
2585 self.caseless = caseless
2586 if caseless:
2587 self.caselessmatch = match_string.upper()
2588 identChars = identChars.upper()
2589 self.identChars = set(identChars)
2591 def _generateDefaultName(self) -> str:
2592 return repr(self.match)
2594 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2595 errmsg = self.errmsg or ""
2596 errloc = loc
2597 if self.caseless:
2598 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2599 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2600 if (
2601 loc >= len(instring) - self.matchLen
2602 or instring[loc + self.matchLen].upper() not in self.identChars
2603 ):
2604 return loc + self.matchLen, self.match
2606 # followed by keyword char
2607 errmsg += ", was immediately followed by keyword character"
2608 errloc = loc + self.matchLen
2609 else:
2610 # preceded by keyword char
2611 errmsg += ", keyword was immediately preceded by keyword character"
2612 errloc = loc - 1
2613 # else no match just raise plain exception
2615 elif (
2616 instring[loc] == self.firstMatchChar
2617 and self.matchLen == 1
2618 or instring.startswith(self.match, loc)
2619 ):
2620 if loc == 0 or instring[loc - 1] not in self.identChars:
2621 if (
2622 loc >= len(instring) - self.matchLen
2623 or instring[loc + self.matchLen] not in self.identChars
2624 ):
2625 return loc + self.matchLen, self.match
2627 # followed by keyword char
2628 errmsg += ", keyword was immediately followed by keyword character"
2629 errloc = loc + self.matchLen
2630 else:
2631 # preceded by keyword char
2632 errmsg += ", keyword was immediately preceded by keyword character"
2633 errloc = loc - 1
2634 # else no match just raise plain exception
2636 raise ParseException(instring, errloc, errmsg, self)
2638 @staticmethod
2639 def set_default_keyword_chars(chars) -> None:
2640 """
2641 Overrides the default characters used by :class:`Keyword` expressions.
2642 """
2643 Keyword.DEFAULT_KEYWORD_CHARS = chars
2645 # Compatibility synonyms
2646 setDefaultKeywordChars = staticmethod(
2647 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)
2648 )
2651class CaselessLiteral(Literal):
2652 """
2653 Token to match a specified string, ignoring case of letters.
2654 Note: the matched results will always be in the case of the given
2655 match string, NOT the case of the input text.
2657 Example::
2659 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2660 # -> ['CMD', 'CMD', 'CMD']
2662 (Contrast with example for :class:`CaselessKeyword`.)
2663 """
2665 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:
2666 match_string = matchString or match_string
2667 super().__init__(match_string.upper())
2668 # Preserve the defining literal.
2669 self.returnString = match_string
2670 self.errmsg = f"Expected {self.name}"
2672 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2673 if instring[loc : loc + self.matchLen].upper() == self.match:
2674 return loc + self.matchLen, self.returnString
2675 raise ParseException(instring, loc, self.errmsg, self)
2678class CaselessKeyword(Keyword):
2679 """
2680 Caseless version of :class:`Keyword`.
2682 Example::
2684 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2685 # -> ['CMD', 'CMD']
2687 (Contrast with example for :class:`CaselessLiteral`.)
2688 """
2690 def __init__(
2691 self,
2692 match_string: str = "",
2693 ident_chars: typing.Optional[str] = None,
2694 *,
2695 matchString: str = "",
2696 identChars: typing.Optional[str] = None,
2697 ) -> None:
2698 identChars = identChars or ident_chars
2699 match_string = matchString or match_string
2700 super().__init__(match_string, identChars, caseless=True)
2703class CloseMatch(Token):
2704 """A variation on :class:`Literal` which matches "close" matches,
2705 that is, strings with at most 'n' mismatching characters.
2706 :class:`CloseMatch` takes parameters:
2708 - ``match_string`` - string to be matched
2709 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2710 - ``max_mismatches`` - (``default=1``) maximum number of
2711 mismatches allowed to count as a match
2713 The results from a successful parse will contain the matched text
2714 from the input string and the following named results:
2716 - ``mismatches`` - a list of the positions within the
2717 match_string where mismatches were found
2718 - ``original`` - the original match_string used to compare
2719 against the input string
2721 If ``mismatches`` is an empty list, then the match was an exact
2722 match.
2724 Example::
2726 patt = CloseMatch("ATCATCGAATGGA")
2727 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2728 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2730 # exact match
2731 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2733 # close match allowing up to 2 mismatches
2734 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
2735 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2736 """
2738 def __init__(
2739 self,
2740 match_string: str,
2741 max_mismatches: typing.Optional[int] = None,
2742 *,
2743 maxMismatches: int = 1,
2744 caseless=False,
2745 ) -> None:
2746 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
2747 super().__init__()
2748 self.match_string = match_string
2749 self.maxMismatches = maxMismatches
2750 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"
2751 self.caseless = caseless
2752 self.mayIndexError = False
2753 self._may_return_empty = False
2755 def _generateDefaultName(self) -> str:
2756 return f"{type(self).__name__}:{self.match_string!r}"
2758 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
2759 start = loc
2760 instrlen = len(instring)
2761 maxloc = start + len(self.match_string)
2763 if maxloc <= instrlen:
2764 match_string = self.match_string
2765 match_stringloc = 0
2766 mismatches = []
2767 maxMismatches = self.maxMismatches
2769 for match_stringloc, s_m in enumerate(
2770 zip(instring[loc:maxloc], match_string)
2771 ):
2772 src, mat = s_m
2773 if self.caseless:
2774 src, mat = src.lower(), mat.lower()
2776 if src != mat:
2777 mismatches.append(match_stringloc)
2778 if len(mismatches) > maxMismatches:
2779 break
2780 else:
2781 loc = start + match_stringloc + 1
2782 results = ParseResults([instring[start:loc]])
2783 results["original"] = match_string
2784 results["mismatches"] = mismatches
2785 return loc, results
2787 raise ParseException(instring, loc, self.errmsg, self)
2790class Word(Token):
2791 """Token for matching words composed of allowed character sets.
2793 Parameters:
2795 - ``init_chars`` - string of all characters that should be used to
2796 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
2797 if ``body_chars`` is also specified, then this is the string of
2798 initial characters
2799 - ``body_chars`` - string of characters that
2800 can be used for matching after a matched initial character as
2801 given in ``init_chars``; if omitted, same as the initial characters
2802 (default=``None``)
2803 - ``min`` - minimum number of characters to match (default=1)
2804 - ``max`` - maximum number of characters to match (default=0)
2805 - ``exact`` - exact number of characters to match (default=0)
2806 - ``as_keyword`` - match as a keyword (default=``False``)
2807 - ``exclude_chars`` - characters that might be
2808 found in the input ``body_chars`` string but which should not be
2809 accepted for matching ;useful to define a word of all
2810 printables except for one or two characters, for instance
2811 (default=``None``)
2813 :class:`srange` is useful for defining custom character set strings
2814 for defining :class:`Word` expressions, using range notation from
2815 regular expression character sets.
2817 A common mistake is to use :class:`Word` to match a specific literal
2818 string, as in ``Word("Address")``. Remember that :class:`Word`
2819 uses the string argument to define *sets* of matchable characters.
2820 This expression would match "Add", "AAA", "dAred", or any other word
2821 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
2822 exact literal string, use :class:`Literal` or :class:`Keyword`.
2824 pyparsing includes helper strings for building Words:
2826 - :class:`alphas`
2827 - :class:`nums`
2828 - :class:`alphanums`
2829 - :class:`hexnums`
2830 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
2831 - accented, tilded, umlauted, etc.)
2832 - :class:`punc8bit` (non-alphabetic characters in ASCII range
2833 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2834 - :class:`printables` (any non-whitespace character)
2836 ``alphas``, ``nums``, and ``printables`` are also defined in several
2837 Unicode sets - see :class:`pyparsing_unicode`.
2839 Example::
2841 # a word composed of digits
2842 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2844 # a word with a leading capital, and zero or more lowercase
2845 capitalized_word = Word(alphas.upper(), alphas.lower())
2847 # hostnames are alphanumeric, with leading alpha, and '-'
2848 hostname = Word(alphas, alphanums + '-')
2850 # roman numeral (not a strict parser, accepts invalid mix of characters)
2851 roman = Word("IVXLCDM")
2853 # any string of non-whitespace characters, except for ','
2854 csv_value = Word(printables, exclude_chars=",")
2856 :raises ValueError: If ``min`` and ``max`` are both specified
2857 and the test ``min <= max`` fails.
2859 .. versionchanged:: 3.1.0
2860 Raises :exc:`ValueError` if ``min`` > ``max``.
2861 """
2863 def __init__(
2864 self,
2865 init_chars: str = "",
2866 body_chars: typing.Optional[str] = None,
2867 min: int = 1,
2868 max: int = 0,
2869 exact: int = 0,
2870 as_keyword: bool = False,
2871 exclude_chars: typing.Optional[str] = None,
2872 *,
2873 initChars: typing.Optional[str] = None,
2874 bodyChars: typing.Optional[str] = None,
2875 asKeyword: bool = False,
2876 excludeChars: typing.Optional[str] = None,
2877 ) -> None:
2878 initChars = initChars or init_chars
2879 bodyChars = bodyChars or body_chars
2880 asKeyword = asKeyword or as_keyword
2881 excludeChars = excludeChars or exclude_chars
2882 super().__init__()
2883 if not initChars:
2884 raise ValueError(
2885 f"invalid {type(self).__name__}, initChars cannot be empty string"
2886 )
2888 initChars_set = set(initChars)
2889 if excludeChars:
2890 excludeChars_set = set(excludeChars)
2891 initChars_set -= excludeChars_set
2892 if bodyChars:
2893 bodyChars = "".join(set(bodyChars) - excludeChars_set)
2894 self.initChars = initChars_set
2895 self.initCharsOrig = "".join(sorted(initChars_set))
2897 if bodyChars:
2898 self.bodyChars = set(bodyChars)
2899 self.bodyCharsOrig = "".join(sorted(bodyChars))
2900 else:
2901 self.bodyChars = initChars_set
2902 self.bodyCharsOrig = self.initCharsOrig
2904 self.maxSpecified = max > 0
2906 if min < 1:
2907 raise ValueError(
2908 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
2909 )
2911 if self.maxSpecified and min > max:
2912 raise ValueError(
2913 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"
2914 )
2916 self.minLen = min
2918 if max > 0:
2919 self.maxLen = max
2920 else:
2921 self.maxLen = _MAX_INT
2923 if exact > 0:
2924 min = max = exact
2925 self.maxLen = exact
2926 self.minLen = exact
2928 self.errmsg = f"Expected {self.name}"
2929 self.mayIndexError = False
2930 self.asKeyword = asKeyword
2931 if self.asKeyword:
2932 self.errmsg += " as a keyword"
2934 # see if we can make a regex for this Word
2935 if " " not in (self.initChars | self.bodyChars):
2936 if len(self.initChars) == 1:
2937 re_leading_fragment = re.escape(self.initCharsOrig)
2938 else:
2939 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"
2941 if self.bodyChars == self.initChars:
2942 if max == 0 and self.minLen == 1:
2943 repeat = "+"
2944 elif max == 1:
2945 repeat = ""
2946 else:
2947 if self.minLen != self.maxLen:
2948 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"
2949 else:
2950 repeat = f"{{{self.minLen}}}"
2951 self.reString = f"{re_leading_fragment}{repeat}"
2952 else:
2953 if max == 1:
2954 re_body_fragment = ""
2955 repeat = ""
2956 else:
2957 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"
2958 if max == 0 and self.minLen == 1:
2959 repeat = "*"
2960 elif max == 2:
2961 repeat = "?" if min <= 1 else ""
2962 else:
2963 if min != max:
2964 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"
2965 else:
2966 repeat = f"{{{min - 1 if min > 0 else ''}}}"
2968 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"
2970 if self.asKeyword:
2971 self.reString = rf"\b{self.reString}\b"
2973 try:
2974 self.re = re.compile(self.reString)
2975 except re.error:
2976 self.re = None # type: ignore[assignment]
2977 else:
2978 self.re_match = self.re.match
2979 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]
2981 def _generateDefaultName(self) -> str:
2982 def charsAsStr(s):
2983 max_repr_len = 16
2984 s = _collapse_string_to_ranges(s, re_escape=False)
2986 if len(s) > max_repr_len:
2987 return s[: max_repr_len - 3] + "..."
2989 return s
2991 if self.initChars != self.bodyChars:
2992 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"
2993 else:
2994 base = f"W:({charsAsStr(self.initChars)})"
2996 # add length specification
2997 if self.minLen > 1 or self.maxLen != _MAX_INT:
2998 if self.minLen == self.maxLen:
2999 if self.minLen == 1:
3000 return base[2:]
3001 else:
3002 return base + f"{{{self.minLen}}}"
3003 elif self.maxLen == _MAX_INT:
3004 return base + f"{{{self.minLen},...}}"
3005 else:
3006 return base + f"{{{self.minLen},{self.maxLen}}}"
3007 return base
3009 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3010 if instring[loc] not in self.initChars:
3011 raise ParseException(instring, loc, self.errmsg, self)
3013 start = loc
3014 loc += 1
3015 instrlen = len(instring)
3016 body_chars: set[str] = self.bodyChars
3017 maxloc = start + self.maxLen
3018 maxloc = min(maxloc, instrlen)
3019 while loc < maxloc and instring[loc] in body_chars:
3020 loc += 1
3022 throw_exception = False
3023 if loc - start < self.minLen:
3024 throw_exception = True
3025 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:
3026 throw_exception = True
3027 elif self.asKeyword and (
3028 (start > 0 and instring[start - 1] in body_chars)
3029 or (loc < instrlen and instring[loc] in body_chars)
3030 ):
3031 throw_exception = True
3033 if throw_exception:
3034 raise ParseException(instring, loc, self.errmsg, self)
3036 return loc, instring[start:loc]
3038 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3039 result = self.re_match(instring, loc)
3040 if not result:
3041 raise ParseException(instring, loc, self.errmsg, self)
3043 loc = result.end()
3044 return loc, result.group()
3047class Char(Word):
3048 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
3049 when defining a match of any single character in a string of
3050 characters.
3051 """
3053 def __init__(
3054 self,
3055 charset: str,
3056 as_keyword: bool = False,
3057 exclude_chars: typing.Optional[str] = None,
3058 *,
3059 asKeyword: bool = False,
3060 excludeChars: typing.Optional[str] = None,
3061 ) -> None:
3062 asKeyword = asKeyword or as_keyword
3063 excludeChars = excludeChars or exclude_chars
3064 super().__init__(
3065 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars
3066 )
3069class Regex(Token):
3070 r"""Token for matching strings that match a given regular
3071 expression. Defined with string specifying the regular expression in
3072 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
3073 If the given regex contains named groups (defined using ``(?P<name>...)``),
3074 these will be preserved as named :class:`ParseResults`.
3076 If instead of the Python stdlib ``re`` module you wish to use a different RE module
3077 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
3078 a compiled RE that was compiled using ``regex``.
3080 The parameters ``pattern`` and ``flags`` are passed
3081 to the ``re.compile()`` function as-is. See the Python
3082 `re module <https://docs.python.org/3/library/re.html>`_ module for an
3083 explanation of the acceptable patterns and flags.
3085 Example::
3087 realnum = Regex(r"[+-]?\d+\.\d*")
3088 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3089 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3091 # named fields in a regex will be returned as named results
3092 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3094 # the Regex class will accept re's compiled using the regex module
3095 import regex
3096 parser = pp.Regex(regex.compile(r'[0-9]'))
3097 """
3099 def __init__(
3100 self,
3101 pattern: Any,
3102 flags: Union[re.RegexFlag, int] = 0,
3103 as_group_list: bool = False,
3104 as_match: bool = False,
3105 *,
3106 asGroupList: bool = False,
3107 asMatch: bool = False,
3108 ) -> None:
3109 super().__init__()
3110 asGroupList = asGroupList or as_group_list
3111 asMatch = asMatch or as_match
3113 if isinstance(pattern, str_type):
3114 if not pattern:
3115 raise ValueError("null string passed to Regex; use Empty() instead")
3117 self._re = None
3118 self._may_return_empty = None # type: ignore [assignment]
3119 self.reString = self.pattern = pattern
3121 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
3122 self._re = pattern
3123 self._may_return_empty = None # type: ignore [assignment]
3124 self.pattern = self.reString = pattern.pattern
3126 elif callable(pattern):
3127 # defer creating this pattern until we really need it
3128 self.pattern = pattern
3129 self._may_return_empty = None # type: ignore [assignment]
3130 self._re = None
3132 else:
3133 raise TypeError(
3134 "Regex may only be constructed with a string or a compiled RE object,"
3135 " or a callable that takes no arguments and returns a string or a"
3136 " compiled RE object"
3137 )
3139 self.flags = flags
3140 self.errmsg = f"Expected {self.name}"
3141 self.mayIndexError = False
3142 self.asGroupList = asGroupList
3143 self.asMatch = asMatch
3144 if self.asGroupList:
3145 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]
3146 if self.asMatch:
3147 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]
3149 @cached_property
3150 def re(self) -> re.Pattern:
3151 if self._re:
3152 return self._re
3154 if callable(self.pattern):
3155 # replace self.pattern with the string returned by calling self.pattern()
3156 self.pattern = cast(Callable[[], str], self.pattern)()
3158 # see if we got a compiled RE back instead of a str - if so, we're done
3159 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):
3160 self._re = cast(re.Pattern[str], self.pattern)
3161 self.pattern = self.reString = self._re.pattern
3162 return self._re
3164 try:
3165 self._re = re.compile(self.pattern, self.flags)
3166 except re.error:
3167 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3168 else:
3169 self._may_return_empty = self.re.match("", pos=0) is not None
3170 return self._re
3172 @cached_property
3173 def re_match(self) -> Callable[[str, int], Any]:
3174 return self.re.match
3176 @property
3177 def mayReturnEmpty(self):
3178 if self._may_return_empty is None:
3179 # force compile of regex pattern, to set may_return_empty flag
3180 self.re # noqa
3181 return self._may_return_empty
3183 @mayReturnEmpty.setter
3184 def mayReturnEmpty(self, value):
3185 self._may_return_empty = value
3187 def _generateDefaultName(self) -> str:
3188 unescaped = repr(self.pattern).replace("\\\\", "\\")
3189 return f"Re:({unescaped})"
3191 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3192 # explicit check for matching past the length of the string;
3193 # this is done because the re module will not complain about
3194 # a match with `pos > len(instring)`, it will just return ""
3195 if loc > len(instring) and self.mayReturnEmpty:
3196 raise ParseException(instring, loc, self.errmsg, self)
3198 result = self.re_match(instring, loc)
3199 if not result:
3200 raise ParseException(instring, loc, self.errmsg, self)
3202 loc = result.end()
3203 ret = ParseResults(result.group())
3204 d = result.groupdict()
3206 for k, v in d.items():
3207 ret[k] = v
3209 return loc, ret
3211 def parseImplAsGroupList(self, instring, loc, do_actions=True):
3212 if loc > len(instring) and self.mayReturnEmpty:
3213 raise ParseException(instring, loc, self.errmsg, self)
3215 result = self.re_match(instring, loc)
3216 if not result:
3217 raise ParseException(instring, loc, self.errmsg, self)
3219 loc = result.end()
3220 ret = result.groups()
3221 return loc, ret
3223 def parseImplAsMatch(self, instring, loc, do_actions=True):
3224 if loc > len(instring) and self.mayReturnEmpty:
3225 raise ParseException(instring, loc, self.errmsg, self)
3227 result = self.re_match(instring, loc)
3228 if not result:
3229 raise ParseException(instring, loc, self.errmsg, self)
3231 loc = result.end()
3232 ret = result
3233 return loc, ret
3235 def sub(self, repl: str) -> ParserElement:
3236 r"""
3237 Return :class:`Regex` with an attached parse action to transform the parsed
3238 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3240 Example::
3242 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3243 print(make_html.transform_string("h1:main title:"))
3244 # prints "<h1>main title</h1>"
3245 """
3246 if self.asGroupList:
3247 raise TypeError("cannot use sub() with Regex(as_group_list=True)")
3249 if self.asMatch and callable(repl):
3250 raise TypeError(
3251 "cannot use sub() with a callable with Regex(as_match=True)"
3252 )
3254 if self.asMatch:
3256 def pa(tokens):
3257 return tokens[0].expand(repl)
3259 else:
3261 def pa(tokens):
3262 return self.re.sub(repl, tokens[0])
3264 return self.add_parse_action(pa)
3267class QuotedString(Token):
3268 r"""
3269 Token for matching strings that are delimited by quoting characters.
3271 Defined with the following parameters:
3273 - ``quote_char`` - string of one or more characters defining the
3274 quote delimiting string
3275 - ``esc_char`` - character to re_escape quotes, typically backslash
3276 (default= ``None``)
3277 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3278 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3279 (default= ``None``)
3280 - ``multiline`` - boolean indicating whether quotes can span
3281 multiple lines (default= ``False``)
3282 - ``unquote_results`` - boolean indicating whether the matched text
3283 should be unquoted (default= ``True``)
3284 - ``end_quote_char`` - string of one or more characters defining the
3285 end of the quote delimited string (default= ``None`` => same as
3286 quote_char)
3287 - ``convert_whitespace_escapes`` - convert escaped whitespace
3288 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3289 (default= ``True``)
3291 .. caution:: ``convert_whitespace_escapes`` has no effect if
3292 ``unquote_results`` is ``False``.
3294 Example::
3296 qs = QuotedString('"')
3297 print(qs.search_string('lsjdf "This is the quote" sldjf'))
3298 complex_qs = QuotedString('{{', end_quote_char='}}')
3299 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))
3300 sql_qs = QuotedString('"', esc_quote='""')
3301 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3303 prints::
3305 [['This is the quote']]
3306 [['This is the "quote"']]
3307 [['This is the quote with "embedded" quotes']]
3308 """
3310 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))
3312 def __init__(
3313 self,
3314 quote_char: str = "",
3315 esc_char: typing.Optional[str] = None,
3316 esc_quote: typing.Optional[str] = None,
3317 multiline: bool = False,
3318 unquote_results: bool = True,
3319 end_quote_char: typing.Optional[str] = None,
3320 convert_whitespace_escapes: bool = True,
3321 *,
3322 quoteChar: str = "",
3323 escChar: typing.Optional[str] = None,
3324 escQuote: typing.Optional[str] = None,
3325 unquoteResults: bool = True,
3326 endQuoteChar: typing.Optional[str] = None,
3327 convertWhitespaceEscapes: bool = True,
3328 ) -> None:
3329 super().__init__()
3330 esc_char = escChar or esc_char
3331 esc_quote = escQuote or esc_quote
3332 unquote_results = unquoteResults and unquote_results
3333 end_quote_char = endQuoteChar or end_quote_char
3334 convert_whitespace_escapes = (
3335 convertWhitespaceEscapes and convert_whitespace_escapes
3336 )
3337 quote_char = quoteChar or quote_char
3339 # remove white space from quote chars
3340 quote_char = quote_char.strip()
3341 if not quote_char:
3342 raise ValueError("quote_char cannot be the empty string")
3344 if end_quote_char is None:
3345 end_quote_char = quote_char
3346 else:
3347 end_quote_char = end_quote_char.strip()
3348 if not end_quote_char:
3349 raise ValueError("end_quote_char cannot be the empty string")
3351 self.quote_char: str = quote_char
3352 self.quote_char_len: int = len(quote_char)
3353 self.first_quote_char: str = quote_char[0]
3354 self.end_quote_char: str = end_quote_char
3355 self.end_quote_char_len: int = len(end_quote_char)
3356 self.esc_char: str = esc_char or ""
3357 self.has_esc_char: bool = esc_char is not None
3358 self.esc_quote: str = esc_quote or ""
3359 self.unquote_results: bool = unquote_results
3360 self.convert_whitespace_escapes: bool = convert_whitespace_escapes
3361 self.multiline = multiline
3362 self.re_flags = re.RegexFlag(0)
3364 # fmt: off
3365 # build up re pattern for the content between the quote delimiters
3366 inner_pattern: list[str] = []
3368 if esc_quote:
3369 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
3371 if esc_char:
3372 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
3374 if len(self.end_quote_char) > 1:
3375 inner_pattern.append(
3376 "(?:"
3377 + "|".join(
3378 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3379 for i in range(len(self.end_quote_char) - 1, 0, -1)
3380 )
3381 + ")"
3382 )
3384 if self.multiline:
3385 self.re_flags |= re.MULTILINE | re.DOTALL
3386 inner_pattern.append(
3387 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3388 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3389 )
3390 else:
3391 inner_pattern.append(
3392 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3393 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"
3394 )
3396 self.pattern = "".join(
3397 [
3398 re.escape(self.quote_char),
3399 "(?:",
3400 '|'.join(inner_pattern),
3401 ")*",
3402 re.escape(self.end_quote_char),
3403 ]
3404 )
3406 if self.unquote_results:
3407 if self.convert_whitespace_escapes:
3408 self.unquote_scan_re = re.compile(
3409 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3410 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"
3411 rf"|({re.escape(self.esc_char)}.)"
3412 rf"|(\n|.)",
3413 flags=self.re_flags,
3414 )
3415 else:
3416 self.unquote_scan_re = re.compile(
3417 rf"({re.escape(self.esc_char)}.)"
3418 rf"|(\n|.)",
3419 flags=self.re_flags
3420 )
3421 # fmt: on
3423 try:
3424 self.re = re.compile(self.pattern, self.re_flags)
3425 self.reString = self.pattern
3426 self.re_match = self.re.match
3427 except re.error:
3428 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")
3430 self.errmsg = f"Expected {self.name}"
3431 self.mayIndexError = False
3432 self._may_return_empty = True
3434 def _generateDefaultName(self) -> str:
3435 if self.quote_char == self.end_quote_char and isinstance(
3436 self.quote_char, str_type
3437 ):
3438 return f"string enclosed in {self.quote_char!r}"
3440 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
3442 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3443 # check first character of opening quote to see if that is a match
3444 # before doing the more complicated regex match
3445 result = (
3446 instring[loc] == self.first_quote_char
3447 and self.re_match(instring, loc)
3448 or None
3449 )
3450 if not result:
3451 raise ParseException(instring, loc, self.errmsg, self)
3453 # get ending loc and matched string from regex matching result
3454 loc = result.end()
3455 ret = result.group()
3457 def convert_escaped_numerics(s: str) -> str:
3458 if s == "0":
3459 return "\0"
3460 if s.isdigit() and len(s) == 3:
3461 return chr(int(s, base=8))
3462 elif s.startswith(("u", "x")):
3463 return chr(int(s[1:], base=16))
3464 else:
3465 return s
3467 if self.unquote_results:
3468 # strip off quotes
3469 ret = ret[self.quote_char_len : -self.end_quote_char_len]
3471 if isinstance(ret, str_type):
3472 # fmt: off
3473 if self.convert_whitespace_escapes:
3474 # as we iterate over matches in the input string,
3475 # collect from whichever match group of the unquote_scan_re
3476 # regex matches (only 1 group will match at any given time)
3477 ret = "".join(
3478 # match group 1 matches \t, \n, etc.
3479 self.ws_map[match.group(1)] if match.group(1)
3480 # match group 2 matches escaped octal, null, hex, and Unicode
3481 # sequences
3482 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)
3483 # match group 3 matches escaped characters
3484 else match.group(3)[-1] if match.group(3)
3485 # match group 4 matches any character
3486 else match.group(4)
3487 for match in self.unquote_scan_re.finditer(ret)
3488 )
3489 else:
3490 ret = "".join(
3491 # match group 1 matches escaped characters
3492 match.group(1)[-1] if match.group(1)
3493 # match group 2 matches any character
3494 else match.group(2)
3495 for match in self.unquote_scan_re.finditer(ret)
3496 )
3497 # fmt: on
3499 # replace escaped quotes
3500 if self.esc_quote:
3501 ret = ret.replace(self.esc_quote, self.end_quote_char)
3503 return loc, ret
3506class CharsNotIn(Token):
3507 """Token for matching words composed of characters *not* in a given
3508 set (will include whitespace in matched characters if not listed in
3509 the provided exclusion set - see example). Defined with string
3510 containing all disallowed characters, and an optional minimum,
3511 maximum, and/or exact length. The default value for ``min`` is
3512 1 (a minimum value < 1 is not valid); the default values for
3513 ``max`` and ``exact`` are 0, meaning no maximum or exact
3514 length restriction.
3516 Example::
3518 # define a comma-separated-value as anything that is not a ','
3519 csv_value = CharsNotIn(',')
3520 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))
3522 prints::
3524 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3525 """
3527 def __init__(
3528 self,
3529 not_chars: str = "",
3530 min: int = 1,
3531 max: int = 0,
3532 exact: int = 0,
3533 *,
3534 notChars: str = "",
3535 ) -> None:
3536 super().__init__()
3537 self.skipWhitespace = False
3538 self.notChars = not_chars or notChars
3539 self.notCharsSet = set(self.notChars)
3541 if min < 1:
3542 raise ValueError(
3543 "cannot specify a minimum length < 1; use"
3544 " Opt(CharsNotIn()) if zero-length char group is permitted"
3545 )
3547 self.minLen = min
3549 if max > 0:
3550 self.maxLen = max
3551 else:
3552 self.maxLen = _MAX_INT
3554 if exact > 0:
3555 self.maxLen = exact
3556 self.minLen = exact
3558 self.errmsg = f"Expected {self.name}"
3559 self._may_return_empty = self.minLen == 0
3560 self.mayIndexError = False
3562 def _generateDefaultName(self) -> str:
3563 not_chars_str = _collapse_string_to_ranges(self.notChars)
3564 if len(not_chars_str) > 16:
3565 return f"!W:({self.notChars[: 16 - 3]}...)"
3566 else:
3567 return f"!W:({self.notChars})"
3569 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3570 notchars = self.notCharsSet
3571 if instring[loc] in notchars:
3572 raise ParseException(instring, loc, self.errmsg, self)
3574 start = loc
3575 loc += 1
3576 maxlen = min(start + self.maxLen, len(instring))
3577 while loc < maxlen and instring[loc] not in notchars:
3578 loc += 1
3580 if loc - start < self.minLen:
3581 raise ParseException(instring, loc, self.errmsg, self)
3583 return loc, instring[start:loc]
3586class White(Token):
3587 """Special matching class for matching whitespace. Normally,
3588 whitespace is ignored by pyparsing grammars. This class is included
3589 when some whitespace structures are significant. Define with
3590 a string containing the whitespace characters to be matched; default
3591 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3592 ``max``, and ``exact`` arguments, as defined for the
3593 :class:`Word` class.
3594 """
3596 whiteStrs = {
3597 " ": "<SP>",
3598 "\t": "<TAB>",
3599 "\n": "<LF>",
3600 "\r": "<CR>",
3601 "\f": "<FF>",
3602 "\u00A0": "<NBSP>",
3603 "\u1680": "<OGHAM_SPACE_MARK>",
3604 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",
3605 "\u2000": "<EN_QUAD>",
3606 "\u2001": "<EM_QUAD>",
3607 "\u2002": "<EN_SPACE>",
3608 "\u2003": "<EM_SPACE>",
3609 "\u2004": "<THREE-PER-EM_SPACE>",
3610 "\u2005": "<FOUR-PER-EM_SPACE>",
3611 "\u2006": "<SIX-PER-EM_SPACE>",
3612 "\u2007": "<FIGURE_SPACE>",
3613 "\u2008": "<PUNCTUATION_SPACE>",
3614 "\u2009": "<THIN_SPACE>",
3615 "\u200A": "<HAIR_SPACE>",
3616 "\u200B": "<ZERO_WIDTH_SPACE>",
3617 "\u202F": "<NNBSP>",
3618 "\u205F": "<MMSP>",
3619 "\u3000": "<IDEOGRAPHIC_SPACE>",
3620 }
3622 def __init__(
3623 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0
3624 ) -> None:
3625 super().__init__()
3626 self.matchWhite = ws
3627 self.set_whitespace_chars(
3628 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3629 copy_defaults=True,
3630 )
3631 # self.leave_whitespace()
3632 self._may_return_empty = True
3633 self.errmsg = f"Expected {self.name}"
3635 self.minLen = min
3637 if max > 0:
3638 self.maxLen = max
3639 else:
3640 self.maxLen = _MAX_INT
3642 if exact > 0:
3643 self.maxLen = exact
3644 self.minLen = exact
3646 def _generateDefaultName(self) -> str:
3647 return "".join(White.whiteStrs[c] for c in self.matchWhite)
3649 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3650 if instring[loc] not in self.matchWhite:
3651 raise ParseException(instring, loc, self.errmsg, self)
3652 start = loc
3653 loc += 1
3654 maxloc = start + self.maxLen
3655 maxloc = min(maxloc, len(instring))
3656 while loc < maxloc and instring[loc] in self.matchWhite:
3657 loc += 1
3659 if loc - start < self.minLen:
3660 raise ParseException(instring, loc, self.errmsg, self)
3662 return loc, instring[start:loc]
3665class PositionToken(Token):
3666 def __init__(self) -> None:
3667 super().__init__()
3668 self._may_return_empty = True
3669 self.mayIndexError = False
3672class GoToColumn(PositionToken):
3673 """Token to advance to a specific column of input text; useful for
3674 tabular report scraping.
3675 """
3677 def __init__(self, colno: int) -> None:
3678 super().__init__()
3679 self.col = colno
3681 def preParse(self, instring: str, loc: int) -> int:
3682 if col(loc, instring) == self.col:
3683 return loc
3685 instrlen = len(instring)
3686 if self.ignoreExprs:
3687 loc = self._skipIgnorables(instring, loc)
3688 while (
3689 loc < instrlen
3690 and instring[loc].isspace()
3691 and col(loc, instring) != self.col
3692 ):
3693 loc += 1
3695 return loc
3697 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3698 thiscol = col(loc, instring)
3699 if thiscol > self.col:
3700 raise ParseException(instring, loc, "Text not in expected column", self)
3701 newloc = loc + self.col - thiscol
3702 ret = instring[loc:newloc]
3703 return newloc, ret
3706class LineStart(PositionToken):
3707 r"""Matches if current position is at the beginning of a line within
3708 the parse string
3710 Example::
3712 test = '''\
3713 AAA this line
3714 AAA and this line
3715 AAA but not this one
3716 B AAA and definitely not this one
3717 '''
3719 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):
3720 print(t)
3722 prints::
3724 ['AAA', ' this line']
3725 ['AAA', ' and this line']
3727 """
3729 def __init__(self) -> None:
3730 super().__init__()
3731 self.leave_whitespace()
3732 self.orig_whiteChars = set() | self.whiteChars
3733 self.whiteChars.discard("\n")
3734 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
3735 self.set_name("start of line")
3737 def preParse(self, instring: str, loc: int) -> int:
3738 if loc == 0:
3739 return loc
3741 ret = self.skipper.preParse(instring, loc)
3743 if "\n" in self.orig_whiteChars:
3744 while instring[ret : ret + 1] == "\n":
3745 ret = self.skipper.preParse(instring, ret + 1)
3747 return ret
3749 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3750 if col(loc, instring) == 1:
3751 return loc, []
3752 raise ParseException(instring, loc, self.errmsg, self)
3755class LineEnd(PositionToken):
3756 """Matches if current position is at the end of a line within the
3757 parse string
3758 """
3760 def __init__(self) -> None:
3761 super().__init__()
3762 self.whiteChars.discard("\n")
3763 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
3764 self.set_name("end of line")
3766 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3767 if loc < len(instring):
3768 if instring[loc] == "\n":
3769 return loc + 1, "\n"
3770 else:
3771 raise ParseException(instring, loc, self.errmsg, self)
3772 elif loc == len(instring):
3773 return loc + 1, []
3774 else:
3775 raise ParseException(instring, loc, self.errmsg, self)
3778class StringStart(PositionToken):
3779 """Matches if current position is at the beginning of the parse
3780 string
3781 """
3783 def __init__(self) -> None:
3784 super().__init__()
3785 self.set_name("start of text")
3787 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3788 # see if entire string up to here is just whitespace and ignoreables
3789 if loc != 0 and loc != self.preParse(instring, 0):
3790 raise ParseException(instring, loc, self.errmsg, self)
3792 return loc, []
3795class StringEnd(PositionToken):
3796 """
3797 Matches if current position is at the end of the parse string
3798 """
3800 def __init__(self) -> None:
3801 super().__init__()
3802 self.set_name("end of text")
3804 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3805 if loc < len(instring):
3806 raise ParseException(instring, loc, self.errmsg, self)
3807 if loc == len(instring):
3808 return loc + 1, []
3809 if loc > len(instring):
3810 return loc, []
3812 raise ParseException(instring, loc, self.errmsg, self)
3815class WordStart(PositionToken):
3816 """Matches if the current position is at the beginning of a
3817 :class:`Word`, and is not preceded by any character in a given
3818 set of ``word_chars`` (default= ``printables``). To emulate the
3819 ``\b`` behavior of regular expressions, use
3820 ``WordStart(alphanums)``. ``WordStart`` will also match at
3821 the beginning of the string being parsed, or at the beginning of
3822 a line.
3823 """
3825 def __init__(
3826 self, word_chars: str = printables, *, wordChars: str = printables
3827 ) -> None:
3828 wordChars = word_chars if wordChars == printables else wordChars
3829 super().__init__()
3830 self.wordChars = set(wordChars)
3831 self.set_name("start of a word")
3833 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3834 if loc != 0:
3835 if (
3836 instring[loc - 1] in self.wordChars
3837 or instring[loc] not in self.wordChars
3838 ):
3839 raise ParseException(instring, loc, self.errmsg, self)
3840 return loc, []
3843class WordEnd(PositionToken):
3844 """Matches if the current position is at the end of a :class:`Word`,
3845 and is not followed by any character in a given set of ``word_chars``
3846 (default= ``printables``). To emulate the ``\b`` behavior of
3847 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3848 will also match at the end of the string being parsed, or at the end
3849 of a line.
3850 """
3852 def __init__(
3853 self, word_chars: str = printables, *, wordChars: str = printables
3854 ) -> None:
3855 wordChars = word_chars if wordChars == printables else wordChars
3856 super().__init__()
3857 self.wordChars = set(wordChars)
3858 self.skipWhitespace = False
3859 self.set_name("end of a word")
3861 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3862 instrlen = len(instring)
3863 if instrlen > 0 and loc < instrlen:
3864 if (
3865 instring[loc] in self.wordChars
3866 or instring[loc - 1] not in self.wordChars
3867 ):
3868 raise ParseException(instring, loc, self.errmsg, self)
3869 return loc, []
3872class Tag(Token):
3873 """
3874 A meta-element for inserting a named result into the parsed
3875 tokens that may be checked later in a parse action or while
3876 processing the parsed results. Accepts an optional tag value,
3877 defaulting to `True`.
3879 Example::
3881 end_punc = "." | ("!" + Tag("enthusiastic"))
3882 greeting = "Hello," + Word(alphas) + end_punc
3884 result = greeting.parse_string("Hello, World.")
3885 print(result.dump())
3887 result = greeting.parse_string("Hello, World!")
3888 print(result.dump())
3890 prints::
3892 ['Hello,', 'World', '.']
3894 ['Hello,', 'World', '!']
3895 - enthusiastic: True
3897 .. versionadded:: 3.1.0
3898 """
3900 def __init__(self, tag_name: str, value: Any = True) -> None:
3901 super().__init__()
3902 self._may_return_empty = True
3903 self.mayIndexError = False
3904 self.leave_whitespace()
3905 self.tag_name = tag_name
3906 self.tag_value = value
3907 self.add_parse_action(self._add_tag)
3908 self.show_in_diagram = False
3910 def _add_tag(self, tokens: ParseResults):
3911 tokens[self.tag_name] = self.tag_value
3913 def _generateDefaultName(self) -> str:
3914 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"
3917class ParseExpression(ParserElement):
3918 """Abstract subclass of ParserElement, for combining and
3919 post-processing parsed tokens.
3920 """
3922 def __init__(
3923 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
3924 ) -> None:
3925 super().__init__(savelist)
3926 self.exprs: list[ParserElement]
3927 if isinstance(exprs, _generatorType):
3928 exprs = list(exprs)
3930 if isinstance(exprs, str_type):
3931 self.exprs = [self._literalStringClass(exprs)]
3932 elif isinstance(exprs, ParserElement):
3933 self.exprs = [exprs]
3934 elif isinstance(exprs, Iterable):
3935 exprs = list(exprs)
3936 # if sequence of strings provided, wrap with Literal
3937 if any(isinstance(expr, str_type) for expr in exprs):
3938 exprs = (
3939 self._literalStringClass(e) if isinstance(e, str_type) else e
3940 for e in exprs
3941 )
3942 self.exprs = list(exprs)
3943 else:
3944 try:
3945 self.exprs = list(exprs)
3946 except TypeError:
3947 self.exprs = [exprs]
3948 self.callPreparse = False
3950 def recurse(self) -> list[ParserElement]:
3951 return self.exprs[:]
3953 def append(self, other) -> ParserElement:
3954 self.exprs.append(other)
3955 self._defaultName = None
3956 return self
3958 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
3959 """
3960 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3961 all contained expressions.
3962 """
3963 super().leave_whitespace(recursive)
3965 if recursive:
3966 self.exprs = [e.copy() for e in self.exprs]
3967 for e in self.exprs:
3968 e.leave_whitespace(recursive)
3969 return self
3971 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
3972 """
3973 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3974 all contained expressions.
3975 """
3976 super().ignore_whitespace(recursive)
3977 if recursive:
3978 self.exprs = [e.copy() for e in self.exprs]
3979 for e in self.exprs:
3980 e.ignore_whitespace(recursive)
3981 return self
3983 def ignore(self, other) -> ParserElement:
3984 if isinstance(other, Suppress):
3985 if other not in self.ignoreExprs:
3986 super().ignore(other)
3987 for e in self.exprs:
3988 e.ignore(self.ignoreExprs[-1])
3989 else:
3990 super().ignore(other)
3991 for e in self.exprs:
3992 e.ignore(self.ignoreExprs[-1])
3993 return self
3995 def _generateDefaultName(self) -> str:
3996 return f"{type(self).__name__}:({self.exprs})"
3998 def streamline(self) -> ParserElement:
3999 if self.streamlined:
4000 return self
4002 super().streamline()
4004 for e in self.exprs:
4005 e.streamline()
4007 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
4008 # but only if there are no parse actions or resultsNames on the nested And's
4009 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
4010 if len(self.exprs) == 2:
4011 other = self.exprs[0]
4012 if (
4013 isinstance(other, self.__class__)
4014 and not other.parseAction
4015 and other.resultsName is None
4016 and not other.debug
4017 ):
4018 self.exprs = other.exprs[:] + [self.exprs[1]]
4019 self._defaultName = None
4020 self._may_return_empty |= other.mayReturnEmpty
4021 self.mayIndexError |= other.mayIndexError
4023 other = self.exprs[-1]
4024 if (
4025 isinstance(other, self.__class__)
4026 and not other.parseAction
4027 and other.resultsName is None
4028 and not other.debug
4029 ):
4030 self.exprs = self.exprs[:-1] + other.exprs[:]
4031 self._defaultName = None
4032 self._may_return_empty |= other.mayReturnEmpty
4033 self.mayIndexError |= other.mayIndexError
4035 self.errmsg = f"Expected {self}"
4037 return self
4039 def validate(self, validateTrace=None) -> None:
4040 warnings.warn(
4041 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4042 DeprecationWarning,
4043 stacklevel=2,
4044 )
4045 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
4046 for e in self.exprs:
4047 e.validate(tmp)
4048 self._checkRecursion([])
4050 def copy(self) -> ParserElement:
4051 ret = super().copy()
4052 ret = typing.cast(ParseExpression, ret)
4053 ret.exprs = [e.copy() for e in self.exprs]
4054 return ret
4056 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4057 if not (
4058 __diag__.warn_ungrouped_named_tokens_in_collection
4059 and Diagnostics.warn_ungrouped_named_tokens_in_collection
4060 not in self.suppress_warnings_
4061 ):
4062 return super()._setResultsName(name, list_all_matches)
4064 for e in self.exprs:
4065 if (
4066 isinstance(e, ParserElement)
4067 and e.resultsName
4068 and (
4069 Diagnostics.warn_ungrouped_named_tokens_in_collection
4070 not in e.suppress_warnings_
4071 )
4072 ):
4073 warning = (
4074 "warn_ungrouped_named_tokens_in_collection:"
4075 f" setting results name {name!r} on {type(self).__name__} expression"
4076 f" collides with {e.resultsName!r} on contained expression"
4077 )
4078 warnings.warn(warning, stacklevel=3)
4079 break
4081 return super()._setResultsName(name, list_all_matches)
4083 # Compatibility synonyms
4084 # fmt: off
4085 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4086 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4087 # fmt: on
4090class And(ParseExpression):
4091 """
4092 Requires all given :class:`ParserElement` s to be found in the given order.
4093 Expressions may be separated by whitespace.
4094 May be constructed using the ``'+'`` operator.
4095 May also be constructed using the ``'-'`` operator, which will
4096 suppress backtracking.
4098 Example::
4100 integer = Word(nums)
4101 name_expr = Word(alphas)[1, ...]
4103 expr = And([integer("id"), name_expr("name"), integer("age")])
4104 # more easily written as:
4105 expr = integer("id") + name_expr("name") + integer("age")
4106 """
4108 class _ErrorStop(Empty):
4109 def __init__(self, *args, **kwargs) -> None:
4110 super().__init__(*args, **kwargs)
4111 self.leave_whitespace()
4113 def _generateDefaultName(self) -> str:
4114 return "-"
4116 def __init__(
4117 self,
4118 exprs_arg: typing.Iterable[Union[ParserElement, str]],
4119 savelist: bool = True,
4120 ) -> None:
4121 # instantiate exprs as a list, converting strs to ParserElements
4122 exprs: list[ParserElement] = [
4123 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg
4124 ]
4126 # convert any Ellipsis elements to SkipTo
4127 if Ellipsis in exprs:
4129 # Ellipsis cannot be the last element
4130 if exprs[-1] is Ellipsis:
4131 raise Exception("cannot construct And with sequence ending in ...")
4133 tmp: list[ParserElement] = []
4134 for cur_expr, next_expr in zip(exprs, exprs[1:]):
4135 if cur_expr is Ellipsis:
4136 tmp.append(SkipTo(next_expr)("_skipped*"))
4137 else:
4138 tmp.append(cur_expr)
4140 exprs[:-1] = tmp
4142 super().__init__(exprs, savelist)
4143 if self.exprs:
4144 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4145 if not isinstance(self.exprs[0], White):
4146 self.set_whitespace_chars(
4147 self.exprs[0].whiteChars,
4148 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
4149 )
4150 self.skipWhitespace = self.exprs[0].skipWhitespace
4151 else:
4152 self.skipWhitespace = False
4153 else:
4154 self._may_return_empty = True
4155 self.callPreparse = True
4157 def streamline(self) -> ParserElement:
4158 # collapse any _PendingSkip's
4159 if self.exprs and any(
4160 isinstance(e, ParseExpression)
4161 and e.exprs
4162 and isinstance(e.exprs[-1], _PendingSkip)
4163 for e in self.exprs[:-1]
4164 ):
4165 deleted_expr_marker = NoMatch()
4166 for i, e in enumerate(self.exprs[:-1]):
4167 if e is deleted_expr_marker:
4168 continue
4169 if (
4170 isinstance(e, ParseExpression)
4171 and e.exprs
4172 and isinstance(e.exprs[-1], _PendingSkip)
4173 ):
4174 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4175 self.exprs[i + 1] = deleted_expr_marker
4176 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]
4178 super().streamline()
4180 # link any IndentedBlocks to the prior expression
4181 prev: ParserElement
4182 cur: ParserElement
4183 for prev, cur in zip(self.exprs, self.exprs[1:]):
4184 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
4185 # (but watch out for recursive grammar)
4186 seen = set()
4187 while True:
4188 if id(cur) in seen:
4189 break
4190 seen.add(id(cur))
4191 if isinstance(cur, IndentedBlock):
4192 prev.add_parse_action(
4193 lambda s, l, t, cur_=cur: setattr(
4194 cur_, "parent_anchor", col(l, s)
4195 )
4196 )
4197 break
4198 subs = cur.recurse()
4199 next_first = next(iter(subs), None)
4200 if next_first is None:
4201 break
4202 cur = typing.cast(ParserElement, next_first)
4204 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4205 return self
4207 def parseImpl(self, instring, loc, do_actions=True):
4208 # pass False as callPreParse arg to _parse for first element, since we already
4209 # pre-parsed the string as part of our And pre-parsing
4210 loc, resultlist = self.exprs[0]._parse(
4211 instring, loc, do_actions, callPreParse=False
4212 )
4213 errorStop = False
4214 for e in self.exprs[1:]:
4215 # if isinstance(e, And._ErrorStop):
4216 if type(e) is And._ErrorStop:
4217 errorStop = True
4218 continue
4219 if errorStop:
4220 try:
4221 loc, exprtokens = e._parse(instring, loc, do_actions)
4222 except ParseSyntaxException:
4223 raise
4224 except ParseBaseException as pe:
4225 pe.__traceback__ = None
4226 raise ParseSyntaxException._from_exception(pe)
4227 except IndexError:
4228 raise ParseSyntaxException(
4229 instring, len(instring), self.errmsg, self
4230 )
4231 else:
4232 loc, exprtokens = e._parse(instring, loc, do_actions)
4233 resultlist += exprtokens
4234 return loc, resultlist
4236 def __iadd__(self, other):
4237 if isinstance(other, str_type):
4238 other = self._literalStringClass(other)
4239 if not isinstance(other, ParserElement):
4240 return NotImplemented
4241 return self.append(other) # And([self, other])
4243 def _checkRecursion(self, parseElementList):
4244 subRecCheckList = parseElementList[:] + [self]
4245 for e in self.exprs:
4246 e._checkRecursion(subRecCheckList)
4247 if not e.mayReturnEmpty:
4248 break
4250 def _generateDefaultName(self) -> str:
4251 inner = " ".join(str(e) for e in self.exprs)
4252 # strip off redundant inner {}'s
4253 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4254 inner = inner[1:-1]
4255 return f"{{{inner}}}"
4258class Or(ParseExpression):
4259 """Requires that at least one :class:`ParserElement` is found. If
4260 two expressions match, the expression that matches the longest
4261 string will be used. May be constructed using the ``'^'``
4262 operator.
4264 Example::
4266 # construct Or using '^' operator
4268 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4269 print(number.search_string("123 3.1416 789"))
4271 prints::
4273 [['123'], ['3.1416'], ['789']]
4274 """
4276 def __init__(
4277 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4278 ) -> None:
4279 super().__init__(exprs, savelist)
4280 if self.exprs:
4281 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4282 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4283 else:
4284 self._may_return_empty = True
4286 def streamline(self) -> ParserElement:
4287 super().streamline()
4288 if self.exprs:
4289 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4290 self.saveAsList = any(e.saveAsList for e in self.exprs)
4291 self.skipWhitespace = all(
4292 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4293 )
4294 else:
4295 self.saveAsList = False
4296 return self
4298 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4299 maxExcLoc = -1
4300 maxException = None
4301 matches: list[tuple[int, ParserElement]] = []
4302 fatals: list[ParseFatalException] = []
4303 if all(e.callPreparse for e in self.exprs):
4304 loc = self.preParse(instring, loc)
4305 for e in self.exprs:
4306 try:
4307 loc2 = e.try_parse(instring, loc, raise_fatal=True)
4308 except ParseFatalException as pfe:
4309 pfe.__traceback__ = None
4310 pfe.parser_element = e
4311 fatals.append(pfe)
4312 maxException = None
4313 maxExcLoc = -1
4314 except ParseException as err:
4315 if not fatals:
4316 err.__traceback__ = None
4317 if err.loc > maxExcLoc:
4318 maxException = err
4319 maxExcLoc = err.loc
4320 except IndexError:
4321 if len(instring) > maxExcLoc:
4322 maxException = ParseException(
4323 instring, len(instring), e.errmsg, self
4324 )
4325 maxExcLoc = len(instring)
4326 else:
4327 # save match among all matches, to retry longest to shortest
4328 matches.append((loc2, e))
4330 if matches:
4331 # re-evaluate all matches in descending order of length of match, in case attached actions
4332 # might change whether or how much they match of the input.
4333 matches.sort(key=itemgetter(0), reverse=True)
4335 if not do_actions:
4336 # no further conditions or parse actions to change the selection of
4337 # alternative, so the first match will be the best match
4338 best_expr = matches[0][1]
4339 return best_expr._parse(instring, loc, do_actions)
4341 longest: tuple[int, typing.Optional[ParseResults]] = -1, None
4342 for loc1, expr1 in matches:
4343 if loc1 <= longest[0]:
4344 # already have a longer match than this one will deliver, we are done
4345 return longest
4347 try:
4348 loc2, toks = expr1._parse(instring, loc, do_actions)
4349 except ParseException as err:
4350 err.__traceback__ = None
4351 if err.loc > maxExcLoc:
4352 maxException = err
4353 maxExcLoc = err.loc
4354 else:
4355 if loc2 >= loc1:
4356 return loc2, toks
4357 # didn't match as much as before
4358 elif loc2 > longest[0]:
4359 longest = loc2, toks
4361 if longest != (-1, None):
4362 return longest
4364 if fatals:
4365 if len(fatals) > 1:
4366 fatals.sort(key=lambda e: -e.loc)
4367 if fatals[0].loc == fatals[1].loc:
4368 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4369 max_fatal = fatals[0]
4370 raise max_fatal
4372 if maxException is not None:
4373 # infer from this check that all alternatives failed at the current position
4374 # so emit this collective error message instead of any single error message
4375 parse_start_loc = self.preParse(instring, loc)
4376 if maxExcLoc == parse_start_loc:
4377 maxException.msg = self.errmsg or ""
4378 raise maxException
4380 raise ParseException(instring, loc, "no defined alternatives to match", self)
4382 def __ixor__(self, other):
4383 if isinstance(other, str_type):
4384 other = self._literalStringClass(other)
4385 if not isinstance(other, ParserElement):
4386 return NotImplemented
4387 return self.append(other) # Or([self, other])
4389 def _generateDefaultName(self) -> str:
4390 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"
4392 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4393 if (
4394 __diag__.warn_multiple_tokens_in_named_alternation
4395 and Diagnostics.warn_multiple_tokens_in_named_alternation
4396 not in self.suppress_warnings_
4397 ):
4398 if any(
4399 isinstance(e, And)
4400 and Diagnostics.warn_multiple_tokens_in_named_alternation
4401 not in e.suppress_warnings_
4402 for e in self.exprs
4403 ):
4404 warning = (
4405 "warn_multiple_tokens_in_named_alternation:"
4406 f" setting results name {name!r} on {type(self).__name__} expression"
4407 " will return a list of all parsed tokens in an And alternative,"
4408 " in prior versions only the first token was returned; enclose"
4409 " contained argument in Group"
4410 )
4411 warnings.warn(warning, stacklevel=3)
4413 return super()._setResultsName(name, list_all_matches)
4416class MatchFirst(ParseExpression):
4417 """Requires that at least one :class:`ParserElement` is found. If
4418 more than one expression matches, the first one listed is the one that will
4419 match. May be constructed using the ``'|'`` operator.
4421 Example::
4423 # construct MatchFirst using '|' operator
4425 # watch the order of expressions to match
4426 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4427 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4429 # put more selective expression first
4430 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4431 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4432 """
4434 def __init__(
4435 self, exprs: typing.Iterable[ParserElement], savelist: bool = False
4436 ) -> None:
4437 super().__init__(exprs, savelist)
4438 if self.exprs:
4439 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4440 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4441 else:
4442 self._may_return_empty = True
4444 def streamline(self) -> ParserElement:
4445 if self.streamlined:
4446 return self
4448 super().streamline()
4449 if self.exprs:
4450 self.saveAsList = any(e.saveAsList for e in self.exprs)
4451 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)
4452 self.skipWhitespace = all(
4453 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4454 )
4455 else:
4456 self.saveAsList = False
4457 self._may_return_empty = True
4458 return self
4460 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4461 maxExcLoc = -1
4462 maxException = None
4464 for e in self.exprs:
4465 try:
4466 return e._parse(instring, loc, do_actions)
4467 except ParseFatalException as pfe:
4468 pfe.__traceback__ = None
4469 pfe.parser_element = e
4470 raise
4471 except ParseException as err:
4472 if err.loc > maxExcLoc:
4473 maxException = err
4474 maxExcLoc = err.loc
4475 except IndexError:
4476 if len(instring) > maxExcLoc:
4477 maxException = ParseException(
4478 instring, len(instring), e.errmsg, self
4479 )
4480 maxExcLoc = len(instring)
4482 if maxException is not None:
4483 # infer from this check that all alternatives failed at the current position
4484 # so emit this collective error message instead of any individual error message
4485 parse_start_loc = self.preParse(instring, loc)
4486 if maxExcLoc == parse_start_loc:
4487 maxException.msg = self.errmsg or ""
4488 raise maxException
4490 raise ParseException(instring, loc, "no defined alternatives to match", self)
4492 def __ior__(self, other):
4493 if isinstance(other, str_type):
4494 other = self._literalStringClass(other)
4495 if not isinstance(other, ParserElement):
4496 return NotImplemented
4497 return self.append(other) # MatchFirst([self, other])
4499 def _generateDefaultName(self) -> str:
4500 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"
4502 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
4503 if (
4504 __diag__.warn_multiple_tokens_in_named_alternation
4505 and Diagnostics.warn_multiple_tokens_in_named_alternation
4506 not in self.suppress_warnings_
4507 ):
4508 if any(
4509 isinstance(e, And)
4510 and Diagnostics.warn_multiple_tokens_in_named_alternation
4511 not in e.suppress_warnings_
4512 for e in self.exprs
4513 ):
4514 warning = (
4515 "warn_multiple_tokens_in_named_alternation:"
4516 f" setting results name {name!r} on {type(self).__name__} expression"
4517 " will return a list of all parsed tokens in an And alternative,"
4518 " in prior versions only the first token was returned; enclose"
4519 " contained argument in Group"
4520 )
4521 warnings.warn(warning, stacklevel=3)
4523 return super()._setResultsName(name, list_all_matches)
4526class Each(ParseExpression):
4527 """Requires all given :class:`ParserElement` s to be found, but in
4528 any order. Expressions may be separated by whitespace.
4530 May be constructed using the ``'&'`` operator.
4532 Example::
4534 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4535 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4536 integer = Word(nums)
4537 shape_attr = "shape:" + shape_type("shape")
4538 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4539 color_attr = "color:" + color("color")
4540 size_attr = "size:" + integer("size")
4542 # use Each (using operator '&') to accept attributes in any order
4543 # (shape and posn are required, color and size are optional)
4544 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4546 shape_spec.run_tests('''
4547 shape: SQUARE color: BLACK posn: 100, 120
4548 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4549 color:GREEN size:20 shape:TRIANGLE posn:20,40
4550 '''
4551 )
4553 prints::
4555 shape: SQUARE color: BLACK posn: 100, 120
4556 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4557 - color: BLACK
4558 - posn: ['100', ',', '120']
4559 - x: 100
4560 - y: 120
4561 - shape: SQUARE
4564 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4565 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4566 - color: BLUE
4567 - posn: ['50', ',', '80']
4568 - x: 50
4569 - y: 80
4570 - shape: CIRCLE
4571 - size: 50
4574 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4575 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4576 - color: GREEN
4577 - posn: ['20', ',', '40']
4578 - x: 20
4579 - y: 40
4580 - shape: TRIANGLE
4581 - size: 20
4582 """
4584 def __init__(
4585 self, exprs: typing.Iterable[ParserElement], savelist: bool = True
4586 ) -> None:
4587 super().__init__(exprs, savelist)
4588 if self.exprs:
4589 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4590 else:
4591 self._may_return_empty = True
4592 self.skipWhitespace = True
4593 self.initExprGroups = True
4594 self.saveAsList = True
4596 def __iand__(self, other):
4597 if isinstance(other, str_type):
4598 other = self._literalStringClass(other)
4599 if not isinstance(other, ParserElement):
4600 return NotImplemented
4601 return self.append(other) # Each([self, other])
4603 def streamline(self) -> ParserElement:
4604 super().streamline()
4605 if self.exprs:
4606 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)
4607 else:
4608 self._may_return_empty = True
4609 return self
4611 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4612 if self.initExprGroups:
4613 self.opt1map = dict(
4614 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
4615 )
4616 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
4617 opt2 = [
4618 e
4619 for e in self.exprs
4620 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
4621 ]
4622 self.optionals = opt1 + opt2
4623 self.multioptionals = [
4624 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4625 for e in self.exprs
4626 if isinstance(e, _MultipleMatch)
4627 ]
4628 self.multirequired = [
4629 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4630 for e in self.exprs
4631 if isinstance(e, OneOrMore)
4632 ]
4633 self.required = [
4634 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
4635 ]
4636 self.required += self.multirequired
4637 self.initExprGroups = False
4639 tmpLoc = loc
4640 tmpReqd = self.required[:]
4641 tmpOpt = self.optionals[:]
4642 multis = self.multioptionals[:]
4643 matchOrder: list[ParserElement] = []
4645 keepMatching = True
4646 failed: list[ParserElement] = []
4647 fatals: list[ParseFatalException] = []
4648 while keepMatching:
4649 tmpExprs = tmpReqd + tmpOpt + multis
4650 failed.clear()
4651 fatals.clear()
4652 for e in tmpExprs:
4653 try:
4654 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
4655 except ParseFatalException as pfe:
4656 pfe.__traceback__ = None
4657 pfe.parser_element = e
4658 fatals.append(pfe)
4659 failed.append(e)
4660 except ParseException:
4661 failed.append(e)
4662 else:
4663 matchOrder.append(self.opt1map.get(id(e), e))
4664 if e in tmpReqd:
4665 tmpReqd.remove(e)
4666 elif e in tmpOpt:
4667 tmpOpt.remove(e)
4668 if len(failed) == len(tmpExprs):
4669 keepMatching = False
4671 # look for any ParseFatalExceptions
4672 if fatals:
4673 if len(fatals) > 1:
4674 fatals.sort(key=lambda e: -e.loc)
4675 if fatals[0].loc == fatals[1].loc:
4676 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))
4677 max_fatal = fatals[0]
4678 raise max_fatal
4680 if tmpReqd:
4681 missing = ", ".join([str(e) for e in tmpReqd])
4682 raise ParseException(
4683 instring,
4684 loc,
4685 f"Missing one or more required elements ({missing})",
4686 )
4688 # add any unmatched Opts, in case they have default values defined
4689 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
4691 total_results = ParseResults([])
4692 for e in matchOrder:
4693 loc, results = e._parse(instring, loc, do_actions)
4694 total_results += results
4696 return loc, total_results
4698 def _generateDefaultName(self) -> str:
4699 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"
4702class ParseElementEnhance(ParserElement):
4703 """Abstract subclass of :class:`ParserElement`, for combining and
4704 post-processing parsed tokens.
4705 """
4707 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
4708 super().__init__(savelist)
4709 if isinstance(expr, str_type):
4710 expr_str = typing.cast(str, expr)
4711 if issubclass(self._literalStringClass, Token):
4712 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]
4713 elif issubclass(type(self), self._literalStringClass):
4714 expr = Literal(expr_str)
4715 else:
4716 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]
4717 expr = typing.cast(ParserElement, expr)
4718 self.expr = expr
4719 if expr is not None:
4720 self.mayIndexError = expr.mayIndexError
4721 self._may_return_empty = expr.mayReturnEmpty
4722 self.set_whitespace_chars(
4723 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
4724 )
4725 self.skipWhitespace = expr.skipWhitespace
4726 self.saveAsList = expr.saveAsList
4727 self.callPreparse = expr.callPreparse
4728 self.ignoreExprs.extend(expr.ignoreExprs)
4730 def recurse(self) -> list[ParserElement]:
4731 return [self.expr] if self.expr is not None else []
4733 def parseImpl(self, instring, loc, do_actions=True):
4734 if self.expr is None:
4735 raise ParseException(instring, loc, "No expression defined", self)
4737 try:
4738 return self.expr._parse(instring, loc, do_actions, callPreParse=False)
4739 except ParseSyntaxException:
4740 raise
4741 except ParseBaseException as pbe:
4742 pbe.pstr = pbe.pstr or instring
4743 pbe.loc = pbe.loc or loc
4744 pbe.parser_element = pbe.parser_element or self
4745 if not isinstance(self, Forward) and self.customName is not None:
4746 if self.errmsg:
4747 pbe.msg = self.errmsg
4748 raise
4750 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4751 super().leave_whitespace(recursive)
4753 if recursive:
4754 if self.expr is not None:
4755 self.expr = self.expr.copy()
4756 self.expr.leave_whitespace(recursive)
4757 return self
4759 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4760 super().ignore_whitespace(recursive)
4762 if recursive:
4763 if self.expr is not None:
4764 self.expr = self.expr.copy()
4765 self.expr.ignore_whitespace(recursive)
4766 return self
4768 def ignore(self, other) -> ParserElement:
4769 if not isinstance(other, Suppress) or other not in self.ignoreExprs:
4770 super().ignore(other)
4771 if self.expr is not None:
4772 self.expr.ignore(self.ignoreExprs[-1])
4774 return self
4776 def streamline(self) -> ParserElement:
4777 super().streamline()
4778 if self.expr is not None:
4779 self.expr.streamline()
4780 return self
4782 def _checkRecursion(self, parseElementList):
4783 if self in parseElementList:
4784 raise RecursiveGrammarException(parseElementList + [self])
4785 subRecCheckList = parseElementList[:] + [self]
4786 if self.expr is not None:
4787 self.expr._checkRecursion(subRecCheckList)
4789 def validate(self, validateTrace=None) -> None:
4790 warnings.warn(
4791 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
4792 DeprecationWarning,
4793 stacklevel=2,
4794 )
4795 if validateTrace is None:
4796 validateTrace = []
4797 tmp = validateTrace[:] + [self]
4798 if self.expr is not None:
4799 self.expr.validate(tmp)
4800 self._checkRecursion([])
4802 def _generateDefaultName(self) -> str:
4803 return f"{type(self).__name__}:({self.expr})"
4805 # Compatibility synonyms
4806 # fmt: off
4807 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
4808 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
4809 # fmt: on
4812class IndentedBlock(ParseElementEnhance):
4813 """
4814 Expression to match one or more expressions at a given indentation level.
4815 Useful for parsing text where structure is implied by indentation (like Python source code).
4816 """
4818 class _Indent(Empty):
4819 def __init__(self, ref_col: int) -> None:
4820 super().__init__()
4821 self.errmsg = f"expected indent at column {ref_col}"
4822 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
4824 class _IndentGreater(Empty):
4825 def __init__(self, ref_col: int) -> None:
4826 super().__init__()
4827 self.errmsg = f"expected indent at column greater than {ref_col}"
4828 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
4830 def __init__(
4831 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
4832 ) -> None:
4833 super().__init__(expr, savelist=True)
4834 # if recursive:
4835 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
4836 self._recursive = recursive
4837 self._grouped = grouped
4838 self.parent_anchor = 1
4840 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4841 # advance parse position to non-whitespace by using an Empty()
4842 # this should be the column to be used for all subsequent indented lines
4843 anchor_loc = Empty().preParse(instring, loc)
4845 # see if self.expr matches at the current location - if not it will raise an exception
4846 # and no further work is necessary
4847 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)
4849 indent_col = col(anchor_loc, instring)
4850 peer_detect_expr = self._Indent(indent_col)
4852 inner_expr = Empty() + peer_detect_expr + self.expr
4853 if self._recursive:
4854 sub_indent = self._IndentGreater(indent_col)
4855 nested_block = IndentedBlock(
4856 self.expr, recursive=self._recursive, grouped=self._grouped
4857 )
4858 nested_block.set_debug(self.debug)
4859 nested_block.parent_anchor = indent_col
4860 inner_expr += Opt(sub_indent + nested_block)
4862 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
4863 block = OneOrMore(inner_expr)
4865 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
4867 if self._grouped:
4868 wrapper = Group
4869 else:
4870 wrapper = lambda expr: expr # type: ignore[misc, assignment]
4871 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
4872 instring, anchor_loc, do_actions
4873 )
4876class AtStringStart(ParseElementEnhance):
4877 """Matches if expression matches at the beginning of the parse
4878 string::
4880 AtStringStart(Word(nums)).parse_string("123")
4881 # prints ["123"]
4883 AtStringStart(Word(nums)).parse_string(" 123")
4884 # raises ParseException
4885 """
4887 def __init__(self, expr: Union[ParserElement, str]) -> None:
4888 super().__init__(expr)
4889 self.callPreparse = False
4891 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4892 if loc != 0:
4893 raise ParseException(instring, loc, "not found at string start")
4894 return super().parseImpl(instring, loc, do_actions)
4897class AtLineStart(ParseElementEnhance):
4898 r"""Matches if an expression matches at the beginning of a line within
4899 the parse string
4901 Example::
4903 test = '''\
4904 AAA this line
4905 AAA and this line
4906 AAA but not this one
4907 B AAA and definitely not this one
4908 '''
4910 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):
4911 print(t)
4913 prints::
4915 ['AAA', ' this line']
4916 ['AAA', ' and this line']
4918 """
4920 def __init__(self, expr: Union[ParserElement, str]) -> None:
4921 super().__init__(expr)
4922 self.callPreparse = False
4924 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4925 if col(loc, instring) != 1:
4926 raise ParseException(instring, loc, "not found at line start")
4927 return super().parseImpl(instring, loc, do_actions)
4930class FollowedBy(ParseElementEnhance):
4931 """Lookahead matching of the given parse expression.
4932 ``FollowedBy`` does *not* advance the parsing position within
4933 the input string, it only verifies that the specified parse
4934 expression matches at the current position. ``FollowedBy``
4935 always returns a null token list. If any results names are defined
4936 in the lookahead expression, those *will* be returned for access by
4937 name.
4939 Example::
4941 # use FollowedBy to match a label only if it is followed by a ':'
4942 data_word = Word(alphas)
4943 label = data_word + FollowedBy(':')
4944 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4946 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()
4948 prints::
4950 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4951 """
4953 def __init__(self, expr: Union[ParserElement, str]) -> None:
4954 super().__init__(expr)
4955 self._may_return_empty = True
4957 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
4958 # by using self._expr.parse and deleting the contents of the returned ParseResults list
4959 # we keep any named results that were defined in the FollowedBy expression
4960 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)
4961 del ret[:]
4963 return loc, ret
4966class PrecededBy(ParseElementEnhance):
4967 """Lookbehind matching of the given parse expression.
4968 ``PrecededBy`` does not advance the parsing position within the
4969 input string, it only verifies that the specified parse expression
4970 matches prior to the current position. ``PrecededBy`` always
4971 returns a null token list, but if a results name is defined on the
4972 given expression, it is returned.
4974 Parameters:
4976 - ``expr`` - expression that must match prior to the current parse
4977 location
4978 - ``retreat`` - (default= ``None``) - (int) maximum number of characters
4979 to lookbehind prior to the current parse location
4981 If the lookbehind expression is a string, :class:`Literal`,
4982 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
4983 with a specified exact or maximum length, then the retreat
4984 parameter is not required. Otherwise, retreat must be specified to
4985 give a maximum number of characters to look back from
4986 the current parse position for a lookbehind match.
4988 Example::
4990 # VB-style variable names with type prefixes
4991 int_var = PrecededBy("#") + pyparsing_common.identifier
4992 str_var = PrecededBy("$") + pyparsing_common.identifier
4994 """
4996 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:
4997 super().__init__(expr)
4998 self.expr = self.expr().leave_whitespace()
4999 self._may_return_empty = True
5000 self.mayIndexError = False
5001 self.exact = False
5002 if isinstance(expr, str_type):
5003 expr = typing.cast(str, expr)
5004 retreat = len(expr)
5005 self.exact = True
5006 elif isinstance(expr, (Literal, Keyword)):
5007 retreat = expr.matchLen
5008 self.exact = True
5009 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
5010 retreat = expr.maxLen
5011 self.exact = True
5012 elif isinstance(expr, PositionToken):
5013 retreat = 0
5014 self.exact = True
5015 self.retreat = retreat
5016 self.errmsg = f"not preceded by {expr}"
5017 self.skipWhitespace = False
5018 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
5020 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:
5021 if self.exact:
5022 if loc < self.retreat:
5023 raise ParseException(instring, loc, self.errmsg, self)
5024 start = loc - self.retreat
5025 _, ret = self.expr._parse(instring, start)
5026 return loc, ret
5028 # retreat specified a maximum lookbehind window, iterate
5029 test_expr = self.expr + StringEnd()
5030 instring_slice = instring[max(0, loc - self.retreat) : loc]
5031 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)
5033 for offset in range(1, min(loc, self.retreat + 1) + 1):
5034 try:
5035 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
5036 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
5037 except ParseBaseException as pbe:
5038 last_expr = pbe
5039 else:
5040 break
5041 else:
5042 raise last_expr
5044 return loc, ret
5047class Located(ParseElementEnhance):
5048 """
5049 Decorates a returned token with its starting and ending
5050 locations in the input string.
5052 This helper adds the following results names:
5054 - ``locn_start`` - location where matched expression begins
5055 - ``locn_end`` - location where matched expression ends
5056 - ``value`` - the actual parsed results
5058 Be careful if the input text contains ``<TAB>`` characters, you
5059 may want to call :class:`ParserElement.parse_with_tabs`
5061 Example::
5063 wd = Word(alphas)
5064 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
5065 print(match)
5067 prints::
5069 [0, ['ljsdf'], 5]
5070 [8, ['lksdjjf'], 15]
5071 [18, ['lkkjj'], 23]
5073 """
5075 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5076 start = loc
5077 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)
5078 ret_tokens = ParseResults([start, tokens, loc])
5079 ret_tokens["locn_start"] = start
5080 ret_tokens["value"] = tokens
5081 ret_tokens["locn_end"] = loc
5082 if self.resultsName:
5083 # must return as a list, so that the name will be attached to the complete group
5084 return loc, [ret_tokens]
5085 else:
5086 return loc, ret_tokens
5089class NotAny(ParseElementEnhance):
5090 """
5091 Lookahead to disallow matching with the given parse expression.
5092 ``NotAny`` does *not* advance the parsing position within the
5093 input string, it only verifies that the specified parse expression
5094 does *not* match at the current position. Also, ``NotAny`` does
5095 *not* skip over leading whitespace. ``NotAny`` always returns
5096 a null token list. May be constructed using the ``'~'`` operator.
5098 Example::
5100 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
5102 # take care not to mistake keywords for identifiers
5103 ident = ~(AND | OR | NOT) + Word(alphas)
5104 boolean_term = Opt(NOT) + ident
5106 # very crude boolean expression - to support parenthesis groups and
5107 # operation hierarchy, use infix_notation
5108 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
5110 # integers that are followed by "." are actually floats
5111 integer = Word(nums) + ~Char(".")
5112 """
5114 def __init__(self, expr: Union[ParserElement, str]) -> None:
5115 super().__init__(expr)
5116 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
5117 # self.leave_whitespace()
5118 self.skipWhitespace = False
5120 self._may_return_empty = True
5121 self.errmsg = f"Found unwanted token, {self.expr}"
5123 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5124 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):
5125 raise ParseException(instring, loc, self.errmsg, self)
5126 return loc, []
5128 def _generateDefaultName(self) -> str:
5129 return f"~{{{self.expr}}}"
5132class _MultipleMatch(ParseElementEnhance):
5133 def __init__(
5134 self,
5135 expr: Union[str, ParserElement],
5136 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5137 *,
5138 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5139 ) -> None:
5140 super().__init__(expr)
5141 stopOn = stopOn or stop_on
5142 self.saveAsList = True
5143 ender = stopOn
5144 if isinstance(ender, str_type):
5145 ender = self._literalStringClass(ender)
5146 self.stopOn(ender)
5148 def stopOn(self, ender) -> ParserElement:
5149 if isinstance(ender, str_type):
5150 ender = self._literalStringClass(ender)
5151 self.not_ender = ~ender if ender is not None else None
5152 return self
5154 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5155 self_expr_parse = self.expr._parse
5156 self_skip_ignorables = self._skipIgnorables
5157 check_ender = False
5158 if self.not_ender is not None:
5159 try_not_ender = self.not_ender.try_parse
5160 check_ender = True
5162 # must be at least one (but first see if we are the stopOn sentinel;
5163 # if so, fail)
5164 if check_ender:
5165 try_not_ender(instring, loc)
5166 loc, tokens = self_expr_parse(instring, loc, do_actions)
5167 try:
5168 hasIgnoreExprs = not not self.ignoreExprs
5169 while 1:
5170 if check_ender:
5171 try_not_ender(instring, loc)
5172 if hasIgnoreExprs:
5173 preloc = self_skip_ignorables(instring, loc)
5174 else:
5175 preloc = loc
5176 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)
5177 tokens += tmptokens
5178 except (ParseException, IndexError):
5179 pass
5181 return loc, tokens
5183 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5184 if (
5185 __diag__.warn_ungrouped_named_tokens_in_collection
5186 and Diagnostics.warn_ungrouped_named_tokens_in_collection
5187 not in self.suppress_warnings_
5188 ):
5189 for e in [self.expr] + self.expr.recurse():
5190 if (
5191 isinstance(e, ParserElement)
5192 and e.resultsName
5193 and (
5194 Diagnostics.warn_ungrouped_named_tokens_in_collection
5195 not in e.suppress_warnings_
5196 )
5197 ):
5198 warning = (
5199 "warn_ungrouped_named_tokens_in_collection:"
5200 f" setting results name {name!r} on {type(self).__name__} expression"
5201 f" collides with {e.resultsName!r} on contained expression"
5202 )
5203 warnings.warn(warning, stacklevel=3)
5204 break
5206 return super()._setResultsName(name, list_all_matches)
5209class OneOrMore(_MultipleMatch):
5210 """
5211 Repetition of one or more of the given expression.
5213 Parameters:
5215 - ``expr`` - expression that must match one or more times
5216 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel
5217 (only required if the sentinel would ordinarily match the repetition
5218 expression)
5220 Example::
5222 data_word = Word(alphas)
5223 label = data_word + FollowedBy(':')
5224 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))
5226 text = "shape: SQUARE posn: upper left color: BLACK"
5227 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
5229 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data
5230 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5231 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
5233 # could also be written as
5234 (attr_expr * (1,)).parse_string(text).pprint()
5235 """
5237 def _generateDefaultName(self) -> str:
5238 return f"{{{self.expr}}}..."
5241class ZeroOrMore(_MultipleMatch):
5242 """
5243 Optional repetition of zero or more of the given expression.
5245 Parameters:
5247 - ``expr`` - expression that must match zero or more times
5248 - ``stop_on`` - expression for a terminating sentinel
5249 (only required if the sentinel would ordinarily match the repetition
5250 expression) - (default= ``None``)
5252 Example: similar to :class:`OneOrMore`
5253 """
5255 def __init__(
5256 self,
5257 expr: Union[str, ParserElement],
5258 stop_on: typing.Optional[Union[ParserElement, str]] = None,
5259 *,
5260 stopOn: typing.Optional[Union[ParserElement, str]] = None,
5261 ) -> None:
5262 super().__init__(expr, stopOn=stopOn or stop_on)
5263 self._may_return_empty = True
5265 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5266 try:
5267 return super().parseImpl(instring, loc, do_actions)
5268 except (ParseException, IndexError):
5269 return loc, ParseResults([], name=self.resultsName)
5271 def _generateDefaultName(self) -> str:
5272 return f"[{self.expr}]..."
5275class DelimitedList(ParseElementEnhance):
5276 """Helper to define a delimited list of expressions - the delimiter
5277 defaults to ','. By default, the list elements and delimiters can
5278 have intervening whitespace, and comments, but this can be
5279 overridden by passing ``combine=True`` in the constructor. If
5280 ``combine`` is set to ``True``, the matching tokens are
5281 returned as a single token string, with the delimiters included;
5282 otherwise, the matching tokens are returned as a list of tokens,
5283 with the delimiters suppressed.
5285 If ``allow_trailing_delim`` is set to True, then the list may end with
5286 a delimiter.
5288 Example::
5290 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5291 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5293 .. versionadded:: 3.1.0
5294 """
5296 def __init__(
5297 self,
5298 expr: Union[str, ParserElement],
5299 delim: Union[str, ParserElement] = ",",
5300 combine: bool = False,
5301 min: typing.Optional[int] = None,
5302 max: typing.Optional[int] = None,
5303 *,
5304 allow_trailing_delim: bool = False,
5305 ) -> None:
5306 if isinstance(expr, str_type):
5307 expr = ParserElement._literalStringClass(expr)
5308 expr = typing.cast(ParserElement, expr)
5310 if min is not None and min < 1:
5311 raise ValueError("min must be greater than 0")
5313 if max is not None and min is not None and max < min:
5314 raise ValueError("max must be greater than, or equal to min")
5316 self.content = expr
5317 self.raw_delim = str(delim)
5318 self.delim = delim
5319 self.combine = combine
5320 if not combine:
5321 self.delim = Suppress(delim)
5322 self.min = min or 1
5323 self.max = max
5324 self.allow_trailing_delim = allow_trailing_delim
5326 delim_list_expr = self.content + (self.delim + self.content) * (
5327 self.min - 1,
5328 None if self.max is None else self.max - 1,
5329 )
5330 if self.allow_trailing_delim:
5331 delim_list_expr += Opt(self.delim)
5333 if self.combine:
5334 delim_list_expr = Combine(delim_list_expr)
5336 super().__init__(delim_list_expr, savelist=True)
5338 def _generateDefaultName(self) -> str:
5339 content_expr = self.content.streamline()
5340 return f"{content_expr} [{self.raw_delim} {content_expr}]..."
5343class _NullToken:
5344 def __bool__(self):
5345 return False
5347 def __str__(self):
5348 return ""
5351class Opt(ParseElementEnhance):
5352 """
5353 Optional matching of the given expression.
5355 Parameters:
5357 - ``expr`` - expression that must match zero or more times
5358 - ``default`` (optional) - value to be returned if the optional expression is not found.
5360 Example::
5362 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
5363 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
5364 zip.run_tests('''
5365 # traditional ZIP code
5366 12345
5368 # ZIP+4 form
5369 12101-0001
5371 # invalid ZIP
5372 98765-
5373 ''')
5375 prints::
5377 # traditional ZIP code
5378 12345
5379 ['12345']
5381 # ZIP+4 form
5382 12101-0001
5383 ['12101-0001']
5385 # invalid ZIP
5386 98765-
5387 ^
5388 FAIL: Expected end of text (at char 5), (line:1, col:6)
5389 """
5391 __optionalNotMatched = _NullToken()
5393 def __init__(
5394 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
5395 ) -> None:
5396 super().__init__(expr, savelist=False)
5397 self.saveAsList = self.expr.saveAsList
5398 self.defaultValue = default
5399 self._may_return_empty = True
5401 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5402 self_expr = self.expr
5403 try:
5404 loc, tokens = self_expr._parse(
5405 instring, loc, do_actions, callPreParse=False
5406 )
5407 except (ParseException, IndexError):
5408 default_value = self.defaultValue
5409 if default_value is not self.__optionalNotMatched:
5410 if self_expr.resultsName:
5411 tokens = ParseResults([default_value])
5412 tokens[self_expr.resultsName] = default_value
5413 else:
5414 tokens = [default_value] # type: ignore[assignment]
5415 else:
5416 tokens = [] # type: ignore[assignment]
5417 return loc, tokens
5419 def _generateDefaultName(self) -> str:
5420 inner = str(self.expr)
5421 # strip off redundant inner {}'s
5422 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
5423 inner = inner[1:-1]
5424 return f"[{inner}]"
5427Optional = Opt
5430class SkipTo(ParseElementEnhance):
5431 """
5432 Token for skipping over all undefined text until the matched
5433 expression is found.
5435 Parameters:
5437 - ``expr`` - target expression marking the end of the data to be skipped
5438 - ``include`` - if ``True``, the target expression is also parsed
5439 (the skipped text and target expression are returned as a 2-element
5440 list) (default= ``False``).
5441 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and
5442 comments) that might contain false matches to the target expression
5443 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be
5444 included in the skipped test; if found before the target expression is found,
5445 the :class:`SkipTo` is not a match
5447 Example::
5449 report = '''
5450 Outstanding Issues Report - 1 Jan 2000
5452 # | Severity | Description | Days Open
5453 -----+----------+-------------------------------------------+-----------
5454 101 | Critical | Intermittent system crash | 6
5455 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5456 79 | Minor | System slow when running too many reports | 47
5457 '''
5458 integer = Word(nums)
5459 SEP = Suppress('|')
5460 # use SkipTo to simply match everything up until the next SEP
5461 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
5462 # - parse action will call token.strip() for each matched token, i.e., the description body
5463 string_data = SkipTo(SEP, ignore=quoted_string)
5464 string_data.set_parse_action(token_map(str.strip))
5465 ticket_expr = (integer("issue_num") + SEP
5466 + string_data("sev") + SEP
5467 + string_data("desc") + SEP
5468 + integer("days_open"))
5470 for tkt in ticket_expr.search_string(report):
5471 print tkt.dump()
5473 prints::
5475 ['101', 'Critical', 'Intermittent system crash', '6']
5476 - days_open: '6'
5477 - desc: 'Intermittent system crash'
5478 - issue_num: '101'
5479 - sev: 'Critical'
5480 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
5481 - days_open: '14'
5482 - desc: "Spelling error on Login ('log|n')"
5483 - issue_num: '94'
5484 - sev: 'Cosmetic'
5485 ['79', 'Minor', 'System slow when running too many reports', '47']
5486 - days_open: '47'
5487 - desc: 'System slow when running too many reports'
5488 - issue_num: '79'
5489 - sev: 'Minor'
5490 """
5492 def __init__(
5493 self,
5494 other: Union[ParserElement, str],
5495 include: bool = False,
5496 ignore: typing.Optional[Union[ParserElement, str]] = None,
5497 fail_on: typing.Optional[Union[ParserElement, str]] = None,
5498 *,
5499 failOn: typing.Optional[Union[ParserElement, str]] = None,
5500 ) -> None:
5501 super().__init__(other)
5502 failOn = failOn or fail_on
5503 self.ignoreExpr = ignore
5504 self._may_return_empty = True
5505 self.mayIndexError = False
5506 self.includeMatch = include
5507 self.saveAsList = False
5508 if isinstance(failOn, str_type):
5509 self.failOn = self._literalStringClass(failOn)
5510 else:
5511 self.failOn = failOn
5512 self.errmsg = f"No match found for {self.expr}"
5513 self.ignorer = Empty().leave_whitespace()
5514 self._update_ignorer()
5516 def _update_ignorer(self):
5517 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr
5518 self.ignorer.ignoreExprs.clear()
5519 for e in self.expr.ignoreExprs:
5520 self.ignorer.ignore(e)
5521 if self.ignoreExpr:
5522 self.ignorer.ignore(self.ignoreExpr)
5524 def ignore(self, expr):
5525 super().ignore(expr)
5526 self._update_ignorer()
5528 def parseImpl(self, instring, loc, do_actions=True):
5529 startloc = loc
5530 instrlen = len(instring)
5531 self_expr_parse = self.expr._parse
5532 self_failOn_canParseNext = (
5533 self.failOn.canParseNext if self.failOn is not None else None
5534 )
5535 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None
5537 tmploc = loc
5538 while tmploc <= instrlen:
5539 if self_failOn_canParseNext is not None:
5540 # break if failOn expression matches
5541 if self_failOn_canParseNext(instring, tmploc):
5542 break
5544 if ignorer_try_parse is not None:
5545 # advance past ignore expressions
5546 prev_tmploc = tmploc
5547 while 1:
5548 try:
5549 tmploc = ignorer_try_parse(instring, tmploc)
5550 except ParseBaseException:
5551 break
5552 # see if all ignorers matched, but didn't actually ignore anything
5553 if tmploc == prev_tmploc:
5554 break
5555 prev_tmploc = tmploc
5557 try:
5558 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)
5559 except (ParseException, IndexError):
5560 # no match, advance loc in string
5561 tmploc += 1
5562 else:
5563 # matched skipto expr, done
5564 break
5566 else:
5567 # ran off the end of the input string without matching skipto expr, fail
5568 raise ParseException(instring, loc, self.errmsg, self)
5570 # build up return values
5571 loc = tmploc
5572 skiptext = instring[startloc:loc]
5573 skipresult = ParseResults(skiptext)
5575 if self.includeMatch:
5576 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)
5577 skipresult += mat
5579 return loc, skipresult
5582class Forward(ParseElementEnhance):
5583 """
5584 Forward declaration of an expression to be defined later -
5585 used for recursive grammars, such as algebraic infix notation.
5586 When the expression is known, it is assigned to the ``Forward``
5587 variable using the ``'<<'`` operator.
5589 Note: take care when assigning to ``Forward`` not to overlook
5590 precedence of operators.
5592 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
5594 fwd_expr << a | b | c
5596 will actually be evaluated as::
5598 (fwd_expr << a) | b | c
5600 thereby leaving b and c out as parseable alternatives. It is recommended that you
5601 explicitly group the values inserted into the ``Forward``::
5603 fwd_expr << (a | b | c)
5605 Converting to use the ``'<<='`` operator instead will avoid this problem.
5607 See :class:`ParseResults.pprint` for an example of a recursive
5608 parser created using ``Forward``.
5609 """
5611 def __init__(
5612 self, other: typing.Optional[Union[ParserElement, str]] = None
5613 ) -> None:
5614 self.caller_frame = traceback.extract_stack(limit=2)[0]
5615 super().__init__(other, savelist=False) # type: ignore[arg-type]
5616 self.lshift_line = None
5618 def __lshift__(self, other) -> Forward:
5619 if hasattr(self, "caller_frame"):
5620 del self.caller_frame
5621 if isinstance(other, str_type):
5622 other = self._literalStringClass(other)
5624 if not isinstance(other, ParserElement):
5625 return NotImplemented
5627 self.expr = other
5628 self.streamlined = other.streamlined
5629 self.mayIndexError = self.expr.mayIndexError
5630 self._may_return_empty = self.expr.mayReturnEmpty
5631 self.set_whitespace_chars(
5632 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
5633 )
5634 self.skipWhitespace = self.expr.skipWhitespace
5635 self.saveAsList = self.expr.saveAsList
5636 self.ignoreExprs.extend(self.expr.ignoreExprs)
5637 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]
5638 return self
5640 def __ilshift__(self, other) -> Forward:
5641 if not isinstance(other, ParserElement):
5642 return NotImplemented
5644 return self << other
5646 def __or__(self, other) -> ParserElement:
5647 caller_line = traceback.extract_stack(limit=2)[-2]
5648 if (
5649 __diag__.warn_on_match_first_with_lshift_operator
5650 and caller_line == self.lshift_line
5651 and Diagnostics.warn_on_match_first_with_lshift_operator
5652 not in self.suppress_warnings_
5653 ):
5654 warnings.warn(
5655 "warn_on_match_first_with_lshift_operator:"
5656 " using '<<' operator with '|' is probably an error, use '<<='",
5657 stacklevel=2,
5658 )
5659 ret = super().__or__(other)
5660 return ret
5662 def __del__(self):
5663 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
5664 if (
5665 self.expr is None
5666 and __diag__.warn_on_assignment_to_Forward
5667 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
5668 ):
5669 warnings.warn_explicit(
5670 "warn_on_assignment_to_Forward:"
5671 " Forward defined here but no expression attached later using '<<=' or '<<'",
5672 UserWarning,
5673 filename=self.caller_frame.filename,
5674 lineno=self.caller_frame.lineno,
5675 )
5677 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
5678 if (
5679 self.expr is None
5680 and __diag__.warn_on_parse_using_empty_Forward
5681 and Diagnostics.warn_on_parse_using_empty_Forward
5682 not in self.suppress_warnings_
5683 ):
5684 # walk stack until parse_string, scan_string, search_string, or transform_string is found
5685 parse_fns = (
5686 "parse_string",
5687 "scan_string",
5688 "search_string",
5689 "transform_string",
5690 )
5691 tb = traceback.extract_stack(limit=200)
5692 for i, frm in enumerate(reversed(tb), start=1):
5693 if frm.name in parse_fns:
5694 stacklevel = i + 1
5695 break
5696 else:
5697 stacklevel = 2
5698 warnings.warn(
5699 "warn_on_parse_using_empty_Forward:"
5700 " Forward expression was never assigned a value, will not parse any input",
5701 stacklevel=stacklevel,
5702 )
5703 if not ParserElement._left_recursion_enabled:
5704 return super().parseImpl(instring, loc, do_actions)
5705 # ## Bounded Recursion algorithm ##
5706 # Recursion only needs to be processed at ``Forward`` elements, since they are
5707 # the only ones that can actually refer to themselves. The general idea is
5708 # to handle recursion stepwise: We start at no recursion, then recurse once,
5709 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
5710 #
5711 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
5712 # - to *match* a specific recursion level, and
5713 # - to *search* the bounded recursion level
5714 # and the two run concurrently. The *search* must *match* each recursion level
5715 # to find the best possible match. This is handled by a memo table, which
5716 # provides the previous match to the next level match attempt.
5717 #
5718 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
5719 #
5720 # There is a complication since we not only *parse* but also *transform* via
5721 # actions: We do not want to run the actions too often while expanding. Thus,
5722 # we expand using `do_actions=False` and only run `do_actions=True` if the next
5723 # recursion level is acceptable.
5724 with ParserElement.recursion_lock:
5725 memo = ParserElement.recursion_memos
5726 try:
5727 # we are parsing at a specific recursion expansion - use it as-is
5728 prev_loc, prev_result = memo[loc, self, do_actions]
5729 if isinstance(prev_result, Exception):
5730 raise prev_result
5731 return prev_loc, prev_result.copy()
5732 except KeyError:
5733 act_key = (loc, self, True)
5734 peek_key = (loc, self, False)
5735 # we are searching for the best recursion expansion - keep on improving
5736 # both `do_actions` cases must be tracked separately here!
5737 prev_loc, prev_peek = memo[peek_key] = (
5738 loc - 1,
5739 ParseException(
5740 instring, loc, "Forward recursion without base case", self
5741 ),
5742 )
5743 if do_actions:
5744 memo[act_key] = memo[peek_key]
5745 while True:
5746 try:
5747 new_loc, new_peek = super().parseImpl(instring, loc, False)
5748 except ParseException:
5749 # we failed before getting any match - do not hide the error
5750 if isinstance(prev_peek, Exception):
5751 raise
5752 new_loc, new_peek = prev_loc, prev_peek
5753 # the match did not get better: we are done
5754 if new_loc <= prev_loc:
5755 if do_actions:
5756 # replace the match for do_actions=False as well,
5757 # in case the action did backtrack
5758 prev_loc, prev_result = memo[peek_key] = memo[act_key]
5759 del memo[peek_key], memo[act_key]
5760 return prev_loc, copy.copy(prev_result)
5761 del memo[peek_key]
5762 return prev_loc, copy.copy(prev_peek)
5763 # the match did get better: see if we can improve further
5764 if do_actions:
5765 try:
5766 memo[act_key] = super().parseImpl(instring, loc, True)
5767 except ParseException as e:
5768 memo[peek_key] = memo[act_key] = (new_loc, e)
5769 raise
5770 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
5772 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5773 self.skipWhitespace = False
5774 return self
5776 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5777 self.skipWhitespace = True
5778 return self
5780 def streamline(self) -> ParserElement:
5781 if not self.streamlined:
5782 self.streamlined = True
5783 if self.expr is not None:
5784 self.expr.streamline()
5785 return self
5787 def validate(self, validateTrace=None) -> None:
5788 warnings.warn(
5789 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",
5790 DeprecationWarning,
5791 stacklevel=2,
5792 )
5793 if validateTrace is None:
5794 validateTrace = []
5796 if self not in validateTrace:
5797 tmp = validateTrace[:] + [self]
5798 if self.expr is not None:
5799 self.expr.validate(tmp)
5800 self._checkRecursion([])
5802 def _generateDefaultName(self) -> str:
5803 # Avoid infinite recursion by setting a temporary _defaultName
5804 save_default_name = self._defaultName
5805 self._defaultName = ": ..."
5807 # Use the string representation of main expression.
5808 try:
5809 if self.expr is not None:
5810 ret_string = str(self.expr)[:1000]
5811 else:
5812 ret_string = "None"
5813 except Exception:
5814 ret_string = "..."
5816 self._defaultName = save_default_name
5817 return f"{type(self).__name__}: {ret_string}"
5819 def copy(self) -> ParserElement:
5820 if self.expr is not None:
5821 return super().copy()
5822 else:
5823 ret = Forward()
5824 ret <<= self
5825 return ret
5827 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:
5828 # fmt: off
5829 if (
5830 __diag__.warn_name_set_on_empty_Forward
5831 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_
5832 and self.expr is None
5833 ):
5834 warning = (
5835 "warn_name_set_on_empty_Forward:"
5836 f" setting results name {name!r} on {type(self).__name__} expression"
5837 " that has no contained expression"
5838 )
5839 warnings.warn(warning, stacklevel=3)
5840 # fmt: on
5842 return super()._setResultsName(name, list_all_matches)
5844 # Compatibility synonyms
5845 # fmt: off
5846 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)
5847 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)
5848 # fmt: on
5851class TokenConverter(ParseElementEnhance):
5852 """
5853 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.
5854 """
5856 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:
5857 super().__init__(expr) # , savelist)
5858 self.saveAsList = False
5861class Combine(TokenConverter):
5862 """Converter to concatenate all matching tokens to a single string.
5863 By default, the matching patterns must also be contiguous in the
5864 input string; this can be disabled by specifying
5865 ``'adjacent=False'`` in the constructor.
5867 Example::
5869 real = Word(nums) + '.' + Word(nums)
5870 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']
5871 # will also erroneously match the following
5872 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']
5874 real = Combine(Word(nums) + '.' + Word(nums))
5875 print(real.parse_string('3.1416')) # -> ['3.1416']
5876 # no match when there are internal spaces
5877 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)
5878 """
5880 def __init__(
5881 self,
5882 expr: ParserElement,
5883 join_string: str = "",
5884 adjacent: bool = True,
5885 *,
5886 joinString: typing.Optional[str] = None,
5887 ) -> None:
5888 super().__init__(expr)
5889 joinString = joinString if joinString is not None else join_string
5890 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5891 if adjacent:
5892 self.leave_whitespace()
5893 self.adjacent = adjacent
5894 self.skipWhitespace = True
5895 self.joinString = joinString
5896 self.callPreparse = True
5898 def ignore(self, other) -> ParserElement:
5899 if self.adjacent:
5900 ParserElement.ignore(self, other)
5901 else:
5902 super().ignore(other)
5903 return self
5905 def postParse(self, instring, loc, tokenlist):
5906 retToks = tokenlist.copy()
5907 del retToks[:]
5908 retToks += ParseResults(
5909 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
5910 )
5912 if self.resultsName and retToks.haskeys():
5913 return [retToks]
5914 else:
5915 return retToks
5918class Group(TokenConverter):
5919 """Converter to return the matched tokens as a list - useful for
5920 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5922 The optional ``aslist`` argument when set to True will return the
5923 parsed tokens as a Python list instead of a pyparsing ParseResults.
5925 Example::
5927 ident = Word(alphas)
5928 num = Word(nums)
5929 term = ident | num
5930 func = ident + Opt(DelimitedList(term))
5931 print(func.parse_string("fn a, b, 100"))
5932 # -> ['fn', 'a', 'b', '100']
5934 func = ident + Group(Opt(DelimitedList(term)))
5935 print(func.parse_string("fn a, b, 100"))
5936 # -> ['fn', ['a', 'b', '100']]
5937 """
5939 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:
5940 super().__init__(expr)
5941 self.saveAsList = True
5942 self._asPythonList = aslist
5944 def postParse(self, instring, loc, tokenlist):
5945 if self._asPythonList:
5946 return ParseResults.List(
5947 tokenlist.asList()
5948 if isinstance(tokenlist, ParseResults)
5949 else list(tokenlist)
5950 )
5952 return [tokenlist]
5955class Dict(TokenConverter):
5956 """Converter to return a repetitive expression as a list, but also
5957 as a dictionary. Each element can also be referenced using the first
5958 token in the expression as its key. Useful for tabular report
5959 scraping when the first column can be used as a item key.
5961 The optional ``asdict`` argument when set to True will return the
5962 parsed tokens as a Python dict instead of a pyparsing ParseResults.
5964 Example::
5966 data_word = Word(alphas)
5967 label = data_word + FollowedBy(':')
5969 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5970 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5972 # print attributes as plain groups
5973 print(attr_expr[1, ...].parse_string(text).dump())
5975 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names
5976 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
5977 print(result.dump())
5979 # access named fields as dict entries, or output as dict
5980 print(result['shape'])
5981 print(result.as_dict())
5983 prints::
5985 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5986 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5987 - color: 'light blue'
5988 - posn: 'upper left'
5989 - shape: 'SQUARE'
5990 - texture: 'burlap'
5991 SQUARE
5992 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5994 See more examples at :class:`ParseResults` of accessing fields by results name.
5995 """
5997 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:
5998 super().__init__(expr)
5999 self.saveAsList = True
6000 self._asPythonDict = asdict
6002 def postParse(self, instring, loc, tokenlist):
6003 for i, tok in enumerate(tokenlist):
6004 if len(tok) == 0:
6005 continue
6007 ikey = tok[0]
6008 if isinstance(ikey, int):
6009 ikey = str(ikey).strip()
6011 if len(tok) == 1:
6012 tokenlist[ikey] = _ParseResultsWithOffset("", i)
6014 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
6015 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
6017 else:
6018 try:
6019 dictvalue = tok.copy() # ParseResults(i)
6020 except Exception:
6021 exc = TypeError(
6022 "could not extract dict values from parsed results"
6023 " - Dict expression must contain Grouped expressions"
6024 )
6025 raise exc from None
6027 del dictvalue[0]
6029 if len(dictvalue) != 1 or (
6030 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
6031 ):
6032 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
6033 else:
6034 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
6036 if self._asPythonDict:
6037 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
6039 return [tokenlist] if self.resultsName else tokenlist
6042class Suppress(TokenConverter):
6043 """Converter for ignoring the results of a parsed expression.
6045 Example::
6047 source = "a, b, c,d"
6048 wd = Word(alphas)
6049 wd_list1 = wd + (',' + wd)[...]
6050 print(wd_list1.parse_string(source))
6052 # often, delimiters that are useful during parsing are just in the
6053 # way afterward - use Suppress to keep them out of the parsed output
6054 wd_list2 = wd + (Suppress(',') + wd)[...]
6055 print(wd_list2.parse_string(source))
6057 # Skipped text (using '...') can be suppressed as well
6058 source = "lead in START relevant text END trailing text"
6059 start_marker = Keyword("START")
6060 end_marker = Keyword("END")
6061 find_body = Suppress(...) + start_marker + ... + end_marker
6062 print(find_body.parse_string(source)
6064 prints::
6066 ['a', ',', 'b', ',', 'c', ',', 'd']
6067 ['a', 'b', 'c', 'd']
6068 ['START', 'relevant text ', 'END']
6070 (See also :class:`DelimitedList`.)
6071 """
6073 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:
6074 if expr is ...:
6075 expr = _PendingSkip(NoMatch())
6076 super().__init__(expr)
6078 def __add__(self, other) -> ParserElement:
6079 if isinstance(self.expr, _PendingSkip):
6080 return Suppress(SkipTo(other)) + other
6082 return super().__add__(other)
6084 def __sub__(self, other) -> ParserElement:
6085 if isinstance(self.expr, _PendingSkip):
6086 return Suppress(SkipTo(other)) - other
6088 return super().__sub__(other)
6090 def postParse(self, instring, loc, tokenlist):
6091 return []
6093 def suppress(self) -> ParserElement:
6094 return self
6097# XXX: Example needs to be re-done for updated output
6098def trace_parse_action(f: ParseAction) -> ParseAction:
6099 """Decorator for debugging parse actions.
6101 When the parse action is called, this decorator will print
6102 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
6103 When the parse action completes, the decorator will print
6104 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
6106 Example::
6108 wd = Word(alphas)
6110 @trace_parse_action
6111 def remove_duplicate_chars(tokens):
6112 return ''.join(sorted(set(''.join(tokens))))
6114 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
6115 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
6117 prints::
6119 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
6120 <<leaving remove_duplicate_chars (ret: 'dfjkls')
6121 ['dfjkls']
6123 .. versionchanged:: 3.1.0
6124 Exception type added to output
6125 """
6126 f = _trim_arity(f)
6128 def z(*paArgs):
6129 thisFunc = f.__name__
6130 s, l, t = paArgs[-3:]
6131 if len(paArgs) > 3:
6132 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"
6133 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")
6134 try:
6135 ret = f(*paArgs)
6136 except Exception as exc:
6137 sys.stderr.write(
6138 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"
6139 )
6140 raise
6141 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")
6142 return ret
6144 z.__name__ = f.__name__
6145 return z
6148# convenience constants for positional expressions
6149empty = Empty().set_name("empty")
6150line_start = LineStart().set_name("line_start")
6151line_end = LineEnd().set_name("line_end")
6152string_start = StringStart().set_name("string_start")
6153string_end = StringEnd().set_name("string_end")
6155_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(
6156 lambda s, l, t: t[0][1]
6157)
6158_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
6159 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
6160)
6161_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
6162 lambda s, l, t: chr(int(t[0][1:], 8))
6163)
6164_singleChar = (
6165 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
6166)
6167_charRange = Group(_singleChar + Suppress("-") + _singleChar)
6168_reBracketExpr = (
6169 Literal("[")
6170 + Opt("^").set_results_name("negate")
6171 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
6172 + Literal("]")
6173)
6176def srange(s: str) -> str:
6177 r"""Helper to easily define string ranges for use in :class:`Word`
6178 construction. Borrows syntax from regexp ``'[]'`` string range
6179 definitions::
6181 srange("[0-9]") -> "0123456789"
6182 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
6183 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
6185 The input string must be enclosed in []'s, and the returned string
6186 is the expanded character set joined into a single string. The
6187 values enclosed in the []'s may be:
6189 - a single character
6190 - an escaped character with a leading backslash (such as ``\-``
6191 or ``\]``)
6192 - an escaped hex character with a leading ``'\x'``
6193 (``\x21``, which is a ``'!'`` character) (``\0x##``
6194 is also supported for backwards compatibility)
6195 - an escaped octal character with a leading ``'\0'``
6196 (``\041``, which is a ``'!'`` character)
6197 - a range of any of the above, separated by a dash (``'a-z'``,
6198 etc.)
6199 - any combination of the above (``'aeiouy'``,
6200 ``'a-zA-Z0-9_$'``, etc.)
6201 """
6203 def _expanded(p):
6204 if isinstance(p, ParseResults):
6205 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
6206 else:
6207 yield p
6209 try:
6210 return "".join(
6211 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]
6212 )
6213 except Exception as e:
6214 return ""
6217def token_map(func, *args) -> ParseAction:
6218 """Helper to define a parse action by mapping a function to all
6219 elements of a :class:`ParseResults` list. If any additional args are passed,
6220 they are forwarded to the given function as additional arguments
6221 after the token, as in
6222 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
6223 which will convert the parsed data to an integer using base 16.
6225 Example (compare the last to example in :class:`ParserElement.transform_string`::
6227 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
6228 hex_ints.run_tests('''
6229 00 11 22 aa FF 0a 0d 1a
6230 ''')
6232 upperword = Word(alphas).set_parse_action(token_map(str.upper))
6233 upperword[1, ...].run_tests('''
6234 my kingdom for a horse
6235 ''')
6237 wd = Word(alphas).set_parse_action(token_map(str.title))
6238 wd[1, ...].set_parse_action(' '.join).run_tests('''
6239 now is the winter of our discontent made glorious summer by this sun of york
6240 ''')
6242 prints::
6244 00 11 22 aa FF 0a 0d 1a
6245 [0, 17, 34, 170, 255, 10, 13, 26]
6247 my kingdom for a horse
6248 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
6250 now is the winter of our discontent made glorious summer by this sun of york
6251 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
6252 """
6254 def pa(s, l, t):
6255 return [func(tokn, *args) for tokn in t]
6257 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
6258 pa.__name__ = func_name
6260 return pa
6263def autoname_elements() -> None:
6264 """
6265 Utility to simplify mass-naming of parser elements, for
6266 generating railroad diagram with named subdiagrams.
6267 """
6269 # guard against _getframe not being implemented in the current Python
6270 getframe_fn = getattr(sys, "_getframe", lambda _: None)
6271 calling_frame = getframe_fn(1)
6272 if calling_frame is None:
6273 return
6275 # find all locals in the calling frame that are ParserElements
6276 calling_frame = typing.cast(types.FrameType, calling_frame)
6277 for name, var in calling_frame.f_locals.items():
6278 # if no custom name defined, set the name to the var name
6279 if isinstance(var, ParserElement) and not var.customName:
6280 var.set_name(name)
6283dbl_quoted_string = Combine(
6284 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6285).set_name("string enclosed in double quotes")
6287sgl_quoted_string = Combine(
6288 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
6289).set_name("string enclosed in single quotes")
6291quoted_string = Combine(
6292 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6293 "double quoted string"
6294 )
6295 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6296 "single quoted string"
6297 )
6298).set_name("quoted string using single or double quotes")
6300# XXX: Is there some way to make this show up in API docs?
6301# .. versionadded:: 3.1.0
6302python_quoted_string = Combine(
6303 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(
6304 "multiline double quoted string"
6305 )
6306 ^ (
6307 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"
6308 ).set_name("multiline single quoted string")
6309 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
6310 "double quoted string"
6311 )
6312 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
6313 "single quoted string"
6314 )
6315).set_name("Python quoted string")
6317unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
6320alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6321punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6323# build list of built-in expressions, for future reference if a global default value
6324# gets updated
6325_builtin_exprs: list[ParserElement] = [
6326 v for v in vars().values() if isinstance(v, ParserElement)
6327]
6329# Compatibility synonyms
6330# fmt: off
6331sglQuotedString = sgl_quoted_string
6332dblQuotedString = dbl_quoted_string
6333quotedString = quoted_string
6334unicodeString = unicode_string
6335lineStart = line_start
6336lineEnd = line_end
6337stringStart = string_start
6338stringEnd = string_end
6339nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)
6340traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)
6341conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)
6342tokenMap = replaced_by_pep8("tokenMap", token_map)
6343# fmt: on