Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 44%
2416 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:23 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:23 +0000
1#
2# core.py
3#
4import os
5import typing
6from typing import (
7 NamedTuple,
8 Union,
9 Callable,
10 Any,
11 Generator,
12 Tuple,
13 List,
14 TextIO,
15 Set,
16 Sequence,
17)
18from abc import ABC, abstractmethod
19from enum import Enum
20import string
21import copy
22import warnings
23import re
24import sys
25from collections.abc import Iterable
26import traceback
27import types
28from operator import itemgetter
29from functools import wraps
30from threading import RLock
31from pathlib import Path
33from .util import (
34 _FifoCache,
35 _UnboundedCache,
36 __config_flags,
37 _collapse_string_to_ranges,
38 _escape_regex_range_chars,
39 _bslash,
40 _flatten,
41 LRUMemo as _LRUMemo,
42 UnboundedMemo as _UnboundedMemo,
43)
44from .exceptions import *
45from .actions import *
46from .results import ParseResults, _ParseResultsWithOffset
47from .unicode import pyparsing_unicode
49_MAX_INT = sys.maxsize
50str_type: Tuple[type, ...] = (str, bytes)
52#
53# Copyright (c) 2003-2022 Paul T. McGuire
54#
55# Permission is hereby granted, free of charge, to any person obtaining
56# a copy of this software and associated documentation files (the
57# "Software"), to deal in the Software without restriction, including
58# without limitation the rights to use, copy, modify, merge, publish,
59# distribute, sublicense, and/or sell copies of the Software, and to
60# permit persons to whom the Software is furnished to do so, subject to
61# the following conditions:
62#
63# The above copyright notice and this permission notice shall be
64# included in all copies or substantial portions of the Software.
65#
66# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
67# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
68# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
69# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
70# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
71# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
72# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
73#
76if sys.version_info >= (3, 8):
77 from functools import cached_property
78else:
80 class cached_property:
81 def __init__(self, func):
82 self._func = func
84 def __get__(self, instance, owner=None):
85 ret = instance.__dict__[self._func.__name__] = self._func(instance)
86 return ret
89class __compat__(__config_flags):
90 """
91 A cross-version compatibility configuration for pyparsing features that will be
92 released in a future version. By setting values in this configuration to True,
93 those features can be enabled in prior versions for compatibility development
94 and testing.
96 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
97 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
98 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
99 behavior
100 """
102 _type_desc = "compatibility"
104 collect_all_And_tokens = True
106 _all_names = [__ for __ in locals() if not __.startswith("_")]
107 _fixed_names = """
108 collect_all_And_tokens
109 """.split()
112class __diag__(__config_flags):
113 _type_desc = "diagnostic"
115 warn_multiple_tokens_in_named_alternation = False
116 warn_ungrouped_named_tokens_in_collection = False
117 warn_name_set_on_empty_Forward = False
118 warn_on_parse_using_empty_Forward = False
119 warn_on_assignment_to_Forward = False
120 warn_on_multiple_string_args_to_oneof = False
121 warn_on_match_first_with_lshift_operator = False
122 enable_debug_on_named_expressions = False
124 _all_names = [__ for __ in locals() if not __.startswith("_")]
125 _warning_names = [name for name in _all_names if name.startswith("warn")]
126 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
128 @classmethod
129 def enable_all_warnings(cls) -> None:
130 for name in cls._warning_names:
131 cls.enable(name)
134class Diagnostics(Enum):
135 """
136 Diagnostic configuration (all default to disabled)
137 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
138 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
139 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
140 name is defined on a containing expression with ungrouped subexpressions that also
141 have results names
142 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
143 with a results name, but has no contents defined
144 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
145 defined in a grammar but has never had an expression attached to it
146 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
147 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
148 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
149 incorrectly called with multiple str arguments
150 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
151 calls to :class:`ParserElement.set_name`
153 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
154 All warnings can be enabled by calling :class:`enable_all_warnings`.
155 """
157 warn_multiple_tokens_in_named_alternation = 0
158 warn_ungrouped_named_tokens_in_collection = 1
159 warn_name_set_on_empty_Forward = 2
160 warn_on_parse_using_empty_Forward = 3
161 warn_on_assignment_to_Forward = 4
162 warn_on_multiple_string_args_to_oneof = 5
163 warn_on_match_first_with_lshift_operator = 6
164 enable_debug_on_named_expressions = 7
167def enable_diag(diag_enum: Diagnostics) -> None:
168 """
169 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
170 """
171 __diag__.enable(diag_enum.name)
174def disable_diag(diag_enum: Diagnostics) -> None:
175 """
176 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
177 """
178 __diag__.disable(diag_enum.name)
181def enable_all_warnings() -> None:
182 """
183 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
184 """
185 __diag__.enable_all_warnings()
188# hide abstract class
189del __config_flags
192def _should_enable_warnings(
193 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]
194) -> bool:
195 enable = bool(warn_env_var)
196 for warn_opt in cmd_line_warn_options:
197 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
198 ":"
199 )[:5]
200 if not w_action.lower().startswith("i") and (
201 not (w_message or w_category or w_module) or w_module == "pyparsing"
202 ):
203 enable = True
204 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
205 enable = False
206 return enable
209if _should_enable_warnings(
210 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
211):
212 enable_all_warnings()
215# build list of single arg builtins, that can be used as parse actions
216_single_arg_builtins = {
217 sum,
218 len,
219 sorted,
220 reversed,
221 list,
222 tuple,
223 set,
224 any,
225 all,
226 min,
227 max,
228}
230_generatorType = types.GeneratorType
231ParseAction = Union[
232 Callable[[], Any],
233 Callable[[ParseResults], Any],
234 Callable[[int, ParseResults], Any],
235 Callable[[str, int, ParseResults], Any],
236]
237ParseCondition = Union[
238 Callable[[], bool],
239 Callable[[ParseResults], bool],
240 Callable[[int, ParseResults], bool],
241 Callable[[str, int, ParseResults], bool],
242]
243ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
244DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
245DebugSuccessAction = Callable[
246 [str, int, int, "ParserElement", ParseResults, bool], None
247]
248DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
251alphas = string.ascii_uppercase + string.ascii_lowercase
252identchars = pyparsing_unicode.Latin1.identchars
253identbodychars = pyparsing_unicode.Latin1.identbodychars
254nums = "0123456789"
255hexnums = nums + "ABCDEFabcdef"
256alphanums = alphas + nums
257printables = "".join([c for c in string.printable if c not in string.whitespace])
259_trim_arity_call_line: traceback.StackSummary = None
262def _trim_arity(func, max_limit=3):
263 """decorator to trim function calls to match the arity of the target"""
264 global _trim_arity_call_line
266 if func in _single_arg_builtins:
267 return lambda s, l, t: func(t)
269 limit = 0
270 found_arity = False
272 def extract_tb(tb, limit=0):
273 frames = traceback.extract_tb(tb, limit=limit)
274 frame_summary = frames[-1]
275 return [frame_summary[:2]]
277 # synthesize what would be returned by traceback.extract_stack at the call to
278 # user's parse action 'func', so that we don't incur call penalty at parse time
280 # fmt: off
281 LINE_DIFF = 7
282 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
283 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
284 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1])
285 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)
287 def wrapper(*args):
288 nonlocal found_arity, limit
289 while 1:
290 try:
291 ret = func(*args[limit:])
292 found_arity = True
293 return ret
294 except TypeError as te:
295 # re-raise TypeErrors if they did not come from our arity testing
296 if found_arity:
297 raise
298 else:
299 tb = te.__traceback__
300 trim_arity_type_error = (
301 extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth
302 )
303 del tb
305 if trim_arity_type_error:
306 if limit < max_limit:
307 limit += 1
308 continue
310 raise
311 # fmt: on
313 # copy func name to wrapper for sensible debug output
314 # (can't use functools.wraps, since that messes with function signature)
315 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
316 wrapper.__name__ = func_name
317 wrapper.__doc__ = func.__doc__
319 return wrapper
322def condition_as_parse_action(
323 fn: ParseCondition, message: str = None, fatal: bool = False
324) -> ParseAction:
325 """
326 Function to convert a simple predicate function that returns ``True`` or ``False``
327 into a parse action. Can be used in places when a parse action is required
328 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition
329 to an operator level in :class:`infix_notation`).
331 Optional keyword arguments:
333 - ``message`` - define a custom message to be used in the raised exception
334 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;
335 otherwise will raise :class:`ParseException`
337 """
338 msg = message if message is not None else "failed user-defined condition"
339 exc_type = ParseFatalException if fatal else ParseException
340 fn = _trim_arity(fn)
342 @wraps(fn)
343 def pa(s, l, t):
344 if not bool(fn(s, l, t)):
345 raise exc_type(s, l, msg)
347 return pa
350def _default_start_debug_action(
351 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False
352):
353 cache_hit_str = "*" if cache_hit else ""
354 print(
355 (
356 "{}Match {} at loc {}({},{})\n {}\n {}^".format(
357 cache_hit_str,
358 expr,
359 loc,
360 lineno(loc, instring),
361 col(loc, instring),
362 line(loc, instring),
363 " " * (col(loc, instring) - 1),
364 )
365 )
366 )
369def _default_success_debug_action(
370 instring: str,
371 startloc: int,
372 endloc: int,
373 expr: "ParserElement",
374 toks: ParseResults,
375 cache_hit: bool = False,
376):
377 cache_hit_str = "*" if cache_hit else ""
378 print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.as_list()))
381def _default_exception_debug_action(
382 instring: str,
383 loc: int,
384 expr: "ParserElement",
385 exc: Exception,
386 cache_hit: bool = False,
387):
388 cache_hit_str = "*" if cache_hit else ""
389 print(
390 "{}Match {} failed, {} raised: {}".format(
391 cache_hit_str, expr, type(exc).__name__, exc
392 )
393 )
396def null_debug_action(*args):
397 """'Do-nothing' debug action, to suppress debugging output during parsing."""
400class ParserElement(ABC):
401 """Abstract base level parser element class."""
403 DEFAULT_WHITE_CHARS: str = " \n\t\r"
404 verbose_stacktrace: bool = False
405 _literalStringClass: typing.Optional[type] = None
407 @staticmethod
408 def set_default_whitespace_chars(chars: str) -> None:
409 r"""
410 Overrides the default whitespace chars
412 Example::
414 # default whitespace chars are space, <TAB> and newline
415 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
417 # change to just treat newline as significant
418 ParserElement.set_default_whitespace_chars(" \t")
419 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def']
420 """
421 ParserElement.DEFAULT_WHITE_CHARS = chars
423 # update whitespace all parse expressions defined in this module
424 for expr in _builtin_exprs:
425 if expr.copyDefaultWhiteChars:
426 expr.whiteChars = set(chars)
428 @staticmethod
429 def inline_literals_using(cls: type) -> None:
430 """
431 Set class to be used for inclusion of string literals into a parser.
433 Example::
435 # default literal class used is Literal
436 integer = Word(nums)
437 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
439 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31']
442 # change to Suppress
443 ParserElement.inline_literals_using(Suppress)
444 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
446 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31']
447 """
448 ParserElement._literalStringClass = cls
450 class DebugActions(NamedTuple):
451 debug_try: typing.Optional[DebugStartAction]
452 debug_match: typing.Optional[DebugSuccessAction]
453 debug_fail: typing.Optional[DebugExceptionAction]
455 def __init__(self, savelist: bool = False):
456 self.parseAction: List[ParseAction] = list()
457 self.failAction: typing.Optional[ParseFailAction] = None
458 self.customName = None
459 self._defaultName = None
460 self.resultsName = None
461 self.saveAsList = savelist
462 self.skipWhitespace = True
463 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
464 self.copyDefaultWhiteChars = True
465 # used when checking for left-recursion
466 self.mayReturnEmpty = False
467 self.keepTabs = False
468 self.ignoreExprs: List["ParserElement"] = list()
469 self.debug = False
470 self.streamlined = False
471 # optimize exception handling for subclasses that don't advance parse index
472 self.mayIndexError = True
473 self.errmsg = ""
474 # mark results names as modal (report only last) or cumulative (list all)
475 self.modalResults = True
476 # custom debug actions
477 self.debugActions = self.DebugActions(None, None, None)
478 # avoid redundant calls to preParse
479 self.callPreparse = True
480 self.callDuringTry = False
481 self.suppress_warnings_: List[Diagnostics] = []
483 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement":
484 """
485 Suppress warnings emitted for a particular diagnostic on this expression.
487 Example::
489 base = pp.Forward()
490 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)
492 # statement would normally raise a warning, but is now suppressed
493 print(base.parseString("x"))
495 """
496 self.suppress_warnings_.append(warning_type)
497 return self
499 def copy(self) -> "ParserElement":
500 """
501 Make a copy of this :class:`ParserElement`. Useful for defining
502 different parse actions for the same parsing pattern, using copies of
503 the original parse element.
505 Example::
507 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
508 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")
509 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
511 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M"))
513 prints::
515 [5120, 100, 655360, 268435456]
517 Equivalent form of ``expr.copy()`` is just ``expr()``::
519 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
520 """
521 cpy = copy.copy(self)
522 cpy.parseAction = self.parseAction[:]
523 cpy.ignoreExprs = self.ignoreExprs[:]
524 if self.copyDefaultWhiteChars:
525 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
526 return cpy
528 def set_results_name(
529 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False
530 ) -> "ParserElement":
531 """
532 Define name for referencing matching tokens as a nested attribute
533 of the returned parse results.
535 Normally, results names are assigned as you would assign keys in a dict:
536 any existing value is overwritten by later values. If it is necessary to
537 keep all values captured for a particular results name, call ``set_results_name``
538 with ``list_all_matches`` = True.
540 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
541 this is so that the client can define a basic element, such as an
542 integer, and reference it in multiple places with different names.
544 You can also set results names using the abbreviated syntax,
545 ``expr("name")`` in place of ``expr.set_results_name("name")``
546 - see :class:`__call__`. If ``list_all_matches`` is required, use
547 ``expr("name*")``.
549 Example::
551 date_str = (integer.set_results_name("year") + '/'
552 + integer.set_results_name("month") + '/'
553 + integer.set_results_name("day"))
555 # equivalent form:
556 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
557 """
558 listAllMatches = listAllMatches or list_all_matches
559 return self._setResultsName(name, listAllMatches)
561 def _setResultsName(self, name, listAllMatches=False):
562 if name is None:
563 return self
564 newself = self.copy()
565 if name.endswith("*"):
566 name = name[:-1]
567 listAllMatches = True
568 newself.resultsName = name
569 newself.modalResults = not listAllMatches
570 return newself
572 def set_break(self, break_flag: bool = True) -> "ParserElement":
573 """
574 Method to invoke the Python pdb debugger when this element is
575 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
576 disable.
577 """
578 if break_flag:
579 _parseMethod = self._parse
581 def breaker(instring, loc, doActions=True, callPreParse=True):
582 import pdb
584 # this call to pdb.set_trace() is intentional, not a checkin error
585 pdb.set_trace()
586 return _parseMethod(instring, loc, doActions, callPreParse)
588 breaker._originalParseMethod = _parseMethod
589 self._parse = breaker
590 else:
591 if hasattr(self._parse, "_originalParseMethod"):
592 self._parse = self._parse._originalParseMethod
593 return self
595 def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":
596 """
597 Define one or more actions to perform when successfully matching parse element definition.
599 Parse actions can be called to perform data conversions, do extra validation,
600 update external data structures, or enhance or replace the parsed tokens.
601 Each parse action ``fn`` is a callable method with 0-3 arguments, called as
602 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
604 - s = the original string being parsed (see note below)
605 - loc = the location of the matching substring
606 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
608 The parsed tokens are passed to the parse action as ParseResults. They can be
609 modified in place using list-style append, extend, and pop operations to update
610 the parsed list elements; and with dictionary-style item set and del operations
611 to add, update, or remove any named results. If the tokens are modified in place,
612 it is not necessary to return them with a return statement.
614 Parse actions can also completely replace the given tokens, with another ``ParseResults``
615 object, or with some entirely different object (common for parse actions that perform data
616 conversions). A convenient way to build a new parse result is to define the values
617 using a dict, and then create the return value using :class:`ParseResults.from_dict`.
619 If None is passed as the ``fn`` parse action, all previously added parse actions for this
620 expression are cleared.
622 Optional keyword arguments:
624 - call_during_try = (default= ``False``) indicate if parse action should be run during
625 lookaheads and alternate testing. For parse actions that have side effects, it is
626 important to only call the parse action once it is determined that it is being
627 called as part of a successful parse. For parse actions that perform additional
628 validation, then call_during_try should be passed as True, so that the validation
629 code is included in the preliminary "try" parses.
631 Note: the default parsing behavior is to expand tabs in the input string
632 before starting the parsing process. See :class:`parse_string` for more
633 information on parsing strings containing ``<TAB>`` s, and suggested
634 methods to maintain a consistent view of the parsed string, the parse
635 location, and line and column positions within the parsed string.
637 Example::
639 # parse dates in the form YYYY/MM/DD
641 # use parse action to convert toks from str to int at parse time
642 def convert_to_int(toks):
643 return int(toks[0])
645 # use a parse action to verify that the date is a valid date
646 def is_valid_date(instring, loc, toks):
647 from datetime import date
648 year, month, day = toks[::2]
649 try:
650 date(year, month, day)
651 except ValueError:
652 raise ParseException(instring, loc, "invalid date given")
654 integer = Word(nums)
655 date_str = integer + '/' + integer + '/' + integer
657 # add parse actions
658 integer.set_parse_action(convert_to_int)
659 date_str.set_parse_action(is_valid_date)
661 # note that integer fields are now ints, not strings
662 date_str.run_tests('''
663 # successful parse - note that integer fields were converted to ints
664 1999/12/31
666 # fail - invalid date
667 1999/13/31
668 ''')
669 """
670 if list(fns) == [None]:
671 self.parseAction = []
672 else:
673 if not all(callable(fn) for fn in fns):
674 raise TypeError("parse actions must be callable")
675 self.parseAction = [_trim_arity(fn) for fn in fns]
676 self.callDuringTry = kwargs.get(
677 "call_during_try", kwargs.get("callDuringTry", False)
678 )
679 return self
681 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":
682 """
683 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
685 See examples in :class:`copy`.
686 """
687 self.parseAction += [_trim_arity(fn) for fn in fns]
688 self.callDuringTry = self.callDuringTry or kwargs.get(
689 "call_during_try", kwargs.get("callDuringTry", False)
690 )
691 return self
693 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement":
694 """Add a boolean predicate function to expression's list of parse actions. See
695 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
696 functions passed to ``add_condition`` need to return boolean success/fail of the condition.
698 Optional keyword arguments:
700 - message = define a custom message to be used in the raised exception
701 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
702 ParseException
703 - call_during_try = boolean to indicate if this method should be called during internal tryParse calls,
704 default=False
706 Example::
708 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
709 year_int = integer.copy()
710 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
711 date_str = year_int + '/' + integer + '/' + integer
713 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0),
714 (line:1, col:1)
715 """
716 for fn in fns:
717 self.parseAction.append(
718 condition_as_parse_action(
719 fn, message=kwargs.get("message"), fatal=kwargs.get("fatal", False)
720 )
721 )
723 self.callDuringTry = self.callDuringTry or kwargs.get(
724 "call_during_try", kwargs.get("callDuringTry", False)
725 )
726 return self
728 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement":
729 """
730 Define action to perform if parsing fails at this expression.
731 Fail acton fn is a callable function that takes the arguments
732 ``fn(s, loc, expr, err)`` where:
734 - s = string being parsed
735 - loc = location where expression match was attempted and failed
736 - expr = the parse expression that failed
737 - err = the exception thrown
739 The function returns no value. It may throw :class:`ParseFatalException`
740 if it is desired to stop parsing immediately."""
741 self.failAction = fn
742 return self
744 def _skipIgnorables(self, instring, loc):
745 exprsFound = True
746 while exprsFound:
747 exprsFound = False
748 for e in self.ignoreExprs:
749 try:
750 while 1:
751 loc, dummy = e._parse(instring, loc)
752 exprsFound = True
753 except ParseException:
754 pass
755 return loc
757 def preParse(self, instring, loc):
758 if self.ignoreExprs:
759 loc = self._skipIgnorables(instring, loc)
761 if self.skipWhitespace:
762 instrlen = len(instring)
763 white_chars = self.whiteChars
764 while loc < instrlen and instring[loc] in white_chars:
765 loc += 1
767 return loc
769 def parseImpl(self, instring, loc, doActions=True):
770 return loc, []
772 def postParse(self, instring, loc, tokenlist):
773 return tokenlist
775 # @profile
776 def _parseNoCache(
777 self, instring, loc, doActions=True, callPreParse=True
778 ) -> Tuple[int, ParseResults]:
779 TRY, MATCH, FAIL = 0, 1, 2
780 debugging = self.debug # and doActions)
781 len_instring = len(instring)
783 if debugging or self.failAction:
784 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
785 try:
786 if callPreParse and self.callPreparse:
787 pre_loc = self.preParse(instring, loc)
788 else:
789 pre_loc = loc
790 tokens_start = pre_loc
791 if self.debugActions.debug_try:
792 self.debugActions.debug_try(instring, tokens_start, self, False)
793 if self.mayIndexError or pre_loc >= len_instring:
794 try:
795 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
796 except IndexError:
797 raise ParseException(instring, len_instring, self.errmsg, self)
798 else:
799 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
800 except Exception as err:
801 # print("Exception raised:", err)
802 if self.debugActions.debug_fail:
803 self.debugActions.debug_fail(
804 instring, tokens_start, self, err, False
805 )
806 if self.failAction:
807 self.failAction(instring, tokens_start, self, err)
808 raise
809 else:
810 if callPreParse and self.callPreparse:
811 pre_loc = self.preParse(instring, loc)
812 else:
813 pre_loc = loc
814 tokens_start = pre_loc
815 if self.mayIndexError or pre_loc >= len_instring:
816 try:
817 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
818 except IndexError:
819 raise ParseException(instring, len_instring, self.errmsg, self)
820 else:
821 loc, tokens = self.parseImpl(instring, pre_loc, doActions)
823 tokens = self.postParse(instring, loc, tokens)
825 ret_tokens = ParseResults(
826 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults
827 )
828 if self.parseAction and (doActions or self.callDuringTry):
829 if debugging:
830 try:
831 for fn in self.parseAction:
832 try:
833 tokens = fn(instring, tokens_start, ret_tokens)
834 except IndexError as parse_action_exc:
835 exc = ParseException("exception raised in parse action")
836 raise exc from parse_action_exc
838 if tokens is not None and tokens is not ret_tokens:
839 ret_tokens = ParseResults(
840 tokens,
841 self.resultsName,
842 asList=self.saveAsList
843 and isinstance(tokens, (ParseResults, list)),
844 modal=self.modalResults,
845 )
846 except Exception as err:
847 # print "Exception raised in user parse action:", err
848 if self.debugActions.debug_fail:
849 self.debugActions.debug_fail(
850 instring, tokens_start, self, err, False
851 )
852 raise
853 else:
854 for fn in self.parseAction:
855 try:
856 tokens = fn(instring, tokens_start, ret_tokens)
857 except IndexError as parse_action_exc:
858 exc = ParseException("exception raised in parse action")
859 raise exc from parse_action_exc
861 if tokens is not None and tokens is not ret_tokens:
862 ret_tokens = ParseResults(
863 tokens,
864 self.resultsName,
865 asList=self.saveAsList
866 and isinstance(tokens, (ParseResults, list)),
867 modal=self.modalResults,
868 )
869 if debugging:
870 # print("Matched", self, "->", ret_tokens.as_list())
871 if self.debugActions.debug_match:
872 self.debugActions.debug_match(
873 instring, tokens_start, loc, self, ret_tokens, False
874 )
876 return loc, ret_tokens
878 def try_parse(self, instring: str, loc: int, raise_fatal: bool = False) -> int:
879 try:
880 return self._parse(instring, loc, doActions=False)[0]
881 except ParseFatalException:
882 if raise_fatal:
883 raise
884 raise ParseException(instring, loc, self.errmsg, self)
886 def can_parse_next(self, instring: str, loc: int) -> bool:
887 try:
888 self.try_parse(instring, loc)
889 except (ParseException, IndexError):
890 return False
891 else:
892 return True
894 # cache for left-recursion in Forward references
895 recursion_lock = RLock()
896 recursion_memos: typing.Dict[
897 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]]
898 ] = {}
900 # argument cache for optimizing repeated calls when backtracking through recursive expressions
901 packrat_cache = (
902 {}
903 ) # this is set later by enabled_packrat(); this is here so that reset_cache() doesn't fail
904 packrat_cache_lock = RLock()
905 packrat_cache_stats = [0, 0]
907 # this method gets repeatedly called during backtracking with the same arguments -
908 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
909 def _parseCache(
910 self, instring, loc, doActions=True, callPreParse=True
911 ) -> Tuple[int, ParseResults]:
912 HIT, MISS = 0, 1
913 TRY, MATCH, FAIL = 0, 1, 2
914 lookup = (self, instring, loc, callPreParse, doActions)
915 with ParserElement.packrat_cache_lock:
916 cache = ParserElement.packrat_cache
917 value = cache.get(lookup)
918 if value is cache.not_in_cache:
919 ParserElement.packrat_cache_stats[MISS] += 1
920 try:
921 value = self._parseNoCache(instring, loc, doActions, callPreParse)
922 except ParseBaseException as pe:
923 # cache a copy of the exception, without the traceback
924 cache.set(lookup, pe.__class__(*pe.args))
925 raise
926 else:
927 cache.set(lookup, (value[0], value[1].copy(), loc))
928 return value
929 else:
930 ParserElement.packrat_cache_stats[HIT] += 1
931 if self.debug and self.debugActions.debug_try:
932 try:
933 self.debugActions.debug_try(instring, loc, self, cache_hit=True)
934 except TypeError:
935 pass
936 if isinstance(value, Exception):
937 if self.debug and self.debugActions.debug_fail:
938 try:
939 self.debugActions.debug_fail(
940 instring, loc, self, value, cache_hit=True
941 )
942 except TypeError:
943 pass
944 raise value
946 loc_, result, endloc = value[0], value[1].copy(), value[2]
947 if self.debug and self.debugActions.debug_match:
948 try:
949 self.debugActions.debug_match(
950 instring, loc_, endloc, self, result, cache_hit=True
951 )
952 except TypeError:
953 pass
955 return loc_, result
957 _parse = _parseNoCache
959 @staticmethod
960 def reset_cache() -> None:
961 ParserElement.packrat_cache.clear()
962 ParserElement.packrat_cache_stats[:] = [0] * len(
963 ParserElement.packrat_cache_stats
964 )
965 ParserElement.recursion_memos.clear()
967 _packratEnabled = False
968 _left_recursion_enabled = False
970 @staticmethod
971 def disable_memoization() -> None:
972 """
973 Disables active Packrat or Left Recursion parsing and their memoization
975 This method also works if neither Packrat nor Left Recursion are enabled.
976 This makes it safe to call before activating Packrat nor Left Recursion
977 to clear any previous settings.
978 """
979 ParserElement.reset_cache()
980 ParserElement._left_recursion_enabled = False
981 ParserElement._packratEnabled = False
982 ParserElement._parse = ParserElement._parseNoCache
984 @staticmethod
985 def enable_left_recursion(
986 cache_size_limit: typing.Optional[int] = None, *, force=False
987 ) -> None:
988 """
989 Enables "bounded recursion" parsing, which allows for both direct and indirect
990 left-recursion. During parsing, left-recursive :class:`Forward` elements are
991 repeatedly matched with a fixed recursion depth that is gradually increased
992 until finding the longest match.
994 Example::
996 import pyparsing as pp
997 pp.ParserElement.enable_left_recursion()
999 E = pp.Forward("E")
1000 num = pp.Word(pp.nums)
1001 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
1002 E <<= E + '+' - num | num
1004 print(E.parse_string("1+2+3"))
1006 Recursion search naturally memoizes matches of ``Forward`` elements and may
1007 thus skip reevaluation of parse actions during backtracking. This may break
1008 programs with parse actions which rely on strict ordering of side-effects.
1010 Parameters:
1012 - cache_size_limit - (default=``None``) - memoize at most this many
1013 ``Forward`` elements during matching; if ``None`` (the default),
1014 memoize all ``Forward`` elements.
1016 Bounded Recursion parsing works similar but not identical to Packrat parsing,
1017 thus the two cannot be used together. Use ``force=True`` to disable any
1018 previous, conflicting settings.
1019 """
1020 if force:
1021 ParserElement.disable_memoization()
1022 elif ParserElement._packratEnabled:
1023 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1024 if cache_size_limit is None:
1025 ParserElement.recursion_memos = _UnboundedMemo()
1026 elif cache_size_limit > 0:
1027 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit)
1028 else:
1029 raise NotImplementedError("Memo size of %s" % cache_size_limit)
1030 ParserElement._left_recursion_enabled = True
1032 @staticmethod
1033 def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None:
1034 """
1035 Enables "packrat" parsing, which adds memoizing to the parsing logic.
1036 Repeated parse attempts at the same string location (which happens
1037 often in many complex grammars) can immediately return a cached value,
1038 instead of re-executing parsing/validating code. Memoizing is done of
1039 both valid results and parsing exceptions.
1041 Parameters:
1043 - cache_size_limit - (default= ``128``) - if an integer value is provided
1044 will limit the size of the packrat cache; if None is passed, then
1045 the cache size will be unbounded; if 0 is passed, the cache will
1046 be effectively disabled.
1048 This speedup may break existing programs that use parse actions that
1049 have side-effects. For this reason, packrat parsing is disabled when
1050 you first import pyparsing. To activate the packrat feature, your
1051 program must call the class method :class:`ParserElement.enable_packrat`.
1052 For best results, call ``enable_packrat()`` immediately after
1053 importing pyparsing.
1055 Example::
1057 import pyparsing
1058 pyparsing.ParserElement.enable_packrat()
1060 Packrat parsing works similar but not identical to Bounded Recursion parsing,
1061 thus the two cannot be used together. Use ``force=True`` to disable any
1062 previous, conflicting settings.
1063 """
1064 if force:
1065 ParserElement.disable_memoization()
1066 elif ParserElement._left_recursion_enabled:
1067 raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1068 if not ParserElement._packratEnabled:
1069 ParserElement._packratEnabled = True
1070 if cache_size_limit is None:
1071 ParserElement.packrat_cache = _UnboundedCache()
1072 else:
1073 ParserElement.packrat_cache = _FifoCache(cache_size_limit)
1074 ParserElement._parse = ParserElement._parseCache
1076 def parse_string(
1077 self, instring: str, parse_all: bool = False, *, parseAll: bool = False
1078 ) -> ParseResults:
1079 """
1080 Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1081 client code.
1083 :param instring: The input string to be parsed.
1084 :param parse_all: If set, the entire input string must match the grammar.
1085 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1086 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1087 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1088 an object with attributes if the given parser includes results names.
1090 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1091 is also equivalent to ending the grammar with :class:`StringEnd`().
1093 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1094 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1095 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1096 being parsed, one can ensure a consistent view of the input string by doing one of the following:
1098 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1099 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1100 parse action's ``s`` argument, or
1101 - explicitly expand the tabs in your input string before calling ``parse_string``.
1103 Examples:
1105 By default, partial matches are OK.
1107 >>> res = Word('a').parse_string('aaaaabaaa')
1108 >>> print(res)
1109 ['aaaaa']
1111 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1112 directly to see more examples.
1114 It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1116 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1117 Traceback (most recent call last):
1118 ...
1119 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6)
1120 """
1121 parseAll = parse_all or parseAll
1123 ParserElement.reset_cache()
1124 if not self.streamlined:
1125 self.streamline()
1126 for e in self.ignoreExprs:
1127 e.streamline()
1128 if not self.keepTabs:
1129 instring = instring.expandtabs()
1130 try:
1131 loc, tokens = self._parse(instring, 0)
1132 if parseAll:
1133 loc = self.preParse(instring, loc)
1134 se = Empty() + StringEnd()
1135 se._parse(instring, loc)
1136 except ParseBaseException as exc:
1137 if ParserElement.verbose_stacktrace:
1138 raise
1139 else:
1140 # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1141 raise exc.with_traceback(None)
1142 else:
1143 return tokens
1145 def scan_string(
1146 self,
1147 instring: str,
1148 max_matches: int = _MAX_INT,
1149 overlap: bool = False,
1150 *,
1151 debug: bool = False,
1152 maxMatches: int = _MAX_INT,
1153 ) -> Generator[Tuple[ParseResults, int, int], None, None]:
1154 """
1155 Scan the input string for expression matches. Each match will return the
1156 matching tokens, start location, and end location. May be called with optional
1157 ``max_matches`` argument, to clip scanning after 'n' matches are found. If
1158 ``overlap`` is specified, then overlapping matches will be reported.
1160 Note that the start and end locations are reported relative to the string
1161 being parsed. See :class:`parse_string` for more information on parsing
1162 strings with embedded tabs.
1164 Example::
1166 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1167 print(source)
1168 for tokens, start, end in Word(alphas).scan_string(source):
1169 print(' '*start + '^'*(end-start))
1170 print(' '*start + tokens[0])
1172 prints::
1174 sldjf123lsdjjkf345sldkjf879lkjsfd987
1175 ^^^^^
1176 sldjf
1177 ^^^^^^^
1178 lsdjjkf
1179 ^^^^^^
1180 sldkjf
1181 ^^^^^^
1182 lkjsfd
1183 """
1184 maxMatches = min(maxMatches, max_matches)
1185 if not self.streamlined:
1186 self.streamline()
1187 for e in self.ignoreExprs:
1188 e.streamline()
1190 if not self.keepTabs:
1191 instring = str(instring).expandtabs()
1192 instrlen = len(instring)
1193 loc = 0
1194 preparseFn = self.preParse
1195 parseFn = self._parse
1196 ParserElement.resetCache()
1197 matches = 0
1198 try:
1199 while loc <= instrlen and matches < maxMatches:
1200 try:
1201 preloc = preparseFn(instring, loc)
1202 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1203 except ParseException:
1204 loc = preloc + 1
1205 else:
1206 if nextLoc > loc:
1207 matches += 1
1208 if debug:
1209 print(
1210 {
1211 "tokens": tokens.asList(),
1212 "start": preloc,
1213 "end": nextLoc,
1214 }
1215 )
1216 yield tokens, preloc, nextLoc
1217 if overlap:
1218 nextloc = preparseFn(instring, loc)
1219 if nextloc > loc:
1220 loc = nextLoc
1221 else:
1222 loc += 1
1223 else:
1224 loc = nextLoc
1225 else:
1226 loc = preloc + 1
1227 except ParseBaseException as exc:
1228 if ParserElement.verbose_stacktrace:
1229 raise
1230 else:
1231 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1232 raise exc.with_traceback(None)
1234 def transform_string(self, instring: str, *, debug: bool = False) -> str:
1235 """
1236 Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1237 be returned from a parse action. To use ``transform_string``, define a grammar and
1238 attach a parse action to it that modifies the returned token list.
1239 Invoking ``transform_string()`` on a target string will then scan for matches,
1240 and replace the matched text patterns according to the logic in the parse
1241 action. ``transform_string()`` returns the resulting transformed string.
1243 Example::
1245 wd = Word(alphas)
1246 wd.set_parse_action(lambda toks: toks[0].title())
1248 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))
1250 prints::
1252 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1253 """
1254 out: List[str] = []
1255 lastE = 0
1256 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1257 # keep string locs straight between transform_string and scan_string
1258 self.keepTabs = True
1259 try:
1260 for t, s, e in self.scan_string(instring, debug=debug):
1261 out.append(instring[lastE:s])
1262 if t:
1263 if isinstance(t, ParseResults):
1264 out += t.as_list()
1265 elif isinstance(t, Iterable) and not isinstance(t, str_type):
1266 out.extend(t)
1267 else:
1268 out.append(t)
1269 lastE = e
1270 out.append(instring[lastE:])
1271 out = [o for o in out if o]
1272 return "".join([str(s) for s in _flatten(out)])
1273 except ParseBaseException as exc:
1274 if ParserElement.verbose_stacktrace:
1275 raise
1276 else:
1277 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1278 raise exc.with_traceback(None)
1280 def search_string(
1281 self,
1282 instring: str,
1283 max_matches: int = _MAX_INT,
1284 *,
1285 debug: bool = False,
1286 maxMatches: int = _MAX_INT,
1287 ) -> ParseResults:
1288 """
1289 Another extension to :class:`scan_string`, simplifying the access to the tokens found
1290 to match the given parse expression. May be called with optional
1291 ``max_matches`` argument, to clip searching after 'n' matches are found.
1293 Example::
1295 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1296 cap_word = Word(alphas.upper(), alphas.lower())
1298 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))
1300 # the sum() builtin can be used to merge results into a single ParseResults object
1301 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))
1303 prints::
1305 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1306 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1307 """
1308 maxMatches = min(maxMatches, max_matches)
1309 try:
1310 return ParseResults(
1311 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)]
1312 )
1313 except ParseBaseException as exc:
1314 if ParserElement.verbose_stacktrace:
1315 raise
1316 else:
1317 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1318 raise exc.with_traceback(None)
1320 def split(
1321 self,
1322 instring: str,
1323 maxsplit: int = _MAX_INT,
1324 include_separators: bool = False,
1325 *,
1326 includeSeparators=False,
1327 ) -> Generator[str, None, None]:
1328 """
1329 Generator method to split a string using the given expression as a separator.
1330 May be called with optional ``maxsplit`` argument, to limit the number of splits;
1331 and the optional ``include_separators`` argument (default= ``False``), if the separating
1332 matching text should be included in the split results.
1334 Example::
1336 punc = one_of(list(".,;:/-!?"))
1337 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1339 prints::
1341 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1342 """
1343 includeSeparators = includeSeparators or include_separators
1344 last = 0
1345 for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1346 yield instring[last:s]
1347 if includeSeparators:
1348 yield t[0]
1349 last = e
1350 yield instring[last:]
1352 def __add__(self, other) -> "ParserElement":
1353 """
1354 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1355 converts them to :class:`Literal`s by default.
1357 Example::
1359 greet = Word(alphas) + "," + Word(alphas) + "!"
1360 hello = "Hello, World!"
1361 print(hello, "->", greet.parse_string(hello))
1363 prints::
1365 Hello, World! -> ['Hello', ',', 'World', '!']
1367 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
1369 Literal('start') + ... + Literal('end')
1371 is equivalent to:
1373 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1375 Note that the skipped text is returned with '_skipped' as a results name,
1376 and to support having multiple skips in the same parser, the value returned is
1377 a list of all skipped text.
1378 """
1379 if other is Ellipsis:
1380 return _PendingSkip(self)
1382 if isinstance(other, str_type):
1383 other = self._literalStringClass(other)
1384 if not isinstance(other, ParserElement):
1385 raise TypeError(
1386 "Cannot combine element of type {} with ParserElement".format(
1387 type(other).__name__
1388 )
1389 )
1390 return And([self, other])
1392 def __radd__(self, other) -> "ParserElement":
1393 """
1394 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1395 """
1396 if other is Ellipsis:
1397 return SkipTo(self)("_skipped*") + self
1399 if isinstance(other, str_type):
1400 other = self._literalStringClass(other)
1401 if not isinstance(other, ParserElement):
1402 raise TypeError(
1403 "Cannot combine element of type {} with ParserElement".format(
1404 type(other).__name__
1405 )
1406 )
1407 return other + self
1409 def __sub__(self, other) -> "ParserElement":
1410 """
1411 Implementation of ``-`` operator, returns :class:`And` with error stop
1412 """
1413 if isinstance(other, str_type):
1414 other = self._literalStringClass(other)
1415 if not isinstance(other, ParserElement):
1416 raise TypeError(
1417 "Cannot combine element of type {} with ParserElement".format(
1418 type(other).__name__
1419 )
1420 )
1421 return self + And._ErrorStop() + other
1423 def __rsub__(self, other) -> "ParserElement":
1424 """
1425 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1426 """
1427 if isinstance(other, str_type):
1428 other = self._literalStringClass(other)
1429 if not isinstance(other, ParserElement):
1430 raise TypeError(
1431 "Cannot combine element of type {} with ParserElement".format(
1432 type(other).__name__
1433 )
1434 )
1435 return other - self
1437 def __mul__(self, other) -> "ParserElement":
1438 """
1439 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1440 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer
1441 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples
1442 may also include ``None`` as in:
1443 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1444 to ``expr*n + ZeroOrMore(expr)``
1445 (read as "at least n instances of ``expr``")
1446 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1447 (read as "0 to n instances of ``expr``")
1448 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1449 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1451 Note that ``expr*(None, n)`` does not raise an exception if
1452 more than n exprs exist in the input stream; that is,
1453 ``expr*(None, n)`` does not enforce a maximum number of expr
1454 occurrences. If this behavior is desired, then write
1455 ``expr*(None, n) + ~expr``
1456 """
1457 if other is Ellipsis:
1458 other = (0, None)
1459 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1460 other = ((0,) + other[1:] + (None,))[:2]
1462 if isinstance(other, int):
1463 minElements, optElements = other, 0
1464 elif isinstance(other, tuple):
1465 other = tuple(o if o is not Ellipsis else None for o in other)
1466 other = (other + (None, None))[:2]
1467 if other[0] is None:
1468 other = (0, other[1])
1469 if isinstance(other[0], int) and other[1] is None:
1470 if other[0] == 0:
1471 return ZeroOrMore(self)
1472 if other[0] == 1:
1473 return OneOrMore(self)
1474 else:
1475 return self * other[0] + ZeroOrMore(self)
1476 elif isinstance(other[0], int) and isinstance(other[1], int):
1477 minElements, optElements = other
1478 optElements -= minElements
1479 else:
1480 raise TypeError(
1481 "cannot multiply ParserElement and ({}) objects".format(
1482 ",".join(type(item).__name__ for item in other)
1483 )
1484 )
1485 else:
1486 raise TypeError(
1487 "cannot multiply ParserElement and {} objects".format(
1488 type(other).__name__
1489 )
1490 )
1492 if minElements < 0:
1493 raise ValueError("cannot multiply ParserElement by negative value")
1494 if optElements < 0:
1495 raise ValueError(
1496 "second tuple value must be greater or equal to first tuple value"
1497 )
1498 if minElements == optElements == 0:
1499 return And([])
1501 if optElements:
1503 def makeOptionalList(n):
1504 if n > 1:
1505 return Opt(self + makeOptionalList(n - 1))
1506 else:
1507 return Opt(self)
1509 if minElements:
1510 if minElements == 1:
1511 ret = self + makeOptionalList(optElements)
1512 else:
1513 ret = And([self] * minElements) + makeOptionalList(optElements)
1514 else:
1515 ret = makeOptionalList(optElements)
1516 else:
1517 if minElements == 1:
1518 ret = self
1519 else:
1520 ret = And([self] * minElements)
1521 return ret
1523 def __rmul__(self, other) -> "ParserElement":
1524 return self.__mul__(other)
1526 def __or__(self, other) -> "ParserElement":
1527 """
1528 Implementation of ``|`` operator - returns :class:`MatchFirst`
1529 """
1530 if other is Ellipsis:
1531 return _PendingSkip(self, must_skip=True)
1533 if isinstance(other, str_type):
1534 other = self._literalStringClass(other)
1535 if not isinstance(other, ParserElement):
1536 raise TypeError(
1537 "Cannot combine element of type {} with ParserElement".format(
1538 type(other).__name__
1539 )
1540 )
1541 return MatchFirst([self, other])
1543 def __ror__(self, other) -> "ParserElement":
1544 """
1545 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1546 """
1547 if isinstance(other, str_type):
1548 other = self._literalStringClass(other)
1549 if not isinstance(other, ParserElement):
1550 raise TypeError(
1551 "Cannot combine element of type {} with ParserElement".format(
1552 type(other).__name__
1553 )
1554 )
1555 return other | self
1557 def __xor__(self, other) -> "ParserElement":
1558 """
1559 Implementation of ``^`` operator - returns :class:`Or`
1560 """
1561 if isinstance(other, str_type):
1562 other = self._literalStringClass(other)
1563 if not isinstance(other, ParserElement):
1564 raise TypeError(
1565 "Cannot combine element of type {} with ParserElement".format(
1566 type(other).__name__
1567 )
1568 )
1569 return Or([self, other])
1571 def __rxor__(self, other) -> "ParserElement":
1572 """
1573 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1574 """
1575 if isinstance(other, str_type):
1576 other = self._literalStringClass(other)
1577 if not isinstance(other, ParserElement):
1578 raise TypeError(
1579 "Cannot combine element of type {} with ParserElement".format(
1580 type(other).__name__
1581 )
1582 )
1583 return other ^ self
1585 def __and__(self, other) -> "ParserElement":
1586 """
1587 Implementation of ``&`` operator - returns :class:`Each`
1588 """
1589 if isinstance(other, str_type):
1590 other = self._literalStringClass(other)
1591 if not isinstance(other, ParserElement):
1592 raise TypeError(
1593 "Cannot combine element of type {} with ParserElement".format(
1594 type(other).__name__
1595 )
1596 )
1597 return Each([self, other])
1599 def __rand__(self, other) -> "ParserElement":
1600 """
1601 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1602 """
1603 if isinstance(other, str_type):
1604 other = self._literalStringClass(other)
1605 if not isinstance(other, ParserElement):
1606 raise TypeError(
1607 "Cannot combine element of type {} with ParserElement".format(
1608 type(other).__name__
1609 )
1610 )
1611 return other & self
1613 def __invert__(self) -> "ParserElement":
1614 """
1615 Implementation of ``~`` operator - returns :class:`NotAny`
1616 """
1617 return NotAny(self)
1619 # disable __iter__ to override legacy use of sequential access to __getitem__ to
1620 # iterate over a sequence
1621 __iter__ = None
1623 def __getitem__(self, key):
1624 """
1625 use ``[]`` indexing notation as a short form for expression repetition:
1627 - ``expr[n]`` is equivalent to ``expr*n``
1628 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1629 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1630 to ``expr*n + ZeroOrMore(expr)``
1631 (read as "at least n instances of ``expr``")
1632 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1633 (read as "0 to n instances of ``expr``")
1634 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1635 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1637 ``None`` may be used in place of ``...``.
1639 Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
1640 if more than ``n`` ``expr``s exist in the input stream. If this behavior is
1641 desired, then write ``expr[..., n] + ~expr``.
1642 """
1644 # convert single arg keys to tuples
1645 try:
1646 if isinstance(key, str_type):
1647 key = (key,)
1648 iter(key)
1649 except TypeError:
1650 key = (key, key)
1652 if len(key) > 2:
1653 raise TypeError(
1654 "only 1 or 2 index arguments supported ({}{})".format(
1655 key[:5], "... [{}]".format(len(key)) if len(key) > 5 else ""
1656 )
1657 )
1659 # clip to 2 elements
1660 ret = self * tuple(key[:2])
1661 return ret
1663 def __call__(self, name: str = None) -> "ParserElement":
1664 """
1665 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1667 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1668 passed as ``True``.
1670 If ``name` is omitted, same as calling :class:`copy`.
1672 Example::
1674 # these are equivalent
1675 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")
1676 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1677 """
1678 if name is not None:
1679 return self._setResultsName(name)
1680 else:
1681 return self.copy()
1683 def suppress(self) -> "ParserElement":
1684 """
1685 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1686 cluttering up returned output.
1687 """
1688 return Suppress(self)
1690 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement":
1691 """
1692 Enables the skipping of whitespace before matching the characters in the
1693 :class:`ParserElement`'s defined pattern.
1695 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1696 """
1697 self.skipWhitespace = True
1698 return self
1700 def leave_whitespace(self, recursive: bool = True) -> "ParserElement":
1701 """
1702 Disables the skipping of whitespace before matching the characters in the
1703 :class:`ParserElement`'s defined pattern. This is normally only used internally by
1704 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1706 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1707 """
1708 self.skipWhitespace = False
1709 return self
1711 def set_whitespace_chars(
1712 self, chars: Union[Set[str], str], copy_defaults: bool = False
1713 ) -> "ParserElement":
1714 """
1715 Overrides the default whitespace chars
1716 """
1717 self.skipWhitespace = True
1718 self.whiteChars = set(chars)
1719 self.copyDefaultWhiteChars = copy_defaults
1720 return self
1722 def parse_with_tabs(self) -> "ParserElement":
1723 """
1724 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1725 Must be called before ``parse_string`` when the input grammar contains elements that
1726 match ``<TAB>`` characters.
1727 """
1728 self.keepTabs = True
1729 return self
1731 def ignore(self, other: "ParserElement") -> "ParserElement":
1732 """
1733 Define expression to be ignored (e.g., comments) while doing pattern
1734 matching; may be called repeatedly, to define multiple comment or other
1735 ignorable patterns.
1737 Example::
1739 patt = Word(alphas)[1, ...]
1740 patt.parse_string('ablaj /* comment */ lskjd')
1741 # -> ['ablaj']
1743 patt.ignore(c_style_comment)
1744 patt.parse_string('ablaj /* comment */ lskjd')
1745 # -> ['ablaj', 'lskjd']
1746 """
1747 import typing
1749 if isinstance(other, str_type):
1750 other = Suppress(other)
1752 if isinstance(other, Suppress):
1753 if other not in self.ignoreExprs:
1754 self.ignoreExprs.append(other)
1755 else:
1756 self.ignoreExprs.append(Suppress(other.copy()))
1757 return self
1759 def set_debug_actions(
1760 self,
1761 start_action: DebugStartAction,
1762 success_action: DebugSuccessAction,
1763 exception_action: DebugExceptionAction,
1764 ) -> "ParserElement":
1765 """
1766 Customize display of debugging messages while doing pattern matching:
1768 - ``start_action`` - method to be called when an expression is about to be parsed;
1769 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``
1771 - ``success_action`` - method to be called when an expression has successfully parsed;
1772 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``
1774 - ``exception_action`` - method to be called when expression fails to parse;
1775 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
1776 """
1777 self.debugActions = self.DebugActions(
1778 start_action or _default_start_debug_action,
1779 success_action or _default_success_debug_action,
1780 exception_action or _default_exception_debug_action,
1781 )
1782 self.debug = True
1783 return self
1785 def set_debug(self, flag: bool = True) -> "ParserElement":
1786 """
1787 Enable display of debugging messages while doing pattern matching.
1788 Set ``flag`` to ``True`` to enable, ``False`` to disable.
1790 Example::
1792 wd = Word(alphas).set_name("alphaword")
1793 integer = Word(nums).set_name("numword")
1794 term = wd | integer
1796 # turn on debugging for wd
1797 wd.set_debug()
1799 term[1, ...].parse_string("abc 123 xyz 890")
1801 prints::
1803 Match alphaword at loc 0(1,1)
1804 Matched alphaword -> ['abc']
1805 Match alphaword at loc 3(1,4)
1806 Exception raised:Expected alphaword (at char 4), (line:1, col:5)
1807 Match alphaword at loc 7(1,8)
1808 Matched alphaword -> ['xyz']
1809 Match alphaword at loc 11(1,12)
1810 Exception raised:Expected alphaword (at char 12), (line:1, col:13)
1811 Match alphaword at loc 15(1,16)
1812 Exception raised:Expected alphaword (at char 15), (line:1, col:16)
1814 The output shown is that produced by the default debug actions - custom debug actions can be
1815 specified using :class:`set_debug_actions`. Prior to attempting
1816 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
1817 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
1818 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,
1819 which makes debugging and exception messages easier to understand - for instance, the default
1820 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.
1821 """
1822 if flag:
1823 self.set_debug_actions(
1824 _default_start_debug_action,
1825 _default_success_debug_action,
1826 _default_exception_debug_action,
1827 )
1828 else:
1829 self.debug = False
1830 return self
1832 @property
1833 def default_name(self) -> str:
1834 if self._defaultName is None:
1835 self._defaultName = self._generateDefaultName()
1836 return self._defaultName
1838 @abstractmethod
1839 def _generateDefaultName(self):
1840 """
1841 Child classes must define this method, which defines how the ``default_name`` is set.
1842 """
1844 def set_name(self, name: str) -> "ParserElement":
1845 """
1846 Define name for this expression, makes debugging and exception messages clearer.
1847 Example::
1848 Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)
1849 Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1850 """
1851 self.customName = name
1852 self.errmsg = "Expected " + self.name
1853 if __diag__.enable_debug_on_named_expressions:
1854 self.set_debug()
1855 return self
1857 @property
1858 def name(self) -> str:
1859 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name
1860 return self.customName if self.customName is not None else self.default_name
1862 def __str__(self) -> str:
1863 return self.name
1865 def __repr__(self) -> str:
1866 return str(self)
1868 def streamline(self) -> "ParserElement":
1869 self.streamlined = True
1870 self._defaultName = None
1871 return self
1873 def recurse(self) -> Sequence["ParserElement"]:
1874 return []
1876 def _checkRecursion(self, parseElementList):
1877 subRecCheckList = parseElementList[:] + [self]
1878 for e in self.recurse():
1879 e._checkRecursion(subRecCheckList)
1881 def validate(self, validateTrace=None) -> None:
1882 """
1883 Check defined expressions for valid structure, check for infinite recursive definitions.
1884 """
1885 self._checkRecursion([])
1887 def parse_file(
1888 self,
1889 file_or_filename: Union[str, Path, TextIO],
1890 encoding: str = "utf-8",
1891 parse_all: bool = False,
1892 *,
1893 parseAll: bool = False,
1894 ) -> ParseResults:
1895 """
1896 Execute the parse expression on the given file or filename.
1897 If a filename is specified (instead of a file object),
1898 the entire file is opened, read, and closed before parsing.
1899 """
1900 parseAll = parseAll or parse_all
1901 try:
1902 file_contents = file_or_filename.read()
1903 except AttributeError:
1904 with open(file_or_filename, "r", encoding=encoding) as f:
1905 file_contents = f.read()
1906 try:
1907 return self.parse_string(file_contents, parseAll)
1908 except ParseBaseException as exc:
1909 if ParserElement.verbose_stacktrace:
1910 raise
1911 else:
1912 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1913 raise exc.with_traceback(None)
1915 def __eq__(self, other):
1916 if self is other:
1917 return True
1918 elif isinstance(other, str_type):
1919 return self.matches(other, parse_all=True)
1920 elif isinstance(other, ParserElement):
1921 return vars(self) == vars(other)
1922 return False
1924 def __hash__(self):
1925 return id(self)
1927 def matches(
1928 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True
1929 ) -> bool:
1930 """
1931 Method for quick testing of a parser against a test string. Good for simple
1932 inline microtests of sub expressions while building up larger parser.
1934 Parameters:
1935 - ``test_string`` - to test against this expression for a match
1936 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
1938 Example::
1940 expr = Word(nums)
1941 assert expr.matches("100")
1942 """
1943 parseAll = parseAll and parse_all
1944 try:
1945 self.parse_string(str(test_string), parse_all=parseAll)
1946 return True
1947 except ParseBaseException:
1948 return False
1950 def run_tests(
1951 self,
1952 tests: Union[str, List[str]],
1953 parse_all: bool = True,
1954 comment: typing.Optional[Union["ParserElement", str]] = "#",
1955 full_dump: bool = True,
1956 print_results: bool = True,
1957 failure_tests: bool = False,
1958 post_parse: Callable[[str, ParseResults], str] = None,
1959 file: typing.Optional[TextIO] = None,
1960 with_line_numbers: bool = False,
1961 *,
1962 parseAll: bool = True,
1963 fullDump: bool = True,
1964 printResults: bool = True,
1965 failureTests: bool = False,
1966 postParse: Callable[[str, ParseResults], str] = None,
1967 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]:
1968 """
1969 Execute the parse expression on a series of test strings, showing each
1970 test, the parsed results or where the parse failed. Quick and easy way to
1971 run a parse expression against a list of sample strings.
1973 Parameters:
1974 - ``tests`` - a list of separate test strings, or a multiline string of test strings
1975 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
1976 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
1977 string; pass None to disable comment filtering
1978 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
1979 if False, only dump nested list
1980 - ``print_results`` - (default= ``True``) prints test output to stdout
1981 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
1982 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
1983 `fn(test_string, parse_results)` and returns a string to be added to the test output
1984 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
1985 if None, will default to ``sys.stdout``
1986 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
1988 Returns: a (success, results) tuple, where success indicates that all tests succeeded
1989 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
1990 test's output
1992 Example::
1994 number_expr = pyparsing_common.number.copy()
1996 result = number_expr.run_tests('''
1997 # unsigned integer
1998 100
1999 # negative integer
2000 -100
2001 # float with scientific notation
2002 6.02e23
2003 # integer with scientific notation
2004 1e-12
2005 ''')
2006 print("Success" if result[0] else "Failed!")
2008 result = number_expr.run_tests('''
2009 # stray character
2010 100Z
2011 # missing leading digit before '.'
2012 -.100
2013 # too many '.'
2014 3.14.159
2015 ''', failure_tests=True)
2016 print("Success" if result[0] else "Failed!")
2018 prints::
2020 # unsigned integer
2021 100
2022 [100]
2024 # negative integer
2025 -100
2026 [-100]
2028 # float with scientific notation
2029 6.02e23
2030 [6.02e+23]
2032 # integer with scientific notation
2033 1e-12
2034 [1e-12]
2036 Success
2038 # stray character
2039 100Z
2040 ^
2041 FAIL: Expected end of text (at char 3), (line:1, col:4)
2043 # missing leading digit before '.'
2044 -.100
2045 ^
2046 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2048 # too many '.'
2049 3.14.159
2050 ^
2051 FAIL: Expected end of text (at char 4), (line:1, col:5)
2053 Success
2055 Each test string must be on a single line. If you want to test a string that spans multiple
2056 lines, create a test like this::
2058 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2060 (Note that this is a raw string literal, you must include the leading ``'r'``.)
2061 """
2062 from .testing import pyparsing_test
2064 parseAll = parseAll and parse_all
2065 fullDump = fullDump and full_dump
2066 printResults = printResults and print_results
2067 failureTests = failureTests or failure_tests
2068 postParse = postParse or post_parse
2069 if isinstance(tests, str_type):
2070 line_strip = type(tests).strip
2071 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
2072 if isinstance(comment, str_type):
2073 comment = Literal(comment)
2074 if file is None:
2075 file = sys.stdout
2076 print_ = file.write
2078 result: Union[ParseResults, Exception]
2079 allResults = []
2080 comments = []
2081 success = True
2082 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2083 BOM = "\ufeff"
2084 for t in tests:
2085 if comment is not None and comment.matches(t, False) or comments and not t:
2086 comments.append(
2087 pyparsing_test.with_line_numbers(t) if with_line_numbers else t
2088 )
2089 continue
2090 if not t:
2091 continue
2092 out = [
2093 "\n" + "\n".join(comments) if comments else "",
2094 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2095 ]
2096 comments = []
2097 try:
2098 # convert newline marks to actual newlines, and strip leading BOM if present
2099 t = NL.transform_string(t.lstrip(BOM))
2100 result = self.parse_string(t, parse_all=parseAll)
2101 except ParseBaseException as pe:
2102 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2103 out.append(pe.explain())
2104 out.append("FAIL: " + str(pe))
2105 if ParserElement.verbose_stacktrace:
2106 out.extend(traceback.format_tb(pe.__traceback__))
2107 success = success and failureTests
2108 result = pe
2109 except Exception as exc:
2110 out.append("FAIL-EXCEPTION: {}: {}".format(type(exc).__name__, exc))
2111 if ParserElement.verbose_stacktrace:
2112 out.extend(traceback.format_tb(exc.__traceback__))
2113 success = success and failureTests
2114 result = exc
2115 else:
2116 success = success and not failureTests
2117 if postParse is not None:
2118 try:
2119 pp_value = postParse(t, result)
2120 if pp_value is not None:
2121 if isinstance(pp_value, ParseResults):
2122 out.append(pp_value.dump())
2123 else:
2124 out.append(str(pp_value))
2125 else:
2126 out.append(result.dump())
2127 except Exception as e:
2128 out.append(result.dump(full=fullDump))
2129 out.append(
2130 "{} failed: {}: {}".format(
2131 postParse.__name__, type(e).__name__, e
2132 )
2133 )
2134 else:
2135 out.append(result.dump(full=fullDump))
2136 out.append("")
2138 if printResults:
2139 print_("\n".join(out))
2141 allResults.append((t, result))
2143 return success, allResults
2145 def create_diagram(
2146 self,
2147 output_html: Union[TextIO, Path, str],
2148 vertical: int = 3,
2149 show_results_names: bool = False,
2150 show_groups: bool = False,
2151 **kwargs,
2152 ) -> None:
2153 """
2154 Create a railroad diagram for the parser.
2156 Parameters:
2157 - output_html (str or file-like object) - output target for generated
2158 diagram HTML
2159 - vertical (int) - threshold for formatting multiple alternatives vertically
2160 instead of horizontally (default=3)
2161 - show_results_names - bool flag whether diagram should show annotations for
2162 defined results names
2163 - show_groups - bool flag whether groups should be highlighted with an unlabeled surrounding box
2164 Additional diagram-formatting keyword arguments can also be included;
2165 see railroad.Diagram class.
2166 """
2168 try:
2169 from .diagram import to_railroad, railroad_to_html
2170 except ImportError as ie:
2171 raise Exception(
2172 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2173 ) from ie
2175 self.streamline()
2177 railroad = to_railroad(
2178 self,
2179 vertical=vertical,
2180 show_results_names=show_results_names,
2181 show_groups=show_groups,
2182 diagram_kwargs=kwargs,
2183 )
2184 if isinstance(output_html, (str, Path)):
2185 with open(output_html, "w", encoding="utf-8") as diag_file:
2186 diag_file.write(railroad_to_html(railroad))
2187 else:
2188 # we were passed a file-like object, just write to it
2189 output_html.write(railroad_to_html(railroad))
2191 setDefaultWhitespaceChars = set_default_whitespace_chars
2192 inlineLiteralsUsing = inline_literals_using
2193 setResultsName = set_results_name
2194 setBreak = set_break
2195 setParseAction = set_parse_action
2196 addParseAction = add_parse_action
2197 addCondition = add_condition
2198 setFailAction = set_fail_action
2199 tryParse = try_parse
2200 canParseNext = can_parse_next
2201 resetCache = reset_cache
2202 enableLeftRecursion = enable_left_recursion
2203 enablePackrat = enable_packrat
2204 parseString = parse_string
2205 scanString = scan_string
2206 searchString = search_string
2207 transformString = transform_string
2208 setWhitespaceChars = set_whitespace_chars
2209 parseWithTabs = parse_with_tabs
2210 setDebugActions = set_debug_actions
2211 setDebug = set_debug
2212 defaultName = default_name
2213 setName = set_name
2214 parseFile = parse_file
2215 runTests = run_tests
2216 ignoreWhitespace = ignore_whitespace
2217 leaveWhitespace = leave_whitespace
2220class _PendingSkip(ParserElement):
2221 # internal placeholder class to hold a place were '...' is added to a parser element,
2222 # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2223 def __init__(self, expr: ParserElement, must_skip: bool = False):
2224 super().__init__()
2225 self.anchor = expr
2226 self.must_skip = must_skip
2228 def _generateDefaultName(self):
2229 return str(self.anchor + Empty()).replace("Empty", "...")
2231 def __add__(self, other) -> "ParserElement":
2232 skipper = SkipTo(other).set_name("...")("_skipped*")
2233 if self.must_skip:
2235 def must_skip(t):
2236 if not t._skipped or t._skipped.as_list() == [""]:
2237 del t[0]
2238 t.pop("_skipped", None)
2240 def show_skip(t):
2241 if t._skipped.as_list()[-1:] == [""]:
2242 t.pop("_skipped")
2243 t["_skipped"] = "missing <" + repr(self.anchor) + ">"
2245 return (
2246 self.anchor + skipper().add_parse_action(must_skip)
2247 | skipper().add_parse_action(show_skip)
2248 ) + other
2250 return self.anchor + skipper + other
2252 def __repr__(self):
2253 return self.defaultName
2255 def parseImpl(self, *args):
2256 raise Exception(
2257 "use of `...` expression without following SkipTo target expression"
2258 )
2261class Token(ParserElement):
2262 """Abstract :class:`ParserElement` subclass, for defining atomic
2263 matching patterns.
2264 """
2266 def __init__(self):
2267 super().__init__(savelist=False)
2269 def _generateDefaultName(self):
2270 return type(self).__name__
2273class Empty(Token):
2274 """
2275 An empty token, will always match.
2276 """
2278 def __init__(self):
2279 super().__init__()
2280 self.mayReturnEmpty = True
2281 self.mayIndexError = False
2284class NoMatch(Token):
2285 """
2286 A token that will never match.
2287 """
2289 def __init__(self):
2290 super().__init__()
2291 self.mayReturnEmpty = True
2292 self.mayIndexError = False
2293 self.errmsg = "Unmatchable token"
2295 def parseImpl(self, instring, loc, doActions=True):
2296 raise ParseException(instring, loc, self.errmsg, self)
2299class Literal(Token):
2300 """
2301 Token to exactly match a specified string.
2303 Example::
2305 Literal('blah').parse_string('blah') # -> ['blah']
2306 Literal('blah').parse_string('blahfooblah') # -> ['blah']
2307 Literal('blah').parse_string('bla') # -> Exception: Expected "blah"
2309 For case-insensitive matching, use :class:`CaselessLiteral`.
2311 For keyword matching (force word break before and after the matched string),
2312 use :class:`Keyword` or :class:`CaselessKeyword`.
2313 """
2315 def __init__(self, match_string: str = "", *, matchString: str = ""):
2316 super().__init__()
2317 match_string = matchString or match_string
2318 self.match = match_string
2319 self.matchLen = len(match_string)
2320 try:
2321 self.firstMatchChar = match_string[0]
2322 except IndexError:
2323 raise ValueError("null string passed to Literal; use Empty() instead")
2324 self.errmsg = "Expected " + self.name
2325 self.mayReturnEmpty = False
2326 self.mayIndexError = False
2328 # Performance tuning: modify __class__ to select
2329 # a parseImpl optimized for single-character check
2330 if self.matchLen == 1 and type(self) is Literal:
2331 self.__class__ = _SingleCharLiteral
2333 def _generateDefaultName(self):
2334 return repr(self.match)
2336 def parseImpl(self, instring, loc, doActions=True):
2337 if instring[loc] == self.firstMatchChar and instring.startswith(
2338 self.match, loc
2339 ):
2340 return loc + self.matchLen, self.match
2341 raise ParseException(instring, loc, self.errmsg, self)
2344class _SingleCharLiteral(Literal):
2345 def parseImpl(self, instring, loc, doActions=True):
2346 if instring[loc] == self.firstMatchChar:
2347 return loc + 1, self.match
2348 raise ParseException(instring, loc, self.errmsg, self)
2351ParserElement._literalStringClass = Literal
2354class Keyword(Token):
2355 """
2356 Token to exactly match a specified string as a keyword, that is,
2357 it must be immediately followed by a non-keyword character. Compare
2358 with :class:`Literal`:
2360 - ``Literal("if")`` will match the leading ``'if'`` in
2361 ``'ifAndOnlyIf'``.
2362 - ``Keyword("if")`` will not; it will only match the leading
2363 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2365 Accepts two optional constructor arguments in addition to the
2366 keyword string:
2368 - ``identChars`` is a string of characters that would be valid
2369 identifier characters, defaulting to all alphanumerics + "_" and
2370 "$"
2371 - ``caseless`` allows case-insensitive matching, default is ``False``.
2373 Example::
2375 Keyword("start").parse_string("start") # -> ['start']
2376 Keyword("start").parse_string("starting") # -> Exception
2378 For case-insensitive matching, use :class:`CaselessKeyword`.
2379 """
2381 DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2383 def __init__(
2384 self,
2385 match_string: str = "",
2386 ident_chars: typing.Optional[str] = None,
2387 caseless: bool = False,
2388 *,
2389 matchString: str = "",
2390 identChars: typing.Optional[str] = None,
2391 ):
2392 super().__init__()
2393 identChars = identChars or ident_chars
2394 if identChars is None:
2395 identChars = Keyword.DEFAULT_KEYWORD_CHARS
2396 match_string = matchString or match_string
2397 self.match = match_string
2398 self.matchLen = len(match_string)
2399 try:
2400 self.firstMatchChar = match_string[0]
2401 except IndexError:
2402 raise ValueError("null string passed to Keyword; use Empty() instead")
2403 self.errmsg = "Expected {} {}".format(type(self).__name__, self.name)
2404 self.mayReturnEmpty = False
2405 self.mayIndexError = False
2406 self.caseless = caseless
2407 if caseless:
2408 self.caselessmatch = match_string.upper()
2409 identChars = identChars.upper()
2410 self.identChars = set(identChars)
2412 def _generateDefaultName(self):
2413 return repr(self.match)
2415 def parseImpl(self, instring, loc, doActions=True):
2416 errmsg = self.errmsg
2417 errloc = loc
2418 if self.caseless:
2419 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2420 if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2421 if (
2422 loc >= len(instring) - self.matchLen
2423 or instring[loc + self.matchLen].upper() not in self.identChars
2424 ):
2425 return loc + self.matchLen, self.match
2426 else:
2427 # followed by keyword char
2428 errmsg += ", was immediately followed by keyword character"
2429 errloc = loc + self.matchLen
2430 else:
2431 # preceded by keyword char
2432 errmsg += ", keyword was immediately preceded by keyword character"
2433 errloc = loc - 1
2434 # else no match just raise plain exception
2436 else:
2437 if (
2438 instring[loc] == self.firstMatchChar
2439 and self.matchLen == 1
2440 or instring.startswith(self.match, loc)
2441 ):
2442 if loc == 0 or instring[loc - 1] not in self.identChars:
2443 if (
2444 loc >= len(instring) - self.matchLen
2445 or instring[loc + self.matchLen] not in self.identChars
2446 ):
2447 return loc + self.matchLen, self.match
2448 else:
2449 # followed by keyword char
2450 errmsg += (
2451 ", keyword was immediately followed by keyword character"
2452 )
2453 errloc = loc + self.matchLen
2454 else:
2455 # preceded by keyword char
2456 errmsg += ", keyword was immediately preceded by keyword character"
2457 errloc = loc - 1
2458 # else no match just raise plain exception
2460 raise ParseException(instring, errloc, errmsg, self)
2462 @staticmethod
2463 def set_default_keyword_chars(chars) -> None:
2464 """
2465 Overrides the default characters used by :class:`Keyword` expressions.
2466 """
2467 Keyword.DEFAULT_KEYWORD_CHARS = chars
2469 setDefaultKeywordChars = set_default_keyword_chars
2472class CaselessLiteral(Literal):
2473 """
2474 Token to match a specified string, ignoring case of letters.
2475 Note: the matched results will always be in the case of the given
2476 match string, NOT the case of the input text.
2478 Example::
2480 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2481 # -> ['CMD', 'CMD', 'CMD']
2483 (Contrast with example for :class:`CaselessKeyword`.)
2484 """
2486 def __init__(self, match_string: str = "", *, matchString: str = ""):
2487 match_string = matchString or match_string
2488 super().__init__(match_string.upper())
2489 # Preserve the defining literal.
2490 self.returnString = match_string
2491 self.errmsg = "Expected " + self.name
2493 def parseImpl(self, instring, loc, doActions=True):
2494 if instring[loc : loc + self.matchLen].upper() == self.match:
2495 return loc + self.matchLen, self.returnString
2496 raise ParseException(instring, loc, self.errmsg, self)
2499class CaselessKeyword(Keyword):
2500 """
2501 Caseless version of :class:`Keyword`.
2503 Example::
2505 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")
2506 # -> ['CMD', 'CMD']
2508 (Contrast with example for :class:`CaselessLiteral`.)
2509 """
2511 def __init__(
2512 self,
2513 match_string: str = "",
2514 ident_chars: typing.Optional[str] = None,
2515 *,
2516 matchString: str = "",
2517 identChars: typing.Optional[str] = None,
2518 ):
2519 identChars = identChars or ident_chars
2520 match_string = matchString or match_string
2521 super().__init__(match_string, identChars, caseless=True)
2524class CloseMatch(Token):
2525 """A variation on :class:`Literal` which matches "close" matches,
2526 that is, strings with at most 'n' mismatching characters.
2527 :class:`CloseMatch` takes parameters:
2529 - ``match_string`` - string to be matched
2530 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2531 - ``max_mismatches`` - (``default=1``) maximum number of
2532 mismatches allowed to count as a match
2534 The results from a successful parse will contain the matched text
2535 from the input string and the following named results:
2537 - ``mismatches`` - a list of the positions within the
2538 match_string where mismatches were found
2539 - ``original`` - the original match_string used to compare
2540 against the input string
2542 If ``mismatches`` is an empty list, then the match was an exact
2543 match.
2545 Example::
2547 patt = CloseMatch("ATCATCGAATGGA")
2548 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2549 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2551 # exact match
2552 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2554 # close match allowing up to 2 mismatches
2555 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
2556 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2557 """
2559 def __init__(
2560 self,
2561 match_string: str,
2562 max_mismatches: int = None,
2563 *,
2564 maxMismatches: int = 1,
2565 caseless=False,
2566 ):
2567 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
2568 super().__init__()
2569 self.match_string = match_string
2570 self.maxMismatches = maxMismatches
2571 self.errmsg = "Expected {!r} (with up to {} mismatches)".format(
2572 self.match_string, self.maxMismatches
2573 )
2574 self.caseless = caseless
2575 self.mayIndexError = False
2576 self.mayReturnEmpty = False
2578 def _generateDefaultName(self):
2579 return "{}:{!r}".format(type(self).__name__, self.match_string)
2581 def parseImpl(self, instring, loc, doActions=True):
2582 start = loc
2583 instrlen = len(instring)
2584 maxloc = start + len(self.match_string)
2586 if maxloc <= instrlen:
2587 match_string = self.match_string
2588 match_stringloc = 0
2589 mismatches = []
2590 maxMismatches = self.maxMismatches
2592 for match_stringloc, s_m in enumerate(
2593 zip(instring[loc:maxloc], match_string)
2594 ):
2595 src, mat = s_m
2596 if self.caseless:
2597 src, mat = src.lower(), mat.lower()
2599 if src != mat:
2600 mismatches.append(match_stringloc)
2601 if len(mismatches) > maxMismatches:
2602 break
2603 else:
2604 loc = start + match_stringloc + 1
2605 results = ParseResults([instring[start:loc]])
2606 results["original"] = match_string
2607 results["mismatches"] = mismatches
2608 return loc, results
2610 raise ParseException(instring, loc, self.errmsg, self)
2613class Word(Token):
2614 """Token for matching words composed of allowed character sets.
2615 Parameters:
2616 - ``init_chars`` - string of all characters that should be used to
2617 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
2618 if ``body_chars`` is also specified, then this is the string of
2619 initial characters
2620 - ``body_chars`` - string of characters that
2621 can be used for matching after a matched initial character as
2622 given in ``init_chars``; if omitted, same as the initial characters
2623 (default=``None``)
2624 - ``min`` - minimum number of characters to match (default=1)
2625 - ``max`` - maximum number of characters to match (default=0)
2626 - ``exact`` - exact number of characters to match (default=0)
2627 - ``as_keyword`` - match as a keyword (default=``False``)
2628 - ``exclude_chars`` - characters that might be
2629 found in the input ``body_chars`` string but which should not be
2630 accepted for matching ;useful to define a word of all
2631 printables except for one or two characters, for instance
2632 (default=``None``)
2634 :class:`srange` is useful for defining custom character set strings
2635 for defining :class:`Word` expressions, using range notation from
2636 regular expression character sets.
2638 A common mistake is to use :class:`Word` to match a specific literal
2639 string, as in ``Word("Address")``. Remember that :class:`Word`
2640 uses the string argument to define *sets* of matchable characters.
2641 This expression would match "Add", "AAA", "dAred", or any other word
2642 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
2643 exact literal string, use :class:`Literal` or :class:`Keyword`.
2645 pyparsing includes helper strings for building Words:
2647 - :class:`alphas`
2648 - :class:`nums`
2649 - :class:`alphanums`
2650 - :class:`hexnums`
2651 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
2652 - accented, tilded, umlauted, etc.)
2653 - :class:`punc8bit` (non-alphabetic characters in ASCII range
2654 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2655 - :class:`printables` (any non-whitespace character)
2657 ``alphas``, ``nums``, and ``printables`` are also defined in several
2658 Unicode sets - see :class:`pyparsing_unicode``.
2660 Example::
2662 # a word composed of digits
2663 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2665 # a word with a leading capital, and zero or more lowercase
2666 capital_word = Word(alphas.upper(), alphas.lower())
2668 # hostnames are alphanumeric, with leading alpha, and '-'
2669 hostname = Word(alphas, alphanums + '-')
2671 # roman numeral (not a strict parser, accepts invalid mix of characters)
2672 roman = Word("IVXLCDM")
2674 # any string of non-whitespace characters, except for ','
2675 csv_value = Word(printables, exclude_chars=",")
2676 """
2678 def __init__(
2679 self,
2680 init_chars: str = "",
2681 body_chars: typing.Optional[str] = None,
2682 min: int = 1,
2683 max: int = 0,
2684 exact: int = 0,
2685 as_keyword: bool = False,
2686 exclude_chars: typing.Optional[str] = None,
2687 *,
2688 initChars: typing.Optional[str] = None,
2689 bodyChars: typing.Optional[str] = None,
2690 asKeyword: bool = False,
2691 excludeChars: typing.Optional[str] = None,
2692 ):
2693 initChars = initChars or init_chars
2694 bodyChars = bodyChars or body_chars
2695 asKeyword = asKeyword or as_keyword
2696 excludeChars = excludeChars or exclude_chars
2697 super().__init__()
2698 if not initChars:
2699 raise ValueError(
2700 "invalid {}, initChars cannot be empty string".format(
2701 type(self).__name__
2702 )
2703 )
2705 initChars = set(initChars)
2706 self.initChars = initChars
2707 if excludeChars:
2708 excludeChars = set(excludeChars)
2709 initChars -= excludeChars
2710 if bodyChars:
2711 bodyChars = set(bodyChars) - excludeChars
2712 self.initCharsOrig = "".join(sorted(initChars))
2714 if bodyChars:
2715 self.bodyCharsOrig = "".join(sorted(bodyChars))
2716 self.bodyChars = set(bodyChars)
2717 else:
2718 self.bodyCharsOrig = "".join(sorted(initChars))
2719 self.bodyChars = set(initChars)
2721 self.maxSpecified = max > 0
2723 if min < 1:
2724 raise ValueError(
2725 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
2726 )
2728 self.minLen = min
2730 if max > 0:
2731 self.maxLen = max
2732 else:
2733 self.maxLen = _MAX_INT
2735 if exact > 0:
2736 self.maxLen = exact
2737 self.minLen = exact
2739 self.errmsg = "Expected " + self.name
2740 self.mayIndexError = False
2741 self.asKeyword = asKeyword
2743 # see if we can make a regex for this Word
2744 if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0):
2745 if self.bodyChars == self.initChars:
2746 if max == 0:
2747 repeat = "+"
2748 elif max == 1:
2749 repeat = ""
2750 else:
2751 repeat = "{{{},{}}}".format(
2752 self.minLen, "" if self.maxLen == _MAX_INT else self.maxLen
2753 )
2754 self.reString = "[{}]{}".format(
2755 _collapse_string_to_ranges(self.initChars),
2756 repeat,
2757 )
2758 elif len(self.initChars) == 1:
2759 if max == 0:
2760 repeat = "*"
2761 else:
2762 repeat = "{{0,{}}}".format(max - 1)
2763 self.reString = "{}[{}]{}".format(
2764 re.escape(self.initCharsOrig),
2765 _collapse_string_to_ranges(self.bodyChars),
2766 repeat,
2767 )
2768 else:
2769 if max == 0:
2770 repeat = "*"
2771 elif max == 2:
2772 repeat = ""
2773 else:
2774 repeat = "{{0,{}}}".format(max - 1)
2775 self.reString = "[{}][{}]{}".format(
2776 _collapse_string_to_ranges(self.initChars),
2777 _collapse_string_to_ranges(self.bodyChars),
2778 repeat,
2779 )
2780 if self.asKeyword:
2781 self.reString = r"\b" + self.reString + r"\b"
2783 try:
2784 self.re = re.compile(self.reString)
2785 except re.error:
2786 self.re = None
2787 else:
2788 self.re_match = self.re.match
2789 self.__class__ = _WordRegex
2791 def _generateDefaultName(self):
2792 def charsAsStr(s):
2793 max_repr_len = 16
2794 s = _collapse_string_to_ranges(s, re_escape=False)
2795 if len(s) > max_repr_len:
2796 return s[: max_repr_len - 3] + "..."
2797 else:
2798 return s
2800 if self.initChars != self.bodyChars:
2801 base = "W:({}, {})".format(
2802 charsAsStr(self.initChars), charsAsStr(self.bodyChars)
2803 )
2804 else:
2805 base = "W:({})".format(charsAsStr(self.initChars))
2807 # add length specification
2808 if self.minLen > 1 or self.maxLen != _MAX_INT:
2809 if self.minLen == self.maxLen:
2810 if self.minLen == 1:
2811 return base[2:]
2812 else:
2813 return base + "{{{}}}".format(self.minLen)
2814 elif self.maxLen == _MAX_INT:
2815 return base + "{{{},...}}".format(self.minLen)
2816 else:
2817 return base + "{{{},{}}}".format(self.minLen, self.maxLen)
2818 return base
2820 def parseImpl(self, instring, loc, doActions=True):
2821 if instring[loc] not in self.initChars:
2822 raise ParseException(instring, loc, self.errmsg, self)
2824 start = loc
2825 loc += 1
2826 instrlen = len(instring)
2827 bodychars = self.bodyChars
2828 maxloc = start + self.maxLen
2829 maxloc = min(maxloc, instrlen)
2830 while loc < maxloc and instring[loc] in bodychars:
2831 loc += 1
2833 throwException = False
2834 if loc - start < self.minLen:
2835 throwException = True
2836 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2837 throwException = True
2838 elif self.asKeyword:
2839 if (
2840 start > 0
2841 and instring[start - 1] in bodychars
2842 or loc < instrlen
2843 and instring[loc] in bodychars
2844 ):
2845 throwException = True
2847 if throwException:
2848 raise ParseException(instring, loc, self.errmsg, self)
2850 return loc, instring[start:loc]
2853class _WordRegex(Word):
2854 def parseImpl(self, instring, loc, doActions=True):
2855 result = self.re_match(instring, loc)
2856 if not result:
2857 raise ParseException(instring, loc, self.errmsg, self)
2859 loc = result.end()
2860 return loc, result.group()
2863class Char(_WordRegex):
2864 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
2865 when defining a match of any single character in a string of
2866 characters.
2867 """
2869 def __init__(
2870 self,
2871 charset: str,
2872 as_keyword: bool = False,
2873 exclude_chars: typing.Optional[str] = None,
2874 *,
2875 asKeyword: bool = False,
2876 excludeChars: typing.Optional[str] = None,
2877 ):
2878 asKeyword = asKeyword or as_keyword
2879 excludeChars = excludeChars or exclude_chars
2880 super().__init__(
2881 charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars
2882 )
2883 self.reString = "[{}]".format(_collapse_string_to_ranges(self.initChars))
2884 if asKeyword:
2885 self.reString = r"\b{}\b".format(self.reString)
2886 self.re = re.compile(self.reString)
2887 self.re_match = self.re.match
2890class Regex(Token):
2891 r"""Token for matching strings that match a given regular
2892 expression. Defined with string specifying the regular expression in
2893 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
2894 If the given regex contains named groups (defined using ``(?P<name>...)``),
2895 these will be preserved as named :class:`ParseResults`.
2897 If instead of the Python stdlib ``re`` module you wish to use a different RE module
2898 (such as the ``regex`` module), you can do so by building your ``Regex`` object with
2899 a compiled RE that was compiled using ``regex``.
2901 Example::
2903 realnum = Regex(r"[+-]?\d+\.\d*")
2904 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2905 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2907 # named fields in a regex will be returned as named results
2908 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2910 # the Regex class will accept re's compiled using the regex module
2911 import regex
2912 parser = pp.Regex(regex.compile(r'[0-9]'))
2913 """
2915 def __init__(
2916 self,
2917 pattern: Any,
2918 flags: Union[re.RegexFlag, int] = 0,
2919 as_group_list: bool = False,
2920 as_match: bool = False,
2921 *,
2922 asGroupList: bool = False,
2923 asMatch: bool = False,
2924 ):
2925 """The parameters ``pattern`` and ``flags`` are passed
2926 to the ``re.compile()`` function as-is. See the Python
2927 `re module <https://docs.python.org/3/library/re.html>`_ module for an
2928 explanation of the acceptable patterns and flags.
2929 """
2930 super().__init__()
2931 asGroupList = asGroupList or as_group_list
2932 asMatch = asMatch or as_match
2934 if isinstance(pattern, str_type):
2935 if not pattern:
2936 raise ValueError("null string passed to Regex; use Empty() instead")
2938 self._re = None
2939 self.reString = self.pattern = pattern
2940 self.flags = flags
2942 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
2943 self._re = pattern
2944 self.pattern = self.reString = pattern.pattern
2945 self.flags = flags
2947 else:
2948 raise TypeError(
2949 "Regex may only be constructed with a string or a compiled RE object"
2950 )
2952 self.errmsg = "Expected " + self.name
2953 self.mayIndexError = False
2954 self.asGroupList = asGroupList
2955 self.asMatch = asMatch
2956 if self.asGroupList:
2957 self.parseImpl = self.parseImplAsGroupList
2958 if self.asMatch:
2959 self.parseImpl = self.parseImplAsMatch
2961 @cached_property
2962 def re(self):
2963 if self._re:
2964 return self._re
2965 else:
2966 try:
2967 return re.compile(self.pattern, self.flags)
2968 except re.error:
2969 raise ValueError(
2970 "invalid pattern ({!r}) passed to Regex".format(self.pattern)
2971 )
2973 @cached_property
2974 def re_match(self):
2975 return self.re.match
2977 @cached_property
2978 def mayReturnEmpty(self):
2979 return self.re_match("") is not None
2981 def _generateDefaultName(self):
2982 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\"))
2984 def parseImpl(self, instring, loc, doActions=True):
2985 result = self.re_match(instring, loc)
2986 if not result:
2987 raise ParseException(instring, loc, self.errmsg, self)
2989 loc = result.end()
2990 ret = ParseResults(result.group())
2991 d = result.groupdict()
2992 if d:
2993 for k, v in d.items():
2994 ret[k] = v
2995 return loc, ret
2997 def parseImplAsGroupList(self, instring, loc, doActions=True):
2998 result = self.re_match(instring, loc)
2999 if not result:
3000 raise ParseException(instring, loc, self.errmsg, self)
3002 loc = result.end()
3003 ret = result.groups()
3004 return loc, ret
3006 def parseImplAsMatch(self, instring, loc, doActions=True):
3007 result = self.re_match(instring, loc)
3008 if not result:
3009 raise ParseException(instring, loc, self.errmsg, self)
3011 loc = result.end()
3012 ret = result
3013 return loc, ret
3015 def sub(self, repl: str) -> ParserElement:
3016 r"""
3017 Return :class:`Regex` with an attached parse action to transform the parsed
3018 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3020 Example::
3022 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3023 print(make_html.transform_string("h1:main title:"))
3024 # prints "<h1>main title</h1>"
3025 """
3026 if self.asGroupList:
3027 raise TypeError("cannot use sub() with Regex(asGroupList=True)")
3029 if self.asMatch and callable(repl):
3030 raise TypeError("cannot use sub() with a callable with Regex(asMatch=True)")
3032 if self.asMatch:
3034 def pa(tokens):
3035 return tokens[0].expand(repl)
3037 else:
3039 def pa(tokens):
3040 return self.re.sub(repl, tokens[0])
3042 return self.add_parse_action(pa)
3045class QuotedString(Token):
3046 r"""
3047 Token for matching strings that are delimited by quoting characters.
3049 Defined with the following parameters:
3051 - ``quote_char`` - string of one or more characters defining the
3052 quote delimiting string
3053 - ``esc_char`` - character to re_escape quotes, typically backslash
3054 (default= ``None``)
3055 - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3056 string (such as SQL's ``""`` to re_escape an embedded ``"``)
3057 (default= ``None``)
3058 - ``multiline`` - boolean indicating whether quotes can span
3059 multiple lines (default= ``False``)
3060 - ``unquote_results`` - boolean indicating whether the matched text
3061 should be unquoted (default= ``True``)
3062 - ``end_quote_char`` - string of one or more characters defining the
3063 end of the quote delimited string (default= ``None`` => same as
3064 quote_char)
3065 - ``convert_whitespace_escapes`` - convert escaped whitespace
3066 (``'\t'``, ``'\n'``, etc.) to actual whitespace
3067 (default= ``True``)
3069 Example::
3071 qs = QuotedString('"')
3072 print(qs.search_string('lsjdf "This is the quote" sldjf'))
3073 complex_qs = QuotedString('{{', end_quote_char='}}')
3074 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))
3075 sql_qs = QuotedString('"', esc_quote='""')
3076 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3078 prints::
3080 [['This is the quote']]
3081 [['This is the "quote"']]
3082 [['This is the quote with "embedded" quotes']]
3083 """
3084 ws_map = ((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))
3086 def __init__(
3087 self,
3088 quote_char: str = "",
3089 esc_char: typing.Optional[str] = None,
3090 esc_quote: typing.Optional[str] = None,
3091 multiline: bool = False,
3092 unquote_results: bool = True,
3093 end_quote_char: typing.Optional[str] = None,
3094 convert_whitespace_escapes: bool = True,
3095 *,
3096 quoteChar: str = "",
3097 escChar: typing.Optional[str] = None,
3098 escQuote: typing.Optional[str] = None,
3099 unquoteResults: bool = True,
3100 endQuoteChar: typing.Optional[str] = None,
3101 convertWhitespaceEscapes: bool = True,
3102 ):
3103 super().__init__()
3104 escChar = escChar or esc_char
3105 escQuote = escQuote or esc_quote
3106 unquoteResults = unquoteResults and unquote_results
3107 endQuoteChar = endQuoteChar or end_quote_char
3108 convertWhitespaceEscapes = (
3109 convertWhitespaceEscapes and convert_whitespace_escapes
3110 )
3111 quote_char = quoteChar or quote_char
3113 # remove white space from quote chars - wont work anyway
3114 quote_char = quote_char.strip()
3115 if not quote_char:
3116 raise ValueError("quote_char cannot be the empty string")
3118 if endQuoteChar is None:
3119 endQuoteChar = quote_char
3120 else:
3121 endQuoteChar = endQuoteChar.strip()
3122 if not endQuoteChar:
3123 raise ValueError("endQuoteChar cannot be the empty string")
3125 self.quoteChar = quote_char
3126 self.quoteCharLen = len(quote_char)
3127 self.firstQuoteChar = quote_char[0]
3128 self.endQuoteChar = endQuoteChar
3129 self.endQuoteCharLen = len(endQuoteChar)
3130 self.escChar = escChar
3131 self.escQuote = escQuote
3132 self.unquoteResults = unquoteResults
3133 self.convertWhitespaceEscapes = convertWhitespaceEscapes
3135 sep = ""
3136 inner_pattern = ""
3138 if escQuote:
3139 inner_pattern += r"{}(?:{})".format(sep, re.escape(escQuote))
3140 sep = "|"
3142 if escChar:
3143 inner_pattern += r"{}(?:{}.)".format(sep, re.escape(escChar))
3144 sep = "|"
3145 self.escCharReplacePattern = re.escape(self.escChar) + "(.)"
3147 if len(self.endQuoteChar) > 1:
3148 inner_pattern += (
3149 "{}(?:".format(sep)
3150 + "|".join(
3151 "(?:{}(?!{}))".format(
3152 re.escape(self.endQuoteChar[:i]),
3153 re.escape(self.endQuoteChar[i:]),
3154 )
3155 for i in range(len(self.endQuoteChar) - 1, 0, -1)
3156 )
3157 + ")"
3158 )
3159 sep = "|"
3161 if multiline:
3162 self.flags = re.MULTILINE | re.DOTALL
3163 inner_pattern += r"{}(?:[^{}{}])".format(
3164 sep,
3165 _escape_regex_range_chars(self.endQuoteChar[0]),
3166 (_escape_regex_range_chars(escChar) if escChar is not None else ""),
3167 )
3168 else:
3169 self.flags = 0
3170 inner_pattern += r"{}(?:[^{}\n\r{}])".format(
3171 sep,
3172 _escape_regex_range_chars(self.endQuoteChar[0]),
3173 (_escape_regex_range_chars(escChar) if escChar is not None else ""),
3174 )
3176 self.pattern = "".join(
3177 [
3178 re.escape(self.quoteChar),
3179 "(?:",
3180 inner_pattern,
3181 ")*",
3182 re.escape(self.endQuoteChar),
3183 ]
3184 )
3186 try:
3187 self.re = re.compile(self.pattern, self.flags)
3188 self.reString = self.pattern
3189 self.re_match = self.re.match
3190 except re.error:
3191 raise ValueError(
3192 "invalid pattern {!r} passed to Regex".format(self.pattern)
3193 )
3195 self.errmsg = "Expected " + self.name
3196 self.mayIndexError = False
3197 self.mayReturnEmpty = True
3199 def _generateDefaultName(self):
3200 if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type):
3201 return "string enclosed in {!r}".format(self.quoteChar)
3203 return "quoted string, starting with {} ending with {}".format(
3204 self.quoteChar, self.endQuoteChar
3205 )
3207 def parseImpl(self, instring, loc, doActions=True):
3208 result = (
3209 instring[loc] == self.firstQuoteChar
3210 and self.re_match(instring, loc)
3211 or None
3212 )
3213 if not result:
3214 raise ParseException(instring, loc, self.errmsg, self)
3216 loc = result.end()
3217 ret = result.group()
3219 if self.unquoteResults:
3221 # strip off quotes
3222 ret = ret[self.quoteCharLen : -self.endQuoteCharLen]
3224 if isinstance(ret, str_type):
3225 # replace escaped whitespace
3226 if "\\" in ret and self.convertWhitespaceEscapes:
3227 for wslit, wschar in self.ws_map:
3228 ret = ret.replace(wslit, wschar)
3230 # replace escaped characters
3231 if self.escChar:
3232 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
3234 # replace escaped quotes
3235 if self.escQuote:
3236 ret = ret.replace(self.escQuote, self.endQuoteChar)
3238 return loc, ret
3241class CharsNotIn(Token):
3242 """Token for matching words composed of characters *not* in a given
3243 set (will include whitespace in matched characters if not listed in
3244 the provided exclusion set - see example). Defined with string
3245 containing all disallowed characters, and an optional minimum,
3246 maximum, and/or exact length. The default value for ``min`` is
3247 1 (a minimum value < 1 is not valid); the default values for
3248 ``max`` and ``exact`` are 0, meaning no maximum or exact
3249 length restriction.
3251 Example::
3253 # define a comma-separated-value as anything that is not a ','
3254 csv_value = CharsNotIn(',')
3255 print(delimited_list(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))
3257 prints::
3259 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3260 """
3262 def __init__(
3263 self,
3264 not_chars: str = "",
3265 min: int = 1,
3266 max: int = 0,
3267 exact: int = 0,
3268 *,
3269 notChars: str = "",
3270 ):
3271 super().__init__()
3272 self.skipWhitespace = False
3273 self.notChars = not_chars or notChars
3274 self.notCharsSet = set(self.notChars)
3276 if min < 1:
3277 raise ValueError(
3278 "cannot specify a minimum length < 1; use "
3279 "Opt(CharsNotIn()) if zero-length char group is permitted"
3280 )
3282 self.minLen = min
3284 if max > 0:
3285 self.maxLen = max
3286 else:
3287 self.maxLen = _MAX_INT
3289 if exact > 0:
3290 self.maxLen = exact
3291 self.minLen = exact
3293 self.errmsg = "Expected " + self.name
3294 self.mayReturnEmpty = self.minLen == 0
3295 self.mayIndexError = False
3297 def _generateDefaultName(self):
3298 not_chars_str = _collapse_string_to_ranges(self.notChars)
3299 if len(not_chars_str) > 16:
3300 return "!W:({}...)".format(self.notChars[: 16 - 3])
3301 else:
3302 return "!W:({})".format(self.notChars)
3304 def parseImpl(self, instring, loc, doActions=True):
3305 notchars = self.notCharsSet
3306 if instring[loc] in notchars:
3307 raise ParseException(instring, loc, self.errmsg, self)
3309 start = loc
3310 loc += 1
3311 maxlen = min(start + self.maxLen, len(instring))
3312 while loc < maxlen and instring[loc] not in notchars:
3313 loc += 1
3315 if loc - start < self.minLen:
3316 raise ParseException(instring, loc, self.errmsg, self)
3318 return loc, instring[start:loc]
3321class White(Token):
3322 """Special matching class for matching whitespace. Normally,
3323 whitespace is ignored by pyparsing grammars. This class is included
3324 when some whitespace structures are significant. Define with
3325 a string containing the whitespace characters to be matched; default
3326 is ``" \\t\\r\\n"``. Also takes optional ``min``,
3327 ``max``, and ``exact`` arguments, as defined for the
3328 :class:`Word` class.
3329 """
3331 whiteStrs = {
3332 " ": "<SP>",
3333 "\t": "<TAB>",
3334 "\n": "<LF>",
3335 "\r": "<CR>",
3336 "\f": "<FF>",
3337 "\u00A0": "<NBSP>",
3338 "\u1680": "<OGHAM_SPACE_MARK>",
3339 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",
3340 "\u2000": "<EN_QUAD>",
3341 "\u2001": "<EM_QUAD>",
3342 "\u2002": "<EN_SPACE>",
3343 "\u2003": "<EM_SPACE>",
3344 "\u2004": "<THREE-PER-EM_SPACE>",
3345 "\u2005": "<FOUR-PER-EM_SPACE>",
3346 "\u2006": "<SIX-PER-EM_SPACE>",
3347 "\u2007": "<FIGURE_SPACE>",
3348 "\u2008": "<PUNCTUATION_SPACE>",
3349 "\u2009": "<THIN_SPACE>",
3350 "\u200A": "<HAIR_SPACE>",
3351 "\u200B": "<ZERO_WIDTH_SPACE>",
3352 "\u202F": "<NNBSP>",
3353 "\u205F": "<MMSP>",
3354 "\u3000": "<IDEOGRAPHIC_SPACE>",
3355 }
3357 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):
3358 super().__init__()
3359 self.matchWhite = ws
3360 self.set_whitespace_chars(
3361 "".join(c for c in self.whiteStrs if c not in self.matchWhite),
3362 copy_defaults=True,
3363 )
3364 # self.leave_whitespace()
3365 self.mayReturnEmpty = True
3366 self.errmsg = "Expected " + self.name
3368 self.minLen = min
3370 if max > 0:
3371 self.maxLen = max
3372 else:
3373 self.maxLen = _MAX_INT
3375 if exact > 0:
3376 self.maxLen = exact
3377 self.minLen = exact
3379 def _generateDefaultName(self):
3380 return "".join(White.whiteStrs[c] for c in self.matchWhite)
3382 def parseImpl(self, instring, loc, doActions=True):
3383 if instring[loc] not in self.matchWhite:
3384 raise ParseException(instring, loc, self.errmsg, self)
3385 start = loc
3386 loc += 1
3387 maxloc = start + self.maxLen
3388 maxloc = min(maxloc, len(instring))
3389 while loc < maxloc and instring[loc] in self.matchWhite:
3390 loc += 1
3392 if loc - start < self.minLen:
3393 raise ParseException(instring, loc, self.errmsg, self)
3395 return loc, instring[start:loc]
3398class PositionToken(Token):
3399 def __init__(self):
3400 super().__init__()
3401 self.mayReturnEmpty = True
3402 self.mayIndexError = False
3405class GoToColumn(PositionToken):
3406 """Token to advance to a specific column of input text; useful for
3407 tabular report scraping.
3408 """
3410 def __init__(self, colno: int):
3411 super().__init__()
3412 self.col = colno
3414 def preParse(self, instring, loc):
3415 if col(loc, instring) != self.col:
3416 instrlen = len(instring)
3417 if self.ignoreExprs:
3418 loc = self._skipIgnorables(instring, loc)
3419 while (
3420 loc < instrlen
3421 and instring[loc].isspace()
3422 and col(loc, instring) != self.col
3423 ):
3424 loc += 1
3425 return loc
3427 def parseImpl(self, instring, loc, doActions=True):
3428 thiscol = col(loc, instring)
3429 if thiscol > self.col:
3430 raise ParseException(instring, loc, "Text not in expected column", self)
3431 newloc = loc + self.col - thiscol
3432 ret = instring[loc:newloc]
3433 return newloc, ret
3436class LineStart(PositionToken):
3437 r"""Matches if current position is at the beginning of a line within
3438 the parse string
3440 Example::
3442 test = '''\
3443 AAA this line
3444 AAA and this line
3445 AAA but not this one
3446 B AAA and definitely not this one
3447 '''
3449 for t in (LineStart() + 'AAA' + restOfLine).search_string(test):
3450 print(t)
3452 prints::
3454 ['AAA', ' this line']
3455 ['AAA', ' and this line']
3457 """
3459 def __init__(self):
3460 super().__init__()
3461 self.leave_whitespace()
3462 self.orig_whiteChars = set() | self.whiteChars
3463 self.whiteChars.discard("\n")
3464 self.skipper = Empty().set_whitespace_chars(self.whiteChars)
3465 self.errmsg = "Expected start of line"
3467 def preParse(self, instring, loc):
3468 if loc == 0:
3469 return loc
3470 else:
3471 ret = self.skipper.preParse(instring, loc)
3472 if "\n" in self.orig_whiteChars:
3473 while instring[ret : ret + 1] == "\n":
3474 ret = self.skipper.preParse(instring, ret + 1)
3475 return ret
3477 def parseImpl(self, instring, loc, doActions=True):
3478 if col(loc, instring) == 1:
3479 return loc, []
3480 raise ParseException(instring, loc, self.errmsg, self)
3483class LineEnd(PositionToken):
3484 """Matches if current position is at the end of a line within the
3485 parse string
3486 """
3488 def __init__(self):
3489 super().__init__()
3490 self.whiteChars.discard("\n")
3491 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
3492 self.errmsg = "Expected end of line"
3494 def parseImpl(self, instring, loc, doActions=True):
3495 if loc < len(instring):
3496 if instring[loc] == "\n":
3497 return loc + 1, "\n"
3498 else:
3499 raise ParseException(instring, loc, self.errmsg, self)
3500 elif loc == len(instring):
3501 return loc + 1, []
3502 else:
3503 raise ParseException(instring, loc, self.errmsg, self)
3506class StringStart(PositionToken):
3507 """Matches if current position is at the beginning of the parse
3508 string
3509 """
3511 def __init__(self):
3512 super().__init__()
3513 self.errmsg = "Expected start of text"
3515 def parseImpl(self, instring, loc, doActions=True):
3516 if loc != 0:
3517 # see if entire string up to here is just whitespace and ignoreables
3518 if loc != self.preParse(instring, 0):
3519 raise ParseException(instring, loc, self.errmsg, self)
3520 return loc, []
3523class StringEnd(PositionToken):
3524 """
3525 Matches if current position is at the end of the parse string
3526 """
3528 def __init__(self):
3529 super().__init__()
3530 self.errmsg = "Expected end of text"
3532 def parseImpl(self, instring, loc, doActions=True):
3533 if loc < len(instring):
3534 raise ParseException(instring, loc, self.errmsg, self)
3535 elif loc == len(instring):
3536 return loc + 1, []
3537 elif loc > len(instring):
3538 return loc, []
3539 else:
3540 raise ParseException(instring, loc, self.errmsg, self)
3543class WordStart(PositionToken):
3544 """Matches if the current position is at the beginning of a
3545 :class:`Word`, and is not preceded by any character in a given
3546 set of ``word_chars`` (default= ``printables``). To emulate the
3547 ``\b`` behavior of regular expressions, use
3548 ``WordStart(alphanums)``. ``WordStart`` will also match at
3549 the beginning of the string being parsed, or at the beginning of
3550 a line.
3551 """
3553 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3554 wordChars = word_chars if wordChars == printables else wordChars
3555 super().__init__()
3556 self.wordChars = set(wordChars)
3557 self.errmsg = "Not at the start of a word"
3559 def parseImpl(self, instring, loc, doActions=True):
3560 if loc != 0:
3561 if (
3562 instring[loc - 1] in self.wordChars
3563 or instring[loc] not in self.wordChars
3564 ):
3565 raise ParseException(instring, loc, self.errmsg, self)
3566 return loc, []
3569class WordEnd(PositionToken):
3570 """Matches if the current position is at the end of a :class:`Word`,
3571 and is not followed by any character in a given set of ``word_chars``
3572 (default= ``printables``). To emulate the ``\b`` behavior of
3573 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3574 will also match at the end of the string being parsed, or at the end
3575 of a line.
3576 """
3578 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3579 wordChars = word_chars if wordChars == printables else wordChars
3580 super().__init__()
3581 self.wordChars = set(wordChars)
3582 self.skipWhitespace = False
3583 self.errmsg = "Not at the end of a word"
3585 def parseImpl(self, instring, loc, doActions=True):
3586 instrlen = len(instring)
3587 if instrlen > 0 and loc < instrlen:
3588 if (
3589 instring[loc] in self.wordChars
3590 or instring[loc - 1] not in self.wordChars
3591 ):
3592 raise ParseException(instring, loc, self.errmsg, self)
3593 return loc, []
3596class ParseExpression(ParserElement):
3597 """Abstract subclass of ParserElement, for combining and
3598 post-processing parsed tokens.
3599 """
3601 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
3602 super().__init__(savelist)
3603 self.exprs: List[ParserElement]
3604 if isinstance(exprs, _generatorType):
3605 exprs = list(exprs)
3607 if isinstance(exprs, str_type):
3608 self.exprs = [self._literalStringClass(exprs)]
3609 elif isinstance(exprs, ParserElement):
3610 self.exprs = [exprs]
3611 elif isinstance(exprs, Iterable):
3612 exprs = list(exprs)
3613 # if sequence of strings provided, wrap with Literal
3614 if any(isinstance(expr, str_type) for expr in exprs):
3615 exprs = (
3616 self._literalStringClass(e) if isinstance(e, str_type) else e
3617 for e in exprs
3618 )
3619 self.exprs = list(exprs)
3620 else:
3621 try:
3622 self.exprs = list(exprs)
3623 except TypeError:
3624 self.exprs = [exprs]
3625 self.callPreparse = False
3627 def recurse(self) -> Sequence[ParserElement]:
3628 return self.exprs[:]
3630 def append(self, other) -> ParserElement:
3631 self.exprs.append(other)
3632 self._defaultName = None
3633 return self
3635 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
3636 """
3637 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3638 all contained expressions.
3639 """
3640 super().leave_whitespace(recursive)
3642 if recursive:
3643 self.exprs = [e.copy() for e in self.exprs]
3644 for e in self.exprs:
3645 e.leave_whitespace(recursive)
3646 return self
3648 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
3649 """
3650 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3651 all contained expressions.
3652 """
3653 super().ignore_whitespace(recursive)
3654 if recursive:
3655 self.exprs = [e.copy() for e in self.exprs]
3656 for e in self.exprs:
3657 e.ignore_whitespace(recursive)
3658 return self
3660 def ignore(self, other) -> ParserElement:
3661 if isinstance(other, Suppress):
3662 if other not in self.ignoreExprs:
3663 super().ignore(other)
3664 for e in self.exprs:
3665 e.ignore(self.ignoreExprs[-1])
3666 else:
3667 super().ignore(other)
3668 for e in self.exprs:
3669 e.ignore(self.ignoreExprs[-1])
3670 return self
3672 def _generateDefaultName(self):
3673 return "{}:({})".format(self.__class__.__name__, str(self.exprs))
3675 def streamline(self) -> ParserElement:
3676 if self.streamlined:
3677 return self
3679 super().streamline()
3681 for e in self.exprs:
3682 e.streamline()
3684 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
3685 # but only if there are no parse actions or resultsNames on the nested And's
3686 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
3687 if len(self.exprs) == 2:
3688 other = self.exprs[0]
3689 if (
3690 isinstance(other, self.__class__)
3691 and not other.parseAction
3692 and other.resultsName is None
3693 and not other.debug
3694 ):
3695 self.exprs = other.exprs[:] + [self.exprs[1]]
3696 self._defaultName = None
3697 self.mayReturnEmpty |= other.mayReturnEmpty
3698 self.mayIndexError |= other.mayIndexError
3700 other = self.exprs[-1]
3701 if (
3702 isinstance(other, self.__class__)
3703 and not other.parseAction
3704 and other.resultsName is None
3705 and not other.debug
3706 ):
3707 self.exprs = self.exprs[:-1] + other.exprs[:]
3708 self._defaultName = None
3709 self.mayReturnEmpty |= other.mayReturnEmpty
3710 self.mayIndexError |= other.mayIndexError
3712 self.errmsg = "Expected " + str(self)
3714 return self
3716 def validate(self, validateTrace=None) -> None:
3717 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3718 for e in self.exprs:
3719 e.validate(tmp)
3720 self._checkRecursion([])
3722 def copy(self) -> ParserElement:
3723 ret = super().copy()
3724 ret.exprs = [e.copy() for e in self.exprs]
3725 return ret
3727 def _setResultsName(self, name, listAllMatches=False):
3728 if (
3729 __diag__.warn_ungrouped_named_tokens_in_collection
3730 and Diagnostics.warn_ungrouped_named_tokens_in_collection
3731 not in self.suppress_warnings_
3732 ):
3733 for e in self.exprs:
3734 if (
3735 isinstance(e, ParserElement)
3736 and e.resultsName
3737 and Diagnostics.warn_ungrouped_named_tokens_in_collection
3738 not in e.suppress_warnings_
3739 ):
3740 warnings.warn(
3741 "{}: setting results name {!r} on {} expression "
3742 "collides with {!r} on contained expression".format(
3743 "warn_ungrouped_named_tokens_in_collection",
3744 name,
3745 type(self).__name__,
3746 e.resultsName,
3747 ),
3748 stacklevel=3,
3749 )
3751 return super()._setResultsName(name, listAllMatches)
3753 ignoreWhitespace = ignore_whitespace
3754 leaveWhitespace = leave_whitespace
3757class And(ParseExpression):
3758 """
3759 Requires all given :class:`ParseExpression` s to be found in the given order.
3760 Expressions may be separated by whitespace.
3761 May be constructed using the ``'+'`` operator.
3762 May also be constructed using the ``'-'`` operator, which will
3763 suppress backtracking.
3765 Example::
3767 integer = Word(nums)
3768 name_expr = Word(alphas)[1, ...]
3770 expr = And([integer("id"), name_expr("name"), integer("age")])
3771 # more easily written as:
3772 expr = integer("id") + name_expr("name") + integer("age")
3773 """
3775 class _ErrorStop(Empty):
3776 def __init__(self, *args, **kwargs):
3777 super().__init__(*args, **kwargs)
3778 self.leave_whitespace()
3780 def _generateDefaultName(self):
3781 return "-"
3783 def __init__(
3784 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True
3785 ):
3786 exprs: List[ParserElement] = list(exprs_arg)
3787 if exprs and Ellipsis in exprs:
3788 tmp = []
3789 for i, expr in enumerate(exprs):
3790 if expr is Ellipsis:
3791 if i < len(exprs) - 1:
3792 skipto_arg: ParserElement = (Empty() + exprs[i + 1]).exprs[-1]
3793 tmp.append(SkipTo(skipto_arg)("_skipped*"))
3794 else:
3795 raise Exception(
3796 "cannot construct And with sequence ending in ..."
3797 )
3798 else:
3799 tmp.append(expr)
3800 exprs[:] = tmp
3801 super().__init__(exprs, savelist)
3802 if self.exprs:
3803 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3804 if not isinstance(self.exprs[0], White):
3805 self.set_whitespace_chars(
3806 self.exprs[0].whiteChars,
3807 copy_defaults=self.exprs[0].copyDefaultWhiteChars,
3808 )
3809 self.skipWhitespace = self.exprs[0].skipWhitespace
3810 else:
3811 self.skipWhitespace = False
3812 else:
3813 self.mayReturnEmpty = True
3814 self.callPreparse = True
3816 def streamline(self) -> ParserElement:
3817 # collapse any _PendingSkip's
3818 if self.exprs:
3819 if any(
3820 isinstance(e, ParseExpression)
3821 and e.exprs
3822 and isinstance(e.exprs[-1], _PendingSkip)
3823 for e in self.exprs[:-1]
3824 ):
3825 for i, e in enumerate(self.exprs[:-1]):
3826 if e is None:
3827 continue
3828 if (
3829 isinstance(e, ParseExpression)
3830 and e.exprs
3831 and isinstance(e.exprs[-1], _PendingSkip)
3832 ):
3833 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
3834 self.exprs[i + 1] = None
3835 self.exprs = [e for e in self.exprs if e is not None]
3837 super().streamline()
3839 # link any IndentedBlocks to the prior expression
3840 for prev, cur in zip(self.exprs, self.exprs[1:]):
3841 # traverse cur or any first embedded expr of cur looking for an IndentedBlock
3842 # (but watch out for recursive grammar)
3843 seen = set()
3844 while cur:
3845 if id(cur) in seen:
3846 break
3847 seen.add(id(cur))
3848 if isinstance(cur, IndentedBlock):
3849 prev.add_parse_action(
3850 lambda s, l, t, cur_=cur: setattr(
3851 cur_, "parent_anchor", col(l, s)
3852 )
3853 )
3854 break
3855 subs = cur.recurse()
3856 cur = next(iter(subs), None)
3858 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3859 return self
3861 def parseImpl(self, instring, loc, doActions=True):
3862 # pass False as callPreParse arg to _parse for first element, since we already
3863 # pre-parsed the string as part of our And pre-parsing
3864 loc, resultlist = self.exprs[0]._parse(
3865 instring, loc, doActions, callPreParse=False
3866 )
3867 errorStop = False
3868 for e in self.exprs[1:]:
3869 # if isinstance(e, And._ErrorStop):
3870 if type(e) is And._ErrorStop:
3871 errorStop = True
3872 continue
3873 if errorStop:
3874 try:
3875 loc, exprtokens = e._parse(instring, loc, doActions)
3876 except ParseSyntaxException:
3877 raise
3878 except ParseBaseException as pe:
3879 pe.__traceback__ = None
3880 raise ParseSyntaxException._from_exception(pe)
3881 except IndexError:
3882 raise ParseSyntaxException(
3883 instring, len(instring), self.errmsg, self
3884 )
3885 else:
3886 loc, exprtokens = e._parse(instring, loc, doActions)
3887 if exprtokens or exprtokens.haskeys():
3888 resultlist += exprtokens
3889 return loc, resultlist
3891 def __iadd__(self, other):
3892 if isinstance(other, str_type):
3893 other = self._literalStringClass(other)
3894 return self.append(other) # And([self, other])
3896 def _checkRecursion(self, parseElementList):
3897 subRecCheckList = parseElementList[:] + [self]
3898 for e in self.exprs:
3899 e._checkRecursion(subRecCheckList)
3900 if not e.mayReturnEmpty:
3901 break
3903 def _generateDefaultName(self):
3904 inner = " ".join(str(e) for e in self.exprs)
3905 # strip off redundant inner {}'s
3906 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
3907 inner = inner[1:-1]
3908 return "{" + inner + "}"
3911class Or(ParseExpression):
3912 """Requires that at least one :class:`ParseExpression` is found. If
3913 two expressions match, the expression that matches the longest
3914 string will be used. May be constructed using the ``'^'``
3915 operator.
3917 Example::
3919 # construct Or using '^' operator
3921 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3922 print(number.search_string("123 3.1416 789"))
3924 prints::
3926 [['123'], ['3.1416'], ['789']]
3927 """
3929 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
3930 super().__init__(exprs, savelist)
3931 if self.exprs:
3932 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3933 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
3934 else:
3935 self.mayReturnEmpty = True
3937 def streamline(self) -> ParserElement:
3938 super().streamline()
3939 if self.exprs:
3940 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3941 self.saveAsList = any(e.saveAsList for e in self.exprs)
3942 self.skipWhitespace = all(
3943 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
3944 )
3945 else:
3946 self.saveAsList = False
3947 return self
3949 def parseImpl(self, instring, loc, doActions=True):
3950 maxExcLoc = -1
3951 maxException = None
3952 matches = []
3953 fatals = []
3954 if all(e.callPreparse for e in self.exprs):
3955 loc = self.preParse(instring, loc)
3956 for e in self.exprs:
3957 try:
3958 loc2 = e.try_parse(instring, loc, raise_fatal=True)
3959 except ParseFatalException as pfe:
3960 pfe.__traceback__ = None
3961 pfe.parserElement = e
3962 fatals.append(pfe)
3963 maxException = None
3964 maxExcLoc = -1
3965 except ParseException as err:
3966 if not fatals:
3967 err.__traceback__ = None
3968 if err.loc > maxExcLoc:
3969 maxException = err
3970 maxExcLoc = err.loc
3971 except IndexError:
3972 if len(instring) > maxExcLoc:
3973 maxException = ParseException(
3974 instring, len(instring), e.errmsg, self
3975 )
3976 maxExcLoc = len(instring)
3977 else:
3978 # save match among all matches, to retry longest to shortest
3979 matches.append((loc2, e))
3981 if matches:
3982 # re-evaluate all matches in descending order of length of match, in case attached actions
3983 # might change whether or how much they match of the input.
3984 matches.sort(key=itemgetter(0), reverse=True)
3986 if not doActions:
3987 # no further conditions or parse actions to change the selection of
3988 # alternative, so the first match will be the best match
3989 best_expr = matches[0][1]
3990 return best_expr._parse(instring, loc, doActions)
3992 longest = -1, None
3993 for loc1, expr1 in matches:
3994 if loc1 <= longest[0]:
3995 # already have a longer match than this one will deliver, we are done
3996 return longest
3998 try:
3999 loc2, toks = expr1._parse(instring, loc, doActions)
4000 except ParseException as err:
4001 err.__traceback__ = None
4002 if err.loc > maxExcLoc:
4003 maxException = err
4004 maxExcLoc = err.loc
4005 else:
4006 if loc2 >= loc1:
4007 return loc2, toks
4008 # didn't match as much as before
4009 elif loc2 > longest[0]:
4010 longest = loc2, toks
4012 if longest != (-1, None):
4013 return longest
4015 if fatals:
4016 if len(fatals) > 1:
4017 fatals.sort(key=lambda e: -e.loc)
4018 if fatals[0].loc == fatals[1].loc:
4019 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement))))
4020 max_fatal = fatals[0]
4021 raise max_fatal
4023 if maxException is not None:
4024 maxException.msg = self.errmsg
4025 raise maxException
4026 else:
4027 raise ParseException(
4028 instring, loc, "no defined alternatives to match", self
4029 )
4031 def __ixor__(self, other):
4032 if isinstance(other, str_type):
4033 other = self._literalStringClass(other)
4034 return self.append(other) # Or([self, other])
4036 def _generateDefaultName(self):
4037 return "{" + " ^ ".join(str(e) for e in self.exprs) + "}"
4039 def _setResultsName(self, name, listAllMatches=False):
4040 if (
4041 __diag__.warn_multiple_tokens_in_named_alternation
4042 and Diagnostics.warn_multiple_tokens_in_named_alternation
4043 not in self.suppress_warnings_
4044 ):
4045 if any(
4046 isinstance(e, And)
4047 and Diagnostics.warn_multiple_tokens_in_named_alternation
4048 not in e.suppress_warnings_
4049 for e in self.exprs
4050 ):
4051 warnings.warn(
4052 "{}: setting results name {!r} on {} expression "
4053 "will return a list of all parsed tokens in an And alternative, "
4054 "in prior versions only the first token was returned; enclose "
4055 "contained argument in Group".format(
4056 "warn_multiple_tokens_in_named_alternation",
4057 name,
4058 type(self).__name__,
4059 ),
4060 stacklevel=3,
4061 )
4063 return super()._setResultsName(name, listAllMatches)
4066class MatchFirst(ParseExpression):
4067 """Requires that at least one :class:`ParseExpression` is found. If
4068 more than one expression matches, the first one listed is the one that will
4069 match. May be constructed using the ``'|'`` operator.
4071 Example::
4073 # construct MatchFirst using '|' operator
4075 # watch the order of expressions to match
4076 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4077 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
4079 # put more selective expression first
4080 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4081 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
4082 """
4084 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):
4085 super().__init__(exprs, savelist)
4086 if self.exprs:
4087 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4088 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4089 else:
4090 self.mayReturnEmpty = True
4092 def streamline(self) -> ParserElement:
4093 if self.streamlined:
4094 return self
4096 super().streamline()
4097 if self.exprs:
4098 self.saveAsList = any(e.saveAsList for e in self.exprs)
4099 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4100 self.skipWhitespace = all(
4101 e.skipWhitespace and not isinstance(e, White) for e in self.exprs
4102 )
4103 else:
4104 self.saveAsList = False
4105 self.mayReturnEmpty = True
4106 return self
4108 def parseImpl(self, instring, loc, doActions=True):
4109 maxExcLoc = -1
4110 maxException = None
4112 for e in self.exprs:
4113 try:
4114 return e._parse(
4115 instring,
4116 loc,
4117 doActions,
4118 )
4119 except ParseFatalException as pfe:
4120 pfe.__traceback__ = None
4121 pfe.parserElement = e
4122 raise
4123 except ParseException as err:
4124 if err.loc > maxExcLoc:
4125 maxException = err
4126 maxExcLoc = err.loc
4127 except IndexError:
4128 if len(instring) > maxExcLoc:
4129 maxException = ParseException(
4130 instring, len(instring), e.errmsg, self
4131 )
4132 maxExcLoc = len(instring)
4134 if maxException is not None:
4135 maxException.msg = self.errmsg
4136 raise maxException
4137 else:
4138 raise ParseException(
4139 instring, loc, "no defined alternatives to match", self
4140 )
4142 def __ior__(self, other):
4143 if isinstance(other, str_type):
4144 other = self._literalStringClass(other)
4145 return self.append(other) # MatchFirst([self, other])
4147 def _generateDefaultName(self):
4148 return "{" + " | ".join(str(e) for e in self.exprs) + "}"
4150 def _setResultsName(self, name, listAllMatches=False):
4151 if (
4152 __diag__.warn_multiple_tokens_in_named_alternation
4153 and Diagnostics.warn_multiple_tokens_in_named_alternation
4154 not in self.suppress_warnings_
4155 ):
4156 if any(
4157 isinstance(e, And)
4158 and Diagnostics.warn_multiple_tokens_in_named_alternation
4159 not in e.suppress_warnings_
4160 for e in self.exprs
4161 ):
4162 warnings.warn(
4163 "{}: setting results name {!r} on {} expression "
4164 "will return a list of all parsed tokens in an And alternative, "
4165 "in prior versions only the first token was returned; enclose "
4166 "contained argument in Group".format(
4167 "warn_multiple_tokens_in_named_alternation",
4168 name,
4169 type(self).__name__,
4170 ),
4171 stacklevel=3,
4172 )
4174 return super()._setResultsName(name, listAllMatches)
4177class Each(ParseExpression):
4178 """Requires all given :class:`ParseExpression` s to be found, but in
4179 any order. Expressions may be separated by whitespace.
4181 May be constructed using the ``'&'`` operator.
4183 Example::
4185 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4186 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4187 integer = Word(nums)
4188 shape_attr = "shape:" + shape_type("shape")
4189 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4190 color_attr = "color:" + color("color")
4191 size_attr = "size:" + integer("size")
4193 # use Each (using operator '&') to accept attributes in any order
4194 # (shape and posn are required, color and size are optional)
4195 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4197 shape_spec.run_tests('''
4198 shape: SQUARE color: BLACK posn: 100, 120
4199 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4200 color:GREEN size:20 shape:TRIANGLE posn:20,40
4201 '''
4202 )
4204 prints::
4206 shape: SQUARE color: BLACK posn: 100, 120
4207 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4208 - color: BLACK
4209 - posn: ['100', ',', '120']
4210 - x: 100
4211 - y: 120
4212 - shape: SQUARE
4215 shape: CIRCLE size: 50 color: BLUE posn: 50,80
4216 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4217 - color: BLUE
4218 - posn: ['50', ',', '80']
4219 - x: 50
4220 - y: 80
4221 - shape: CIRCLE
4222 - size: 50
4225 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4226 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4227 - color: GREEN
4228 - posn: ['20', ',', '40']
4229 - x: 20
4230 - y: 40
4231 - shape: TRIANGLE
4232 - size: 20
4233 """
4235 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True):
4236 super().__init__(exprs, savelist)
4237 if self.exprs:
4238 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4239 else:
4240 self.mayReturnEmpty = True
4241 self.skipWhitespace = True
4242 self.initExprGroups = True
4243 self.saveAsList = True
4245 def streamline(self) -> ParserElement:
4246 super().streamline()
4247 if self.exprs:
4248 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4249 else:
4250 self.mayReturnEmpty = True
4251 return self
4253 def parseImpl(self, instring, loc, doActions=True):
4254 if self.initExprGroups:
4255 self.opt1map = dict(
4256 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
4257 )
4258 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
4259 opt2 = [
4260 e
4261 for e in self.exprs
4262 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
4263 ]
4264 self.optionals = opt1 + opt2
4265 self.multioptionals = [
4266 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4267 for e in self.exprs
4268 if isinstance(e, _MultipleMatch)
4269 ]
4270 self.multirequired = [
4271 e.expr.set_results_name(e.resultsName, list_all_matches=True)
4272 for e in self.exprs
4273 if isinstance(e, OneOrMore)
4274 ]
4275 self.required = [
4276 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
4277 ]
4278 self.required += self.multirequired
4279 self.initExprGroups = False
4281 tmpLoc = loc
4282 tmpReqd = self.required[:]
4283 tmpOpt = self.optionals[:]
4284 multis = self.multioptionals[:]
4285 matchOrder = []
4287 keepMatching = True
4288 failed = []
4289 fatals = []
4290 while keepMatching:
4291 tmpExprs = tmpReqd + tmpOpt + multis
4292 failed.clear()
4293 fatals.clear()
4294 for e in tmpExprs:
4295 try:
4296 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
4297 except ParseFatalException as pfe:
4298 pfe.__traceback__ = None
4299 pfe.parserElement = e
4300 fatals.append(pfe)
4301 failed.append(e)
4302 except ParseException:
4303 failed.append(e)
4304 else:
4305 matchOrder.append(self.opt1map.get(id(e), e))
4306 if e in tmpReqd:
4307 tmpReqd.remove(e)
4308 elif e in tmpOpt:
4309 tmpOpt.remove(e)
4310 if len(failed) == len(tmpExprs):
4311 keepMatching = False
4313 # look for any ParseFatalExceptions
4314 if fatals:
4315 if len(fatals) > 1:
4316 fatals.sort(key=lambda e: -e.loc)
4317 if fatals[0].loc == fatals[1].loc:
4318 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement))))
4319 max_fatal = fatals[0]
4320 raise max_fatal
4322 if tmpReqd:
4323 missing = ", ".join([str(e) for e in tmpReqd])
4324 raise ParseException(
4325 instring,
4326 loc,
4327 "Missing one or more required elements ({})".format(missing),
4328 )
4330 # add any unmatched Opts, in case they have default values defined
4331 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
4333 total_results = ParseResults([])
4334 for e in matchOrder:
4335 loc, results = e._parse(instring, loc, doActions)
4336 total_results += results
4338 return loc, total_results
4340 def _generateDefaultName(self):
4341 return "{" + " & ".join(str(e) for e in self.exprs) + "}"
4344class ParseElementEnhance(ParserElement):
4345 """Abstract subclass of :class:`ParserElement`, for combining and
4346 post-processing parsed tokens.
4347 """
4349 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
4350 super().__init__(savelist)
4351 if isinstance(expr, str_type):
4352 if issubclass(self._literalStringClass, Token):
4353 expr = self._literalStringClass(expr)
4354 elif issubclass(type(self), self._literalStringClass):
4355 expr = Literal(expr)
4356 else:
4357 expr = self._literalStringClass(Literal(expr))
4358 self.expr = expr
4359 if expr is not None:
4360 self.mayIndexError = expr.mayIndexError
4361 self.mayReturnEmpty = expr.mayReturnEmpty
4362 self.set_whitespace_chars(
4363 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
4364 )
4365 self.skipWhitespace = expr.skipWhitespace
4366 self.saveAsList = expr.saveAsList
4367 self.callPreparse = expr.callPreparse
4368 self.ignoreExprs.extend(expr.ignoreExprs)
4370 def recurse(self) -> Sequence[ParserElement]:
4371 return [self.expr] if self.expr is not None else []
4373 def parseImpl(self, instring, loc, doActions=True):
4374 if self.expr is not None:
4375 return self.expr._parse(instring, loc, doActions, callPreParse=False)
4376 else:
4377 raise ParseException(instring, loc, "No expression defined", self)
4379 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
4380 super().leave_whitespace(recursive)
4382 if recursive:
4383 self.expr = self.expr.copy()
4384 if self.expr is not None:
4385 self.expr.leave_whitespace(recursive)
4386 return self
4388 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
4389 super().ignore_whitespace(recursive)
4391 if recursive:
4392 self.expr = self.expr.copy()
4393 if self.expr is not None:
4394 self.expr.ignore_whitespace(recursive)
4395 return self
4397 def ignore(self, other) -> ParserElement:
4398 if isinstance(other, Suppress):
4399 if other not in self.ignoreExprs:
4400 super().ignore(other)
4401 if self.expr is not None:
4402 self.expr.ignore(self.ignoreExprs[-1])
4403 else:
4404 super().ignore(other)
4405 if self.expr is not None:
4406 self.expr.ignore(self.ignoreExprs[-1])
4407 return self
4409 def streamline(self) -> ParserElement:
4410 super().streamline()
4411 if self.expr is not None:
4412 self.expr.streamline()
4413 return self
4415 def _checkRecursion(self, parseElementList):
4416 if self in parseElementList:
4417 raise RecursiveGrammarException(parseElementList + [self])
4418 subRecCheckList = parseElementList[:] + [self]
4419 if self.expr is not None:
4420 self.expr._checkRecursion(subRecCheckList)
4422 def validate(self, validateTrace=None) -> None:
4423 if validateTrace is None:
4424 validateTrace = []
4425 tmp = validateTrace[:] + [self]
4426 if self.expr is not None:
4427 self.expr.validate(tmp)
4428 self._checkRecursion([])
4430 def _generateDefaultName(self):
4431 return "{}:({})".format(self.__class__.__name__, str(self.expr))
4433 ignoreWhitespace = ignore_whitespace
4434 leaveWhitespace = leave_whitespace
4437class IndentedBlock(ParseElementEnhance):
4438 """
4439 Expression to match one or more expressions at a given indentation level.
4440 Useful for parsing text where structure is implied by indentation (like Python source code).
4441 """
4443 class _Indent(Empty):
4444 def __init__(self, ref_col: int):
4445 super().__init__()
4446 self.errmsg = "expected indent at column {}".format(ref_col)
4447 self.add_condition(lambda s, l, t: col(l, s) == ref_col)
4449 class _IndentGreater(Empty):
4450 def __init__(self, ref_col: int):
4451 super().__init__()
4452 self.errmsg = "expected indent at column greater than {}".format(ref_col)
4453 self.add_condition(lambda s, l, t: col(l, s) > ref_col)
4455 def __init__(
4456 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
4457 ):
4458 super().__init__(expr, savelist=True)
4459 # if recursive:
4460 # raise NotImplementedError("IndentedBlock with recursive is not implemented")
4461 self._recursive = recursive
4462 self._grouped = grouped
4463 self.parent_anchor = 1
4465 def parseImpl(self, instring, loc, doActions=True):
4466 # advance parse position to non-whitespace by using an Empty()
4467 # this should be the column to be used for all subsequent indented lines
4468 anchor_loc = Empty().preParse(instring, loc)
4470 # see if self.expr matches at the current location - if not it will raise an exception
4471 # and no further work is necessary
4472 self.expr.try_parse(instring, anchor_loc, doActions)
4474 indent_col = col(anchor_loc, instring)
4475 peer_detect_expr = self._Indent(indent_col)
4477 inner_expr = Empty() + peer_detect_expr + self.expr
4478 if self._recursive:
4479 sub_indent = self._IndentGreater(indent_col)
4480 nested_block = IndentedBlock(
4481 self.expr, recursive=self._recursive, grouped=self._grouped
4482 )
4483 nested_block.set_debug(self.debug)
4484 nested_block.parent_anchor = indent_col
4485 inner_expr += Opt(sub_indent + nested_block)
4487 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
4488 block = OneOrMore(inner_expr)
4490 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
4492 if self._grouped:
4493 wrapper = Group
4494 else:
4495 wrapper = lambda expr: expr
4496 return (wrapper(block) + Optional(trailing_undent)).parseImpl(
4497 instring, anchor_loc, doActions
4498 )
4501class AtStringStart(ParseElementEnhance):
4502 """Matches if expression matches at the beginning of the parse
4503 string::
4505 AtStringStart(Word(nums)).parse_string("123")
4506 # prints ["123"]
4508 AtStringStart(Word(nums)).parse_string(" 123")
4509 # raises ParseException
4510 """
4512 def __init__(self, expr: Union[ParserElement, str]):
4513 super().__init__(expr)
4514 self.callPreparse = False
4516 def parseImpl(self, instring, loc, doActions=True):
4517 if loc != 0:
4518 raise ParseException(instring, loc, "not found at string start")
4519 return super().parseImpl(instring, loc, doActions)
4522class AtLineStart(ParseElementEnhance):
4523 r"""Matches if an expression matches at the beginning of a line within
4524 the parse string
4526 Example::
4528 test = '''\
4529 AAA this line
4530 AAA and this line
4531 AAA but not this one
4532 B AAA and definitely not this one
4533 '''
4535 for t in (AtLineStart('AAA') + restOfLine).search_string(test):
4536 print(t)
4538 prints::
4540 ['AAA', ' this line']
4541 ['AAA', ' and this line']
4543 """
4545 def __init__(self, expr: Union[ParserElement, str]):
4546 super().__init__(expr)
4547 self.callPreparse = False
4549 def parseImpl(self, instring, loc, doActions=True):
4550 if col(loc, instring) != 1:
4551 raise ParseException(instring, loc, "not found at line start")
4552 return super().parseImpl(instring, loc, doActions)
4555class FollowedBy(ParseElementEnhance):
4556 """Lookahead matching of the given parse expression.
4557 ``FollowedBy`` does *not* advance the parsing position within
4558 the input string, it only verifies that the specified parse
4559 expression matches at the current position. ``FollowedBy``
4560 always returns a null token list. If any results names are defined
4561 in the lookahead expression, those *will* be returned for access by
4562 name.
4564 Example::
4566 # use FollowedBy to match a label only if it is followed by a ':'
4567 data_word = Word(alphas)
4568 label = data_word + FollowedBy(':')
4569 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4571 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()
4573 prints::
4575 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4576 """
4578 def __init__(self, expr: Union[ParserElement, str]):
4579 super().__init__(expr)
4580 self.mayReturnEmpty = True
4582 def parseImpl(self, instring, loc, doActions=True):
4583 # by using self._expr.parse and deleting the contents of the returned ParseResults list
4584 # we keep any named results that were defined in the FollowedBy expression
4585 _, ret = self.expr._parse(instring, loc, doActions=doActions)
4586 del ret[:]
4588 return loc, ret
4591class PrecededBy(ParseElementEnhance):
4592 """Lookbehind matching of the given parse expression.
4593 ``PrecededBy`` does not advance the parsing position within the
4594 input string, it only verifies that the specified parse expression
4595 matches prior to the current position. ``PrecededBy`` always
4596 returns a null token list, but if a results name is defined on the
4597 given expression, it is returned.
4599 Parameters:
4601 - expr - expression that must match prior to the current parse
4602 location
4603 - retreat - (default= ``None``) - (int) maximum number of characters
4604 to lookbehind prior to the current parse location
4606 If the lookbehind expression is a string, :class:`Literal`,
4607 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
4608 with a specified exact or maximum length, then the retreat
4609 parameter is not required. Otherwise, retreat must be specified to
4610 give a maximum number of characters to look back from
4611 the current parse position for a lookbehind match.
4613 Example::
4615 # VB-style variable names with type prefixes
4616 int_var = PrecededBy("#") + pyparsing_common.identifier
4617 str_var = PrecededBy("$") + pyparsing_common.identifier
4619 """
4621 def __init__(
4622 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None
4623 ):
4624 super().__init__(expr)
4625 self.expr = self.expr().leave_whitespace()
4626 self.mayReturnEmpty = True
4627 self.mayIndexError = False
4628 self.exact = False
4629 if isinstance(expr, str_type):
4630 retreat = len(expr)
4631 self.exact = True
4632 elif isinstance(expr, (Literal, Keyword)):
4633 retreat = expr.matchLen
4634 self.exact = True
4635 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4636 retreat = expr.maxLen
4637 self.exact = True
4638 elif isinstance(expr, PositionToken):
4639 retreat = 0
4640 self.exact = True
4641 self.retreat = retreat
4642 self.errmsg = "not preceded by " + str(expr)
4643 self.skipWhitespace = False
4644 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4646 def parseImpl(self, instring, loc=0, doActions=True):
4647 if self.exact:
4648 if loc < self.retreat:
4649 raise ParseException(instring, loc, self.errmsg)
4650 start = loc - self.retreat
4651 _, ret = self.expr._parse(instring, start)
4652 else:
4653 # retreat specified a maximum lookbehind window, iterate
4654 test_expr = self.expr + StringEnd()
4655 instring_slice = instring[max(0, loc - self.retreat) : loc]
4656 last_expr = ParseException(instring, loc, self.errmsg)
4657 for offset in range(1, min(loc, self.retreat + 1) + 1):
4658 try:
4659 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
4660 _, ret = test_expr._parse(
4661 instring_slice, len(instring_slice) - offset
4662 )
4663 except ParseBaseException as pbe:
4664 last_expr = pbe
4665 else:
4666 break
4667 else:
4668 raise last_expr
4669 return loc, ret
4672class Located(ParseElementEnhance):
4673 """
4674 Decorates a returned token with its starting and ending
4675 locations in the input string.
4677 This helper adds the following results names:
4679 - ``locn_start`` - location where matched expression begins
4680 - ``locn_end`` - location where matched expression ends
4681 - ``value`` - the actual parsed results
4683 Be careful if the input text contains ``<TAB>`` characters, you
4684 may want to call :class:`ParserElement.parse_with_tabs`
4686 Example::
4688 wd = Word(alphas)
4689 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
4690 print(match)
4692 prints::
4694 [0, ['ljsdf'], 5]
4695 [8, ['lksdjjf'], 15]
4696 [18, ['lkkjj'], 23]
4698 """
4700 def parseImpl(self, instring, loc, doActions=True):
4701 start = loc
4702 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False)
4703 ret_tokens = ParseResults([start, tokens, loc])
4704 ret_tokens["locn_start"] = start
4705 ret_tokens["value"] = tokens
4706 ret_tokens["locn_end"] = loc
4707 if self.resultsName:
4708 # must return as a list, so that the name will be attached to the complete group
4709 return loc, [ret_tokens]
4710 else:
4711 return loc, ret_tokens
4714class NotAny(ParseElementEnhance):
4715 """
4716 Lookahead to disallow matching with the given parse expression.
4717 ``NotAny`` does *not* advance the parsing position within the
4718 input string, it only verifies that the specified parse expression
4719 does *not* match at the current position. Also, ``NotAny`` does
4720 *not* skip over leading whitespace. ``NotAny`` always returns
4721 a null token list. May be constructed using the ``'~'`` operator.
4723 Example::
4725 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4727 # take care not to mistake keywords for identifiers
4728 ident = ~(AND | OR | NOT) + Word(alphas)
4729 boolean_term = Opt(NOT) + ident
4731 # very crude boolean expression - to support parenthesis groups and
4732 # operation hierarchy, use infix_notation
4733 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]
4735 # integers that are followed by "." are actually floats
4736 integer = Word(nums) + ~Char(".")
4737 """
4739 def __init__(self, expr: Union[ParserElement, str]):
4740 super().__init__(expr)
4741 # do NOT use self.leave_whitespace(), don't want to propagate to exprs
4742 # self.leave_whitespace()
4743 self.skipWhitespace = False
4745 self.mayReturnEmpty = True
4746 self.errmsg = "Found unwanted token, " + str(self.expr)
4748 def parseImpl(self, instring, loc, doActions=True):
4749 if self.expr.can_parse_next(instring, loc):
4750 raise ParseException(instring, loc, self.errmsg, self)
4751 return loc, []
4753 def _generateDefaultName(self):
4754 return "~{" + str(self.expr) + "}"
4757class _MultipleMatch(ParseElementEnhance):
4758 def __init__(
4759 self,
4760 expr: ParserElement,
4761 stop_on: typing.Optional[Union[ParserElement, str]] = None,
4762 *,
4763 stopOn: typing.Optional[Union[ParserElement, str]] = None,
4764 ):
4765 super().__init__(expr)
4766 stopOn = stopOn or stop_on
4767 self.saveAsList = True
4768 ender = stopOn
4769 if isinstance(ender, str_type):
4770 ender = self._literalStringClass(ender)
4771 self.stopOn(ender)
4773 def stopOn(self, ender) -> ParserElement:
4774 if isinstance(ender, str_type):
4775 ender = self._literalStringClass(ender)
4776 self.not_ender = ~ender if ender is not None else None
4777 return self
4779 def parseImpl(self, instring, loc, doActions=True):
4780 self_expr_parse = self.expr._parse
4781 self_skip_ignorables = self._skipIgnorables
4782 check_ender = self.not_ender is not None
4783 if check_ender:
4784 try_not_ender = self.not_ender.tryParse
4786 # must be at least one (but first see if we are the stopOn sentinel;
4787 # if so, fail)
4788 if check_ender:
4789 try_not_ender(instring, loc)
4790 loc, tokens = self_expr_parse(instring, loc, doActions)
4791 try:
4792 hasIgnoreExprs = not not self.ignoreExprs
4793 while 1:
4794 if check_ender:
4795 try_not_ender(instring, loc)
4796 if hasIgnoreExprs:
4797 preloc = self_skip_ignorables(instring, loc)
4798 else:
4799 preloc = loc
4800 loc, tmptokens = self_expr_parse(instring, preloc, doActions)
4801 if tmptokens or tmptokens.haskeys():
4802 tokens += tmptokens
4803 except (ParseException, IndexError):
4804 pass
4806 return loc, tokens
4808 def _setResultsName(self, name, listAllMatches=False):
4809 if (
4810 __diag__.warn_ungrouped_named_tokens_in_collection
4811 and Diagnostics.warn_ungrouped_named_tokens_in_collection
4812 not in self.suppress_warnings_
4813 ):
4814 for e in [self.expr] + self.expr.recurse():
4815 if (
4816 isinstance(e, ParserElement)
4817 and e.resultsName
4818 and Diagnostics.warn_ungrouped_named_tokens_in_collection
4819 not in e.suppress_warnings_
4820 ):
4821 warnings.warn(
4822 "{}: setting results name {!r} on {} expression "
4823 "collides with {!r} on contained expression".format(
4824 "warn_ungrouped_named_tokens_in_collection",
4825 name,
4826 type(self).__name__,
4827 e.resultsName,
4828 ),
4829 stacklevel=3,
4830 )
4832 return super()._setResultsName(name, listAllMatches)
4835class OneOrMore(_MultipleMatch):
4836 """
4837 Repetition of one or more of the given expression.
4839 Parameters:
4840 - expr - expression that must match one or more times
4841 - stop_on - (default= ``None``) - expression for a terminating sentinel
4842 (only required if the sentinel would ordinarily match the repetition
4843 expression)
4845 Example::
4847 data_word = Word(alphas)
4848 label = data_word + FollowedBy(':')
4849 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))
4851 text = "shape: SQUARE posn: upper left color: BLACK"
4852 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
4854 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data
4855 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4856 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
4858 # could also be written as
4859 (attr_expr * (1,)).parse_string(text).pprint()
4860 """
4862 def _generateDefaultName(self):
4863 return "{" + str(self.expr) + "}..."
4866class ZeroOrMore(_MultipleMatch):
4867 """
4868 Optional repetition of zero or more of the given expression.
4870 Parameters:
4871 - ``expr`` - expression that must match zero or more times
4872 - ``stop_on`` - expression for a terminating sentinel
4873 (only required if the sentinel would ordinarily match the repetition
4874 expression) - (default= ``None``)
4876 Example: similar to :class:`OneOrMore`
4877 """
4879 def __init__(
4880 self,
4881 expr: ParserElement,
4882 stop_on: typing.Optional[Union[ParserElement, str]] = None,
4883 *,
4884 stopOn: typing.Optional[Union[ParserElement, str]] = None,
4885 ):
4886 super().__init__(expr, stopOn=stopOn or stop_on)
4887 self.mayReturnEmpty = True
4889 def parseImpl(self, instring, loc, doActions=True):
4890 try:
4891 return super().parseImpl(instring, loc, doActions)
4892 except (ParseException, IndexError):
4893 return loc, ParseResults([], name=self.resultsName)
4895 def _generateDefaultName(self):
4896 return "[" + str(self.expr) + "]..."
4899class _NullToken:
4900 def __bool__(self):
4901 return False
4903 def __str__(self):
4904 return ""
4907class Opt(ParseElementEnhance):
4908 """
4909 Optional matching of the given expression.
4911 Parameters:
4912 - ``expr`` - expression that must match zero or more times
4913 - ``default`` (optional) - value to be returned if the optional expression is not found.
4915 Example::
4917 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4918 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
4919 zip.run_tests('''
4920 # traditional ZIP code
4921 12345
4923 # ZIP+4 form
4924 12101-0001
4926 # invalid ZIP
4927 98765-
4928 ''')
4930 prints::
4932 # traditional ZIP code
4933 12345
4934 ['12345']
4936 # ZIP+4 form
4937 12101-0001
4938 ['12101-0001']
4940 # invalid ZIP
4941 98765-
4942 ^
4943 FAIL: Expected end of text (at char 5), (line:1, col:6)
4944 """
4946 __optionalNotMatched = _NullToken()
4948 def __init__(
4949 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
4950 ):
4951 super().__init__(expr, savelist=False)
4952 self.saveAsList = self.expr.saveAsList
4953 self.defaultValue = default
4954 self.mayReturnEmpty = True
4956 def parseImpl(self, instring, loc, doActions=True):
4957 self_expr = self.expr
4958 try:
4959 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False)
4960 except (ParseException, IndexError):
4961 default_value = self.defaultValue
4962 if default_value is not self.__optionalNotMatched:
4963 if self_expr.resultsName:
4964 tokens = ParseResults([default_value])
4965 tokens[self_expr.resultsName] = default_value
4966 else:
4967 tokens = [default_value]
4968 else:
4969 tokens = []
4970 return loc, tokens
4972 def _generateDefaultName(self):
4973 inner = str(self.expr)
4974 # strip off redundant inner {}'s
4975 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4976 inner = inner[1:-1]
4977 return "[" + inner + "]"
4980Optional = Opt
4983class SkipTo(ParseElementEnhance):
4984 """
4985 Token for skipping over all undefined text until the matched
4986 expression is found.
4988 Parameters:
4989 - ``expr`` - target expression marking the end of the data to be skipped
4990 - ``include`` - if ``True``, the target expression is also parsed
4991 (the skipped text and target expression are returned as a 2-element
4992 list) (default= ``False``).
4993 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and
4994 comments) that might contain false matches to the target expression
4995 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be
4996 included in the skipped test; if found before the target expression is found,
4997 the :class:`SkipTo` is not a match
4999 Example::
5001 report = '''
5002 Outstanding Issues Report - 1 Jan 2000
5004 # | Severity | Description | Days Open
5005 -----+----------+-------------------------------------------+-----------
5006 101 | Critical | Intermittent system crash | 6
5007 94 | Cosmetic | Spelling error on Login ('log|n') | 14
5008 79 | Minor | System slow when running too many reports | 47
5009 '''
5010 integer = Word(nums)
5011 SEP = Suppress('|')
5012 # use SkipTo to simply match everything up until the next SEP
5013 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
5014 # - parse action will call token.strip() for each matched token, i.e., the description body
5015 string_data = SkipTo(SEP, ignore=quoted_string)
5016 string_data.set_parse_action(token_map(str.strip))
5017 ticket_expr = (integer("issue_num") + SEP
5018 + string_data("sev") + SEP
5019 + string_data("desc") + SEP
5020 + integer("days_open"))
5022 for tkt in ticket_expr.search_string(report):
5023 print tkt.dump()
5025 prints::
5027 ['101', 'Critical', 'Intermittent system crash', '6']
5028 - days_open: '6'
5029 - desc: 'Intermittent system crash'
5030 - issue_num: '101'
5031 - sev: 'Critical'
5032 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
5033 - days_open: '14'
5034 - desc: "Spelling error on Login ('log|n')"
5035 - issue_num: '94'
5036 - sev: 'Cosmetic'
5037 ['79', 'Minor', 'System slow when running too many reports', '47']
5038 - days_open: '47'
5039 - desc: 'System slow when running too many reports'
5040 - issue_num: '79'
5041 - sev: 'Minor'
5042 """
5044 def __init__(
5045 self,
5046 other: Union[ParserElement, str],
5047 include: bool = False,
5048 ignore: bool = None,
5049 fail_on: typing.Optional[Union[ParserElement, str]] = None,
5050 *,
5051 failOn: Union[ParserElement, str] = None,
5052 ):
5053 super().__init__(other)
5054 failOn = failOn or fail_on
5055 self.ignoreExpr = ignore
5056 self.mayReturnEmpty = True
5057 self.mayIndexError = False
5058 self.includeMatch = include
5059 self.saveAsList = False
5060 if isinstance(failOn, str_type):
5061 self.failOn = self._literalStringClass(failOn)
5062 else:
5063 self.failOn = failOn
5064 self.errmsg = "No match found for " + str(self.expr)
5066 def parseImpl(self, instring, loc, doActions=True):
5067 startloc = loc
5068 instrlen = len(instring)
5069 self_expr_parse = self.expr._parse
5070 self_failOn_canParseNext = (
5071 self.failOn.canParseNext if self.failOn is not None else None
5072 )
5073 self_ignoreExpr_tryParse = (
5074 self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
5075 )
5077 tmploc = loc
5078 while tmploc <= instrlen:
5079 if self_failOn_canParseNext is not None:
5080 # break if failOn expression matches
5081 if self_failOn_canParseNext(instring, tmploc):
5082 break
5084 if self_ignoreExpr_tryParse is not None:
5085 # advance past ignore expressions
5086 while 1:
5087 try:
5088 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
5089 except ParseBaseException:
5090 break
5092 try:
5093 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False)
5094 except (ParseException, IndexError):
5095 # no match, advance loc in string
5096 tmploc += 1
5097 else:
5098 # matched skipto expr, done
5099 break
5101 else:
5102 # ran off the end of the input string without matching skipto expr, fail
5103 raise ParseException(instring, loc, self.errmsg, self)
5105 # build up return values
5106 loc = tmploc
5107 skiptext = instring[startloc:loc]
5108 skipresult = ParseResults(skiptext)
5110 if self.includeMatch:
5111 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False)
5112 skipresult += mat
5114 return loc, skipresult
5117class Forward(ParseElementEnhance):
5118 """
5119 Forward declaration of an expression to be defined later -
5120 used for recursive grammars, such as algebraic infix notation.
5121 When the expression is known, it is assigned to the ``Forward``
5122 variable using the ``'<<'`` operator.
5124 Note: take care when assigning to ``Forward`` not to overlook
5125 precedence of operators.
5127 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
5129 fwd_expr << a | b | c
5131 will actually be evaluated as::
5133 (fwd_expr << a) | b | c
5135 thereby leaving b and c out as parseable alternatives. It is recommended that you
5136 explicitly group the values inserted into the ``Forward``::
5138 fwd_expr << (a | b | c)
5140 Converting to use the ``'<<='`` operator instead will avoid this problem.
5142 See :class:`ParseResults.pprint` for an example of a recursive
5143 parser created using ``Forward``.
5144 """
5146 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None):
5147 self.caller_frame = traceback.extract_stack(limit=2)[0]
5148 super().__init__(other, savelist=False)
5149 self.lshift_line = None
5151 def __lshift__(self, other):
5152 if hasattr(self, "caller_frame"):
5153 del self.caller_frame
5154 if isinstance(other, str_type):
5155 other = self._literalStringClass(other)
5156 self.expr = other
5157 self.mayIndexError = self.expr.mayIndexError
5158 self.mayReturnEmpty = self.expr.mayReturnEmpty
5159 self.set_whitespace_chars(
5160 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
5161 )
5162 self.skipWhitespace = self.expr.skipWhitespace
5163 self.saveAsList = self.expr.saveAsList
5164 self.ignoreExprs.extend(self.expr.ignoreExprs)
5165 self.lshift_line = traceback.extract_stack(limit=2)[-2]
5166 return self
5168 def __ilshift__(self, other):
5169 return self << other
5171 def __or__(self, other):
5172 caller_line = traceback.extract_stack(limit=2)[-2]
5173 if (
5174 __diag__.warn_on_match_first_with_lshift_operator
5175 and caller_line == self.lshift_line
5176 and Diagnostics.warn_on_match_first_with_lshift_operator
5177 not in self.suppress_warnings_
5178 ):
5179 warnings.warn(
5180 "using '<<' operator with '|' is probably an error, use '<<='",
5181 stacklevel=2,
5182 )
5183 ret = super().__or__(other)
5184 return ret
5186 def __del__(self):
5187 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
5188 if (
5189 self.expr is None
5190 and __diag__.warn_on_assignment_to_Forward
5191 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
5192 ):
5193 warnings.warn_explicit(
5194 "Forward defined here but no expression attached later using '<<=' or '<<'",
5195 UserWarning,
5196 filename=self.caller_frame.filename,
5197 lineno=self.caller_frame.lineno,
5198 )
5200 def parseImpl(self, instring, loc, doActions=True):
5201 if (
5202 self.expr is None
5203 and __diag__.warn_on_parse_using_empty_Forward
5204 and Diagnostics.warn_on_parse_using_empty_Forward
5205 not in self.suppress_warnings_
5206 ):
5207 # walk stack until parse_string, scan_string, search_string, or transform_string is found
5208 parse_fns = [
5209 "parse_string",
5210 "scan_string",
5211 "search_string",
5212 "transform_string",
5213 ]
5214 tb = traceback.extract_stack(limit=200)
5215 for i, frm in enumerate(reversed(tb), start=1):
5216 if frm.name in parse_fns:
5217 stacklevel = i + 1
5218 break
5219 else:
5220 stacklevel = 2
5221 warnings.warn(
5222 "Forward expression was never assigned a value, will not parse any input",
5223 stacklevel=stacklevel,
5224 )
5225 if not ParserElement._left_recursion_enabled:
5226 return super().parseImpl(instring, loc, doActions)
5227 # ## Bounded Recursion algorithm ##
5228 # Recursion only needs to be processed at ``Forward`` elements, since they are
5229 # the only ones that can actually refer to themselves. The general idea is
5230 # to handle recursion stepwise: We start at no recursion, then recurse once,
5231 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
5232 #
5233 # The "trick" here is that each ``Forward`` gets evaluated in two contexts
5234 # - to *match* a specific recursion level, and
5235 # - to *search* the bounded recursion level
5236 # and the two run concurrently. The *search* must *match* each recursion level
5237 # to find the best possible match. This is handled by a memo table, which
5238 # provides the previous match to the next level match attempt.
5239 #
5240 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
5241 #
5242 # There is a complication since we not only *parse* but also *transform* via
5243 # actions: We do not want to run the actions too often while expanding. Thus,
5244 # we expand using `doActions=False` and only run `doActions=True` if the next
5245 # recursion level is acceptable.
5246 with ParserElement.recursion_lock:
5247 memo = ParserElement.recursion_memos
5248 try:
5249 # we are parsing at a specific recursion expansion - use it as-is
5250 prev_loc, prev_result = memo[loc, self, doActions]
5251 if isinstance(prev_result, Exception):
5252 raise prev_result
5253 return prev_loc, prev_result.copy()
5254 except KeyError:
5255 act_key = (loc, self, True)
5256 peek_key = (loc, self, False)
5257 # we are searching for the best recursion expansion - keep on improving
5258 # both `doActions` cases must be tracked separately here!
5259 prev_loc, prev_peek = memo[peek_key] = (
5260 loc - 1,
5261 ParseException(
5262 instring, loc, "Forward recursion without base case", self
5263 ),
5264 )
5265 if doActions:
5266 memo[act_key] = memo[peek_key]
5267 while True:
5268 try:
5269 new_loc, new_peek = super().parseImpl(instring, loc, False)
5270 except ParseException:
5271 # we failed before getting any match – do not hide the error
5272 if isinstance(prev_peek, Exception):
5273 raise
5274 new_loc, new_peek = prev_loc, prev_peek
5275 # the match did not get better: we are done
5276 if new_loc <= prev_loc:
5277 if doActions:
5278 # replace the match for doActions=False as well,
5279 # in case the action did backtrack
5280 prev_loc, prev_result = memo[peek_key] = memo[act_key]
5281 del memo[peek_key], memo[act_key]
5282 return prev_loc, prev_result.copy()
5283 del memo[peek_key]
5284 return prev_loc, prev_peek.copy()
5285 # the match did get better: see if we can improve further
5286 else:
5287 if doActions:
5288 try:
5289 memo[act_key] = super().parseImpl(instring, loc, True)
5290 except ParseException as e:
5291 memo[peek_key] = memo[act_key] = (new_loc, e)
5292 raise
5293 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
5295 def leave_whitespace(self, recursive: bool = True) -> ParserElement:
5296 self.skipWhitespace = False
5297 return self
5299 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
5300 self.skipWhitespace = True
5301 return self
5303 def streamline(self) -> ParserElement:
5304 if not self.streamlined:
5305 self.streamlined = True
5306 if self.expr is not None:
5307 self.expr.streamline()
5308 return self
5310 def validate(self, validateTrace=None) -> None:
5311 if validateTrace is None:
5312 validateTrace = []
5314 if self not in validateTrace:
5315 tmp = validateTrace[:] + [self]
5316 if self.expr is not None:
5317 self.expr.validate(tmp)
5318 self._checkRecursion([])
5320 def _generateDefaultName(self):
5321 # Avoid infinite recursion by setting a temporary _defaultName
5322 self._defaultName = ": ..."
5324 # Use the string representation of main expression.
5325 retString = "..."
5326 try:
5327 if self.expr is not None:
5328 retString = str(self.expr)[:1000]
5329 else:
5330 retString = "None"
5331 finally:
5332 return self.__class__.__name__ + ": " + retString
5334 def copy(self) -> ParserElement:
5335 if self.expr is not None:
5336 return super().copy()
5337 else:
5338 ret = Forward()
5339 ret <<= self
5340 return ret
5342 def _setResultsName(self, name, list_all_matches=False):
5343 if (
5344 __diag__.warn_name_set_on_empty_Forward
5345 and Diagnostics.warn_name_set_on_empty_Forward
5346 not in self.suppress_warnings_
5347 ):
5348 if self.expr is None:
5349 warnings.warn(
5350 "{}: setting results name {!r} on {} expression "
5351 "that has no contained expression".format(
5352 "warn_name_set_on_empty_Forward", name, type(self).__name__
5353 ),
5354 stacklevel=3,
5355 )
5357 return super()._setResultsName(name, list_all_matches)
5359 ignoreWhitespace = ignore_whitespace
5360 leaveWhitespace = leave_whitespace
5363class TokenConverter(ParseElementEnhance):
5364 """
5365 Abstract subclass of :class:`ParseExpression`, for converting parsed results.
5366 """
5368 def __init__(self, expr: Union[ParserElement, str], savelist=False):
5369 super().__init__(expr) # , savelist)
5370 self.saveAsList = False
5373class Combine(TokenConverter):
5374 """Converter to concatenate all matching tokens to a single string.
5375 By default, the matching patterns must also be contiguous in the
5376 input string; this can be disabled by specifying
5377 ``'adjacent=False'`` in the constructor.
5379 Example::
5381 real = Word(nums) + '.' + Word(nums)
5382 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']
5383 # will also erroneously match the following
5384 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']
5386 real = Combine(Word(nums) + '.' + Word(nums))
5387 print(real.parse_string('3.1416')) # -> ['3.1416']
5388 # no match when there are internal spaces
5389 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)
5390 """
5392 def __init__(
5393 self,
5394 expr: ParserElement,
5395 join_string: str = "",
5396 adjacent: bool = True,
5397 *,
5398 joinString: typing.Optional[str] = None,
5399 ):
5400 super().__init__(expr)
5401 joinString = joinString if joinString is not None else join_string
5402 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5403 if adjacent:
5404 self.leave_whitespace()
5405 self.adjacent = adjacent
5406 self.skipWhitespace = True
5407 self.joinString = joinString
5408 self.callPreparse = True
5410 def ignore(self, other) -> ParserElement:
5411 if self.adjacent:
5412 ParserElement.ignore(self, other)
5413 else:
5414 super().ignore(other)
5415 return self
5417 def postParse(self, instring, loc, tokenlist):
5418 retToks = tokenlist.copy()
5419 del retToks[:]
5420 retToks += ParseResults(
5421 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
5422 )
5424 if self.resultsName and retToks.haskeys():
5425 return [retToks]
5426 else:
5427 return retToks
5430class Group(TokenConverter):
5431 """Converter to return the matched tokens as a list - useful for
5432 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5434 The optional ``aslist`` argument when set to True will return the
5435 parsed tokens as a Python list instead of a pyparsing ParseResults.
5437 Example::
5439 ident = Word(alphas)
5440 num = Word(nums)
5441 term = ident | num
5442 func = ident + Opt(delimited_list(term))
5443 print(func.parse_string("fn a, b, 100"))
5444 # -> ['fn', 'a', 'b', '100']
5446 func = ident + Group(Opt(delimited_list(term)))
5447 print(func.parse_string("fn a, b, 100"))
5448 # -> ['fn', ['a', 'b', '100']]
5449 """
5451 def __init__(self, expr: ParserElement, aslist: bool = False):
5452 super().__init__(expr)
5453 self.saveAsList = True
5454 self._asPythonList = aslist
5456 def postParse(self, instring, loc, tokenlist):
5457 if self._asPythonList:
5458 return ParseResults.List(
5459 tokenlist.asList()
5460 if isinstance(tokenlist, ParseResults)
5461 else list(tokenlist)
5462 )
5463 else:
5464 return [tokenlist]
5467class Dict(TokenConverter):
5468 """Converter to return a repetitive expression as a list, but also
5469 as a dictionary. Each element can also be referenced using the first
5470 token in the expression as its key. Useful for tabular report
5471 scraping when the first column can be used as a item key.
5473 The optional ``asdict`` argument when set to True will return the
5474 parsed tokens as a Python dict instead of a pyparsing ParseResults.
5476 Example::
5478 data_word = Word(alphas)
5479 label = data_word + FollowedBy(':')
5481 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5482 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5484 # print attributes as plain groups
5485 print(attr_expr[1, ...].parse_string(text).dump())
5487 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names
5488 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)
5489 print(result.dump())
5491 # access named fields as dict entries, or output as dict
5492 print(result['shape'])
5493 print(result.as_dict())
5495 prints::
5497 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5498 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5499 - color: 'light blue'
5500 - posn: 'upper left'
5501 - shape: 'SQUARE'
5502 - texture: 'burlap'
5503 SQUARE
5504 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5506 See more examples at :class:`ParseResults` of accessing fields by results name.
5507 """
5509 def __init__(self, expr: ParserElement, asdict: bool = False):
5510 super().__init__(expr)
5511 self.saveAsList = True
5512 self._asPythonDict = asdict
5514 def postParse(self, instring, loc, tokenlist):
5515 for i, tok in enumerate(tokenlist):
5516 if len(tok) == 0:
5517 continue
5519 ikey = tok[0]
5520 if isinstance(ikey, int):
5521 ikey = str(ikey).strip()
5523 if len(tok) == 1:
5524 tokenlist[ikey] = _ParseResultsWithOffset("", i)
5526 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5527 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5529 else:
5530 try:
5531 dictvalue = tok.copy() # ParseResults(i)
5532 except Exception:
5533 exc = TypeError(
5534 "could not extract dict values from parsed results"
5535 " - Dict expression must contain Grouped expressions"
5536 )
5537 raise exc from None
5539 del dictvalue[0]
5541 if len(dictvalue) != 1 or (
5542 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
5543 ):
5544 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5545 else:
5546 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5548 if self._asPythonDict:
5549 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
5550 else:
5551 return [tokenlist] if self.resultsName else tokenlist
5554class Suppress(TokenConverter):
5555 """Converter for ignoring the results of a parsed expression.
5557 Example::
5559 source = "a, b, c,d"
5560 wd = Word(alphas)
5561 wd_list1 = wd + (',' + wd)[...]
5562 print(wd_list1.parse_string(source))
5564 # often, delimiters that are useful during parsing are just in the
5565 # way afterward - use Suppress to keep them out of the parsed output
5566 wd_list2 = wd + (Suppress(',') + wd)[...]
5567 print(wd_list2.parse_string(source))
5569 # Skipped text (using '...') can be suppressed as well
5570 source = "lead in START relevant text END trailing text"
5571 start_marker = Keyword("START")
5572 end_marker = Keyword("END")
5573 find_body = Suppress(...) + start_marker + ... + end_marker
5574 print(find_body.parse_string(source)
5576 prints::
5578 ['a', ',', 'b', ',', 'c', ',', 'd']
5579 ['a', 'b', 'c', 'd']
5580 ['START', 'relevant text ', 'END']
5582 (See also :class:`delimited_list`.)
5583 """
5585 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
5586 if expr is ...:
5587 expr = _PendingSkip(NoMatch())
5588 super().__init__(expr)
5590 def __add__(self, other) -> "ParserElement":
5591 if isinstance(self.expr, _PendingSkip):
5592 return Suppress(SkipTo(other)) + other
5593 else:
5594 return super().__add__(other)
5596 def __sub__(self, other) -> "ParserElement":
5597 if isinstance(self.expr, _PendingSkip):
5598 return Suppress(SkipTo(other)) - other
5599 else:
5600 return super().__sub__(other)
5602 def postParse(self, instring, loc, tokenlist):
5603 return []
5605 def suppress(self) -> ParserElement:
5606 return self
5609def trace_parse_action(f: ParseAction) -> ParseAction:
5610 """Decorator for debugging parse actions.
5612 When the parse action is called, this decorator will print
5613 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
5614 When the parse action completes, the decorator will print
5615 ``"<<"`` followed by the returned value, or any exception that the parse action raised.
5617 Example::
5619 wd = Word(alphas)
5621 @trace_parse_action
5622 def remove_duplicate_chars(tokens):
5623 return ''.join(sorted(set(''.join(tokens))))
5625 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)
5626 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
5628 prints::
5630 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
5631 <<leaving remove_duplicate_chars (ret: 'dfjkls')
5632 ['dfjkls']
5633 """
5634 f = _trim_arity(f)
5636 def z(*paArgs):
5637 thisFunc = f.__name__
5638 s, l, t = paArgs[-3:]
5639 if len(paArgs) > 3:
5640 thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc
5641 sys.stderr.write(
5642 ">>entering {}(line: {!r}, {}, {!r})\n".format(thisFunc, line(l, s), l, t)
5643 )
5644 try:
5645 ret = f(*paArgs)
5646 except Exception as exc:
5647 sys.stderr.write("<<leaving {} (exception: {})\n".format(thisFunc, exc))
5648 raise
5649 sys.stderr.write("<<leaving {} (ret: {!r})\n".format(thisFunc, ret))
5650 return ret
5652 z.__name__ = f.__name__
5653 return z
5656# convenience constants for positional expressions
5657empty = Empty().set_name("empty")
5658line_start = LineStart().set_name("line_start")
5659line_end = LineEnd().set_name("line_end")
5660string_start = StringStart().set_name("string_start")
5661string_end = StringEnd().set_name("string_end")
5663_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).set_parse_action(
5664 lambda s, l, t: t[0][1]
5665)
5666_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
5667 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
5668)
5669_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
5670 lambda s, l, t: chr(int(t[0][1:], 8))
5671)
5672_singleChar = (
5673 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
5674)
5675_charRange = Group(_singleChar + Suppress("-") + _singleChar)
5676_reBracketExpr = (
5677 Literal("[")
5678 + Opt("^").set_results_name("negate")
5679 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
5680 + "]"
5681)
5684def srange(s: str) -> str:
5685 r"""Helper to easily define string ranges for use in :class:`Word`
5686 construction. Borrows syntax from regexp ``'[]'`` string range
5687 definitions::
5689 srange("[0-9]") -> "0123456789"
5690 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
5691 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
5693 The input string must be enclosed in []'s, and the returned string
5694 is the expanded character set joined into a single string. The
5695 values enclosed in the []'s may be:
5697 - a single character
5698 - an escaped character with a leading backslash (such as ``\-``
5699 or ``\]``)
5700 - an escaped hex character with a leading ``'\x'``
5701 (``\x21``, which is a ``'!'`` character) (``\0x##``
5702 is also supported for backwards compatibility)
5703 - an escaped octal character with a leading ``'\0'``
5704 (``\041``, which is a ``'!'`` character)
5705 - a range of any of the above, separated by a dash (``'a-z'``,
5706 etc.)
5707 - any combination of the above (``'aeiouy'``,
5708 ``'a-zA-Z0-9_$'``, etc.)
5709 """
5710 _expanded = (
5711 lambda p: p
5712 if not isinstance(p, ParseResults)
5713 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
5714 )
5715 try:
5716 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)
5717 except Exception:
5718 return ""
5721def token_map(func, *args) -> ParseAction:
5722 """Helper to define a parse action by mapping a function to all
5723 elements of a :class:`ParseResults` list. If any additional args are passed,
5724 they are forwarded to the given function as additional arguments
5725 after the token, as in
5726 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
5727 which will convert the parsed data to an integer using base 16.
5729 Example (compare the last to example in :class:`ParserElement.transform_string`::
5731 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))
5732 hex_ints.run_tests('''
5733 00 11 22 aa FF 0a 0d 1a
5734 ''')
5736 upperword = Word(alphas).set_parse_action(token_map(str.upper))
5737 upperword[1, ...].run_tests('''
5738 my kingdom for a horse
5739 ''')
5741 wd = Word(alphas).set_parse_action(token_map(str.title))
5742 wd[1, ...].set_parse_action(' '.join).run_tests('''
5743 now is the winter of our discontent made glorious summer by this sun of york
5744 ''')
5746 prints::
5748 00 11 22 aa FF 0a 0d 1a
5749 [0, 17, 34, 170, 255, 10, 13, 26]
5751 my kingdom for a horse
5752 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
5754 now is the winter of our discontent made glorious summer by this sun of york
5755 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
5756 """
5758 def pa(s, l, t):
5759 return [func(tokn, *args) for tokn in t]
5761 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
5762 pa.__name__ = func_name
5764 return pa
5767def autoname_elements() -> None:
5768 """
5769 Utility to simplify mass-naming of parser elements, for
5770 generating railroad diagram with named subdiagrams.
5771 """
5772 for name, var in sys._getframe().f_back.f_locals.items():
5773 if isinstance(var, ParserElement) and not var.customName:
5774 var.set_name(name)
5777dbl_quoted_string = Combine(
5778 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
5779).set_name("string enclosed in double quotes")
5781sgl_quoted_string = Combine(
5782 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
5783).set_name("string enclosed in single quotes")
5785quoted_string = Combine(
5786 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
5787 | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
5788).set_name("quotedString using single or double quotes")
5790unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
5793alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5794punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5796# build list of built-in expressions, for future reference if a global default value
5797# gets updated
5798_builtin_exprs: List[ParserElement] = [
5799 v for v in vars().values() if isinstance(v, ParserElement)
5800]
5802# backward compatibility names
5803tokenMap = token_map
5804conditionAsParseAction = condition_as_parse_action
5805nullDebugAction = null_debug_action
5806sglQuotedString = sgl_quoted_string
5807dblQuotedString = dbl_quoted_string
5808quotedString = quoted_string
5809unicodeString = unicode_string
5810lineStart = line_start
5811lineEnd = line_end
5812stringStart = string_start
5813stringEnd = string_end
5814traceParseAction = trace_parse_action