Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pyparsing/core.py: 44%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2637 statements  

1# 

2# core.py 

3# 

4from __future__ import annotations 

5 

6import collections.abc 

7from collections import deque 

8import os 

9import typing 

10from typing import ( 

11 Any, 

12 Callable, 

13 Generator, 

14 NamedTuple, 

15 Sequence, 

16 TextIO, 

17 Union, 

18 cast, 

19) 

20from abc import ABC, abstractmethod 

21from enum import Enum 

22import string 

23import copy 

24import warnings 

25import re 

26import sys 

27from collections.abc import Iterable 

28import traceback 

29import types 

30from operator import itemgetter 

31from functools import wraps 

32from threading import RLock 

33from pathlib import Path 

34 

35from .util import ( 

36 _FifoCache, 

37 _UnboundedCache, 

38 __config_flags, 

39 _collapse_string_to_ranges, 

40 _escape_regex_range_chars, 

41 _bslash, 

42 _flatten, 

43 LRUMemo as _LRUMemo, 

44 UnboundedMemo as _UnboundedMemo, 

45 replaced_by_pep8, 

46) 

47from .exceptions import * 

48from .actions import * 

49from .results import ParseResults, _ParseResultsWithOffset 

50from .unicode import pyparsing_unicode 

51 

52_MAX_INT = sys.maxsize 

53str_type: tuple[type, ...] = (str, bytes) 

54 

55# 

56# Copyright (c) 2003-2022 Paul T. McGuire 

57# 

58# Permission is hereby granted, free of charge, to any person obtaining 

59# a copy of this software and associated documentation files (the 

60# "Software"), to deal in the Software without restriction, including 

61# without limitation the rights to use, copy, modify, merge, publish, 

62# distribute, sublicense, and/or sell copies of the Software, and to 

63# permit persons to whom the Software is furnished to do so, subject to 

64# the following conditions: 

65# 

66# The above copyright notice and this permission notice shall be 

67# included in all copies or substantial portions of the Software. 

68# 

69# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

70# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

71# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

72# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

73# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

74# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

75# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

76# 

77 

78from functools import cached_property 

79 

80 

81class __compat__(__config_flags): 

82 """ 

83 A cross-version compatibility configuration for pyparsing features that will be 

84 released in a future version. By setting values in this configuration to True, 

85 those features can be enabled in prior versions for compatibility development 

86 and testing. 

87 

88 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

89 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

90 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

91 behavior 

92 """ 

93 

94 _type_desc = "compatibility" 

95 

96 collect_all_And_tokens = True 

97 

98 _all_names = [__ for __ in locals() if not __.startswith("_")] 

99 _fixed_names = """ 

100 collect_all_And_tokens 

101 """.split() 

102 

103 

104class __diag__(__config_flags): 

105 _type_desc = "diagnostic" 

106 

107 warn_multiple_tokens_in_named_alternation = False 

108 warn_ungrouped_named_tokens_in_collection = False 

109 warn_name_set_on_empty_Forward = False 

110 warn_on_parse_using_empty_Forward = False 

111 warn_on_assignment_to_Forward = False 

112 warn_on_multiple_string_args_to_oneof = False 

113 warn_on_match_first_with_lshift_operator = False 

114 enable_debug_on_named_expressions = False 

115 

116 _all_names = [__ for __ in locals() if not __.startswith("_")] 

117 _warning_names = [name for name in _all_names if name.startswith("warn")] 

118 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

119 

120 @classmethod 

121 def enable_all_warnings(cls) -> None: 

122 for name in cls._warning_names: 

123 cls.enable(name) 

124 

125 

126class Diagnostics(Enum): 

127 """ 

128 Diagnostic configuration (all default to disabled) 

129 

130 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

131 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

132 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

133 name is defined on a containing expression with ungrouped subexpressions that also 

134 have results names 

135 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

136 with a results name, but has no contents defined 

137 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

138 defined in a grammar but has never had an expression attached to it 

139 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

140 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

141 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

142 incorrectly called with multiple str arguments 

143 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

144 calls to :class:`ParserElement.set_name` 

145 

146 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

147 All warnings can be enabled by calling :class:`enable_all_warnings`. 

148 """ 

149 

150 warn_multiple_tokens_in_named_alternation = 0 

151 warn_ungrouped_named_tokens_in_collection = 1 

152 warn_name_set_on_empty_Forward = 2 

153 warn_on_parse_using_empty_Forward = 3 

154 warn_on_assignment_to_Forward = 4 

155 warn_on_multiple_string_args_to_oneof = 5 

156 warn_on_match_first_with_lshift_operator = 6 

157 enable_debug_on_named_expressions = 7 

158 

159 

160def enable_diag(diag_enum: Diagnostics) -> None: 

161 """ 

162 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

163 """ 

164 __diag__.enable(diag_enum.name) 

165 

166 

167def disable_diag(diag_enum: Diagnostics) -> None: 

168 """ 

169 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

170 """ 

171 __diag__.disable(diag_enum.name) 

172 

173 

174def enable_all_warnings() -> None: 

175 """ 

176 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

177 """ 

178 __diag__.enable_all_warnings() 

179 

180 

181# hide abstract class 

182del __config_flags 

183 

184 

185def _should_enable_warnings( 

186 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

187) -> bool: 

188 enable = bool(warn_env_var) 

189 for warn_opt in cmd_line_warn_options: 

190 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

191 ":" 

192 )[:5] 

193 if not w_action.lower().startswith("i") and ( 

194 not (w_message or w_category or w_module) or w_module == "pyparsing" 

195 ): 

196 enable = True 

197 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

198 enable = False 

199 return enable 

200 

201 

202if _should_enable_warnings( 

203 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

204): 

205 enable_all_warnings() 

206 

207 

208# build list of single arg builtins, that can be used as parse actions 

209# fmt: off 

210_single_arg_builtins = { 

211 sum, len, sorted, reversed, list, tuple, set, any, all, min, max 

212} 

213# fmt: on 

214 

215_generatorType = types.GeneratorType 

216ParseImplReturnType = tuple[int, Any] 

217PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

218 

219ParseCondition = Union[ 

220 Callable[[], bool], 

221 Callable[[ParseResults], bool], 

222 Callable[[int, ParseResults], bool], 

223 Callable[[str, int, ParseResults], bool], 

224] 

225ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

226DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

227DebugSuccessAction = Callable[ 

228 [str, int, int, "ParserElement", ParseResults, bool], None 

229] 

230DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

231 

232 

233alphas: str = string.ascii_uppercase + string.ascii_lowercase 

234identchars: str = pyparsing_unicode.Latin1.identchars 

235identbodychars: str = pyparsing_unicode.Latin1.identbodychars 

236nums: str = "0123456789" 

237hexnums: str = nums + "ABCDEFabcdef" 

238alphanums: str = alphas + nums 

239printables: str = "".join([c for c in string.printable if c not in string.whitespace]) 

240 

241 

242class _ParseActionIndexError(Exception): 

243 """ 

244 Internal wrapper around IndexError so that IndexErrors raised inside 

245 parse actions aren't misinterpreted as IndexErrors raised inside 

246 ParserElement parseImpl methods. 

247 """ 

248 

249 def __init__(self, msg: str, exc: BaseException): 

250 self.msg: str = msg 

251 self.exc: BaseException = exc 

252 

253 

254_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

255pa_call_line_synth = () 

256 

257 

258def _trim_arity(func, max_limit=3): 

259 """decorator to trim function calls to match the arity of the target""" 

260 global _trim_arity_call_line, pa_call_line_synth 

261 

262 if func in _single_arg_builtins: 

263 return lambda s, l, t: func(t) 

264 

265 limit = 0 

266 found_arity = False 

267 

268 # synthesize what would be returned by traceback.extract_stack at the call to 

269 # user's parse action 'func', so that we don't incur call penalty at parse time 

270 

271 # fmt: off 

272 LINE_DIFF = 9 

273 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

274 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

275 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1] 

276 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

277 

278 def wrapper(*args): 

279 nonlocal found_arity, limit 

280 if found_arity: 

281 return func(*args[limit:]) 

282 while 1: 

283 try: 

284 ret = func(*args[limit:]) 

285 found_arity = True 

286 return ret 

287 except TypeError as te: 

288 # re-raise TypeErrors if they did not come from our arity testing 

289 if found_arity: 

290 raise 

291 else: 

292 tb = te.__traceback__ 

293 frames = traceback.extract_tb(tb, limit=2) 

294 frame_summary = frames[-1] 

295 trim_arity_type_error = ( 

296 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

297 ) 

298 del tb 

299 

300 if trim_arity_type_error: 

301 if limit < max_limit: 

302 limit += 1 

303 continue 

304 

305 raise 

306 except IndexError as ie: 

307 # wrap IndexErrors inside a _ParseActionIndexError 

308 raise _ParseActionIndexError( 

309 "IndexError raised in parse action", ie 

310 ).with_traceback(None) 

311 # fmt: on 

312 

313 # copy func name to wrapper for sensible debug output 

314 # (can't use functools.wraps, since that messes with function signature) 

315 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

316 wrapper.__name__ = func_name 

317 wrapper.__doc__ = func.__doc__ 

318 

319 return wrapper 

320 

321 

322def condition_as_parse_action( 

323 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

324) -> ParseAction: 

325 """ 

326 Function to convert a simple predicate function that returns ``True`` or ``False`` 

327 into a parse action. Can be used in places when a parse action is required 

328 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition 

329 to an operator level in :class:`infix_notation`). 

330 

331 Optional keyword arguments: 

332 

333 - ``message`` - define a custom message to be used in the raised exception 

334 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; 

335 otherwise will raise :class:`ParseException` 

336 

337 """ 

338 msg = message if message is not None else "failed user-defined condition" 

339 exc_type = ParseFatalException if fatal else ParseException 

340 fn = _trim_arity(fn) 

341 

342 @wraps(fn) 

343 def pa(s, l, t): 

344 if not bool(fn(s, l, t)): 

345 raise exc_type(s, l, msg) 

346 

347 return pa 

348 

349 

350def _default_start_debug_action( 

351 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False 

352): 

353 cache_hit_str = "*" if cache_hit else "" 

354 print( 

355 ( 

356 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

357 f" {line(loc, instring)}\n" 

358 f" {' ' * (col(loc, instring) - 1)}^" 

359 ) 

360 ) 

361 

362 

363def _default_success_debug_action( 

364 instring: str, 

365 startloc: int, 

366 endloc: int, 

367 expr: ParserElement, 

368 toks: ParseResults, 

369 cache_hit: bool = False, 

370): 

371 cache_hit_str = "*" if cache_hit else "" 

372 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

373 

374 

375def _default_exception_debug_action( 

376 instring: str, 

377 loc: int, 

378 expr: ParserElement, 

379 exc: Exception, 

380 cache_hit: bool = False, 

381): 

382 cache_hit_str = "*" if cache_hit else "" 

383 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

384 

385 

386def null_debug_action(*args): 

387 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

388 

389 

390class ParserElement(ABC): 

391 """Abstract base level parser element class.""" 

392 

393 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

394 verbose_stacktrace: bool = False 

395 _literalStringClass: type = None # type: ignore[assignment] 

396 

397 @staticmethod 

398 def set_default_whitespace_chars(chars: str) -> None: 

399 r""" 

400 Overrides the default whitespace chars 

401 

402 Example:: 

403 

404 # default whitespace chars are space, <TAB> and newline 

405 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

406 

407 # change to just treat newline as significant 

408 ParserElement.set_default_whitespace_chars(" \t") 

409 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] 

410 """ 

411 ParserElement.DEFAULT_WHITE_CHARS = chars 

412 

413 # update whitespace all parse expressions defined in this module 

414 for expr in _builtin_exprs: 

415 if expr.copyDefaultWhiteChars: 

416 expr.whiteChars = set(chars) 

417 

418 @staticmethod 

419 def inline_literals_using(cls: type) -> None: 

420 """ 

421 Set class to be used for inclusion of string literals into a parser. 

422 

423 Example:: 

424 

425 # default literal class used is Literal 

426 integer = Word(nums) 

427 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

428 

429 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

430 

431 

432 # change to Suppress 

433 ParserElement.inline_literals_using(Suppress) 

434 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

435 

436 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] 

437 """ 

438 ParserElement._literalStringClass = cls 

439 

440 @classmethod 

441 def using_each(cls, seq, **class_kwargs): 

442 """ 

443 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq. 

444 

445 Example:: 

446 

447 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

448 

449 """ 

450 yield from (cls(obj, **class_kwargs) for obj in seq) 

451 

452 class DebugActions(NamedTuple): 

453 debug_try: typing.Optional[DebugStartAction] 

454 debug_match: typing.Optional[DebugSuccessAction] 

455 debug_fail: typing.Optional[DebugExceptionAction] 

456 

457 def __init__(self, savelist: bool = False): 

458 self.parseAction: list[ParseAction] = list() 

459 self.failAction: typing.Optional[ParseFailAction] = None 

460 self.customName: str = None # type: ignore[assignment] 

461 self._defaultName: typing.Optional[str] = None 

462 self.resultsName: str = None # type: ignore[assignment] 

463 self.saveAsList = savelist 

464 self.skipWhitespace = True 

465 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

466 self.copyDefaultWhiteChars = True 

467 # used when checking for left-recursion 

468 self.mayReturnEmpty = False 

469 self.keepTabs = False 

470 self.ignoreExprs: list[ParserElement] = list() 

471 self.debug = False 

472 self.streamlined = False 

473 # optimize exception handling for subclasses that don't advance parse index 

474 self.mayIndexError = True 

475 self.errmsg: Union[str, None] = "" 

476 # mark results names as modal (report only last) or cumulative (list all) 

477 self.modalResults = True 

478 # custom debug actions 

479 self.debugActions = self.DebugActions(None, None, None) 

480 # avoid redundant calls to preParse 

481 self.callPreparse = True 

482 self.callDuringTry = False 

483 self.suppress_warnings_: list[Diagnostics] = [] 

484 self.show_in_diagram = True 

485 

486 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: 

487 """ 

488 Suppress warnings emitted for a particular diagnostic on this expression. 

489 

490 Example:: 

491 

492 base = pp.Forward() 

493 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) 

494 

495 # statement would normally raise a warning, but is now suppressed 

496 print(base.parse_string("x")) 

497 

498 """ 

499 self.suppress_warnings_.append(warning_type) 

500 return self 

501 

502 def visit_all(self): 

503 """General-purpose method to yield all expressions and sub-expressions 

504 in a grammar. Typically just for internal use. 

505 """ 

506 to_visit = deque([self]) 

507 seen = set() 

508 while to_visit: 

509 cur = to_visit.popleft() 

510 

511 # guard against looping forever through recursive grammars 

512 if cur in seen: 

513 continue 

514 seen.add(cur) 

515 

516 to_visit.extend(cur.recurse()) 

517 yield cur 

518 

519 def copy(self) -> ParserElement: 

520 """ 

521 Make a copy of this :class:`ParserElement`. Useful for defining 

522 different parse actions for the same parsing pattern, using copies of 

523 the original parse element. 

524 

525 Example:: 

526 

527 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

528 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") 

529 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

530 

531 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) 

532 

533 prints:: 

534 

535 [5120, 100, 655360, 268435456] 

536 

537 Equivalent form of ``expr.copy()`` is just ``expr()``:: 

538 

539 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

540 """ 

541 cpy = copy.copy(self) 

542 cpy.parseAction = self.parseAction[:] 

543 cpy.ignoreExprs = self.ignoreExprs[:] 

544 if self.copyDefaultWhiteChars: 

545 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

546 return cpy 

547 

548 def set_results_name( 

549 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 

550 ) -> ParserElement: 

551 """ 

552 Define name for referencing matching tokens as a nested attribute 

553 of the returned parse results. 

554 

555 Normally, results names are assigned as you would assign keys in a dict: 

556 any existing value is overwritten by later values. If it is necessary to 

557 keep all values captured for a particular results name, call ``set_results_name`` 

558 with ``list_all_matches`` = True. 

559 

560 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

561 this is so that the client can define a basic element, such as an 

562 integer, and reference it in multiple places with different names. 

563 

564 You can also set results names using the abbreviated syntax, 

565 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

566 - see :class:`__call__`. If ``list_all_matches`` is required, use 

567 ``expr("name*")``. 

568 

569 Example:: 

570 

571 integer = Word(nums) 

572 date_str = (integer.set_results_name("year") + '/' 

573 + integer.set_results_name("month") + '/' 

574 + integer.set_results_name("day")) 

575 

576 # equivalent form: 

577 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

578 """ 

579 listAllMatches = listAllMatches or list_all_matches 

580 return self._setResultsName(name, listAllMatches) 

581 

582 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

583 if name is None: 

584 return self 

585 newself = self.copy() 

586 if name.endswith("*"): 

587 name = name[:-1] 

588 list_all_matches = True 

589 newself.resultsName = name 

590 newself.modalResults = not list_all_matches 

591 return newself 

592 

593 def set_break(self, break_flag: bool = True) -> ParserElement: 

594 """ 

595 Method to invoke the Python pdb debugger when this element is 

596 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

597 disable. 

598 """ 

599 if break_flag: 

600 _parseMethod = self._parse 

601 

602 def breaker(instring, loc, do_actions=True, callPreParse=True): 

603 # this call to breakpoint() is intentional, not a checkin error 

604 breakpoint() 

605 return _parseMethod(instring, loc, do_actions, callPreParse) 

606 

607 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

608 self._parse = breaker # type: ignore [method-assign] 

609 elif hasattr(self._parse, "_originalParseMethod"): 

610 self._parse = self._parse._originalParseMethod # type: ignore [method-assign] 

611 return self 

612 

613 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: 

614 """ 

615 Define one or more actions to perform when successfully matching parse element definition. 

616 

617 Parse actions can be called to perform data conversions, do extra validation, 

618 update external data structures, or enhance or replace the parsed tokens. 

619 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

620 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

621 

622 - ``s`` = the original string being parsed (see note below) 

623 - ``loc`` = the location of the matching substring 

624 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

625 

626 The parsed tokens are passed to the parse action as ParseResults. They can be 

627 modified in place using list-style append, extend, and pop operations to update 

628 the parsed list elements; and with dictionary-style item set and del operations 

629 to add, update, or remove any named results. If the tokens are modified in place, 

630 it is not necessary to return them with a return statement. 

631 

632 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

633 object, or with some entirely different object (common for parse actions that perform data 

634 conversions). A convenient way to build a new parse result is to define the values 

635 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

636 

637 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

638 expression are cleared. 

639 

640 Optional keyword arguments: 

641 

642 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during 

643 lookaheads and alternate testing. For parse actions that have side effects, it is 

644 important to only call the parse action once it is determined that it is being 

645 called as part of a successful parse. For parse actions that perform additional 

646 validation, then call_during_try should be passed as True, so that the validation 

647 code is included in the preliminary "try" parses. 

648 

649 Note: the default parsing behavior is to expand tabs in the input string 

650 before starting the parsing process. See :class:`parse_string` for more 

651 information on parsing strings containing ``<TAB>`` s, and suggested 

652 methods to maintain a consistent view of the parsed string, the parse 

653 location, and line and column positions within the parsed string. 

654 

655 Example:: 

656 

657 # parse dates in the form YYYY/MM/DD 

658 

659 # use parse action to convert toks from str to int at parse time 

660 def convert_to_int(toks): 

661 return int(toks[0]) 

662 

663 # use a parse action to verify that the date is a valid date 

664 def is_valid_date(instring, loc, toks): 

665 from datetime import date 

666 year, month, day = toks[::2] 

667 try: 

668 date(year, month, day) 

669 except ValueError: 

670 raise ParseException(instring, loc, "invalid date given") 

671 

672 integer = Word(nums) 

673 date_str = integer + '/' + integer + '/' + integer 

674 

675 # add parse actions 

676 integer.set_parse_action(convert_to_int) 

677 date_str.set_parse_action(is_valid_date) 

678 

679 # note that integer fields are now ints, not strings 

680 date_str.run_tests(''' 

681 # successful parse - note that integer fields were converted to ints 

682 1999/12/31 

683 

684 # fail - invalid date 

685 1999/13/31 

686 ''') 

687 """ 

688 if list(fns) == [None]: 

689 self.parseAction.clear() 

690 return self 

691 

692 if not all(callable(fn) for fn in fns): 

693 raise TypeError("parse actions must be callable") 

694 self.parseAction[:] = [_trim_arity(fn) for fn in fns] 

695 self.callDuringTry = kwargs.get( 

696 "call_during_try", kwargs.get("callDuringTry", False) 

697 ) 

698 

699 return self 

700 

701 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: 

702 """ 

703 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

704 

705 See examples in :class:`copy`. 

706 """ 

707 self.parseAction += [_trim_arity(fn) for fn in fns] 

708 self.callDuringTry = self.callDuringTry or kwargs.get( 

709 "call_during_try", kwargs.get("callDuringTry", False) 

710 ) 

711 return self 

712 

713 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement: 

714 """Add a boolean predicate function to expression's list of parse actions. See 

715 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

716 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

717 

718 Optional keyword arguments: 

719 

720 - ``message`` = define a custom message to be used in the raised exception 

721 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

722 ParseException 

723 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

724 default=False 

725 

726 Example:: 

727 

728 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

729 year_int = integer.copy() 

730 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

731 date_str = year_int + '/' + integer + '/' + integer 

732 

733 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), 

734 (line:1, col:1) 

735 """ 

736 for fn in fns: 

737 self.parseAction.append( 

738 condition_as_parse_action( 

739 fn, 

740 message=str(kwargs.get("message")), 

741 fatal=bool(kwargs.get("fatal", False)), 

742 ) 

743 ) 

744 

745 self.callDuringTry = self.callDuringTry or kwargs.get( 

746 "call_during_try", kwargs.get("callDuringTry", False) 

747 ) 

748 return self 

749 

750 def set_fail_action(self, fn: ParseFailAction) -> ParserElement: 

751 """ 

752 Define action to perform if parsing fails at this expression. 

753 Fail acton fn is a callable function that takes the arguments 

754 ``fn(s, loc, expr, err)`` where: 

755 

756 - ``s`` = string being parsed 

757 - ``loc`` = location where expression match was attempted and failed 

758 - ``expr`` = the parse expression that failed 

759 - ``err`` = the exception thrown 

760 

761 The function returns no value. It may throw :class:`ParseFatalException` 

762 if it is desired to stop parsing immediately.""" 

763 self.failAction = fn 

764 return self 

765 

766 def _skipIgnorables(self, instring: str, loc: int) -> int: 

767 if not self.ignoreExprs: 

768 return loc 

769 exprsFound = True 

770 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

771 last_loc = loc 

772 while exprsFound: 

773 exprsFound = False 

774 for ignore_fn in ignore_expr_fns: 

775 try: 

776 while 1: 

777 loc, dummy = ignore_fn(instring, loc) 

778 exprsFound = True 

779 except ParseException: 

780 pass 

781 # check if all ignore exprs matched but didn't actually advance the parse location 

782 if loc == last_loc: 

783 break 

784 last_loc = loc 

785 return loc 

786 

787 def preParse(self, instring: str, loc: int) -> int: 

788 if self.ignoreExprs: 

789 loc = self._skipIgnorables(instring, loc) 

790 

791 if self.skipWhitespace: 

792 instrlen = len(instring) 

793 white_chars = self.whiteChars 

794 while loc < instrlen and instring[loc] in white_chars: 

795 loc += 1 

796 

797 return loc 

798 

799 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

800 return loc, [] 

801 

802 def postParse(self, instring, loc, tokenlist): 

803 return tokenlist 

804 

805 # @profile 

806 def _parseNoCache( 

807 self, instring, loc, do_actions=True, callPreParse=True 

808 ) -> tuple[int, ParseResults]: 

809 debugging = self.debug # and do_actions) 

810 len_instring = len(instring) 

811 

812 if debugging or self.failAction: 

813 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

814 try: 

815 if callPreParse and self.callPreparse: 

816 pre_loc = self.preParse(instring, loc) 

817 else: 

818 pre_loc = loc 

819 tokens_start = pre_loc 

820 if self.debugActions.debug_try: 

821 self.debugActions.debug_try(instring, tokens_start, self, False) 

822 if self.mayIndexError or pre_loc >= len_instring: 

823 try: 

824 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

825 except IndexError: 

826 raise ParseException(instring, len_instring, self.errmsg, self) 

827 else: 

828 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

829 except Exception as err: 

830 # print("Exception raised:", err) 

831 if self.debugActions.debug_fail: 

832 self.debugActions.debug_fail( 

833 instring, tokens_start, self, err, False 

834 ) 

835 if self.failAction: 

836 self.failAction(instring, tokens_start, self, err) 

837 raise 

838 else: 

839 if callPreParse and self.callPreparse: 

840 pre_loc = self.preParse(instring, loc) 

841 else: 

842 pre_loc = loc 

843 tokens_start = pre_loc 

844 if self.mayIndexError or pre_loc >= len_instring: 

845 try: 

846 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

847 except IndexError: 

848 raise ParseException(instring, len_instring, self.errmsg, self) 

849 else: 

850 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

851 

852 tokens = self.postParse(instring, loc, tokens) 

853 

854 ret_tokens = ParseResults( 

855 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 

856 ) 

857 if self.parseAction and (do_actions or self.callDuringTry): 

858 if debugging: 

859 try: 

860 for fn in self.parseAction: 

861 try: 

862 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

863 except IndexError as parse_action_exc: 

864 exc = ParseException("exception raised in parse action") 

865 raise exc from parse_action_exc 

866 

867 if tokens is not None and tokens is not ret_tokens: 

868 ret_tokens = ParseResults( 

869 tokens, 

870 self.resultsName, 

871 asList=self.saveAsList 

872 and isinstance(tokens, (ParseResults, list)), 

873 modal=self.modalResults, 

874 ) 

875 except Exception as err: 

876 # print "Exception raised in user parse action:", err 

877 if self.debugActions.debug_fail: 

878 self.debugActions.debug_fail( 

879 instring, tokens_start, self, err, False 

880 ) 

881 raise 

882 else: 

883 for fn in self.parseAction: 

884 try: 

885 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

886 except IndexError as parse_action_exc: 

887 exc = ParseException("exception raised in parse action") 

888 raise exc from parse_action_exc 

889 

890 if tokens is not None and tokens is not ret_tokens: 

891 ret_tokens = ParseResults( 

892 tokens, 

893 self.resultsName, 

894 asList=self.saveAsList 

895 and isinstance(tokens, (ParseResults, list)), 

896 modal=self.modalResults, 

897 ) 

898 if debugging: 

899 # print("Matched", self, "->", ret_tokens.as_list()) 

900 if self.debugActions.debug_match: 

901 self.debugActions.debug_match( 

902 instring, tokens_start, loc, self, ret_tokens, False 

903 ) 

904 

905 return loc, ret_tokens 

906 

907 def try_parse( 

908 self, 

909 instring: str, 

910 loc: int, 

911 *, 

912 raise_fatal: bool = False, 

913 do_actions: bool = False, 

914 ) -> int: 

915 try: 

916 return self._parse(instring, loc, do_actions=do_actions)[0] 

917 except ParseFatalException: 

918 if raise_fatal: 

919 raise 

920 raise ParseException(instring, loc, self.errmsg, self) 

921 

922 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

923 try: 

924 self.try_parse(instring, loc, do_actions=do_actions) 

925 except (ParseException, IndexError): 

926 return False 

927 else: 

928 return True 

929 

930 # cache for left-recursion in Forward references 

931 recursion_lock = RLock() 

932 recursion_memos: collections.abc.MutableMapping[ 

933 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]] 

934 ] = {} 

935 

936 class _CacheType(typing.Protocol): 

937 """ 

938 Class to be used for packrat and left-recursion cacheing of results 

939 and exceptions. 

940 """ 

941 

942 not_in_cache: bool 

943 

944 def get(self, *args) -> typing.Any: ... 

945 

946 def set(self, *args) -> None: ... 

947 

948 def clear(self) -> None: ... 

949 

950 class NullCache(dict): 

951 """ 

952 A null cache type for initialization of the packrat_cache class variable. 

953 If/when enable_packrat() is called, this null cache will be replaced by a 

954 proper _CacheType class instance. 

955 """ 

956 

957 not_in_cache: bool = True 

958 

959 def get(self, *args) -> typing.Any: ... 

960 

961 def set(self, *args) -> None: ... 

962 

963 def clear(self) -> None: ... 

964 

965 # class-level argument cache for optimizing repeated calls when backtracking 

966 # through recursive expressions 

967 packrat_cache: _CacheType = NullCache() 

968 packrat_cache_lock = RLock() 

969 packrat_cache_stats = [0, 0] 

970 

971 # this method gets repeatedly called during backtracking with the same arguments - 

972 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

973 def _parseCache( 

974 self, instring, loc, do_actions=True, callPreParse=True 

975 ) -> tuple[int, ParseResults]: 

976 HIT, MISS = 0, 1 

977 lookup = (self, instring, loc, callPreParse, do_actions) 

978 with ParserElement.packrat_cache_lock: 

979 cache = ParserElement.packrat_cache 

980 value = cache.get(lookup) 

981 if value is cache.not_in_cache: 

982 ParserElement.packrat_cache_stats[MISS] += 1 

983 try: 

984 value = self._parseNoCache(instring, loc, do_actions, callPreParse) 

985 except ParseBaseException as pe: 

986 # cache a copy of the exception, without the traceback 

987 cache.set(lookup, pe.__class__(*pe.args)) 

988 raise 

989 else: 

990 cache.set(lookup, (value[0], value[1].copy(), loc)) 

991 return value 

992 else: 

993 ParserElement.packrat_cache_stats[HIT] += 1 

994 if self.debug and self.debugActions.debug_try: 

995 try: 

996 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

997 except TypeError: 

998 pass 

999 if isinstance(value, Exception): 

1000 if self.debug and self.debugActions.debug_fail: 

1001 try: 

1002 self.debugActions.debug_fail( 

1003 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

1004 ) 

1005 except TypeError: 

1006 pass 

1007 raise value 

1008 

1009 value = cast(tuple[int, ParseResults, int], value) 

1010 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1011 if self.debug and self.debugActions.debug_match: 

1012 try: 

1013 self.debugActions.debug_match( 

1014 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1015 ) 

1016 except TypeError: 

1017 pass 

1018 

1019 return loc_, result 

1020 

1021 _parse = _parseNoCache 

1022 

1023 @staticmethod 

1024 def reset_cache() -> None: 

1025 ParserElement.packrat_cache.clear() 

1026 ParserElement.packrat_cache_stats[:] = [0] * len( 

1027 ParserElement.packrat_cache_stats 

1028 ) 

1029 ParserElement.recursion_memos.clear() 

1030 

1031 _packratEnabled = False 

1032 _left_recursion_enabled = False 

1033 

1034 @staticmethod 

1035 def disable_memoization() -> None: 

1036 """ 

1037 Disables active Packrat or Left Recursion parsing and their memoization 

1038 

1039 This method also works if neither Packrat nor Left Recursion are enabled. 

1040 This makes it safe to call before activating Packrat nor Left Recursion 

1041 to clear any previous settings. 

1042 """ 

1043 ParserElement.reset_cache() 

1044 ParserElement._left_recursion_enabled = False 

1045 ParserElement._packratEnabled = False 

1046 ParserElement._parse = ParserElement._parseNoCache 

1047 

1048 @staticmethod 

1049 def enable_left_recursion( 

1050 cache_size_limit: typing.Optional[int] = None, *, force=False 

1051 ) -> None: 

1052 """ 

1053 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1054 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1055 repeatedly matched with a fixed recursion depth that is gradually increased 

1056 until finding the longest match. 

1057 

1058 Example:: 

1059 

1060 import pyparsing as pp 

1061 pp.ParserElement.enable_left_recursion() 

1062 

1063 E = pp.Forward("E") 

1064 num = pp.Word(pp.nums) 

1065 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1066 E <<= E + '+' - num | num 

1067 

1068 print(E.parse_string("1+2+3")) 

1069 

1070 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1071 thus skip reevaluation of parse actions during backtracking. This may break 

1072 programs with parse actions which rely on strict ordering of side-effects. 

1073 

1074 Parameters: 

1075 

1076 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1077 ``Forward`` elements during matching; if ``None`` (the default), 

1078 memoize all ``Forward`` elements. 

1079 

1080 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1081 thus the two cannot be used together. Use ``force=True`` to disable any 

1082 previous, conflicting settings. 

1083 """ 

1084 if force: 

1085 ParserElement.disable_memoization() 

1086 elif ParserElement._packratEnabled: 

1087 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1088 if cache_size_limit is None: 

1089 ParserElement.recursion_memos = _UnboundedMemo() 

1090 elif cache_size_limit > 0: 

1091 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1092 else: 

1093 raise NotImplementedError(f"Memo size of {cache_size_limit}") 

1094 ParserElement._left_recursion_enabled = True 

1095 

1096 @staticmethod 

1097 def enable_packrat( 

1098 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1099 ) -> None: 

1100 """ 

1101 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1102 Repeated parse attempts at the same string location (which happens 

1103 often in many complex grammars) can immediately return a cached value, 

1104 instead of re-executing parsing/validating code. Memoizing is done of 

1105 both valid results and parsing exceptions. 

1106 

1107 Parameters: 

1108 

1109 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1110 will limit the size of the packrat cache; if None is passed, then 

1111 the cache size will be unbounded; if 0 is passed, the cache will 

1112 be effectively disabled. 

1113 

1114 This speedup may break existing programs that use parse actions that 

1115 have side-effects. For this reason, packrat parsing is disabled when 

1116 you first import pyparsing. To activate the packrat feature, your 

1117 program must call the class method :class:`ParserElement.enable_packrat`. 

1118 For best results, call ``enable_packrat()`` immediately after 

1119 importing pyparsing. 

1120 

1121 Example:: 

1122 

1123 import pyparsing 

1124 pyparsing.ParserElement.enable_packrat() 

1125 

1126 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1127 thus the two cannot be used together. Use ``force=True`` to disable any 

1128 previous, conflicting settings. 

1129 """ 

1130 if force: 

1131 ParserElement.disable_memoization() 

1132 elif ParserElement._left_recursion_enabled: 

1133 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1134 

1135 if ParserElement._packratEnabled: 

1136 return 

1137 

1138 ParserElement._packratEnabled = True 

1139 if cache_size_limit is None: 

1140 ParserElement.packrat_cache = _UnboundedCache() 

1141 else: 

1142 ParserElement.packrat_cache = _FifoCache(cache_size_limit) 

1143 ParserElement._parse = ParserElement._parseCache 

1144 

1145 def parse_string( 

1146 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 

1147 ) -> ParseResults: 

1148 """ 

1149 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1150 client code. 

1151 

1152 :param instring: The input string to be parsed. 

1153 :param parse_all: If set, the entire input string must match the grammar. 

1154 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1155 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1156 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1157 an object with attributes if the given parser includes results names. 

1158 

1159 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1160 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1161 

1162 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1163 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1164 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1165 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1166 

1167 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1168 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1169 parse action's ``s`` argument, or 

1170 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1171 

1172 Examples: 

1173 

1174 By default, partial matches are OK. 

1175 

1176 >>> res = Word('a').parse_string('aaaaabaaa') 

1177 >>> print(res) 

1178 ['aaaaa'] 

1179 

1180 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1181 directly to see more examples. 

1182 

1183 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1184 

1185 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1186 Traceback (most recent call last): 

1187 ... 

1188 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) 

1189 """ 

1190 parseAll = parse_all or parseAll 

1191 

1192 ParserElement.reset_cache() 

1193 if not self.streamlined: 

1194 self.streamline() 

1195 for e in self.ignoreExprs: 

1196 e.streamline() 

1197 if not self.keepTabs: 

1198 instring = instring.expandtabs() 

1199 try: 

1200 loc, tokens = self._parse(instring, 0) 

1201 if parseAll: 

1202 loc = self.preParse(instring, loc) 

1203 se = Empty() + StringEnd().set_debug(False) 

1204 se._parse(instring, loc) 

1205 except _ParseActionIndexError as pa_exc: 

1206 raise pa_exc.exc 

1207 except ParseBaseException as exc: 

1208 if ParserElement.verbose_stacktrace: 

1209 raise 

1210 

1211 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1212 raise exc.with_traceback(None) 

1213 else: 

1214 return tokens 

1215 

1216 def scan_string( 

1217 self, 

1218 instring: str, 

1219 max_matches: int = _MAX_INT, 

1220 overlap: bool = False, 

1221 always_skip_whitespace=True, 

1222 *, 

1223 debug: bool = False, 

1224 maxMatches: int = _MAX_INT, 

1225 ) -> Generator[tuple[ParseResults, int, int], None, None]: 

1226 """ 

1227 Scan the input string for expression matches. Each match will return the 

1228 matching tokens, start location, and end location. May be called with optional 

1229 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1230 ``overlap`` is specified, then overlapping matches will be reported. 

1231 

1232 Note that the start and end locations are reported relative to the string 

1233 being parsed. See :class:`parse_string` for more information on parsing 

1234 strings with embedded tabs. 

1235 

1236 Example:: 

1237 

1238 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1239 print(source) 

1240 for tokens, start, end in Word(alphas).scan_string(source): 

1241 print(' '*start + '^'*(end-start)) 

1242 print(' '*start + tokens[0]) 

1243 

1244 prints:: 

1245 

1246 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1247 ^^^^^ 

1248 sldjf 

1249 ^^^^^^^ 

1250 lsdjjkf 

1251 ^^^^^^ 

1252 sldkjf 

1253 ^^^^^^ 

1254 lkjsfd 

1255 """ 

1256 maxMatches = min(maxMatches, max_matches) 

1257 if not self.streamlined: 

1258 self.streamline() 

1259 for e in self.ignoreExprs: 

1260 e.streamline() 

1261 

1262 if not self.keepTabs: 

1263 instring = str(instring).expandtabs() 

1264 instrlen = len(instring) 

1265 loc = 0 

1266 if always_skip_whitespace: 

1267 preparser = Empty() 

1268 preparser.ignoreExprs = self.ignoreExprs 

1269 preparser.whiteChars = self.whiteChars 

1270 preparseFn = preparser.preParse 

1271 else: 

1272 preparseFn = self.preParse 

1273 parseFn = self._parse 

1274 ParserElement.resetCache() 

1275 matches = 0 

1276 try: 

1277 while loc <= instrlen and matches < maxMatches: 

1278 try: 

1279 preloc: int = preparseFn(instring, loc) 

1280 nextLoc: int 

1281 tokens: ParseResults 

1282 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1283 except ParseException: 

1284 loc = preloc + 1 

1285 else: 

1286 if nextLoc > loc: 

1287 matches += 1 

1288 if debug: 

1289 print( 

1290 { 

1291 "tokens": tokens.asList(), 

1292 "start": preloc, 

1293 "end": nextLoc, 

1294 } 

1295 ) 

1296 yield tokens, preloc, nextLoc 

1297 if overlap: 

1298 nextloc = preparseFn(instring, loc) 

1299 if nextloc > loc: 

1300 loc = nextLoc 

1301 else: 

1302 loc += 1 

1303 else: 

1304 loc = nextLoc 

1305 else: 

1306 loc = preloc + 1 

1307 except ParseBaseException as exc: 

1308 if ParserElement.verbose_stacktrace: 

1309 raise 

1310 

1311 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1312 raise exc.with_traceback(None) 

1313 

1314 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1315 """ 

1316 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1317 be returned from a parse action. To use ``transform_string``, define a grammar and 

1318 attach a parse action to it that modifies the returned token list. 

1319 Invoking ``transform_string()`` on a target string will then scan for matches, 

1320 and replace the matched text patterns according to the logic in the parse 

1321 action. ``transform_string()`` returns the resulting transformed string. 

1322 

1323 Example:: 

1324 

1325 wd = Word(alphas) 

1326 wd.set_parse_action(lambda toks: toks[0].title()) 

1327 

1328 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) 

1329 

1330 prints:: 

1331 

1332 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

1333 """ 

1334 out: list[str] = [] 

1335 lastE = 0 

1336 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1337 # keep string locs straight between transform_string and scan_string 

1338 self.keepTabs = True 

1339 try: 

1340 for t, s, e in self.scan_string(instring, debug=debug): 

1341 if s > lastE: 

1342 out.append(instring[lastE:s]) 

1343 lastE = e 

1344 

1345 if not t: 

1346 continue 

1347 

1348 if isinstance(t, ParseResults): 

1349 out += t.as_list() 

1350 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1351 out.extend(t) 

1352 else: 

1353 out.append(t) 

1354 

1355 out.append(instring[lastE:]) 

1356 out = [o for o in out if o] 

1357 return "".join([str(s) for s in _flatten(out)]) 

1358 except ParseBaseException as exc: 

1359 if ParserElement.verbose_stacktrace: 

1360 raise 

1361 

1362 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1363 raise exc.with_traceback(None) 

1364 

1365 def search_string( 

1366 self, 

1367 instring: str, 

1368 max_matches: int = _MAX_INT, 

1369 *, 

1370 debug: bool = False, 

1371 maxMatches: int = _MAX_INT, 

1372 ) -> ParseResults: 

1373 """ 

1374 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1375 to match the given parse expression. May be called with optional 

1376 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1377 

1378 Example:: 

1379 

1380 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

1381 cap_word = Word(alphas.upper(), alphas.lower()) 

1382 

1383 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) 

1384 

1385 # the sum() builtin can be used to merge results into a single ParseResults object 

1386 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) 

1387 

1388 prints:: 

1389 

1390 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1391 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1392 """ 

1393 maxMatches = min(maxMatches, max_matches) 

1394 try: 

1395 return ParseResults( 

1396 [ 

1397 t 

1398 for t, s, e in self.scan_string( 

1399 instring, maxMatches, always_skip_whitespace=False, debug=debug 

1400 ) 

1401 ] 

1402 ) 

1403 except ParseBaseException as exc: 

1404 if ParserElement.verbose_stacktrace: 

1405 raise 

1406 

1407 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1408 raise exc.with_traceback(None) 

1409 

1410 def split( 

1411 self, 

1412 instring: str, 

1413 maxsplit: int = _MAX_INT, 

1414 include_separators: bool = False, 

1415 *, 

1416 includeSeparators=False, 

1417 ) -> Generator[str, None, None]: 

1418 """ 

1419 Generator method to split a string using the given expression as a separator. 

1420 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1421 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1422 matching text should be included in the split results. 

1423 

1424 Example:: 

1425 

1426 punc = one_of(list(".,;:/-!?")) 

1427 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

1428 

1429 prints:: 

1430 

1431 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1432 """ 

1433 includeSeparators = includeSeparators or include_separators 

1434 last = 0 

1435 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1436 yield instring[last:s] 

1437 if includeSeparators: 

1438 yield t[0] 

1439 last = e 

1440 yield instring[last:] 

1441 

1442 def __add__(self, other) -> ParserElement: 

1443 """ 

1444 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1445 converts them to :class:`Literal`\\ s by default. 

1446 

1447 Example:: 

1448 

1449 greet = Word(alphas) + "," + Word(alphas) + "!" 

1450 hello = "Hello, World!" 

1451 print(hello, "->", greet.parse_string(hello)) 

1452 

1453 prints:: 

1454 

1455 Hello, World! -> ['Hello', ',', 'World', '!'] 

1456 

1457 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: 

1458 

1459 Literal('start') + ... + Literal('end') 

1460 

1461 is equivalent to:: 

1462 

1463 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1464 

1465 Note that the skipped text is returned with '_skipped' as a results name, 

1466 and to support having multiple skips in the same parser, the value returned is 

1467 a list of all skipped text. 

1468 """ 

1469 if other is Ellipsis: 

1470 return _PendingSkip(self) 

1471 

1472 if isinstance(other, str_type): 

1473 other = self._literalStringClass(other) 

1474 if not isinstance(other, ParserElement): 

1475 return NotImplemented 

1476 return And([self, other]) 

1477 

1478 def __radd__(self, other) -> ParserElement: 

1479 """ 

1480 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1481 """ 

1482 if other is Ellipsis: 

1483 return SkipTo(self)("_skipped*") + self 

1484 

1485 if isinstance(other, str_type): 

1486 other = self._literalStringClass(other) 

1487 if not isinstance(other, ParserElement): 

1488 return NotImplemented 

1489 return other + self 

1490 

1491 def __sub__(self, other) -> ParserElement: 

1492 """ 

1493 Implementation of ``-`` operator, returns :class:`And` with error stop 

1494 """ 

1495 if isinstance(other, str_type): 

1496 other = self._literalStringClass(other) 

1497 if not isinstance(other, ParserElement): 

1498 return NotImplemented 

1499 return self + And._ErrorStop() + other 

1500 

1501 def __rsub__(self, other) -> ParserElement: 

1502 """ 

1503 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1504 """ 

1505 if isinstance(other, str_type): 

1506 other = self._literalStringClass(other) 

1507 if not isinstance(other, ParserElement): 

1508 return NotImplemented 

1509 return other - self 

1510 

1511 def __mul__(self, other) -> ParserElement: 

1512 """ 

1513 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1514 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1515 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1516 may also include ``None`` as in: 

1517 

1518 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1519 to ``expr*n + ZeroOrMore(expr)`` 

1520 (read as "at least n instances of ``expr``") 

1521 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1522 (read as "0 to n instances of ``expr``") 

1523 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1524 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1525 

1526 Note that ``expr*(None, n)`` does not raise an exception if 

1527 more than n exprs exist in the input stream; that is, 

1528 ``expr*(None, n)`` does not enforce a maximum number of expr 

1529 occurrences. If this behavior is desired, then write 

1530 ``expr*(None, n) + ~expr`` 

1531 """ 

1532 if other is Ellipsis: 

1533 other = (0, None) 

1534 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1535 other = ((0,) + other[1:] + (None,))[:2] 

1536 

1537 if not isinstance(other, (int, tuple)): 

1538 return NotImplemented 

1539 

1540 if isinstance(other, int): 

1541 minElements, optElements = other, 0 

1542 else: 

1543 other = tuple(o if o is not Ellipsis else None for o in other) 

1544 other = (other + (None, None))[:2] 

1545 if other[0] is None: 

1546 other = (0, other[1]) 

1547 if isinstance(other[0], int) and other[1] is None: 

1548 if other[0] == 0: 

1549 return ZeroOrMore(self) 

1550 if other[0] == 1: 

1551 return OneOrMore(self) 

1552 else: 

1553 return self * other[0] + ZeroOrMore(self) 

1554 elif isinstance(other[0], int) and isinstance(other[1], int): 

1555 minElements, optElements = other 

1556 optElements -= minElements 

1557 else: 

1558 return NotImplemented 

1559 

1560 if minElements < 0: 

1561 raise ValueError("cannot multiply ParserElement by negative value") 

1562 if optElements < 0: 

1563 raise ValueError( 

1564 "second tuple value must be greater or equal to first tuple value" 

1565 ) 

1566 if minElements == optElements == 0: 

1567 return And([]) 

1568 

1569 if optElements: 

1570 

1571 def makeOptionalList(n): 

1572 if n > 1: 

1573 return Opt(self + makeOptionalList(n - 1)) 

1574 else: 

1575 return Opt(self) 

1576 

1577 if minElements: 

1578 if minElements == 1: 

1579 ret = self + makeOptionalList(optElements) 

1580 else: 

1581 ret = And([self] * minElements) + makeOptionalList(optElements) 

1582 else: 

1583 ret = makeOptionalList(optElements) 

1584 else: 

1585 if minElements == 1: 

1586 ret = self 

1587 else: 

1588 ret = And([self] * minElements) 

1589 return ret 

1590 

1591 def __rmul__(self, other) -> ParserElement: 

1592 return self.__mul__(other) 

1593 

1594 def __or__(self, other) -> ParserElement: 

1595 """ 

1596 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1597 """ 

1598 if other is Ellipsis: 

1599 return _PendingSkip(self, must_skip=True) 

1600 

1601 if isinstance(other, str_type): 

1602 # `expr | ""` is equivalent to `Opt(expr)` 

1603 if other == "": 

1604 return Opt(self) 

1605 other = self._literalStringClass(other) 

1606 if not isinstance(other, ParserElement): 

1607 return NotImplemented 

1608 return MatchFirst([self, other]) 

1609 

1610 def __ror__(self, other) -> ParserElement: 

1611 """ 

1612 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1613 """ 

1614 if isinstance(other, str_type): 

1615 other = self._literalStringClass(other) 

1616 if not isinstance(other, ParserElement): 

1617 return NotImplemented 

1618 return other | self 

1619 

1620 def __xor__(self, other) -> ParserElement: 

1621 """ 

1622 Implementation of ``^`` operator - returns :class:`Or` 

1623 """ 

1624 if isinstance(other, str_type): 

1625 other = self._literalStringClass(other) 

1626 if not isinstance(other, ParserElement): 

1627 return NotImplemented 

1628 return Or([self, other]) 

1629 

1630 def __rxor__(self, other) -> ParserElement: 

1631 """ 

1632 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1633 """ 

1634 if isinstance(other, str_type): 

1635 other = self._literalStringClass(other) 

1636 if not isinstance(other, ParserElement): 

1637 return NotImplemented 

1638 return other ^ self 

1639 

1640 def __and__(self, other) -> ParserElement: 

1641 """ 

1642 Implementation of ``&`` operator - returns :class:`Each` 

1643 """ 

1644 if isinstance(other, str_type): 

1645 other = self._literalStringClass(other) 

1646 if not isinstance(other, ParserElement): 

1647 return NotImplemented 

1648 return Each([self, other]) 

1649 

1650 def __rand__(self, other) -> ParserElement: 

1651 """ 

1652 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1653 """ 

1654 if isinstance(other, str_type): 

1655 other = self._literalStringClass(other) 

1656 if not isinstance(other, ParserElement): 

1657 return NotImplemented 

1658 return other & self 

1659 

1660 def __invert__(self) -> ParserElement: 

1661 """ 

1662 Implementation of ``~`` operator - returns :class:`NotAny` 

1663 """ 

1664 return NotAny(self) 

1665 

1666 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1667 # iterate over a sequence 

1668 __iter__ = None 

1669 

1670 def __getitem__(self, key): 

1671 """ 

1672 use ``[]`` indexing notation as a short form for expression repetition: 

1673 

1674 - ``expr[n]`` is equivalent to ``expr*n`` 

1675 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1676 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1677 to ``expr*n + ZeroOrMore(expr)`` 

1678 (read as "at least n instances of ``expr``") 

1679 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1680 (read as "0 to n instances of ``expr``") 

1681 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1682 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1683 

1684 ``None`` may be used in place of ``...``. 

1685 

1686 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1687 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1688 desired, then write ``expr[..., n] + ~expr``. 

1689 

1690 For repetition with a stop_on expression, use slice notation: 

1691 

1692 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1693 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1694 

1695 """ 

1696 

1697 stop_on_defined = False 

1698 stop_on = NoMatch() 

1699 if isinstance(key, slice): 

1700 key, stop_on = key.start, key.stop 

1701 if key is None: 

1702 key = ... 

1703 stop_on_defined = True 

1704 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1705 key, stop_on = (key[0], key[1].start), key[1].stop 

1706 stop_on_defined = True 

1707 

1708 # convert single arg keys to tuples 

1709 if isinstance(key, str_type): 

1710 key = (key,) 

1711 try: 

1712 iter(key) 

1713 except TypeError: 

1714 key = (key, key) 

1715 

1716 if len(key) > 2: 

1717 raise TypeError( 

1718 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1719 ) 

1720 

1721 # clip to 2 elements 

1722 ret = self * tuple(key[:2]) 

1723 ret = typing.cast(_MultipleMatch, ret) 

1724 

1725 if stop_on_defined: 

1726 ret.stopOn(stop_on) 

1727 

1728 return ret 

1729 

1730 def __call__(self, name: typing.Optional[str] = None) -> ParserElement: 

1731 """ 

1732 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1733 

1734 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1735 passed as ``True``. 

1736 

1737 If ``name`` is omitted, same as calling :class:`copy`. 

1738 

1739 Example:: 

1740 

1741 # these are equivalent 

1742 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") 

1743 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1744 """ 

1745 if name is not None: 

1746 return self._setResultsName(name) 

1747 

1748 return self.copy() 

1749 

1750 def suppress(self) -> ParserElement: 

1751 """ 

1752 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1753 cluttering up returned output. 

1754 """ 

1755 return Suppress(self) 

1756 

1757 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

1758 """ 

1759 Enables the skipping of whitespace before matching the characters in the 

1760 :class:`ParserElement`'s defined pattern. 

1761 

1762 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1763 """ 

1764 self.skipWhitespace = True 

1765 return self 

1766 

1767 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

1768 """ 

1769 Disables the skipping of whitespace before matching the characters in the 

1770 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1771 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1772 

1773 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1774 """ 

1775 self.skipWhitespace = False 

1776 return self 

1777 

1778 def set_whitespace_chars( 

1779 self, chars: Union[set[str], str], copy_defaults: bool = False 

1780 ) -> ParserElement: 

1781 """ 

1782 Overrides the default whitespace chars 

1783 """ 

1784 self.skipWhitespace = True 

1785 self.whiteChars = set(chars) 

1786 self.copyDefaultWhiteChars = copy_defaults 

1787 return self 

1788 

1789 def parse_with_tabs(self) -> ParserElement: 

1790 """ 

1791 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1792 Must be called before ``parse_string`` when the input grammar contains elements that 

1793 match ``<TAB>`` characters. 

1794 """ 

1795 self.keepTabs = True 

1796 return self 

1797 

1798 def ignore(self, other: ParserElement) -> ParserElement: 

1799 """ 

1800 Define expression to be ignored (e.g., comments) while doing pattern 

1801 matching; may be called repeatedly, to define multiple comment or other 

1802 ignorable patterns. 

1803 

1804 Example:: 

1805 

1806 patt = Word(alphas)[...] 

1807 patt.parse_string('ablaj /* comment */ lskjd') 

1808 # -> ['ablaj'] 

1809 

1810 patt.ignore(c_style_comment) 

1811 patt.parse_string('ablaj /* comment */ lskjd') 

1812 # -> ['ablaj', 'lskjd'] 

1813 """ 

1814 if isinstance(other, str_type): 

1815 other = Suppress(other) 

1816 

1817 if isinstance(other, Suppress): 

1818 if other not in self.ignoreExprs: 

1819 self.ignoreExprs.append(other) 

1820 else: 

1821 self.ignoreExprs.append(Suppress(other.copy())) 

1822 return self 

1823 

1824 def set_debug_actions( 

1825 self, 

1826 start_action: DebugStartAction, 

1827 success_action: DebugSuccessAction, 

1828 exception_action: DebugExceptionAction, 

1829 ) -> ParserElement: 

1830 """ 

1831 Customize display of debugging messages while doing pattern matching: 

1832 

1833 - ``start_action`` - method to be called when an expression is about to be parsed; 

1834 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` 

1835 

1836 - ``success_action`` - method to be called when an expression has successfully parsed; 

1837 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` 

1838 

1839 - ``exception_action`` - method to be called when expression fails to parse; 

1840 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` 

1841 """ 

1842 self.debugActions = self.DebugActions( 

1843 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

1844 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

1845 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

1846 ) 

1847 self.debug = True 

1848 return self 

1849 

1850 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement: 

1851 """ 

1852 Enable display of debugging messages while doing pattern matching. 

1853 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

1854 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

1855 

1856 Example:: 

1857 

1858 wd = Word(alphas).set_name("alphaword") 

1859 integer = Word(nums).set_name("numword") 

1860 term = wd | integer 

1861 

1862 # turn on debugging for wd 

1863 wd.set_debug() 

1864 

1865 term[1, ...].parse_string("abc 123 xyz 890") 

1866 

1867 prints:: 

1868 

1869 Match alphaword at loc 0(1,1) 

1870 Matched alphaword -> ['abc'] 

1871 Match alphaword at loc 3(1,4) 

1872 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

1873 Match alphaword at loc 7(1,8) 

1874 Matched alphaword -> ['xyz'] 

1875 Match alphaword at loc 11(1,12) 

1876 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

1877 Match alphaword at loc 15(1,16) 

1878 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

1879 

1880 The output shown is that produced by the default debug actions - custom debug actions can be 

1881 specified using :class:`set_debug_actions`. Prior to attempting 

1882 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

1883 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

1884 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, 

1885 which makes debugging and exception messages easier to understand - for instance, the default 

1886 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. 

1887 """ 

1888 if recurse: 

1889 for expr in self.visit_all(): 

1890 expr.set_debug(flag, recurse=False) 

1891 return self 

1892 

1893 if flag: 

1894 self.set_debug_actions( 

1895 _default_start_debug_action, 

1896 _default_success_debug_action, 

1897 _default_exception_debug_action, 

1898 ) 

1899 else: 

1900 self.debug = False 

1901 return self 

1902 

1903 @property 

1904 def default_name(self) -> str: 

1905 if self._defaultName is None: 

1906 self._defaultName = self._generateDefaultName() 

1907 return self._defaultName 

1908 

1909 @abstractmethod 

1910 def _generateDefaultName(self) -> str: 

1911 """ 

1912 Child classes must define this method, which defines how the ``default_name`` is set. 

1913 """ 

1914 

1915 def set_name(self, name: typing.Optional[str]) -> ParserElement: 

1916 """ 

1917 Define name for this expression, makes debugging and exception messages clearer. If 

1918 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also 

1919 enable debug for this expression. 

1920 

1921 If `name` is None, clears any custom name for this expression, and clears the 

1922 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`. 

1923 

1924 Example:: 

1925 

1926 integer = Word(nums) 

1927 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) 

1928 

1929 integer.set_name("integer") 

1930 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1931 """ 

1932 self.customName = name # type: ignore[assignment] 

1933 self.errmsg = f"Expected {str(self)}" 

1934 

1935 if __diag__.enable_debug_on_named_expressions: 

1936 self.set_debug(name is not None) 

1937 

1938 return self 

1939 

1940 @property 

1941 def name(self) -> str: 

1942 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 

1943 return self.customName if self.customName is not None else self.default_name 

1944 

1945 @name.setter 

1946 def name(self, new_name) -> None: 

1947 self.set_name(new_name) 

1948 

1949 def __str__(self) -> str: 

1950 return self.name 

1951 

1952 def __repr__(self) -> str: 

1953 return str(self) 

1954 

1955 def streamline(self) -> ParserElement: 

1956 self.streamlined = True 

1957 self._defaultName = None 

1958 return self 

1959 

1960 def recurse(self) -> list[ParserElement]: 

1961 return [] 

1962 

1963 def _checkRecursion(self, parseElementList): 

1964 subRecCheckList = parseElementList[:] + [self] 

1965 for e in self.recurse(): 

1966 e._checkRecursion(subRecCheckList) 

1967 

1968 def validate(self, validateTrace=None) -> None: 

1969 """ 

1970 Check defined expressions for valid structure, check for infinite recursive definitions. 

1971 """ 

1972 warnings.warn( 

1973 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

1974 DeprecationWarning, 

1975 stacklevel=2, 

1976 ) 

1977 self._checkRecursion([]) 

1978 

1979 def parse_file( 

1980 self, 

1981 file_or_filename: Union[str, Path, TextIO], 

1982 encoding: str = "utf-8", 

1983 parse_all: bool = False, 

1984 *, 

1985 parseAll: bool = False, 

1986 ) -> ParseResults: 

1987 """ 

1988 Execute the parse expression on the given file or filename. 

1989 If a filename is specified (instead of a file object), 

1990 the entire file is opened, read, and closed before parsing. 

1991 """ 

1992 parseAll = parseAll or parse_all 

1993 try: 

1994 file_or_filename = typing.cast(TextIO, file_or_filename) 

1995 file_contents = file_or_filename.read() 

1996 except AttributeError: 

1997 file_or_filename = typing.cast(str, file_or_filename) 

1998 with open(file_or_filename, "r", encoding=encoding) as f: 

1999 file_contents = f.read() 

2000 try: 

2001 return self.parse_string(file_contents, parseAll) 

2002 except ParseBaseException as exc: 

2003 if ParserElement.verbose_stacktrace: 

2004 raise 

2005 

2006 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

2007 raise exc.with_traceback(None) 

2008 

2009 def __eq__(self, other): 

2010 if self is other: 

2011 return True 

2012 elif isinstance(other, str_type): 

2013 return self.matches(other, parse_all=True) 

2014 elif isinstance(other, ParserElement): 

2015 return vars(self) == vars(other) 

2016 return False 

2017 

2018 def __hash__(self): 

2019 return id(self) 

2020 

2021 def matches( 

2022 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 

2023 ) -> bool: 

2024 """ 

2025 Method for quick testing of a parser against a test string. Good for simple 

2026 inline microtests of sub expressions while building up larger parser. 

2027 

2028 Parameters: 

2029 

2030 - ``test_string`` - to test against this expression for a match 

2031 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2032 

2033 Example:: 

2034 

2035 expr = Word(nums) 

2036 assert expr.matches("100") 

2037 """ 

2038 parseAll = parseAll and parse_all 

2039 try: 

2040 self.parse_string(str(test_string), parse_all=parseAll) 

2041 return True 

2042 except ParseBaseException: 

2043 return False 

2044 

2045 def run_tests( 

2046 self, 

2047 tests: Union[str, list[str]], 

2048 parse_all: bool = True, 

2049 comment: typing.Optional[Union[ParserElement, str]] = "#", 

2050 full_dump: bool = True, 

2051 print_results: bool = True, 

2052 failure_tests: bool = False, 

2053 post_parse: typing.Optional[ 

2054 Callable[[str, ParseResults], typing.Optional[str]] 

2055 ] = None, 

2056 file: typing.Optional[TextIO] = None, 

2057 with_line_numbers: bool = False, 

2058 *, 

2059 parseAll: bool = True, 

2060 fullDump: bool = True, 

2061 printResults: bool = True, 

2062 failureTests: bool = False, 

2063 postParse: typing.Optional[ 

2064 Callable[[str, ParseResults], typing.Optional[str]] 

2065 ] = None, 

2066 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]: 

2067 """ 

2068 Execute the parse expression on a series of test strings, showing each 

2069 test, the parsed results or where the parse failed. Quick and easy way to 

2070 run a parse expression against a list of sample strings. 

2071 

2072 Parameters: 

2073 

2074 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2075 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2076 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2077 string; pass None to disable comment filtering 

2078 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2079 if False, only dump nested list 

2080 - ``print_results`` - (default= ``True``) prints test output to stdout 

2081 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2082 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2083 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2084 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2085 if None, will default to ``sys.stdout`` 

2086 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2087 

2088 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2089 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2090 test's output 

2091 

2092 Example:: 

2093 

2094 number_expr = pyparsing_common.number.copy() 

2095 

2096 result = number_expr.run_tests(''' 

2097 # unsigned integer 

2098 100 

2099 # negative integer 

2100 -100 

2101 # float with scientific notation 

2102 6.02e23 

2103 # integer with scientific notation 

2104 1e-12 

2105 ''') 

2106 print("Success" if result[0] else "Failed!") 

2107 

2108 result = number_expr.run_tests(''' 

2109 # stray character 

2110 100Z 

2111 # missing leading digit before '.' 

2112 -.100 

2113 # too many '.' 

2114 3.14.159 

2115 ''', failure_tests=True) 

2116 print("Success" if result[0] else "Failed!") 

2117 

2118 prints:: 

2119 

2120 # unsigned integer 

2121 100 

2122 [100] 

2123 

2124 # negative integer 

2125 -100 

2126 [-100] 

2127 

2128 # float with scientific notation 

2129 6.02e23 

2130 [6.02e+23] 

2131 

2132 # integer with scientific notation 

2133 1e-12 

2134 [1e-12] 

2135 

2136 Success 

2137 

2138 # stray character 

2139 100Z 

2140 ^ 

2141 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2142 

2143 # missing leading digit before '.' 

2144 -.100 

2145 ^ 

2146 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2147 

2148 # too many '.' 

2149 3.14.159 

2150 ^ 

2151 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2152 

2153 Success 

2154 

2155 Each test string must be on a single line. If you want to test a string that spans multiple 

2156 lines, create a test like this:: 

2157 

2158 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2159 

2160 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2161 """ 

2162 from .testing import pyparsing_test 

2163 

2164 parseAll = parseAll and parse_all 

2165 fullDump = fullDump and full_dump 

2166 printResults = printResults and print_results 

2167 failureTests = failureTests or failure_tests 

2168 postParse = postParse or post_parse 

2169 if isinstance(tests, str_type): 

2170 tests = typing.cast(str, tests) 

2171 line_strip = type(tests).strip 

2172 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2173 comment_specified = comment is not None 

2174 if comment_specified: 

2175 if isinstance(comment, str_type): 

2176 comment = typing.cast(str, comment) 

2177 comment = Literal(comment) 

2178 comment = typing.cast(ParserElement, comment) 

2179 if file is None: 

2180 file = sys.stdout 

2181 print_ = file.write 

2182 

2183 result: Union[ParseResults, Exception] 

2184 allResults: list[tuple[str, Union[ParseResults, Exception]]] = [] 

2185 comments: list[str] = [] 

2186 success = True 

2187 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2188 BOM = "\ufeff" 

2189 nlstr = "\n" 

2190 for t in tests: 

2191 if comment_specified and comment.matches(t, False) or comments and not t: 

2192 comments.append( 

2193 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2194 ) 

2195 continue 

2196 if not t: 

2197 continue 

2198 out = [ 

2199 f"{nlstr}{nlstr.join(comments) if comments else ''}", 

2200 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2201 ] 

2202 comments.clear() 

2203 try: 

2204 # convert newline marks to actual newlines, and strip leading BOM if present 

2205 t = NL.transform_string(t.lstrip(BOM)) 

2206 result = self.parse_string(t, parse_all=parseAll) 

2207 except ParseBaseException as pe: 

2208 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" 

2209 out.append(pe.explain()) 

2210 out.append(f"FAIL: {fatal}{pe}") 

2211 if ParserElement.verbose_stacktrace: 

2212 out.extend(traceback.format_tb(pe.__traceback__)) 

2213 success = success and failureTests 

2214 result = pe 

2215 except Exception as exc: 

2216 tag = "FAIL-EXCEPTION" 

2217 

2218 # see if this exception was raised in a parse action 

2219 tb = exc.__traceback__ 

2220 it = iter(traceback.walk_tb(tb)) 

2221 for f, line in it: 

2222 if (f.f_code.co_filename, line) == pa_call_line_synth: 

2223 next_f = next(it)[0] 

2224 tag += f" (raised in parse action {next_f.f_code.co_name!r})" 

2225 break 

2226 

2227 out.append(f"{tag}: {type(exc).__name__}: {exc}") 

2228 if ParserElement.verbose_stacktrace: 

2229 out.extend(traceback.format_tb(exc.__traceback__)) 

2230 success = success and failureTests 

2231 result = exc 

2232 else: 

2233 success = success and not failureTests 

2234 if postParse is not None: 

2235 try: 

2236 pp_value = postParse(t, result) 

2237 if pp_value is not None: 

2238 if isinstance(pp_value, ParseResults): 

2239 out.append(pp_value.dump()) 

2240 else: 

2241 out.append(str(pp_value)) 

2242 else: 

2243 out.append(result.dump()) 

2244 except Exception as e: 

2245 out.append(result.dump(full=fullDump)) 

2246 out.append( 

2247 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2248 ) 

2249 else: 

2250 out.append(result.dump(full=fullDump)) 

2251 out.append("") 

2252 

2253 if printResults: 

2254 print_("\n".join(out)) 

2255 

2256 allResults.append((t, result)) 

2257 

2258 return success, allResults 

2259 

2260 def create_diagram( 

2261 self, 

2262 output_html: Union[TextIO, Path, str], 

2263 vertical: int = 3, 

2264 show_results_names: bool = False, 

2265 show_groups: bool = False, 

2266 embed: bool = False, 

2267 **kwargs, 

2268 ) -> None: 

2269 """ 

2270 Create a railroad diagram for the parser. 

2271 

2272 Parameters: 

2273 

2274 - ``output_html`` (str or file-like object) - output target for generated 

2275 diagram HTML 

2276 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2277 instead of horizontally (default=3) 

2278 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2279 defined results names 

2280 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2281 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2282 the resulting HTML in an enclosing HTML source 

2283 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2284 can be used to insert custom CSS styling 

2285 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2286 generated code 

2287 

2288 Additional diagram-formatting keyword arguments can also be included; 

2289 see railroad.Diagram class. 

2290 """ 

2291 

2292 try: 

2293 from .diagram import to_railroad, railroad_to_html 

2294 except ImportError as ie: 

2295 raise Exception( 

2296 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2297 ) from ie 

2298 

2299 self.streamline() 

2300 

2301 railroad = to_railroad( 

2302 self, 

2303 vertical=vertical, 

2304 show_results_names=show_results_names, 

2305 show_groups=show_groups, 

2306 diagram_kwargs=kwargs, 

2307 ) 

2308 if not isinstance(output_html, (str, Path)): 

2309 # we were passed a file-like object, just write to it 

2310 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2311 return 

2312 

2313 with open(output_html, "w", encoding="utf-8") as diag_file: 

2314 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2315 

2316 # Compatibility synonyms 

2317 # fmt: off 

2318 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)) 

2319 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8( 

2320 "setDefaultWhitespaceChars", set_default_whitespace_chars 

2321 )) 

2322 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization)) 

2323 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion)) 

2324 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat)) 

2325 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache)) 

2326 

2327 setResultsName = replaced_by_pep8("setResultsName", set_results_name) 

2328 setBreak = replaced_by_pep8("setBreak", set_break) 

2329 setParseAction = replaced_by_pep8("setParseAction", set_parse_action) 

2330 addParseAction = replaced_by_pep8("addParseAction", add_parse_action) 

2331 addCondition = replaced_by_pep8("addCondition", add_condition) 

2332 setFailAction = replaced_by_pep8("setFailAction", set_fail_action) 

2333 tryParse = replaced_by_pep8("tryParse", try_parse) 

2334 parseString = replaced_by_pep8("parseString", parse_string) 

2335 scanString = replaced_by_pep8("scanString", scan_string) 

2336 transformString = replaced_by_pep8("transformString", transform_string) 

2337 searchString = replaced_by_pep8("searchString", search_string) 

2338 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

2339 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

2340 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) 

2341 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) 

2342 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) 

2343 setDebug = replaced_by_pep8("setDebug", set_debug) 

2344 setName = replaced_by_pep8("setName", set_name) 

2345 parseFile = replaced_by_pep8("parseFile", parse_file) 

2346 runTests = replaced_by_pep8("runTests", run_tests) 

2347 canParseNext = replaced_by_pep8("canParseNext", can_parse_next) 

2348 defaultName = default_name 

2349 # fmt: on 

2350 

2351 

2352class _PendingSkip(ParserElement): 

2353 # internal placeholder class to hold a place were '...' is added to a parser element, 

2354 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2355 def __init__(self, expr: ParserElement, must_skip: bool = False): 

2356 super().__init__() 

2357 self.anchor = expr 

2358 self.must_skip = must_skip 

2359 

2360 def _generateDefaultName(self) -> str: 

2361 return str(self.anchor + Empty()).replace("Empty", "...") 

2362 

2363 def __add__(self, other) -> ParserElement: 

2364 skipper = SkipTo(other).set_name("...")("_skipped*") 

2365 if self.must_skip: 

2366 

2367 def must_skip(t): 

2368 if not t._skipped or t._skipped.as_list() == [""]: 

2369 del t[0] 

2370 t.pop("_skipped", None) 

2371 

2372 def show_skip(t): 

2373 if t._skipped.as_list()[-1:] == [""]: 

2374 t.pop("_skipped") 

2375 t["_skipped"] = f"missing <{self.anchor!r}>" 

2376 

2377 return ( 

2378 self.anchor + skipper().add_parse_action(must_skip) 

2379 | skipper().add_parse_action(show_skip) 

2380 ) + other 

2381 

2382 return self.anchor + skipper + other 

2383 

2384 def __repr__(self): 

2385 return self.defaultName 

2386 

2387 def parseImpl(self, *args) -> ParseImplReturnType: 

2388 raise Exception( 

2389 "use of `...` expression without following SkipTo target expression" 

2390 ) 

2391 

2392 

2393class Token(ParserElement): 

2394 """Abstract :class:`ParserElement` subclass, for defining atomic 

2395 matching patterns. 

2396 """ 

2397 

2398 def __init__(self): 

2399 super().__init__(savelist=False) 

2400 

2401 def _generateDefaultName(self) -> str: 

2402 return type(self).__name__ 

2403 

2404 

2405class NoMatch(Token): 

2406 """ 

2407 A token that will never match. 

2408 """ 

2409 

2410 def __init__(self): 

2411 super().__init__() 

2412 self.mayReturnEmpty = True 

2413 self.mayIndexError = False 

2414 self.errmsg = "Unmatchable token" 

2415 

2416 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2417 raise ParseException(instring, loc, self.errmsg, self) 

2418 

2419 

2420class Literal(Token): 

2421 """ 

2422 Token to exactly match a specified string. 

2423 

2424 Example:: 

2425 

2426 Literal('abc').parse_string('abc') # -> ['abc'] 

2427 Literal('abc').parse_string('abcdef') # -> ['abc'] 

2428 Literal('abc').parse_string('ab') # -> Exception: Expected "abc" 

2429 

2430 For case-insensitive matching, use :class:`CaselessLiteral`. 

2431 

2432 For keyword matching (force word break before and after the matched string), 

2433 use :class:`Keyword` or :class:`CaselessKeyword`. 

2434 """ 

2435 

2436 def __new__(cls, match_string: str = "", *, matchString: str = ""): 

2437 # Performance tuning: select a subclass with optimized parseImpl 

2438 if cls is Literal: 

2439 match_string = matchString or match_string 

2440 if not match_string: 

2441 return super().__new__(Empty) 

2442 if len(match_string) == 1: 

2443 return super().__new__(_SingleCharLiteral) 

2444 

2445 # Default behavior 

2446 return super().__new__(cls) 

2447 

2448 # Needed to make copy.copy() work correctly if we customize __new__ 

2449 def __getnewargs__(self): 

2450 return (self.match,) 

2451 

2452 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2453 super().__init__() 

2454 match_string = matchString or match_string 

2455 self.match = match_string 

2456 self.matchLen = len(match_string) 

2457 self.firstMatchChar = match_string[:1] 

2458 self.errmsg = f"Expected {self.name}" 

2459 self.mayReturnEmpty = False 

2460 self.mayIndexError = False 

2461 

2462 def _generateDefaultName(self) -> str: 

2463 return repr(self.match) 

2464 

2465 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2466 if instring[loc] == self.firstMatchChar and instring.startswith( 

2467 self.match, loc 

2468 ): 

2469 return loc + self.matchLen, self.match 

2470 raise ParseException(instring, loc, self.errmsg, self) 

2471 

2472 

2473class Empty(Literal): 

2474 """ 

2475 An empty token, will always match. 

2476 """ 

2477 

2478 def __init__(self, match_string="", *, matchString=""): 

2479 super().__init__("") 

2480 self.mayReturnEmpty = True 

2481 self.mayIndexError = False 

2482 

2483 def _generateDefaultName(self) -> str: 

2484 return "Empty" 

2485 

2486 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2487 return loc, [] 

2488 

2489 

2490class _SingleCharLiteral(Literal): 

2491 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2492 if instring[loc] == self.firstMatchChar: 

2493 return loc + 1, self.match 

2494 raise ParseException(instring, loc, self.errmsg, self) 

2495 

2496 

2497ParserElement._literalStringClass = Literal 

2498 

2499 

2500class Keyword(Token): 

2501 """ 

2502 Token to exactly match a specified string as a keyword, that is, 

2503 it must be immediately preceded and followed by whitespace or 

2504 non-keyword characters. Compare with :class:`Literal`: 

2505 

2506 - ``Literal("if")`` will match the leading ``'if'`` in 

2507 ``'ifAndOnlyIf'``. 

2508 - ``Keyword("if")`` will not; it will only match the leading 

2509 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2510 

2511 Accepts two optional constructor arguments in addition to the 

2512 keyword string: 

2513 

2514 - ``ident_chars`` is a string of characters that would be valid 

2515 identifier characters, defaulting to all alphanumerics + "_" and 

2516 "$" 

2517 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2518 

2519 Example:: 

2520 

2521 Keyword("start").parse_string("start") # -> ['start'] 

2522 Keyword("start").parse_string("starting") # -> Exception 

2523 

2524 For case-insensitive matching, use :class:`CaselessKeyword`. 

2525 """ 

2526 

2527 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2528 

2529 def __init__( 

2530 self, 

2531 match_string: str = "", 

2532 ident_chars: typing.Optional[str] = None, 

2533 caseless: bool = False, 

2534 *, 

2535 matchString: str = "", 

2536 identChars: typing.Optional[str] = None, 

2537 ): 

2538 super().__init__() 

2539 identChars = identChars or ident_chars 

2540 if identChars is None: 

2541 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2542 match_string = matchString or match_string 

2543 self.match = match_string 

2544 self.matchLen = len(match_string) 

2545 self.firstMatchChar = match_string[:1] 

2546 if not self.firstMatchChar: 

2547 raise ValueError("null string passed to Keyword; use Empty() instead") 

2548 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2549 self.mayReturnEmpty = False 

2550 self.mayIndexError = False 

2551 self.caseless = caseless 

2552 if caseless: 

2553 self.caselessmatch = match_string.upper() 

2554 identChars = identChars.upper() 

2555 self.identChars = set(identChars) 

2556 

2557 def _generateDefaultName(self) -> str: 

2558 return repr(self.match) 

2559 

2560 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2561 errmsg = self.errmsg or "" 

2562 errloc = loc 

2563 if self.caseless: 

2564 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2565 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2566 if ( 

2567 loc >= len(instring) - self.matchLen 

2568 or instring[loc + self.matchLen].upper() not in self.identChars 

2569 ): 

2570 return loc + self.matchLen, self.match 

2571 

2572 # followed by keyword char 

2573 errmsg += ", was immediately followed by keyword character" 

2574 errloc = loc + self.matchLen 

2575 else: 

2576 # preceded by keyword char 

2577 errmsg += ", keyword was immediately preceded by keyword character" 

2578 errloc = loc - 1 

2579 # else no match just raise plain exception 

2580 

2581 elif ( 

2582 instring[loc] == self.firstMatchChar 

2583 and self.matchLen == 1 

2584 or instring.startswith(self.match, loc) 

2585 ): 

2586 if loc == 0 or instring[loc - 1] not in self.identChars: 

2587 if ( 

2588 loc >= len(instring) - self.matchLen 

2589 or instring[loc + self.matchLen] not in self.identChars 

2590 ): 

2591 return loc + self.matchLen, self.match 

2592 

2593 # followed by keyword char 

2594 errmsg += ", keyword was immediately followed by keyword character" 

2595 errloc = loc + self.matchLen 

2596 else: 

2597 # preceded by keyword char 

2598 errmsg += ", keyword was immediately preceded by keyword character" 

2599 errloc = loc - 1 

2600 # else no match just raise plain exception 

2601 

2602 raise ParseException(instring, errloc, errmsg, self) 

2603 

2604 @staticmethod 

2605 def set_default_keyword_chars(chars) -> None: 

2606 """ 

2607 Overrides the default characters used by :class:`Keyword` expressions. 

2608 """ 

2609 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2610 

2611 # Compatibility synonyms 

2612 setDefaultKeywordChars = staticmethod( 

2613 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars) 

2614 ) 

2615 

2616 

2617class CaselessLiteral(Literal): 

2618 """ 

2619 Token to match a specified string, ignoring case of letters. 

2620 Note: the matched results will always be in the case of the given 

2621 match string, NOT the case of the input text. 

2622 

2623 Example:: 

2624 

2625 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2626 # -> ['CMD', 'CMD', 'CMD'] 

2627 

2628 (Contrast with example for :class:`CaselessKeyword`.) 

2629 """ 

2630 

2631 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2632 match_string = matchString or match_string 

2633 super().__init__(match_string.upper()) 

2634 # Preserve the defining literal. 

2635 self.returnString = match_string 

2636 self.errmsg = f"Expected {self.name}" 

2637 

2638 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2639 if instring[loc : loc + self.matchLen].upper() == self.match: 

2640 return loc + self.matchLen, self.returnString 

2641 raise ParseException(instring, loc, self.errmsg, self) 

2642 

2643 

2644class CaselessKeyword(Keyword): 

2645 """ 

2646 Caseless version of :class:`Keyword`. 

2647 

2648 Example:: 

2649 

2650 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2651 # -> ['CMD', 'CMD'] 

2652 

2653 (Contrast with example for :class:`CaselessLiteral`.) 

2654 """ 

2655 

2656 def __init__( 

2657 self, 

2658 match_string: str = "", 

2659 ident_chars: typing.Optional[str] = None, 

2660 *, 

2661 matchString: str = "", 

2662 identChars: typing.Optional[str] = None, 

2663 ): 

2664 identChars = identChars or ident_chars 

2665 match_string = matchString or match_string 

2666 super().__init__(match_string, identChars, caseless=True) 

2667 

2668 

2669class CloseMatch(Token): 

2670 """A variation on :class:`Literal` which matches "close" matches, 

2671 that is, strings with at most 'n' mismatching characters. 

2672 :class:`CloseMatch` takes parameters: 

2673 

2674 - ``match_string`` - string to be matched 

2675 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2676 - ``max_mismatches`` - (``default=1``) maximum number of 

2677 mismatches allowed to count as a match 

2678 

2679 The results from a successful parse will contain the matched text 

2680 from the input string and the following named results: 

2681 

2682 - ``mismatches`` - a list of the positions within the 

2683 match_string where mismatches were found 

2684 - ``original`` - the original match_string used to compare 

2685 against the input string 

2686 

2687 If ``mismatches`` is an empty list, then the match was an exact 

2688 match. 

2689 

2690 Example:: 

2691 

2692 patt = CloseMatch("ATCATCGAATGGA") 

2693 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

2694 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

2695 

2696 # exact match 

2697 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

2698 

2699 # close match allowing up to 2 mismatches 

2700 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

2701 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

2702 """ 

2703 

2704 def __init__( 

2705 self, 

2706 match_string: str, 

2707 max_mismatches: typing.Optional[int] = None, 

2708 *, 

2709 maxMismatches: int = 1, 

2710 caseless=False, 

2711 ): 

2712 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

2713 super().__init__() 

2714 self.match_string = match_string 

2715 self.maxMismatches = maxMismatches 

2716 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

2717 self.caseless = caseless 

2718 self.mayIndexError = False 

2719 self.mayReturnEmpty = False 

2720 

2721 def _generateDefaultName(self) -> str: 

2722 return f"{type(self).__name__}:{self.match_string!r}" 

2723 

2724 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2725 start = loc 

2726 instrlen = len(instring) 

2727 maxloc = start + len(self.match_string) 

2728 

2729 if maxloc <= instrlen: 

2730 match_string = self.match_string 

2731 match_stringloc = 0 

2732 mismatches = [] 

2733 maxMismatches = self.maxMismatches 

2734 

2735 for match_stringloc, s_m in enumerate( 

2736 zip(instring[loc:maxloc], match_string) 

2737 ): 

2738 src, mat = s_m 

2739 if self.caseless: 

2740 src, mat = src.lower(), mat.lower() 

2741 

2742 if src != mat: 

2743 mismatches.append(match_stringloc) 

2744 if len(mismatches) > maxMismatches: 

2745 break 

2746 else: 

2747 loc = start + match_stringloc + 1 

2748 results = ParseResults([instring[start:loc]]) 

2749 results["original"] = match_string 

2750 results["mismatches"] = mismatches 

2751 return loc, results 

2752 

2753 raise ParseException(instring, loc, self.errmsg, self) 

2754 

2755 

2756class Word(Token): 

2757 """Token for matching words composed of allowed character sets. 

2758 

2759 Parameters: 

2760 

2761 - ``init_chars`` - string of all characters that should be used to 

2762 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

2763 if ``body_chars`` is also specified, then this is the string of 

2764 initial characters 

2765 - ``body_chars`` - string of characters that 

2766 can be used for matching after a matched initial character as 

2767 given in ``init_chars``; if omitted, same as the initial characters 

2768 (default=``None``) 

2769 - ``min`` - minimum number of characters to match (default=1) 

2770 - ``max`` - maximum number of characters to match (default=0) 

2771 - ``exact`` - exact number of characters to match (default=0) 

2772 - ``as_keyword`` - match as a keyword (default=``False``) 

2773 - ``exclude_chars`` - characters that might be 

2774 found in the input ``body_chars`` string but which should not be 

2775 accepted for matching ;useful to define a word of all 

2776 printables except for one or two characters, for instance 

2777 (default=``None``) 

2778 

2779 :class:`srange` is useful for defining custom character set strings 

2780 for defining :class:`Word` expressions, using range notation from 

2781 regular expression character sets. 

2782 

2783 A common mistake is to use :class:`Word` to match a specific literal 

2784 string, as in ``Word("Address")``. Remember that :class:`Word` 

2785 uses the string argument to define *sets* of matchable characters. 

2786 This expression would match "Add", "AAA", "dAred", or any other word 

2787 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

2788 exact literal string, use :class:`Literal` or :class:`Keyword`. 

2789 

2790 pyparsing includes helper strings for building Words: 

2791 

2792 - :class:`alphas` 

2793 - :class:`nums` 

2794 - :class:`alphanums` 

2795 - :class:`hexnums` 

2796 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 

2797 - accented, tilded, umlauted, etc.) 

2798 - :class:`punc8bit` (non-alphabetic characters in ASCII range 

2799 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

2800 - :class:`printables` (any non-whitespace character) 

2801 

2802 ``alphas``, ``nums``, and ``printables`` are also defined in several 

2803 Unicode sets - see :class:`pyparsing_unicode``. 

2804 

2805 Example:: 

2806 

2807 # a word composed of digits 

2808 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

2809 

2810 # a word with a leading capital, and zero or more lowercase 

2811 capitalized_word = Word(alphas.upper(), alphas.lower()) 

2812 

2813 # hostnames are alphanumeric, with leading alpha, and '-' 

2814 hostname = Word(alphas, alphanums + '-') 

2815 

2816 # roman numeral (not a strict parser, accepts invalid mix of characters) 

2817 roman = Word("IVXLCDM") 

2818 

2819 # any string of non-whitespace characters, except for ',' 

2820 csv_value = Word(printables, exclude_chars=",") 

2821 """ 

2822 

2823 def __init__( 

2824 self, 

2825 init_chars: str = "", 

2826 body_chars: typing.Optional[str] = None, 

2827 min: int = 1, 

2828 max: int = 0, 

2829 exact: int = 0, 

2830 as_keyword: bool = False, 

2831 exclude_chars: typing.Optional[str] = None, 

2832 *, 

2833 initChars: typing.Optional[str] = None, 

2834 bodyChars: typing.Optional[str] = None, 

2835 asKeyword: bool = False, 

2836 excludeChars: typing.Optional[str] = None, 

2837 ): 

2838 initChars = initChars or init_chars 

2839 bodyChars = bodyChars or body_chars 

2840 asKeyword = asKeyword or as_keyword 

2841 excludeChars = excludeChars or exclude_chars 

2842 super().__init__() 

2843 if not initChars: 

2844 raise ValueError( 

2845 f"invalid {type(self).__name__}, initChars cannot be empty string" 

2846 ) 

2847 

2848 initChars_set = set(initChars) 

2849 if excludeChars: 

2850 excludeChars_set = set(excludeChars) 

2851 initChars_set -= excludeChars_set 

2852 if bodyChars: 

2853 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

2854 self.initChars = initChars_set 

2855 self.initCharsOrig = "".join(sorted(initChars_set)) 

2856 

2857 if bodyChars: 

2858 self.bodyChars = set(bodyChars) 

2859 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

2860 else: 

2861 self.bodyChars = initChars_set 

2862 self.bodyCharsOrig = self.initCharsOrig 

2863 

2864 self.maxSpecified = max > 0 

2865 

2866 if min < 1: 

2867 raise ValueError( 

2868 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

2869 ) 

2870 

2871 if self.maxSpecified and min > max: 

2872 raise ValueError( 

2873 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

2874 ) 

2875 

2876 self.minLen = min 

2877 

2878 if max > 0: 

2879 self.maxLen = max 

2880 else: 

2881 self.maxLen = _MAX_INT 

2882 

2883 if exact > 0: 

2884 min = max = exact 

2885 self.maxLen = exact 

2886 self.minLen = exact 

2887 

2888 self.errmsg = f"Expected {self.name}" 

2889 self.mayIndexError = False 

2890 self.asKeyword = asKeyword 

2891 if self.asKeyword: 

2892 self.errmsg += " as a keyword" 

2893 

2894 # see if we can make a regex for this Word 

2895 if " " not in (self.initChars | self.bodyChars): 

2896 if len(self.initChars) == 1: 

2897 re_leading_fragment = re.escape(self.initCharsOrig) 

2898 else: 

2899 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

2900 

2901 if self.bodyChars == self.initChars: 

2902 if max == 0 and self.minLen == 1: 

2903 repeat = "+" 

2904 elif max == 1: 

2905 repeat = "" 

2906 else: 

2907 if self.minLen != self.maxLen: 

2908 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

2909 else: 

2910 repeat = f"{{{self.minLen}}}" 

2911 self.reString = f"{re_leading_fragment}{repeat}" 

2912 else: 

2913 if max == 1: 

2914 re_body_fragment = "" 

2915 repeat = "" 

2916 else: 

2917 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

2918 if max == 0 and self.minLen == 1: 

2919 repeat = "*" 

2920 elif max == 2: 

2921 repeat = "?" if min <= 1 else "" 

2922 else: 

2923 if min != max: 

2924 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

2925 else: 

2926 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

2927 

2928 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

2929 

2930 if self.asKeyword: 

2931 self.reString = rf"\b{self.reString}\b" 

2932 

2933 try: 

2934 self.re = re.compile(self.reString) 

2935 except re.error: 

2936 self.re = None # type: ignore[assignment] 

2937 else: 

2938 self.re_match = self.re.match 

2939 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign] 

2940 

2941 def _generateDefaultName(self) -> str: 

2942 def charsAsStr(s): 

2943 max_repr_len = 16 

2944 s = _collapse_string_to_ranges(s, re_escape=False) 

2945 

2946 if len(s) > max_repr_len: 

2947 return s[: max_repr_len - 3] + "..." 

2948 

2949 return s 

2950 

2951 if self.initChars != self.bodyChars: 

2952 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

2953 else: 

2954 base = f"W:({charsAsStr(self.initChars)})" 

2955 

2956 # add length specification 

2957 if self.minLen > 1 or self.maxLen != _MAX_INT: 

2958 if self.minLen == self.maxLen: 

2959 if self.minLen == 1: 

2960 return base[2:] 

2961 else: 

2962 return base + f"{{{self.minLen}}}" 

2963 elif self.maxLen == _MAX_INT: 

2964 return base + f"{{{self.minLen},...}}" 

2965 else: 

2966 return base + f"{{{self.minLen},{self.maxLen}}}" 

2967 return base 

2968 

2969 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2970 if instring[loc] not in self.initChars: 

2971 raise ParseException(instring, loc, self.errmsg, self) 

2972 

2973 start = loc 

2974 loc += 1 

2975 instrlen = len(instring) 

2976 body_chars: set[str] = self.bodyChars 

2977 maxloc = start + self.maxLen 

2978 maxloc = min(maxloc, instrlen) 

2979 while loc < maxloc and instring[loc] in body_chars: 

2980 loc += 1 

2981 

2982 throw_exception = False 

2983 if loc - start < self.minLen: 

2984 throw_exception = True 

2985 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars: 

2986 throw_exception = True 

2987 elif self.asKeyword and ( 

2988 (start > 0 and instring[start - 1] in body_chars) 

2989 or (loc < instrlen and instring[loc] in body_chars) 

2990 ): 

2991 throw_exception = True 

2992 

2993 if throw_exception: 

2994 raise ParseException(instring, loc, self.errmsg, self) 

2995 

2996 return loc, instring[start:loc] 

2997 

2998 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2999 result = self.re_match(instring, loc) 

3000 if not result: 

3001 raise ParseException(instring, loc, self.errmsg, self) 

3002 

3003 loc = result.end() 

3004 return loc, result.group() 

3005 

3006 

3007class Char(Word): 

3008 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

3009 when defining a match of any single character in a string of 

3010 characters. 

3011 """ 

3012 

3013 def __init__( 

3014 self, 

3015 charset: str, 

3016 as_keyword: bool = False, 

3017 exclude_chars: typing.Optional[str] = None, 

3018 *, 

3019 asKeyword: bool = False, 

3020 excludeChars: typing.Optional[str] = None, 

3021 ): 

3022 asKeyword = asKeyword or as_keyword 

3023 excludeChars = excludeChars or exclude_chars 

3024 super().__init__( 

3025 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

3026 ) 

3027 

3028 

3029class Regex(Token): 

3030 r"""Token for matching strings that match a given regular 

3031 expression. Defined with string specifying the regular expression in 

3032 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3033 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3034 these will be preserved as named :class:`ParseResults`. 

3035 

3036 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3037 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3038 a compiled RE that was compiled using ``regex``. 

3039 

3040 Example:: 

3041 

3042 realnum = Regex(r"[+-]?\d+\.\d*") 

3043 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3044 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3045 

3046 # named fields in a regex will be returned as named results 

3047 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3048 

3049 # the Regex class will accept re's compiled using the regex module 

3050 import regex 

3051 parser = pp.Regex(regex.compile(r'[0-9]')) 

3052 """ 

3053 

3054 def __init__( 

3055 self, 

3056 pattern: Any, 

3057 flags: Union[re.RegexFlag, int] = 0, 

3058 as_group_list: bool = False, 

3059 as_match: bool = False, 

3060 *, 

3061 asGroupList: bool = False, 

3062 asMatch: bool = False, 

3063 ): 

3064 """The parameters ``pattern`` and ``flags`` are passed 

3065 to the ``re.compile()`` function as-is. See the Python 

3066 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3067 explanation of the acceptable patterns and flags. 

3068 """ 

3069 super().__init__() 

3070 asGroupList = asGroupList or as_group_list 

3071 asMatch = asMatch or as_match 

3072 

3073 if isinstance(pattern, str_type): 

3074 if not pattern: 

3075 raise ValueError("null string passed to Regex; use Empty() instead") 

3076 

3077 self._re = None 

3078 self.reString = self.pattern = pattern 

3079 

3080 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3081 self._re = pattern 

3082 self.pattern = self.reString = pattern.pattern 

3083 

3084 elif callable(pattern): 

3085 # defer creating this pattern until we really need it 

3086 self.pattern = pattern 

3087 self._re = None 

3088 

3089 else: 

3090 raise TypeError( 

3091 "Regex may only be constructed with a string or a compiled RE object," 

3092 " or a callable that takes no arguments and returns a string or a" 

3093 " compiled RE object" 

3094 ) 

3095 

3096 self.flags = flags 

3097 self.errmsg = f"Expected {self.name}" 

3098 self.mayIndexError = False 

3099 self.asGroupList = asGroupList 

3100 self.asMatch = asMatch 

3101 if self.asGroupList: 

3102 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign] 

3103 if self.asMatch: 

3104 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign] 

3105 

3106 @cached_property 

3107 def re(self) -> re.Pattern: 

3108 if self._re: 

3109 return self._re 

3110 

3111 if callable(self.pattern): 

3112 # replace self.pattern with the string returned by calling self.pattern() 

3113 self.pattern = cast(Callable[[], str], self.pattern)() 

3114 

3115 # see if we got a compiled RE back instead of a str - if so, we're done 

3116 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"): 

3117 self._re = cast(re.Pattern[str], self.pattern) 

3118 self.pattern = self.reString = self._re.pattern 

3119 return self._re 

3120 

3121 try: 

3122 self._re = re.compile(self.pattern, self.flags) 

3123 return self._re 

3124 except re.error: 

3125 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3126 

3127 @cached_property 

3128 def re_match(self) -> Callable[[str, int], Any]: 

3129 return self.re.match 

3130 

3131 @cached_property 

3132 def mayReturnEmpty(self) -> bool: # type: ignore[override] 

3133 return self.re_match("", 0) is not None 

3134 

3135 def _generateDefaultName(self) -> str: 

3136 unescaped = repr(self.pattern).replace("\\\\", "\\") 

3137 return f"Re:({unescaped})" 

3138 

3139 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3140 result = self.re_match(instring, loc) 

3141 if not result: 

3142 raise ParseException(instring, loc, self.errmsg, self) 

3143 

3144 loc = result.end() 

3145 ret = ParseResults(result.group()) 

3146 d = result.groupdict() 

3147 

3148 for k, v in d.items(): 

3149 ret[k] = v 

3150 

3151 return loc, ret 

3152 

3153 def parseImplAsGroupList(self, instring, loc, do_actions=True): 

3154 result = self.re_match(instring, loc) 

3155 if not result: 

3156 raise ParseException(instring, loc, self.errmsg, self) 

3157 

3158 loc = result.end() 

3159 ret = result.groups() 

3160 return loc, ret 

3161 

3162 def parseImplAsMatch(self, instring, loc, do_actions=True): 

3163 result = self.re_match(instring, loc) 

3164 if not result: 

3165 raise ParseException(instring, loc, self.errmsg, self) 

3166 

3167 loc = result.end() 

3168 ret = result 

3169 return loc, ret 

3170 

3171 def sub(self, repl: str) -> ParserElement: 

3172 r""" 

3173 Return :class:`Regex` with an attached parse action to transform the parsed 

3174 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3175 

3176 Example:: 

3177 

3178 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3179 print(make_html.transform_string("h1:main title:")) 

3180 # prints "<h1>main title</h1>" 

3181 """ 

3182 if self.asGroupList: 

3183 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3184 

3185 if self.asMatch and callable(repl): 

3186 raise TypeError( 

3187 "cannot use sub() with a callable with Regex(as_match=True)" 

3188 ) 

3189 

3190 if self.asMatch: 

3191 

3192 def pa(tokens): 

3193 return tokens[0].expand(repl) 

3194 

3195 else: 

3196 

3197 def pa(tokens): 

3198 return self.re.sub(repl, tokens[0]) 

3199 

3200 return self.add_parse_action(pa) 

3201 

3202 

3203class QuotedString(Token): 

3204 r""" 

3205 Token for matching strings that are delimited by quoting characters. 

3206 

3207 Defined with the following parameters: 

3208 

3209 - ``quote_char`` - string of one or more characters defining the 

3210 quote delimiting string 

3211 - ``esc_char`` - character to re_escape quotes, typically backslash 

3212 (default= ``None``) 

3213 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3214 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3215 (default= ``None``) 

3216 - ``multiline`` - boolean indicating whether quotes can span 

3217 multiple lines (default= ``False``) 

3218 - ``unquote_results`` - boolean indicating whether the matched text 

3219 should be unquoted (default= ``True``) 

3220 - ``end_quote_char`` - string of one or more characters defining the 

3221 end of the quote delimited string (default= ``None`` => same as 

3222 quote_char) 

3223 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3224 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3225 (default= ``True``) 

3226 

3227 Example:: 

3228 

3229 qs = QuotedString('"') 

3230 print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3231 complex_qs = QuotedString('{{', end_quote_char='}}') 

3232 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) 

3233 sql_qs = QuotedString('"', esc_quote='""') 

3234 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3235 

3236 prints:: 

3237 

3238 [['This is the quote']] 

3239 [['This is the "quote"']] 

3240 [['This is the quote with "embedded" quotes']] 

3241 """ 

3242 

3243 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3244 

3245 def __init__( 

3246 self, 

3247 quote_char: str = "", 

3248 esc_char: typing.Optional[str] = None, 

3249 esc_quote: typing.Optional[str] = None, 

3250 multiline: bool = False, 

3251 unquote_results: bool = True, 

3252 end_quote_char: typing.Optional[str] = None, 

3253 convert_whitespace_escapes: bool = True, 

3254 *, 

3255 quoteChar: str = "", 

3256 escChar: typing.Optional[str] = None, 

3257 escQuote: typing.Optional[str] = None, 

3258 unquoteResults: bool = True, 

3259 endQuoteChar: typing.Optional[str] = None, 

3260 convertWhitespaceEscapes: bool = True, 

3261 ): 

3262 super().__init__() 

3263 esc_char = escChar or esc_char 

3264 esc_quote = escQuote or esc_quote 

3265 unquote_results = unquoteResults and unquote_results 

3266 end_quote_char = endQuoteChar or end_quote_char 

3267 convert_whitespace_escapes = ( 

3268 convertWhitespaceEscapes and convert_whitespace_escapes 

3269 ) 

3270 quote_char = quoteChar or quote_char 

3271 

3272 # remove white space from quote chars 

3273 quote_char = quote_char.strip() 

3274 if not quote_char: 

3275 raise ValueError("quote_char cannot be the empty string") 

3276 

3277 if end_quote_char is None: 

3278 end_quote_char = quote_char 

3279 else: 

3280 end_quote_char = end_quote_char.strip() 

3281 if not end_quote_char: 

3282 raise ValueError("end_quote_char cannot be the empty string") 

3283 

3284 self.quote_char: str = quote_char 

3285 self.quote_char_len: int = len(quote_char) 

3286 self.first_quote_char: str = quote_char[0] 

3287 self.end_quote_char: str = end_quote_char 

3288 self.end_quote_char_len: int = len(end_quote_char) 

3289 self.esc_char: str = esc_char or "" 

3290 self.has_esc_char: bool = esc_char is not None 

3291 self.esc_quote: str = esc_quote or "" 

3292 self.unquote_results: bool = unquote_results 

3293 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3294 self.multiline = multiline 

3295 self.re_flags = re.RegexFlag(0) 

3296 

3297 # fmt: off 

3298 # build up re pattern for the content between the quote delimiters 

3299 inner_pattern: list[str] = [] 

3300 

3301 if esc_quote: 

3302 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3303 

3304 if esc_char: 

3305 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3306 

3307 if len(self.end_quote_char) > 1: 

3308 inner_pattern.append( 

3309 "(?:" 

3310 + "|".join( 

3311 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3312 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3313 ) 

3314 + ")" 

3315 ) 

3316 

3317 if self.multiline: 

3318 self.re_flags |= re.MULTILINE | re.DOTALL 

3319 inner_pattern.append( 

3320 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3321 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3322 ) 

3323 else: 

3324 inner_pattern.append( 

3325 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3326 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3327 ) 

3328 

3329 self.pattern = "".join( 

3330 [ 

3331 re.escape(self.quote_char), 

3332 "(?:", 

3333 '|'.join(inner_pattern), 

3334 ")*", 

3335 re.escape(self.end_quote_char), 

3336 ] 

3337 ) 

3338 

3339 if self.unquote_results: 

3340 if self.convert_whitespace_escapes: 

3341 self.unquote_scan_re = re.compile( 

3342 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3343 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})" 

3344 rf"|({re.escape(self.esc_char)}.)" 

3345 rf"|(\n|.)", 

3346 flags=self.re_flags, 

3347 ) 

3348 else: 

3349 self.unquote_scan_re = re.compile( 

3350 rf"({re.escape(self.esc_char)}.)" 

3351 rf"|(\n|.)", 

3352 flags=self.re_flags 

3353 ) 

3354 # fmt: on 

3355 

3356 try: 

3357 self.re = re.compile(self.pattern, self.re_flags) 

3358 self.reString = self.pattern 

3359 self.re_match = self.re.match 

3360 except re.error: 

3361 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3362 

3363 self.errmsg = f"Expected {self.name}" 

3364 self.mayIndexError = False 

3365 self.mayReturnEmpty = True 

3366 

3367 def _generateDefaultName(self) -> str: 

3368 if self.quote_char == self.end_quote_char and isinstance( 

3369 self.quote_char, str_type 

3370 ): 

3371 return f"string enclosed in {self.quote_char!r}" 

3372 

3373 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3374 

3375 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3376 # check first character of opening quote to see if that is a match 

3377 # before doing the more complicated regex match 

3378 result = ( 

3379 instring[loc] == self.first_quote_char 

3380 and self.re_match(instring, loc) 

3381 or None 

3382 ) 

3383 if not result: 

3384 raise ParseException(instring, loc, self.errmsg, self) 

3385 

3386 # get ending loc and matched string from regex matching result 

3387 loc = result.end() 

3388 ret = result.group() 

3389 

3390 def convert_escaped_numerics(s: str) -> str: 

3391 if s == "0": 

3392 return "\0" 

3393 if s.isdigit() and len(s) == 3: 

3394 return chr(int(s, base=8)) 

3395 elif s.startswith(("u", "x")): 

3396 return chr(int(s[1:], base=16)) 

3397 else: 

3398 return s 

3399 

3400 if self.unquote_results: 

3401 # strip off quotes 

3402 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3403 

3404 if isinstance(ret, str_type): 

3405 # fmt: off 

3406 if self.convert_whitespace_escapes: 

3407 # as we iterate over matches in the input string, 

3408 # collect from whichever match group of the unquote_scan_re 

3409 # regex matches (only 1 group will match at any given time) 

3410 ret = "".join( 

3411 # match group 1 matches \t, \n, etc. 

3412 self.ws_map[match.group(1)] if match.group(1) 

3413 # match group 2 matches escaped octal, null, hex, and Unicode 

3414 # sequences 

3415 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2) 

3416 # match group 3 matches escaped characters 

3417 else match.group(3)[-1] if match.group(3) 

3418 # match group 4 matches any character 

3419 else match.group(4) 

3420 for match in self.unquote_scan_re.finditer(ret) 

3421 ) 

3422 else: 

3423 ret = "".join( 

3424 # match group 1 matches escaped characters 

3425 match.group(1)[-1] if match.group(1) 

3426 # match group 2 matches any character 

3427 else match.group(2) 

3428 for match in self.unquote_scan_re.finditer(ret) 

3429 ) 

3430 # fmt: on 

3431 

3432 # replace escaped quotes 

3433 if self.esc_quote: 

3434 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3435 

3436 return loc, ret 

3437 

3438 

3439class CharsNotIn(Token): 

3440 """Token for matching words composed of characters *not* in a given 

3441 set (will include whitespace in matched characters if not listed in 

3442 the provided exclusion set - see example). Defined with string 

3443 containing all disallowed characters, and an optional minimum, 

3444 maximum, and/or exact length. The default value for ``min`` is 

3445 1 (a minimum value < 1 is not valid); the default values for 

3446 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3447 length restriction. 

3448 

3449 Example:: 

3450 

3451 # define a comma-separated-value as anything that is not a ',' 

3452 csv_value = CharsNotIn(',') 

3453 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) 

3454 

3455 prints:: 

3456 

3457 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3458 """ 

3459 

3460 def __init__( 

3461 self, 

3462 not_chars: str = "", 

3463 min: int = 1, 

3464 max: int = 0, 

3465 exact: int = 0, 

3466 *, 

3467 notChars: str = "", 

3468 ): 

3469 super().__init__() 

3470 self.skipWhitespace = False 

3471 self.notChars = not_chars or notChars 

3472 self.notCharsSet = set(self.notChars) 

3473 

3474 if min < 1: 

3475 raise ValueError( 

3476 "cannot specify a minimum length < 1; use" 

3477 " Opt(CharsNotIn()) if zero-length char group is permitted" 

3478 ) 

3479 

3480 self.minLen = min 

3481 

3482 if max > 0: 

3483 self.maxLen = max 

3484 else: 

3485 self.maxLen = _MAX_INT 

3486 

3487 if exact > 0: 

3488 self.maxLen = exact 

3489 self.minLen = exact 

3490 

3491 self.errmsg = f"Expected {self.name}" 

3492 self.mayReturnEmpty = self.minLen == 0 

3493 self.mayIndexError = False 

3494 

3495 def _generateDefaultName(self) -> str: 

3496 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3497 if len(not_chars_str) > 16: 

3498 return f"!W:({self.notChars[: 16 - 3]}...)" 

3499 else: 

3500 return f"!W:({self.notChars})" 

3501 

3502 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3503 notchars = self.notCharsSet 

3504 if instring[loc] in notchars: 

3505 raise ParseException(instring, loc, self.errmsg, self) 

3506 

3507 start = loc 

3508 loc += 1 

3509 maxlen = min(start + self.maxLen, len(instring)) 

3510 while loc < maxlen and instring[loc] not in notchars: 

3511 loc += 1 

3512 

3513 if loc - start < self.minLen: 

3514 raise ParseException(instring, loc, self.errmsg, self) 

3515 

3516 return loc, instring[start:loc] 

3517 

3518 

3519class White(Token): 

3520 """Special matching class for matching whitespace. Normally, 

3521 whitespace is ignored by pyparsing grammars. This class is included 

3522 when some whitespace structures are significant. Define with 

3523 a string containing the whitespace characters to be matched; default 

3524 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3525 ``max``, and ``exact`` arguments, as defined for the 

3526 :class:`Word` class. 

3527 """ 

3528 

3529 whiteStrs = { 

3530 " ": "<SP>", 

3531 "\t": "<TAB>", 

3532 "\n": "<LF>", 

3533 "\r": "<CR>", 

3534 "\f": "<FF>", 

3535 "\u00A0": "<NBSP>", 

3536 "\u1680": "<OGHAM_SPACE_MARK>", 

3537 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3538 "\u2000": "<EN_QUAD>", 

3539 "\u2001": "<EM_QUAD>", 

3540 "\u2002": "<EN_SPACE>", 

3541 "\u2003": "<EM_SPACE>", 

3542 "\u2004": "<THREE-PER-EM_SPACE>", 

3543 "\u2005": "<FOUR-PER-EM_SPACE>", 

3544 "\u2006": "<SIX-PER-EM_SPACE>", 

3545 "\u2007": "<FIGURE_SPACE>", 

3546 "\u2008": "<PUNCTUATION_SPACE>", 

3547 "\u2009": "<THIN_SPACE>", 

3548 "\u200A": "<HAIR_SPACE>", 

3549 "\u200B": "<ZERO_WIDTH_SPACE>", 

3550 "\u202F": "<NNBSP>", 

3551 "\u205F": "<MMSP>", 

3552 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3553 } 

3554 

3555 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): 

3556 super().__init__() 

3557 self.matchWhite = ws 

3558 self.set_whitespace_chars( 

3559 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3560 copy_defaults=True, 

3561 ) 

3562 # self.leave_whitespace() 

3563 self.mayReturnEmpty = True 

3564 self.errmsg = f"Expected {self.name}" 

3565 

3566 self.minLen = min 

3567 

3568 if max > 0: 

3569 self.maxLen = max 

3570 else: 

3571 self.maxLen = _MAX_INT 

3572 

3573 if exact > 0: 

3574 self.maxLen = exact 

3575 self.minLen = exact 

3576 

3577 def _generateDefaultName(self) -> str: 

3578 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

3579 

3580 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3581 if instring[loc] not in self.matchWhite: 

3582 raise ParseException(instring, loc, self.errmsg, self) 

3583 start = loc 

3584 loc += 1 

3585 maxloc = start + self.maxLen 

3586 maxloc = min(maxloc, len(instring)) 

3587 while loc < maxloc and instring[loc] in self.matchWhite: 

3588 loc += 1 

3589 

3590 if loc - start < self.minLen: 

3591 raise ParseException(instring, loc, self.errmsg, self) 

3592 

3593 return loc, instring[start:loc] 

3594 

3595 

3596class PositionToken(Token): 

3597 def __init__(self): 

3598 super().__init__() 

3599 self.mayReturnEmpty = True 

3600 self.mayIndexError = False 

3601 

3602 

3603class GoToColumn(PositionToken): 

3604 """Token to advance to a specific column of input text; useful for 

3605 tabular report scraping. 

3606 """ 

3607 

3608 def __init__(self, colno: int): 

3609 super().__init__() 

3610 self.col = colno 

3611 

3612 def preParse(self, instring: str, loc: int) -> int: 

3613 if col(loc, instring) == self.col: 

3614 return loc 

3615 

3616 instrlen = len(instring) 

3617 if self.ignoreExprs: 

3618 loc = self._skipIgnorables(instring, loc) 

3619 while ( 

3620 loc < instrlen 

3621 and instring[loc].isspace() 

3622 and col(loc, instring) != self.col 

3623 ): 

3624 loc += 1 

3625 

3626 return loc 

3627 

3628 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3629 thiscol = col(loc, instring) 

3630 if thiscol > self.col: 

3631 raise ParseException(instring, loc, "Text not in expected column", self) 

3632 newloc = loc + self.col - thiscol 

3633 ret = instring[loc:newloc] 

3634 return newloc, ret 

3635 

3636 

3637class LineStart(PositionToken): 

3638 r"""Matches if current position is at the beginning of a line within 

3639 the parse string 

3640 

3641 Example:: 

3642 

3643 test = '''\ 

3644 AAA this line 

3645 AAA and this line 

3646 AAA but not this one 

3647 B AAA and definitely not this one 

3648 ''' 

3649 

3650 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

3651 print(t) 

3652 

3653 prints:: 

3654 

3655 ['AAA', ' this line'] 

3656 ['AAA', ' and this line'] 

3657 

3658 """ 

3659 

3660 def __init__(self): 

3661 super().__init__() 

3662 self.leave_whitespace() 

3663 self.orig_whiteChars = set() | self.whiteChars 

3664 self.whiteChars.discard("\n") 

3665 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

3666 self.set_name("start of line") 

3667 

3668 def preParse(self, instring: str, loc: int) -> int: 

3669 if loc == 0: 

3670 return loc 

3671 

3672 ret = self.skipper.preParse(instring, loc) 

3673 

3674 if "\n" in self.orig_whiteChars: 

3675 while instring[ret : ret + 1] == "\n": 

3676 ret = self.skipper.preParse(instring, ret + 1) 

3677 

3678 return ret 

3679 

3680 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3681 if col(loc, instring) == 1: 

3682 return loc, [] 

3683 raise ParseException(instring, loc, self.errmsg, self) 

3684 

3685 

3686class LineEnd(PositionToken): 

3687 """Matches if current position is at the end of a line within the 

3688 parse string 

3689 """ 

3690 

3691 def __init__(self): 

3692 super().__init__() 

3693 self.whiteChars.discard("\n") 

3694 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

3695 self.set_name("end of line") 

3696 

3697 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3698 if loc < len(instring): 

3699 if instring[loc] == "\n": 

3700 return loc + 1, "\n" 

3701 else: 

3702 raise ParseException(instring, loc, self.errmsg, self) 

3703 elif loc == len(instring): 

3704 return loc + 1, [] 

3705 else: 

3706 raise ParseException(instring, loc, self.errmsg, self) 

3707 

3708 

3709class StringStart(PositionToken): 

3710 """Matches if current position is at the beginning of the parse 

3711 string 

3712 """ 

3713 

3714 def __init__(self): 

3715 super().__init__() 

3716 self.set_name("start of text") 

3717 

3718 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3719 # see if entire string up to here is just whitespace and ignoreables 

3720 if loc != 0 and loc != self.preParse(instring, 0): 

3721 raise ParseException(instring, loc, self.errmsg, self) 

3722 

3723 return loc, [] 

3724 

3725 

3726class StringEnd(PositionToken): 

3727 """ 

3728 Matches if current position is at the end of the parse string 

3729 """ 

3730 

3731 def __init__(self): 

3732 super().__init__() 

3733 self.set_name("end of text") 

3734 

3735 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3736 if loc < len(instring): 

3737 raise ParseException(instring, loc, self.errmsg, self) 

3738 if loc == len(instring): 

3739 return loc + 1, [] 

3740 if loc > len(instring): 

3741 return loc, [] 

3742 

3743 raise ParseException(instring, loc, self.errmsg, self) 

3744 

3745 

3746class WordStart(PositionToken): 

3747 """Matches if the current position is at the beginning of a 

3748 :class:`Word`, and is not preceded by any character in a given 

3749 set of ``word_chars`` (default= ``printables``). To emulate the 

3750 ``\b`` behavior of regular expressions, use 

3751 ``WordStart(alphanums)``. ``WordStart`` will also match at 

3752 the beginning of the string being parsed, or at the beginning of 

3753 a line. 

3754 """ 

3755 

3756 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3757 wordChars = word_chars if wordChars == printables else wordChars 

3758 super().__init__() 

3759 self.wordChars = set(wordChars) 

3760 self.set_name("start of a word") 

3761 

3762 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3763 if loc != 0: 

3764 if ( 

3765 instring[loc - 1] in self.wordChars 

3766 or instring[loc] not in self.wordChars 

3767 ): 

3768 raise ParseException(instring, loc, self.errmsg, self) 

3769 return loc, [] 

3770 

3771 

3772class WordEnd(PositionToken): 

3773 """Matches if the current position is at the end of a :class:`Word`, 

3774 and is not followed by any character in a given set of ``word_chars`` 

3775 (default= ``printables``). To emulate the ``\b`` behavior of 

3776 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

3777 will also match at the end of the string being parsed, or at the end 

3778 of a line. 

3779 """ 

3780 

3781 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3782 wordChars = word_chars if wordChars == printables else wordChars 

3783 super().__init__() 

3784 self.wordChars = set(wordChars) 

3785 self.skipWhitespace = False 

3786 self.set_name("end of a word") 

3787 

3788 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3789 instrlen = len(instring) 

3790 if instrlen > 0 and loc < instrlen: 

3791 if ( 

3792 instring[loc] in self.wordChars 

3793 or instring[loc - 1] not in self.wordChars 

3794 ): 

3795 raise ParseException(instring, loc, self.errmsg, self) 

3796 return loc, [] 

3797 

3798 

3799class Tag(Token): 

3800 """ 

3801 A meta-element for inserting a named result into the parsed 

3802 tokens that may be checked later in a parse action or while 

3803 processing the parsed results. Accepts an optional tag value, 

3804 defaulting to `True`. 

3805 

3806 Example:: 

3807 

3808 end_punc = "." | ("!" + Tag("enthusiastic"))) 

3809 greeting = "Hello," + Word(alphas) + end_punc 

3810 

3811 result = greeting.parse_string("Hello, World.") 

3812 print(result.dump()) 

3813 

3814 result = greeting.parse_string("Hello, World!") 

3815 print(result.dump()) 

3816 

3817 prints:: 

3818 

3819 ['Hello,', 'World', '.'] 

3820 

3821 ['Hello,', 'World', '!'] 

3822 - enthusiastic: True 

3823 """ 

3824 

3825 def __init__(self, tag_name: str, value: Any = True): 

3826 super().__init__() 

3827 self.mayReturnEmpty = True 

3828 self.mayIndexError = False 

3829 self.leave_whitespace() 

3830 self.tag_name = tag_name 

3831 self.tag_value = value 

3832 self.add_parse_action(self._add_tag) 

3833 

3834 def _add_tag(self, tokens: ParseResults): 

3835 tokens[self.tag_name] = self.tag_value 

3836 

3837 def _generateDefaultName(self) -> str: 

3838 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}" 

3839 

3840 

3841class ParseExpression(ParserElement): 

3842 """Abstract subclass of ParserElement, for combining and 

3843 post-processing parsed tokens. 

3844 """ 

3845 

3846 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

3847 super().__init__(savelist) 

3848 self.exprs: list[ParserElement] 

3849 if isinstance(exprs, _generatorType): 

3850 exprs = list(exprs) 

3851 

3852 if isinstance(exprs, str_type): 

3853 self.exprs = [self._literalStringClass(exprs)] 

3854 elif isinstance(exprs, ParserElement): 

3855 self.exprs = [exprs] 

3856 elif isinstance(exprs, Iterable): 

3857 exprs = list(exprs) 

3858 # if sequence of strings provided, wrap with Literal 

3859 if any(isinstance(expr, str_type) for expr in exprs): 

3860 exprs = ( 

3861 self._literalStringClass(e) if isinstance(e, str_type) else e 

3862 for e in exprs 

3863 ) 

3864 self.exprs = list(exprs) 

3865 else: 

3866 try: 

3867 self.exprs = list(exprs) 

3868 except TypeError: 

3869 self.exprs = [exprs] 

3870 self.callPreparse = False 

3871 

3872 def recurse(self) -> list[ParserElement]: 

3873 return self.exprs[:] 

3874 

3875 def append(self, other) -> ParserElement: 

3876 self.exprs.append(other) 

3877 self._defaultName = None 

3878 return self 

3879 

3880 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

3881 """ 

3882 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3883 all contained expressions. 

3884 """ 

3885 super().leave_whitespace(recursive) 

3886 

3887 if recursive: 

3888 self.exprs = [e.copy() for e in self.exprs] 

3889 for e in self.exprs: 

3890 e.leave_whitespace(recursive) 

3891 return self 

3892 

3893 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

3894 """ 

3895 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3896 all contained expressions. 

3897 """ 

3898 super().ignore_whitespace(recursive) 

3899 if recursive: 

3900 self.exprs = [e.copy() for e in self.exprs] 

3901 for e in self.exprs: 

3902 e.ignore_whitespace(recursive) 

3903 return self 

3904 

3905 def ignore(self, other) -> ParserElement: 

3906 if isinstance(other, Suppress): 

3907 if other not in self.ignoreExprs: 

3908 super().ignore(other) 

3909 for e in self.exprs: 

3910 e.ignore(self.ignoreExprs[-1]) 

3911 else: 

3912 super().ignore(other) 

3913 for e in self.exprs: 

3914 e.ignore(self.ignoreExprs[-1]) 

3915 return self 

3916 

3917 def _generateDefaultName(self) -> str: 

3918 return f"{type(self).__name__}:({self.exprs})" 

3919 

3920 def streamline(self) -> ParserElement: 

3921 if self.streamlined: 

3922 return self 

3923 

3924 super().streamline() 

3925 

3926 for e in self.exprs: 

3927 e.streamline() 

3928 

3929 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

3930 # but only if there are no parse actions or resultsNames on the nested And's 

3931 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

3932 if len(self.exprs) == 2: 

3933 other = self.exprs[0] 

3934 if ( 

3935 isinstance(other, self.__class__) 

3936 and not other.parseAction 

3937 and other.resultsName is None 

3938 and not other.debug 

3939 ): 

3940 self.exprs = other.exprs[:] + [self.exprs[1]] 

3941 self._defaultName = None 

3942 self.mayReturnEmpty |= other.mayReturnEmpty 

3943 self.mayIndexError |= other.mayIndexError 

3944 

3945 other = self.exprs[-1] 

3946 if ( 

3947 isinstance(other, self.__class__) 

3948 and not other.parseAction 

3949 and other.resultsName is None 

3950 and not other.debug 

3951 ): 

3952 self.exprs = self.exprs[:-1] + other.exprs[:] 

3953 self._defaultName = None 

3954 self.mayReturnEmpty |= other.mayReturnEmpty 

3955 self.mayIndexError |= other.mayIndexError 

3956 

3957 self.errmsg = f"Expected {self}" 

3958 

3959 return self 

3960 

3961 def validate(self, validateTrace=None) -> None: 

3962 warnings.warn( 

3963 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

3964 DeprecationWarning, 

3965 stacklevel=2, 

3966 ) 

3967 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

3968 for e in self.exprs: 

3969 e.validate(tmp) 

3970 self._checkRecursion([]) 

3971 

3972 def copy(self) -> ParserElement: 

3973 ret = super().copy() 

3974 ret = typing.cast(ParseExpression, ret) 

3975 ret.exprs = [e.copy() for e in self.exprs] 

3976 return ret 

3977 

3978 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

3979 if not ( 

3980 __diag__.warn_ungrouped_named_tokens_in_collection 

3981 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

3982 not in self.suppress_warnings_ 

3983 ): 

3984 return super()._setResultsName(name, list_all_matches) 

3985 

3986 for e in self.exprs: 

3987 if ( 

3988 isinstance(e, ParserElement) 

3989 and e.resultsName 

3990 and ( 

3991 Diagnostics.warn_ungrouped_named_tokens_in_collection 

3992 not in e.suppress_warnings_ 

3993 ) 

3994 ): 

3995 warning = ( 

3996 "warn_ungrouped_named_tokens_in_collection:" 

3997 f" setting results name {name!r} on {type(self).__name__} expression" 

3998 f" collides with {e.resultsName!r} on contained expression" 

3999 ) 

4000 warnings.warn(warning, stacklevel=3) 

4001 break 

4002 

4003 return super()._setResultsName(name, list_all_matches) 

4004 

4005 # Compatibility synonyms 

4006 # fmt: off 

4007 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4008 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4009 # fmt: on 

4010 

4011 

4012class And(ParseExpression): 

4013 """ 

4014 Requires all given :class:`ParserElement` s to be found in the given order. 

4015 Expressions may be separated by whitespace. 

4016 May be constructed using the ``'+'`` operator. 

4017 May also be constructed using the ``'-'`` operator, which will 

4018 suppress backtracking. 

4019 

4020 Example:: 

4021 

4022 integer = Word(nums) 

4023 name_expr = Word(alphas)[1, ...] 

4024 

4025 expr = And([integer("id"), name_expr("name"), integer("age")]) 

4026 # more easily written as: 

4027 expr = integer("id") + name_expr("name") + integer("age") 

4028 """ 

4029 

4030 class _ErrorStop(Empty): 

4031 def __init__(self, *args, **kwargs): 

4032 super().__init__(*args, **kwargs) 

4033 self.leave_whitespace() 

4034 

4035 def _generateDefaultName(self) -> str: 

4036 return "-" 

4037 

4038 def __init__( 

4039 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True 

4040 ): 

4041 exprs: list[ParserElement] = list(exprs_arg) 

4042 if exprs and Ellipsis in exprs: 

4043 tmp: list[ParserElement] = [] 

4044 for i, expr in enumerate(exprs): 

4045 if expr is not Ellipsis: 

4046 tmp.append(expr) 

4047 continue 

4048 

4049 if i < len(exprs) - 1: 

4050 skipto_arg: ParserElement = typing.cast( 

4051 ParseExpression, (Empty() + exprs[i + 1]) 

4052 ).exprs[-1] 

4053 tmp.append(SkipTo(skipto_arg)("_skipped*")) 

4054 continue 

4055 

4056 raise Exception("cannot construct And with sequence ending in ...") 

4057 exprs[:] = tmp 

4058 super().__init__(exprs, savelist) 

4059 if self.exprs: 

4060 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4061 if not isinstance(self.exprs[0], White): 

4062 self.set_whitespace_chars( 

4063 self.exprs[0].whiteChars, 

4064 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

4065 ) 

4066 self.skipWhitespace = self.exprs[0].skipWhitespace 

4067 else: 

4068 self.skipWhitespace = False 

4069 else: 

4070 self.mayReturnEmpty = True 

4071 self.callPreparse = True 

4072 

4073 def streamline(self) -> ParserElement: 

4074 # collapse any _PendingSkip's 

4075 if self.exprs and any( 

4076 isinstance(e, ParseExpression) 

4077 and e.exprs 

4078 and isinstance(e.exprs[-1], _PendingSkip) 

4079 for e in self.exprs[:-1] 

4080 ): 

4081 deleted_expr_marker = NoMatch() 

4082 for i, e in enumerate(self.exprs[:-1]): 

4083 if e is deleted_expr_marker: 

4084 continue 

4085 if ( 

4086 isinstance(e, ParseExpression) 

4087 and e.exprs 

4088 and isinstance(e.exprs[-1], _PendingSkip) 

4089 ): 

4090 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4091 self.exprs[i + 1] = deleted_expr_marker 

4092 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

4093 

4094 super().streamline() 

4095 

4096 # link any IndentedBlocks to the prior expression 

4097 prev: ParserElement 

4098 cur: ParserElement 

4099 for prev, cur in zip(self.exprs, self.exprs[1:]): 

4100 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

4101 # (but watch out for recursive grammar) 

4102 seen = set() 

4103 while True: 

4104 if id(cur) in seen: 

4105 break 

4106 seen.add(id(cur)) 

4107 if isinstance(cur, IndentedBlock): 

4108 prev.add_parse_action( 

4109 lambda s, l, t, cur_=cur: setattr( 

4110 cur_, "parent_anchor", col(l, s) 

4111 ) 

4112 ) 

4113 break 

4114 subs = cur.recurse() 

4115 next_first = next(iter(subs), None) 

4116 if next_first is None: 

4117 break 

4118 cur = typing.cast(ParserElement, next_first) 

4119 

4120 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4121 return self 

4122 

4123 def parseImpl(self, instring, loc, do_actions=True): 

4124 # pass False as callPreParse arg to _parse for first element, since we already 

4125 # pre-parsed the string as part of our And pre-parsing 

4126 loc, resultlist = self.exprs[0]._parse( 

4127 instring, loc, do_actions, callPreParse=False 

4128 ) 

4129 errorStop = False 

4130 for e in self.exprs[1:]: 

4131 # if isinstance(e, And._ErrorStop): 

4132 if type(e) is And._ErrorStop: 

4133 errorStop = True 

4134 continue 

4135 if errorStop: 

4136 try: 

4137 loc, exprtokens = e._parse(instring, loc, do_actions) 

4138 except ParseSyntaxException: 

4139 raise 

4140 except ParseBaseException as pe: 

4141 pe.__traceback__ = None 

4142 raise ParseSyntaxException._from_exception(pe) 

4143 except IndexError: 

4144 raise ParseSyntaxException( 

4145 instring, len(instring), self.errmsg, self 

4146 ) 

4147 else: 

4148 loc, exprtokens = e._parse(instring, loc, do_actions) 

4149 resultlist += exprtokens 

4150 return loc, resultlist 

4151 

4152 def __iadd__(self, other): 

4153 if isinstance(other, str_type): 

4154 other = self._literalStringClass(other) 

4155 if not isinstance(other, ParserElement): 

4156 return NotImplemented 

4157 return self.append(other) # And([self, other]) 

4158 

4159 def _checkRecursion(self, parseElementList): 

4160 subRecCheckList = parseElementList[:] + [self] 

4161 for e in self.exprs: 

4162 e._checkRecursion(subRecCheckList) 

4163 if not e.mayReturnEmpty: 

4164 break 

4165 

4166 def _generateDefaultName(self) -> str: 

4167 inner = " ".join(str(e) for e in self.exprs) 

4168 # strip off redundant inner {}'s 

4169 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4170 inner = inner[1:-1] 

4171 return f"{{{inner}}}" 

4172 

4173 

4174class Or(ParseExpression): 

4175 """Requires that at least one :class:`ParserElement` is found. If 

4176 two expressions match, the expression that matches the longest 

4177 string will be used. May be constructed using the ``'^'`` 

4178 operator. 

4179 

4180 Example:: 

4181 

4182 # construct Or using '^' operator 

4183 

4184 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4185 print(number.search_string("123 3.1416 789")) 

4186 

4187 prints:: 

4188 

4189 [['123'], ['3.1416'], ['789']] 

4190 """ 

4191 

4192 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4193 super().__init__(exprs, savelist) 

4194 if self.exprs: 

4195 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4196 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4197 else: 

4198 self.mayReturnEmpty = True 

4199 

4200 def streamline(self) -> ParserElement: 

4201 super().streamline() 

4202 if self.exprs: 

4203 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4204 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4205 self.skipWhitespace = all( 

4206 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4207 ) 

4208 else: 

4209 self.saveAsList = False 

4210 return self 

4211 

4212 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4213 maxExcLoc = -1 

4214 maxException = None 

4215 matches: list[tuple[int, ParserElement]] = [] 

4216 fatals: list[ParseFatalException] = [] 

4217 if all(e.callPreparse for e in self.exprs): 

4218 loc = self.preParse(instring, loc) 

4219 for e in self.exprs: 

4220 try: 

4221 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4222 except ParseFatalException as pfe: 

4223 pfe.__traceback__ = None 

4224 pfe.parser_element = e 

4225 fatals.append(pfe) 

4226 maxException = None 

4227 maxExcLoc = -1 

4228 except ParseException as err: 

4229 if not fatals: 

4230 err.__traceback__ = None 

4231 if err.loc > maxExcLoc: 

4232 maxException = err 

4233 maxExcLoc = err.loc 

4234 except IndexError: 

4235 if len(instring) > maxExcLoc: 

4236 maxException = ParseException( 

4237 instring, len(instring), e.errmsg, self 

4238 ) 

4239 maxExcLoc = len(instring) 

4240 else: 

4241 # save match among all matches, to retry longest to shortest 

4242 matches.append((loc2, e)) 

4243 

4244 if matches: 

4245 # re-evaluate all matches in descending order of length of match, in case attached actions 

4246 # might change whether or how much they match of the input. 

4247 matches.sort(key=itemgetter(0), reverse=True) 

4248 

4249 if not do_actions: 

4250 # no further conditions or parse actions to change the selection of 

4251 # alternative, so the first match will be the best match 

4252 best_expr = matches[0][1] 

4253 return best_expr._parse(instring, loc, do_actions) 

4254 

4255 longest: tuple[int, typing.Optional[ParseResults]] = -1, None 

4256 for loc1, expr1 in matches: 

4257 if loc1 <= longest[0]: 

4258 # already have a longer match than this one will deliver, we are done 

4259 return longest 

4260 

4261 try: 

4262 loc2, toks = expr1._parse(instring, loc, do_actions) 

4263 except ParseException as err: 

4264 err.__traceback__ = None 

4265 if err.loc > maxExcLoc: 

4266 maxException = err 

4267 maxExcLoc = err.loc 

4268 else: 

4269 if loc2 >= loc1: 

4270 return loc2, toks 

4271 # didn't match as much as before 

4272 elif loc2 > longest[0]: 

4273 longest = loc2, toks 

4274 

4275 if longest != (-1, None): 

4276 return longest 

4277 

4278 if fatals: 

4279 if len(fatals) > 1: 

4280 fatals.sort(key=lambda e: -e.loc) 

4281 if fatals[0].loc == fatals[1].loc: 

4282 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4283 max_fatal = fatals[0] 

4284 raise max_fatal 

4285 

4286 if maxException is not None: 

4287 # infer from this check that all alternatives failed at the current position 

4288 # so emit this collective error message instead of any single error message 

4289 if maxExcLoc == loc: 

4290 maxException.msg = self.errmsg or "" 

4291 raise maxException 

4292 

4293 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4294 

4295 def __ixor__(self, other): 

4296 if isinstance(other, str_type): 

4297 other = self._literalStringClass(other) 

4298 if not isinstance(other, ParserElement): 

4299 return NotImplemented 

4300 return self.append(other) # Or([self, other]) 

4301 

4302 def _generateDefaultName(self) -> str: 

4303 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" 

4304 

4305 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4306 if ( 

4307 __diag__.warn_multiple_tokens_in_named_alternation 

4308 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4309 not in self.suppress_warnings_ 

4310 ): 

4311 if any( 

4312 isinstance(e, And) 

4313 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4314 not in e.suppress_warnings_ 

4315 for e in self.exprs 

4316 ): 

4317 warning = ( 

4318 "warn_multiple_tokens_in_named_alternation:" 

4319 f" setting results name {name!r} on {type(self).__name__} expression" 

4320 " will return a list of all parsed tokens in an And alternative," 

4321 " in prior versions only the first token was returned; enclose" 

4322 " contained argument in Group" 

4323 ) 

4324 warnings.warn(warning, stacklevel=3) 

4325 

4326 return super()._setResultsName(name, list_all_matches) 

4327 

4328 

4329class MatchFirst(ParseExpression): 

4330 """Requires that at least one :class:`ParserElement` is found. If 

4331 more than one expression matches, the first one listed is the one that will 

4332 match. May be constructed using the ``'|'`` operator. 

4333 

4334 Example:: 

4335 

4336 # construct MatchFirst using '|' operator 

4337 

4338 # watch the order of expressions to match 

4339 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4340 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

4341 

4342 # put more selective expression first 

4343 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4344 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

4345 """ 

4346 

4347 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4348 super().__init__(exprs, savelist) 

4349 if self.exprs: 

4350 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4351 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4352 else: 

4353 self.mayReturnEmpty = True 

4354 

4355 def streamline(self) -> ParserElement: 

4356 if self.streamlined: 

4357 return self 

4358 

4359 super().streamline() 

4360 if self.exprs: 

4361 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4362 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4363 self.skipWhitespace = all( 

4364 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4365 ) 

4366 else: 

4367 self.saveAsList = False 

4368 self.mayReturnEmpty = True 

4369 return self 

4370 

4371 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4372 maxExcLoc = -1 

4373 maxException = None 

4374 

4375 for e in self.exprs: 

4376 try: 

4377 return e._parse(instring, loc, do_actions) 

4378 except ParseFatalException as pfe: 

4379 pfe.__traceback__ = None 

4380 pfe.parser_element = e 

4381 raise 

4382 except ParseException as err: 

4383 if err.loc > maxExcLoc: 

4384 maxException = err 

4385 maxExcLoc = err.loc 

4386 except IndexError: 

4387 if len(instring) > maxExcLoc: 

4388 maxException = ParseException( 

4389 instring, len(instring), e.errmsg, self 

4390 ) 

4391 maxExcLoc = len(instring) 

4392 

4393 if maxException is not None: 

4394 # infer from this check that all alternatives failed at the current position 

4395 # so emit this collective error message instead of any individual error message 

4396 if maxExcLoc == loc: 

4397 maxException.msg = self.errmsg or "" 

4398 raise maxException 

4399 

4400 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4401 

4402 def __ior__(self, other): 

4403 if isinstance(other, str_type): 

4404 other = self._literalStringClass(other) 

4405 if not isinstance(other, ParserElement): 

4406 return NotImplemented 

4407 return self.append(other) # MatchFirst([self, other]) 

4408 

4409 def _generateDefaultName(self) -> str: 

4410 return f"{{{' | '.join(str(e) for e in self.exprs)}}}" 

4411 

4412 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4413 if ( 

4414 __diag__.warn_multiple_tokens_in_named_alternation 

4415 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4416 not in self.suppress_warnings_ 

4417 ): 

4418 if any( 

4419 isinstance(e, And) 

4420 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4421 not in e.suppress_warnings_ 

4422 for e in self.exprs 

4423 ): 

4424 warning = ( 

4425 "warn_multiple_tokens_in_named_alternation:" 

4426 f" setting results name {name!r} on {type(self).__name__} expression" 

4427 " will return a list of all parsed tokens in an And alternative," 

4428 " in prior versions only the first token was returned; enclose" 

4429 " contained argument in Group" 

4430 ) 

4431 warnings.warn(warning, stacklevel=3) 

4432 

4433 return super()._setResultsName(name, list_all_matches) 

4434 

4435 

4436class Each(ParseExpression): 

4437 """Requires all given :class:`ParserElement` s to be found, but in 

4438 any order. Expressions may be separated by whitespace. 

4439 

4440 May be constructed using the ``'&'`` operator. 

4441 

4442 Example:: 

4443 

4444 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4445 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4446 integer = Word(nums) 

4447 shape_attr = "shape:" + shape_type("shape") 

4448 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4449 color_attr = "color:" + color("color") 

4450 size_attr = "size:" + integer("size") 

4451 

4452 # use Each (using operator '&') to accept attributes in any order 

4453 # (shape and posn are required, color and size are optional) 

4454 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4455 

4456 shape_spec.run_tests(''' 

4457 shape: SQUARE color: BLACK posn: 100, 120 

4458 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4459 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4460 ''' 

4461 ) 

4462 

4463 prints:: 

4464 

4465 shape: SQUARE color: BLACK posn: 100, 120 

4466 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4467 - color: BLACK 

4468 - posn: ['100', ',', '120'] 

4469 - x: 100 

4470 - y: 120 

4471 - shape: SQUARE 

4472 

4473 

4474 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4475 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

4476 - color: BLUE 

4477 - posn: ['50', ',', '80'] 

4478 - x: 50 

4479 - y: 80 

4480 - shape: CIRCLE 

4481 - size: 50 

4482 

4483 

4484 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

4485 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

4486 - color: GREEN 

4487 - posn: ['20', ',', '40'] 

4488 - x: 20 

4489 - y: 40 

4490 - shape: TRIANGLE 

4491 - size: 20 

4492 """ 

4493 

4494 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): 

4495 super().__init__(exprs, savelist) 

4496 if self.exprs: 

4497 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4498 else: 

4499 self.mayReturnEmpty = True 

4500 self.skipWhitespace = True 

4501 self.initExprGroups = True 

4502 self.saveAsList = True 

4503 

4504 def __iand__(self, other): 

4505 if isinstance(other, str_type): 

4506 other = self._literalStringClass(other) 

4507 if not isinstance(other, ParserElement): 

4508 return NotImplemented 

4509 return self.append(other) # Each([self, other]) 

4510 

4511 def streamline(self) -> ParserElement: 

4512 super().streamline() 

4513 if self.exprs: 

4514 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4515 else: 

4516 self.mayReturnEmpty = True 

4517 return self 

4518 

4519 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4520 if self.initExprGroups: 

4521 self.opt1map = dict( 

4522 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

4523 ) 

4524 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

4525 opt2 = [ 

4526 e 

4527 for e in self.exprs 

4528 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

4529 ] 

4530 self.optionals = opt1 + opt2 

4531 self.multioptionals = [ 

4532 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4533 for e in self.exprs 

4534 if isinstance(e, _MultipleMatch) 

4535 ] 

4536 self.multirequired = [ 

4537 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4538 for e in self.exprs 

4539 if isinstance(e, OneOrMore) 

4540 ] 

4541 self.required = [ 

4542 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

4543 ] 

4544 self.required += self.multirequired 

4545 self.initExprGroups = False 

4546 

4547 tmpLoc = loc 

4548 tmpReqd = self.required[:] 

4549 tmpOpt = self.optionals[:] 

4550 multis = self.multioptionals[:] 

4551 matchOrder: list[ParserElement] = [] 

4552 

4553 keepMatching = True 

4554 failed: list[ParserElement] = [] 

4555 fatals: list[ParseFatalException] = [] 

4556 while keepMatching: 

4557 tmpExprs = tmpReqd + tmpOpt + multis 

4558 failed.clear() 

4559 fatals.clear() 

4560 for e in tmpExprs: 

4561 try: 

4562 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

4563 except ParseFatalException as pfe: 

4564 pfe.__traceback__ = None 

4565 pfe.parser_element = e 

4566 fatals.append(pfe) 

4567 failed.append(e) 

4568 except ParseException: 

4569 failed.append(e) 

4570 else: 

4571 matchOrder.append(self.opt1map.get(id(e), e)) 

4572 if e in tmpReqd: 

4573 tmpReqd.remove(e) 

4574 elif e in tmpOpt: 

4575 tmpOpt.remove(e) 

4576 if len(failed) == len(tmpExprs): 

4577 keepMatching = False 

4578 

4579 # look for any ParseFatalExceptions 

4580 if fatals: 

4581 if len(fatals) > 1: 

4582 fatals.sort(key=lambda e: -e.loc) 

4583 if fatals[0].loc == fatals[1].loc: 

4584 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4585 max_fatal = fatals[0] 

4586 raise max_fatal 

4587 

4588 if tmpReqd: 

4589 missing = ", ".join([str(e) for e in tmpReqd]) 

4590 raise ParseException( 

4591 instring, 

4592 loc, 

4593 f"Missing one or more required elements ({missing})", 

4594 ) 

4595 

4596 # add any unmatched Opts, in case they have default values defined 

4597 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

4598 

4599 total_results = ParseResults([]) 

4600 for e in matchOrder: 

4601 loc, results = e._parse(instring, loc, do_actions) 

4602 total_results += results 

4603 

4604 return loc, total_results 

4605 

4606 def _generateDefaultName(self) -> str: 

4607 return f"{{{' & '.join(str(e) for e in self.exprs)}}}" 

4608 

4609 

4610class ParseElementEnhance(ParserElement): 

4611 """Abstract subclass of :class:`ParserElement`, for combining and 

4612 post-processing parsed tokens. 

4613 """ 

4614 

4615 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

4616 super().__init__(savelist) 

4617 if isinstance(expr, str_type): 

4618 expr_str = typing.cast(str, expr) 

4619 if issubclass(self._literalStringClass, Token): 

4620 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

4621 elif issubclass(type(self), self._literalStringClass): 

4622 expr = Literal(expr_str) 

4623 else: 

4624 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

4625 expr = typing.cast(ParserElement, expr) 

4626 self.expr = expr 

4627 if expr is not None: 

4628 self.mayIndexError = expr.mayIndexError 

4629 self.mayReturnEmpty = expr.mayReturnEmpty 

4630 self.set_whitespace_chars( 

4631 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

4632 ) 

4633 self.skipWhitespace = expr.skipWhitespace 

4634 self.saveAsList = expr.saveAsList 

4635 self.callPreparse = expr.callPreparse 

4636 self.ignoreExprs.extend(expr.ignoreExprs) 

4637 

4638 def recurse(self) -> list[ParserElement]: 

4639 return [self.expr] if self.expr is not None else [] 

4640 

4641 def parseImpl(self, instring, loc, do_actions=True): 

4642 if self.expr is None: 

4643 raise ParseException(instring, loc, "No expression defined", self) 

4644 

4645 try: 

4646 return self.expr._parse(instring, loc, do_actions, callPreParse=False) 

4647 except ParseSyntaxException: 

4648 raise 

4649 except ParseBaseException as pbe: 

4650 pbe.pstr = pbe.pstr or instring 

4651 pbe.loc = pbe.loc or loc 

4652 pbe.parser_element = pbe.parser_element or self 

4653 if not isinstance(self, Forward) and self.customName is not None: 

4654 if self.errmsg: 

4655 pbe.msg = self.errmsg 

4656 raise 

4657 

4658 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4659 super().leave_whitespace(recursive) 

4660 

4661 if recursive: 

4662 if self.expr is not None: 

4663 self.expr = self.expr.copy() 

4664 self.expr.leave_whitespace(recursive) 

4665 return self 

4666 

4667 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4668 super().ignore_whitespace(recursive) 

4669 

4670 if recursive: 

4671 if self.expr is not None: 

4672 self.expr = self.expr.copy() 

4673 self.expr.ignore_whitespace(recursive) 

4674 return self 

4675 

4676 def ignore(self, other) -> ParserElement: 

4677 if not isinstance(other, Suppress) or other not in self.ignoreExprs: 

4678 super().ignore(other) 

4679 if self.expr is not None: 

4680 self.expr.ignore(self.ignoreExprs[-1]) 

4681 

4682 return self 

4683 

4684 def streamline(self) -> ParserElement: 

4685 super().streamline() 

4686 if self.expr is not None: 

4687 self.expr.streamline() 

4688 return self 

4689 

4690 def _checkRecursion(self, parseElementList): 

4691 if self in parseElementList: 

4692 raise RecursiveGrammarException(parseElementList + [self]) 

4693 subRecCheckList = parseElementList[:] + [self] 

4694 if self.expr is not None: 

4695 self.expr._checkRecursion(subRecCheckList) 

4696 

4697 def validate(self, validateTrace=None) -> None: 

4698 warnings.warn( 

4699 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4700 DeprecationWarning, 

4701 stacklevel=2, 

4702 ) 

4703 if validateTrace is None: 

4704 validateTrace = [] 

4705 tmp = validateTrace[:] + [self] 

4706 if self.expr is not None: 

4707 self.expr.validate(tmp) 

4708 self._checkRecursion([]) 

4709 

4710 def _generateDefaultName(self) -> str: 

4711 return f"{type(self).__name__}:({self.expr})" 

4712 

4713 # Compatibility synonyms 

4714 # fmt: off 

4715 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4716 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4717 # fmt: on 

4718 

4719 

4720class IndentedBlock(ParseElementEnhance): 

4721 """ 

4722 Expression to match one or more expressions at a given indentation level. 

4723 Useful for parsing text where structure is implied by indentation (like Python source code). 

4724 """ 

4725 

4726 class _Indent(Empty): 

4727 def __init__(self, ref_col: int): 

4728 super().__init__() 

4729 self.errmsg = f"expected indent at column {ref_col}" 

4730 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

4731 

4732 class _IndentGreater(Empty): 

4733 def __init__(self, ref_col: int): 

4734 super().__init__() 

4735 self.errmsg = f"expected indent at column greater than {ref_col}" 

4736 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

4737 

4738 def __init__( 

4739 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

4740 ): 

4741 super().__init__(expr, savelist=True) 

4742 # if recursive: 

4743 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

4744 self._recursive = recursive 

4745 self._grouped = grouped 

4746 self.parent_anchor = 1 

4747 

4748 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4749 # advance parse position to non-whitespace by using an Empty() 

4750 # this should be the column to be used for all subsequent indented lines 

4751 anchor_loc = Empty().preParse(instring, loc) 

4752 

4753 # see if self.expr matches at the current location - if not it will raise an exception 

4754 # and no further work is necessary 

4755 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions) 

4756 

4757 indent_col = col(anchor_loc, instring) 

4758 peer_detect_expr = self._Indent(indent_col) 

4759 

4760 inner_expr = Empty() + peer_detect_expr + self.expr 

4761 if self._recursive: 

4762 sub_indent = self._IndentGreater(indent_col) 

4763 nested_block = IndentedBlock( 

4764 self.expr, recursive=self._recursive, grouped=self._grouped 

4765 ) 

4766 nested_block.set_debug(self.debug) 

4767 nested_block.parent_anchor = indent_col 

4768 inner_expr += Opt(sub_indent + nested_block) 

4769 

4770 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

4771 block = OneOrMore(inner_expr) 

4772 

4773 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

4774 

4775 if self._grouped: 

4776 wrapper = Group 

4777 else: 

4778 wrapper = lambda expr: expr # type: ignore[misc, assignment] 

4779 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

4780 instring, anchor_loc, do_actions 

4781 ) 

4782 

4783 

4784class AtStringStart(ParseElementEnhance): 

4785 """Matches if expression matches at the beginning of the parse 

4786 string:: 

4787 

4788 AtStringStart(Word(nums)).parse_string("123") 

4789 # prints ["123"] 

4790 

4791 AtStringStart(Word(nums)).parse_string(" 123") 

4792 # raises ParseException 

4793 """ 

4794 

4795 def __init__(self, expr: Union[ParserElement, str]): 

4796 super().__init__(expr) 

4797 self.callPreparse = False 

4798 

4799 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4800 if loc != 0: 

4801 raise ParseException(instring, loc, "not found at string start") 

4802 return super().parseImpl(instring, loc, do_actions) 

4803 

4804 

4805class AtLineStart(ParseElementEnhance): 

4806 r"""Matches if an expression matches at the beginning of a line within 

4807 the parse string 

4808 

4809 Example:: 

4810 

4811 test = '''\ 

4812 AAA this line 

4813 AAA and this line 

4814 AAA but not this one 

4815 B AAA and definitely not this one 

4816 ''' 

4817 

4818 for t in (AtLineStart('AAA') + rest_of_line).search_string(test): 

4819 print(t) 

4820 

4821 prints:: 

4822 

4823 ['AAA', ' this line'] 

4824 ['AAA', ' and this line'] 

4825 

4826 """ 

4827 

4828 def __init__(self, expr: Union[ParserElement, str]): 

4829 super().__init__(expr) 

4830 self.callPreparse = False 

4831 

4832 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4833 if col(loc, instring) != 1: 

4834 raise ParseException(instring, loc, "not found at line start") 

4835 return super().parseImpl(instring, loc, do_actions) 

4836 

4837 

4838class FollowedBy(ParseElementEnhance): 

4839 """Lookahead matching of the given parse expression. 

4840 ``FollowedBy`` does *not* advance the parsing position within 

4841 the input string, it only verifies that the specified parse 

4842 expression matches at the current position. ``FollowedBy`` 

4843 always returns a null token list. If any results names are defined 

4844 in the lookahead expression, those *will* be returned for access by 

4845 name. 

4846 

4847 Example:: 

4848 

4849 # use FollowedBy to match a label only if it is followed by a ':' 

4850 data_word = Word(alphas) 

4851 label = data_word + FollowedBy(':') 

4852 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

4853 

4854 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() 

4855 

4856 prints:: 

4857 

4858 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

4859 """ 

4860 

4861 def __init__(self, expr: Union[ParserElement, str]): 

4862 super().__init__(expr) 

4863 self.mayReturnEmpty = True 

4864 

4865 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4866 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

4867 # we keep any named results that were defined in the FollowedBy expression 

4868 _, ret = self.expr._parse(instring, loc, do_actions=do_actions) 

4869 del ret[:] 

4870 

4871 return loc, ret 

4872 

4873 

4874class PrecededBy(ParseElementEnhance): 

4875 """Lookbehind matching of the given parse expression. 

4876 ``PrecededBy`` does not advance the parsing position within the 

4877 input string, it only verifies that the specified parse expression 

4878 matches prior to the current position. ``PrecededBy`` always 

4879 returns a null token list, but if a results name is defined on the 

4880 given expression, it is returned. 

4881 

4882 Parameters: 

4883 

4884 - ``expr`` - expression that must match prior to the current parse 

4885 location 

4886 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

4887 to lookbehind prior to the current parse location 

4888 

4889 If the lookbehind expression is a string, :class:`Literal`, 

4890 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

4891 with a specified exact or maximum length, then the retreat 

4892 parameter is not required. Otherwise, retreat must be specified to 

4893 give a maximum number of characters to look back from 

4894 the current parse position for a lookbehind match. 

4895 

4896 Example:: 

4897 

4898 # VB-style variable names with type prefixes 

4899 int_var = PrecededBy("#") + pyparsing_common.identifier 

4900 str_var = PrecededBy("$") + pyparsing_common.identifier 

4901 

4902 """ 

4903 

4904 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0): 

4905 super().__init__(expr) 

4906 self.expr = self.expr().leave_whitespace() 

4907 self.mayReturnEmpty = True 

4908 self.mayIndexError = False 

4909 self.exact = False 

4910 if isinstance(expr, str_type): 

4911 expr = typing.cast(str, expr) 

4912 retreat = len(expr) 

4913 self.exact = True 

4914 elif isinstance(expr, (Literal, Keyword)): 

4915 retreat = expr.matchLen 

4916 self.exact = True 

4917 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

4918 retreat = expr.maxLen 

4919 self.exact = True 

4920 elif isinstance(expr, PositionToken): 

4921 retreat = 0 

4922 self.exact = True 

4923 self.retreat = retreat 

4924 self.errmsg = f"not preceded by {expr}" 

4925 self.skipWhitespace = False 

4926 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

4927 

4928 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType: 

4929 if self.exact: 

4930 if loc < self.retreat: 

4931 raise ParseException(instring, loc, self.errmsg, self) 

4932 start = loc - self.retreat 

4933 _, ret = self.expr._parse(instring, start) 

4934 return loc, ret 

4935 

4936 # retreat specified a maximum lookbehind window, iterate 

4937 test_expr = self.expr + StringEnd() 

4938 instring_slice = instring[max(0, loc - self.retreat) : loc] 

4939 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self) 

4940 

4941 for offset in range(1, min(loc, self.retreat + 1) + 1): 

4942 try: 

4943 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

4944 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

4945 except ParseBaseException as pbe: 

4946 last_expr = pbe 

4947 else: 

4948 break 

4949 else: 

4950 raise last_expr 

4951 

4952 return loc, ret 

4953 

4954 

4955class Located(ParseElementEnhance): 

4956 """ 

4957 Decorates a returned token with its starting and ending 

4958 locations in the input string. 

4959 

4960 This helper adds the following results names: 

4961 

4962 - ``locn_start`` - location where matched expression begins 

4963 - ``locn_end`` - location where matched expression ends 

4964 - ``value`` - the actual parsed results 

4965 

4966 Be careful if the input text contains ``<TAB>`` characters, you 

4967 may want to call :class:`ParserElement.parse_with_tabs` 

4968 

4969 Example:: 

4970 

4971 wd = Word(alphas) 

4972 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

4973 print(match) 

4974 

4975 prints:: 

4976 

4977 [0, ['ljsdf'], 5] 

4978 [8, ['lksdjjf'], 15] 

4979 [18, ['lkkjj'], 23] 

4980 

4981 """ 

4982 

4983 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4984 start = loc 

4985 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False) 

4986 ret_tokens = ParseResults([start, tokens, loc]) 

4987 ret_tokens["locn_start"] = start 

4988 ret_tokens["value"] = tokens 

4989 ret_tokens["locn_end"] = loc 

4990 if self.resultsName: 

4991 # must return as a list, so that the name will be attached to the complete group 

4992 return loc, [ret_tokens] 

4993 else: 

4994 return loc, ret_tokens 

4995 

4996 

4997class NotAny(ParseElementEnhance): 

4998 """ 

4999 Lookahead to disallow matching with the given parse expression. 

5000 ``NotAny`` does *not* advance the parsing position within the 

5001 input string, it only verifies that the specified parse expression 

5002 does *not* match at the current position. Also, ``NotAny`` does 

5003 *not* skip over leading whitespace. ``NotAny`` always returns 

5004 a null token list. May be constructed using the ``'~'`` operator. 

5005 

5006 Example:: 

5007 

5008 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

5009 

5010 # take care not to mistake keywords for identifiers 

5011 ident = ~(AND | OR | NOT) + Word(alphas) 

5012 boolean_term = Opt(NOT) + ident 

5013 

5014 # very crude boolean expression - to support parenthesis groups and 

5015 # operation hierarchy, use infix_notation 

5016 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

5017 

5018 # integers that are followed by "." are actually floats 

5019 integer = Word(nums) + ~Char(".") 

5020 """ 

5021 

5022 def __init__(self, expr: Union[ParserElement, str]): 

5023 super().__init__(expr) 

5024 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

5025 # self.leave_whitespace() 

5026 self.skipWhitespace = False 

5027 

5028 self.mayReturnEmpty = True 

5029 self.errmsg = f"Found unwanted token, {self.expr}" 

5030 

5031 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5032 if self.expr.can_parse_next(instring, loc, do_actions=do_actions): 

5033 raise ParseException(instring, loc, self.errmsg, self) 

5034 return loc, [] 

5035 

5036 def _generateDefaultName(self) -> str: 

5037 return f"~{{{self.expr}}}" 

5038 

5039 

5040class _MultipleMatch(ParseElementEnhance): 

5041 def __init__( 

5042 self, 

5043 expr: Union[str, ParserElement], 

5044 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5045 *, 

5046 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5047 ): 

5048 super().__init__(expr) 

5049 stopOn = stopOn or stop_on 

5050 self.saveAsList = True 

5051 ender = stopOn 

5052 if isinstance(ender, str_type): 

5053 ender = self._literalStringClass(ender) 

5054 self.stopOn(ender) 

5055 

5056 def stopOn(self, ender) -> ParserElement: 

5057 if isinstance(ender, str_type): 

5058 ender = self._literalStringClass(ender) 

5059 self.not_ender = ~ender if ender is not None else None 

5060 return self 

5061 

5062 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5063 self_expr_parse = self.expr._parse 

5064 self_skip_ignorables = self._skipIgnorables 

5065 check_ender = self.not_ender is not None 

5066 if check_ender: 

5067 try_not_ender = self.not_ender.try_parse 

5068 

5069 # must be at least one (but first see if we are the stopOn sentinel; 

5070 # if so, fail) 

5071 if check_ender: 

5072 try_not_ender(instring, loc) 

5073 loc, tokens = self_expr_parse(instring, loc, do_actions) 

5074 try: 

5075 hasIgnoreExprs = not not self.ignoreExprs 

5076 while 1: 

5077 if check_ender: 

5078 try_not_ender(instring, loc) 

5079 if hasIgnoreExprs: 

5080 preloc = self_skip_ignorables(instring, loc) 

5081 else: 

5082 preloc = loc 

5083 loc, tmptokens = self_expr_parse(instring, preloc, do_actions) 

5084 tokens += tmptokens 

5085 except (ParseException, IndexError): 

5086 pass 

5087 

5088 return loc, tokens 

5089 

5090 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5091 if ( 

5092 __diag__.warn_ungrouped_named_tokens_in_collection 

5093 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5094 not in self.suppress_warnings_ 

5095 ): 

5096 for e in [self.expr] + self.expr.recurse(): 

5097 if ( 

5098 isinstance(e, ParserElement) 

5099 and e.resultsName 

5100 and ( 

5101 Diagnostics.warn_ungrouped_named_tokens_in_collection 

5102 not in e.suppress_warnings_ 

5103 ) 

5104 ): 

5105 warning = ( 

5106 "warn_ungrouped_named_tokens_in_collection:" 

5107 f" setting results name {name!r} on {type(self).__name__} expression" 

5108 f" collides with {e.resultsName!r} on contained expression" 

5109 ) 

5110 warnings.warn(warning, stacklevel=3) 

5111 break 

5112 

5113 return super()._setResultsName(name, list_all_matches) 

5114 

5115 

5116class OneOrMore(_MultipleMatch): 

5117 """ 

5118 Repetition of one or more of the given expression. 

5119 

5120 Parameters: 

5121 

5122 - ``expr`` - expression that must match one or more times 

5123 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5124 (only required if the sentinel would ordinarily match the repetition 

5125 expression) 

5126 

5127 Example:: 

5128 

5129 data_word = Word(alphas) 

5130 label = data_word + FollowedBy(':') 

5131 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) 

5132 

5133 text = "shape: SQUARE posn: upper left color: BLACK" 

5134 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

5135 

5136 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data 

5137 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5138 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5139 

5140 # could also be written as 

5141 (attr_expr * (1,)).parse_string(text).pprint() 

5142 """ 

5143 

5144 def _generateDefaultName(self) -> str: 

5145 return f"{{{self.expr}}}..." 

5146 

5147 

5148class ZeroOrMore(_MultipleMatch): 

5149 """ 

5150 Optional repetition of zero or more of the given expression. 

5151 

5152 Parameters: 

5153 

5154 - ``expr`` - expression that must match zero or more times 

5155 - ``stop_on`` - expression for a terminating sentinel 

5156 (only required if the sentinel would ordinarily match the repetition 

5157 expression) - (default= ``None``) 

5158 

5159 Example: similar to :class:`OneOrMore` 

5160 """ 

5161 

5162 def __init__( 

5163 self, 

5164 expr: Union[str, ParserElement], 

5165 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5166 *, 

5167 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5168 ): 

5169 super().__init__(expr, stopOn=stopOn or stop_on) 

5170 self.mayReturnEmpty = True 

5171 

5172 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5173 try: 

5174 return super().parseImpl(instring, loc, do_actions) 

5175 except (ParseException, IndexError): 

5176 return loc, ParseResults([], name=self.resultsName) 

5177 

5178 def _generateDefaultName(self) -> str: 

5179 return f"[{self.expr}]..." 

5180 

5181 

5182class DelimitedList(ParseElementEnhance): 

5183 def __init__( 

5184 self, 

5185 expr: Union[str, ParserElement], 

5186 delim: Union[str, ParserElement] = ",", 

5187 combine: bool = False, 

5188 min: typing.Optional[int] = None, 

5189 max: typing.Optional[int] = None, 

5190 *, 

5191 allow_trailing_delim: bool = False, 

5192 ): 

5193 """Helper to define a delimited list of expressions - the delimiter 

5194 defaults to ','. By default, the list elements and delimiters can 

5195 have intervening whitespace, and comments, but this can be 

5196 overridden by passing ``combine=True`` in the constructor. If 

5197 ``combine`` is set to ``True``, the matching tokens are 

5198 returned as a single token string, with the delimiters included; 

5199 otherwise, the matching tokens are returned as a list of tokens, 

5200 with the delimiters suppressed. 

5201 

5202 If ``allow_trailing_delim`` is set to True, then the list may end with 

5203 a delimiter. 

5204 

5205 Example:: 

5206 

5207 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

5208 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

5209 """ 

5210 if isinstance(expr, str_type): 

5211 expr = ParserElement._literalStringClass(expr) 

5212 expr = typing.cast(ParserElement, expr) 

5213 

5214 if min is not None and min < 1: 

5215 raise ValueError("min must be greater than 0") 

5216 

5217 if max is not None and min is not None and max < min: 

5218 raise ValueError("max must be greater than, or equal to min") 

5219 

5220 self.content = expr 

5221 self.raw_delim = str(delim) 

5222 self.delim = delim 

5223 self.combine = combine 

5224 if not combine: 

5225 self.delim = Suppress(delim) 

5226 self.min = min or 1 

5227 self.max = max 

5228 self.allow_trailing_delim = allow_trailing_delim 

5229 

5230 delim_list_expr = self.content + (self.delim + self.content) * ( 

5231 self.min - 1, 

5232 None if self.max is None else self.max - 1, 

5233 ) 

5234 if self.allow_trailing_delim: 

5235 delim_list_expr += Opt(self.delim) 

5236 

5237 if self.combine: 

5238 delim_list_expr = Combine(delim_list_expr) 

5239 

5240 super().__init__(delim_list_expr, savelist=True) 

5241 

5242 def _generateDefaultName(self) -> str: 

5243 content_expr = self.content.streamline() 

5244 return f"{content_expr} [{self.raw_delim} {content_expr}]..." 

5245 

5246 

5247class _NullToken: 

5248 def __bool__(self): 

5249 return False 

5250 

5251 def __str__(self): 

5252 return "" 

5253 

5254 

5255class Opt(ParseElementEnhance): 

5256 """ 

5257 Optional matching of the given expression. 

5258 

5259 Parameters: 

5260 

5261 - ``expr`` - expression that must match zero or more times 

5262 - ``default`` (optional) - value to be returned if the optional expression is not found. 

5263 

5264 Example:: 

5265 

5266 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5267 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5268 zip.run_tests(''' 

5269 # traditional ZIP code 

5270 12345 

5271 

5272 # ZIP+4 form 

5273 12101-0001 

5274 

5275 # invalid ZIP 

5276 98765- 

5277 ''') 

5278 

5279 prints:: 

5280 

5281 # traditional ZIP code 

5282 12345 

5283 ['12345'] 

5284 

5285 # ZIP+4 form 

5286 12101-0001 

5287 ['12101-0001'] 

5288 

5289 # invalid ZIP 

5290 98765- 

5291 ^ 

5292 FAIL: Expected end of text (at char 5), (line:1, col:6) 

5293 """ 

5294 

5295 __optionalNotMatched = _NullToken() 

5296 

5297 def __init__( 

5298 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5299 ): 

5300 super().__init__(expr, savelist=False) 

5301 self.saveAsList = self.expr.saveAsList 

5302 self.defaultValue = default 

5303 self.mayReturnEmpty = True 

5304 

5305 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5306 self_expr = self.expr 

5307 try: 

5308 loc, tokens = self_expr._parse( 

5309 instring, loc, do_actions, callPreParse=False 

5310 ) 

5311 except (ParseException, IndexError): 

5312 default_value = self.defaultValue 

5313 if default_value is not self.__optionalNotMatched: 

5314 if self_expr.resultsName: 

5315 tokens = ParseResults([default_value]) 

5316 tokens[self_expr.resultsName] = default_value 

5317 else: 

5318 tokens = [default_value] # type: ignore[assignment] 

5319 else: 

5320 tokens = [] # type: ignore[assignment] 

5321 return loc, tokens 

5322 

5323 def _generateDefaultName(self) -> str: 

5324 inner = str(self.expr) 

5325 # strip off redundant inner {}'s 

5326 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5327 inner = inner[1:-1] 

5328 return f"[{inner}]" 

5329 

5330 

5331Optional = Opt 

5332 

5333 

5334class SkipTo(ParseElementEnhance): 

5335 """ 

5336 Token for skipping over all undefined text until the matched 

5337 expression is found. 

5338 

5339 Parameters: 

5340 

5341 - ``expr`` - target expression marking the end of the data to be skipped 

5342 - ``include`` - if ``True``, the target expression is also parsed 

5343 (the skipped text and target expression are returned as a 2-element 

5344 list) (default= ``False``). 

5345 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and 

5346 comments) that might contain false matches to the target expression 

5347 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be 

5348 included in the skipped test; if found before the target expression is found, 

5349 the :class:`SkipTo` is not a match 

5350 

5351 Example:: 

5352 

5353 report = ''' 

5354 Outstanding Issues Report - 1 Jan 2000 

5355 

5356 # | Severity | Description | Days Open 

5357 -----+----------+-------------------------------------------+----------- 

5358 101 | Critical | Intermittent system crash | 6 

5359 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5360 79 | Minor | System slow when running too many reports | 47 

5361 ''' 

5362 integer = Word(nums) 

5363 SEP = Suppress('|') 

5364 # use SkipTo to simply match everything up until the next SEP 

5365 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5366 # - parse action will call token.strip() for each matched token, i.e., the description body 

5367 string_data = SkipTo(SEP, ignore=quoted_string) 

5368 string_data.set_parse_action(token_map(str.strip)) 

5369 ticket_expr = (integer("issue_num") + SEP 

5370 + string_data("sev") + SEP 

5371 + string_data("desc") + SEP 

5372 + integer("days_open")) 

5373 

5374 for tkt in ticket_expr.search_string(report): 

5375 print tkt.dump() 

5376 

5377 prints:: 

5378 

5379 ['101', 'Critical', 'Intermittent system crash', '6'] 

5380 - days_open: '6' 

5381 - desc: 'Intermittent system crash' 

5382 - issue_num: '101' 

5383 - sev: 'Critical' 

5384 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

5385 - days_open: '14' 

5386 - desc: "Spelling error on Login ('log|n')" 

5387 - issue_num: '94' 

5388 - sev: 'Cosmetic' 

5389 ['79', 'Minor', 'System slow when running too many reports', '47'] 

5390 - days_open: '47' 

5391 - desc: 'System slow when running too many reports' 

5392 - issue_num: '79' 

5393 - sev: 'Minor' 

5394 """ 

5395 

5396 def __init__( 

5397 self, 

5398 other: Union[ParserElement, str], 

5399 include: bool = False, 

5400 ignore: typing.Optional[Union[ParserElement, str]] = None, 

5401 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

5402 *, 

5403 failOn: typing.Optional[Union[ParserElement, str]] = None, 

5404 ): 

5405 super().__init__(other) 

5406 failOn = failOn or fail_on 

5407 self.ignoreExpr = ignore 

5408 self.mayReturnEmpty = True 

5409 self.mayIndexError = False 

5410 self.includeMatch = include 

5411 self.saveAsList = False 

5412 if isinstance(failOn, str_type): 

5413 self.failOn = self._literalStringClass(failOn) 

5414 else: 

5415 self.failOn = failOn 

5416 self.errmsg = f"No match found for {self.expr}" 

5417 self.ignorer = Empty().leave_whitespace() 

5418 self._update_ignorer() 

5419 

5420 def _update_ignorer(self): 

5421 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

5422 self.ignorer.ignoreExprs.clear() 

5423 for e in self.expr.ignoreExprs: 

5424 self.ignorer.ignore(e) 

5425 if self.ignoreExpr: 

5426 self.ignorer.ignore(self.ignoreExpr) 

5427 

5428 def ignore(self, expr): 

5429 super().ignore(expr) 

5430 self._update_ignorer() 

5431 

5432 def parseImpl(self, instring, loc, do_actions=True): 

5433 startloc = loc 

5434 instrlen = len(instring) 

5435 self_expr_parse = self.expr._parse 

5436 self_failOn_canParseNext = ( 

5437 self.failOn.canParseNext if self.failOn is not None else None 

5438 ) 

5439 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

5440 

5441 tmploc = loc 

5442 while tmploc <= instrlen: 

5443 if self_failOn_canParseNext is not None: 

5444 # break if failOn expression matches 

5445 if self_failOn_canParseNext(instring, tmploc): 

5446 break 

5447 

5448 if ignorer_try_parse is not None: 

5449 # advance past ignore expressions 

5450 prev_tmploc = tmploc 

5451 while 1: 

5452 try: 

5453 tmploc = ignorer_try_parse(instring, tmploc) 

5454 except ParseBaseException: 

5455 break 

5456 # see if all ignorers matched, but didn't actually ignore anything 

5457 if tmploc == prev_tmploc: 

5458 break 

5459 prev_tmploc = tmploc 

5460 

5461 try: 

5462 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False) 

5463 except (ParseException, IndexError): 

5464 # no match, advance loc in string 

5465 tmploc += 1 

5466 else: 

5467 # matched skipto expr, done 

5468 break 

5469 

5470 else: 

5471 # ran off the end of the input string without matching skipto expr, fail 

5472 raise ParseException(instring, loc, self.errmsg, self) 

5473 

5474 # build up return values 

5475 loc = tmploc 

5476 skiptext = instring[startloc:loc] 

5477 skipresult = ParseResults(skiptext) 

5478 

5479 if self.includeMatch: 

5480 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False) 

5481 skipresult += mat 

5482 

5483 return loc, skipresult 

5484 

5485 

5486class Forward(ParseElementEnhance): 

5487 """ 

5488 Forward declaration of an expression to be defined later - 

5489 used for recursive grammars, such as algebraic infix notation. 

5490 When the expression is known, it is assigned to the ``Forward`` 

5491 variable using the ``'<<'`` operator. 

5492 

5493 Note: take care when assigning to ``Forward`` not to overlook 

5494 precedence of operators. 

5495 

5496 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

5497 

5498 fwd_expr << a | b | c 

5499 

5500 will actually be evaluated as:: 

5501 

5502 (fwd_expr << a) | b | c 

5503 

5504 thereby leaving b and c out as parseable alternatives. It is recommended that you 

5505 explicitly group the values inserted into the ``Forward``:: 

5506 

5507 fwd_expr << (a | b | c) 

5508 

5509 Converting to use the ``'<<='`` operator instead will avoid this problem. 

5510 

5511 See :class:`ParseResults.pprint` for an example of a recursive 

5512 parser created using ``Forward``. 

5513 """ 

5514 

5515 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): 

5516 self.caller_frame = traceback.extract_stack(limit=2)[0] 

5517 super().__init__(other, savelist=False) # type: ignore[arg-type] 

5518 self.lshift_line = None 

5519 

5520 def __lshift__(self, other) -> Forward: 

5521 if hasattr(self, "caller_frame"): 

5522 del self.caller_frame 

5523 if isinstance(other, str_type): 

5524 other = self._literalStringClass(other) 

5525 

5526 if not isinstance(other, ParserElement): 

5527 return NotImplemented 

5528 

5529 self.expr = other 

5530 self.streamlined = other.streamlined 

5531 self.mayIndexError = self.expr.mayIndexError 

5532 self.mayReturnEmpty = self.expr.mayReturnEmpty 

5533 self.set_whitespace_chars( 

5534 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

5535 ) 

5536 self.skipWhitespace = self.expr.skipWhitespace 

5537 self.saveAsList = self.expr.saveAsList 

5538 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5539 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

5540 return self 

5541 

5542 def __ilshift__(self, other) -> Forward: 

5543 if not isinstance(other, ParserElement): 

5544 return NotImplemented 

5545 

5546 return self << other 

5547 

5548 def __or__(self, other) -> ParserElement: 

5549 caller_line = traceback.extract_stack(limit=2)[-2] 

5550 if ( 

5551 __diag__.warn_on_match_first_with_lshift_operator 

5552 and caller_line == self.lshift_line 

5553 and Diagnostics.warn_on_match_first_with_lshift_operator 

5554 not in self.suppress_warnings_ 

5555 ): 

5556 warnings.warn( 

5557 "warn_on_match_first_with_lshift_operator:" 

5558 " using '<<' operator with '|' is probably an error, use '<<='", 

5559 stacklevel=2, 

5560 ) 

5561 ret = super().__or__(other) 

5562 return ret 

5563 

5564 def __del__(self): 

5565 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

5566 if ( 

5567 self.expr is None 

5568 and __diag__.warn_on_assignment_to_Forward 

5569 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

5570 ): 

5571 warnings.warn_explicit( 

5572 "warn_on_assignment_to_Forward:" 

5573 " Forward defined here but no expression attached later using '<<=' or '<<'", 

5574 UserWarning, 

5575 filename=self.caller_frame.filename, 

5576 lineno=self.caller_frame.lineno, 

5577 ) 

5578 

5579 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5580 if ( 

5581 self.expr is None 

5582 and __diag__.warn_on_parse_using_empty_Forward 

5583 and Diagnostics.warn_on_parse_using_empty_Forward 

5584 not in self.suppress_warnings_ 

5585 ): 

5586 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

5587 parse_fns = ( 

5588 "parse_string", 

5589 "scan_string", 

5590 "search_string", 

5591 "transform_string", 

5592 ) 

5593 tb = traceback.extract_stack(limit=200) 

5594 for i, frm in enumerate(reversed(tb), start=1): 

5595 if frm.name in parse_fns: 

5596 stacklevel = i + 1 

5597 break 

5598 else: 

5599 stacklevel = 2 

5600 warnings.warn( 

5601 "warn_on_parse_using_empty_Forward:" 

5602 " Forward expression was never assigned a value, will not parse any input", 

5603 stacklevel=stacklevel, 

5604 ) 

5605 if not ParserElement._left_recursion_enabled: 

5606 return super().parseImpl(instring, loc, do_actions) 

5607 # ## Bounded Recursion algorithm ## 

5608 # Recursion only needs to be processed at ``Forward`` elements, since they are 

5609 # the only ones that can actually refer to themselves. The general idea is 

5610 # to handle recursion stepwise: We start at no recursion, then recurse once, 

5611 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

5612 # 

5613 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

5614 # - to *match* a specific recursion level, and 

5615 # - to *search* the bounded recursion level 

5616 # and the two run concurrently. The *search* must *match* each recursion level 

5617 # to find the best possible match. This is handled by a memo table, which 

5618 # provides the previous match to the next level match attempt. 

5619 # 

5620 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

5621 # 

5622 # There is a complication since we not only *parse* but also *transform* via 

5623 # actions: We do not want to run the actions too often while expanding. Thus, 

5624 # we expand using `do_actions=False` and only run `do_actions=True` if the next 

5625 # recursion level is acceptable. 

5626 with ParserElement.recursion_lock: 

5627 memo = ParserElement.recursion_memos 

5628 try: 

5629 # we are parsing at a specific recursion expansion - use it as-is 

5630 prev_loc, prev_result = memo[loc, self, do_actions] 

5631 if isinstance(prev_result, Exception): 

5632 raise prev_result 

5633 return prev_loc, prev_result.copy() 

5634 except KeyError: 

5635 act_key = (loc, self, True) 

5636 peek_key = (loc, self, False) 

5637 # we are searching for the best recursion expansion - keep on improving 

5638 # both `do_actions` cases must be tracked separately here! 

5639 prev_loc, prev_peek = memo[peek_key] = ( 

5640 loc - 1, 

5641 ParseException( 

5642 instring, loc, "Forward recursion without base case", self 

5643 ), 

5644 ) 

5645 if do_actions: 

5646 memo[act_key] = memo[peek_key] 

5647 while True: 

5648 try: 

5649 new_loc, new_peek = super().parseImpl(instring, loc, False) 

5650 except ParseException: 

5651 # we failed before getting any match – do not hide the error 

5652 if isinstance(prev_peek, Exception): 

5653 raise 

5654 new_loc, new_peek = prev_loc, prev_peek 

5655 # the match did not get better: we are done 

5656 if new_loc <= prev_loc: 

5657 if do_actions: 

5658 # replace the match for do_actions=False as well, 

5659 # in case the action did backtrack 

5660 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

5661 del memo[peek_key], memo[act_key] 

5662 return prev_loc, copy.copy(prev_result) 

5663 del memo[peek_key] 

5664 return prev_loc, copy.copy(prev_peek) 

5665 # the match did get better: see if we can improve further 

5666 if do_actions: 

5667 try: 

5668 memo[act_key] = super().parseImpl(instring, loc, True) 

5669 except ParseException as e: 

5670 memo[peek_key] = memo[act_key] = (new_loc, e) 

5671 raise 

5672 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

5673 

5674 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5675 self.skipWhitespace = False 

5676 return self 

5677 

5678 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5679 self.skipWhitespace = True 

5680 return self 

5681 

5682 def streamline(self) -> ParserElement: 

5683 if not self.streamlined: 

5684 self.streamlined = True 

5685 if self.expr is not None: 

5686 self.expr.streamline() 

5687 return self 

5688 

5689 def validate(self, validateTrace=None) -> None: 

5690 warnings.warn( 

5691 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5692 DeprecationWarning, 

5693 stacklevel=2, 

5694 ) 

5695 if validateTrace is None: 

5696 validateTrace = [] 

5697 

5698 if self not in validateTrace: 

5699 tmp = validateTrace[:] + [self] 

5700 if self.expr is not None: 

5701 self.expr.validate(tmp) 

5702 self._checkRecursion([]) 

5703 

5704 def _generateDefaultName(self) -> str: 

5705 # Avoid infinite recursion by setting a temporary _defaultName 

5706 self._defaultName = ": ..." 

5707 

5708 # Use the string representation of main expression. 

5709 retString = "..." 

5710 try: 

5711 if self.expr is not None: 

5712 retString = str(self.expr)[:1000] 

5713 else: 

5714 retString = "None" 

5715 finally: 

5716 return f"{type(self).__name__}: {retString}" 

5717 

5718 def copy(self) -> ParserElement: 

5719 if self.expr is not None: 

5720 return super().copy() 

5721 else: 

5722 ret = Forward() 

5723 ret <<= self 

5724 return ret 

5725 

5726 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5727 # fmt: off 

5728 if ( 

5729 __diag__.warn_name_set_on_empty_Forward 

5730 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ 

5731 and self.expr is None 

5732 ): 

5733 warning = ( 

5734 "warn_name_set_on_empty_Forward:" 

5735 f" setting results name {name!r} on {type(self).__name__} expression" 

5736 " that has no contained expression" 

5737 ) 

5738 warnings.warn(warning, stacklevel=3) 

5739 # fmt: on 

5740 

5741 return super()._setResultsName(name, list_all_matches) 

5742 

5743 # Compatibility synonyms 

5744 # fmt: off 

5745 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

5746 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

5747 # fmt: on 

5748 

5749 

5750class TokenConverter(ParseElementEnhance): 

5751 """ 

5752 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. 

5753 """ 

5754 

5755 def __init__(self, expr: Union[ParserElement, str], savelist=False): 

5756 super().__init__(expr) # , savelist) 

5757 self.saveAsList = False 

5758 

5759 

5760class Combine(TokenConverter): 

5761 """Converter to concatenate all matching tokens to a single string. 

5762 By default, the matching patterns must also be contiguous in the 

5763 input string; this can be disabled by specifying 

5764 ``'adjacent=False'`` in the constructor. 

5765 

5766 Example:: 

5767 

5768 real = Word(nums) + '.' + Word(nums) 

5769 print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] 

5770 # will also erroneously match the following 

5771 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] 

5772 

5773 real = Combine(Word(nums) + '.' + Word(nums)) 

5774 print(real.parse_string('3.1416')) # -> ['3.1416'] 

5775 # no match when there are internal spaces 

5776 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) 

5777 """ 

5778 

5779 def __init__( 

5780 self, 

5781 expr: ParserElement, 

5782 join_string: str = "", 

5783 adjacent: bool = True, 

5784 *, 

5785 joinString: typing.Optional[str] = None, 

5786 ): 

5787 super().__init__(expr) 

5788 joinString = joinString if joinString is not None else join_string 

5789 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

5790 if adjacent: 

5791 self.leave_whitespace() 

5792 self.adjacent = adjacent 

5793 self.skipWhitespace = True 

5794 self.joinString = joinString 

5795 self.callPreparse = True 

5796 

5797 def ignore(self, other) -> ParserElement: 

5798 if self.adjacent: 

5799 ParserElement.ignore(self, other) 

5800 else: 

5801 super().ignore(other) 

5802 return self 

5803 

5804 def postParse(self, instring, loc, tokenlist): 

5805 retToks = tokenlist.copy() 

5806 del retToks[:] 

5807 retToks += ParseResults( 

5808 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

5809 ) 

5810 

5811 if self.resultsName and retToks.haskeys(): 

5812 return [retToks] 

5813 else: 

5814 return retToks 

5815 

5816 

5817class Group(TokenConverter): 

5818 """Converter to return the matched tokens as a list - useful for 

5819 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

5820 

5821 The optional ``aslist`` argument when set to True will return the 

5822 parsed tokens as a Python list instead of a pyparsing ParseResults. 

5823 

5824 Example:: 

5825 

5826 ident = Word(alphas) 

5827 num = Word(nums) 

5828 term = ident | num 

5829 func = ident + Opt(DelimitedList(term)) 

5830 print(func.parse_string("fn a, b, 100")) 

5831 # -> ['fn', 'a', 'b', '100'] 

5832 

5833 func = ident + Group(Opt(DelimitedList(term))) 

5834 print(func.parse_string("fn a, b, 100")) 

5835 # -> ['fn', ['a', 'b', '100']] 

5836 """ 

5837 

5838 def __init__(self, expr: ParserElement, aslist: bool = False): 

5839 super().__init__(expr) 

5840 self.saveAsList = True 

5841 self._asPythonList = aslist 

5842 

5843 def postParse(self, instring, loc, tokenlist): 

5844 if self._asPythonList: 

5845 return ParseResults.List( 

5846 tokenlist.asList() 

5847 if isinstance(tokenlist, ParseResults) 

5848 else list(tokenlist) 

5849 ) 

5850 

5851 return [tokenlist] 

5852 

5853 

5854class Dict(TokenConverter): 

5855 """Converter to return a repetitive expression as a list, but also 

5856 as a dictionary. Each element can also be referenced using the first 

5857 token in the expression as its key. Useful for tabular report 

5858 scraping when the first column can be used as a item key. 

5859 

5860 The optional ``asdict`` argument when set to True will return the 

5861 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

5862 

5863 Example:: 

5864 

5865 data_word = Word(alphas) 

5866 label = data_word + FollowedBy(':') 

5867 

5868 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5869 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5870 

5871 # print attributes as plain groups 

5872 print(attr_expr[1, ...].parse_string(text).dump()) 

5873 

5874 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names 

5875 result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

5876 print(result.dump()) 

5877 

5878 # access named fields as dict entries, or output as dict 

5879 print(result['shape']) 

5880 print(result.as_dict()) 

5881 

5882 prints:: 

5883 

5884 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

5885 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5886 - color: 'light blue' 

5887 - posn: 'upper left' 

5888 - shape: 'SQUARE' 

5889 - texture: 'burlap' 

5890 SQUARE 

5891 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

5892 

5893 See more examples at :class:`ParseResults` of accessing fields by results name. 

5894 """ 

5895 

5896 def __init__(self, expr: ParserElement, asdict: bool = False): 

5897 super().__init__(expr) 

5898 self.saveAsList = True 

5899 self._asPythonDict = asdict 

5900 

5901 def postParse(self, instring, loc, tokenlist): 

5902 for i, tok in enumerate(tokenlist): 

5903 if len(tok) == 0: 

5904 continue 

5905 

5906 ikey = tok[0] 

5907 if isinstance(ikey, int): 

5908 ikey = str(ikey).strip() 

5909 

5910 if len(tok) == 1: 

5911 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

5912 

5913 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

5914 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

5915 

5916 else: 

5917 try: 

5918 dictvalue = tok.copy() # ParseResults(i) 

5919 except Exception: 

5920 exc = TypeError( 

5921 "could not extract dict values from parsed results" 

5922 " - Dict expression must contain Grouped expressions" 

5923 ) 

5924 raise exc from None 

5925 

5926 del dictvalue[0] 

5927 

5928 if len(dictvalue) != 1 or ( 

5929 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

5930 ): 

5931 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

5932 else: 

5933 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

5934 

5935 if self._asPythonDict: 

5936 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

5937 

5938 return [tokenlist] if self.resultsName else tokenlist 

5939 

5940 

5941class Suppress(TokenConverter): 

5942 """Converter for ignoring the results of a parsed expression. 

5943 

5944 Example:: 

5945 

5946 source = "a, b, c,d" 

5947 wd = Word(alphas) 

5948 wd_list1 = wd + (',' + wd)[...] 

5949 print(wd_list1.parse_string(source)) 

5950 

5951 # often, delimiters that are useful during parsing are just in the 

5952 # way afterward - use Suppress to keep them out of the parsed output 

5953 wd_list2 = wd + (Suppress(',') + wd)[...] 

5954 print(wd_list2.parse_string(source)) 

5955 

5956 # Skipped text (using '...') can be suppressed as well 

5957 source = "lead in START relevant text END trailing text" 

5958 start_marker = Keyword("START") 

5959 end_marker = Keyword("END") 

5960 find_body = Suppress(...) + start_marker + ... + end_marker 

5961 print(find_body.parse_string(source) 

5962 

5963 prints:: 

5964 

5965 ['a', ',', 'b', ',', 'c', ',', 'd'] 

5966 ['a', 'b', 'c', 'd'] 

5967 ['START', 'relevant text ', 'END'] 

5968 

5969 (See also :class:`DelimitedList`.) 

5970 """ 

5971 

5972 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

5973 if expr is ...: 

5974 expr = _PendingSkip(NoMatch()) 

5975 super().__init__(expr) 

5976 

5977 def __add__(self, other) -> ParserElement: 

5978 if isinstance(self.expr, _PendingSkip): 

5979 return Suppress(SkipTo(other)) + other 

5980 

5981 return super().__add__(other) 

5982 

5983 def __sub__(self, other) -> ParserElement: 

5984 if isinstance(self.expr, _PendingSkip): 

5985 return Suppress(SkipTo(other)) - other 

5986 

5987 return super().__sub__(other) 

5988 

5989 def postParse(self, instring, loc, tokenlist): 

5990 return [] 

5991 

5992 def suppress(self) -> ParserElement: 

5993 return self 

5994 

5995 

5996def trace_parse_action(f: ParseAction) -> ParseAction: 

5997 """Decorator for debugging parse actions. 

5998 

5999 When the parse action is called, this decorator will print 

6000 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

6001 When the parse action completes, the decorator will print 

6002 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

6003 

6004 Example:: 

6005 

6006 wd = Word(alphas) 

6007 

6008 @trace_parse_action 

6009 def remove_duplicate_chars(tokens): 

6010 return ''.join(sorted(set(''.join(tokens)))) 

6011 

6012 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

6013 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

6014 

6015 prints:: 

6016 

6017 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

6018 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

6019 ['dfjkls'] 

6020 """ 

6021 f = _trim_arity(f) 

6022 

6023 def z(*paArgs): 

6024 thisFunc = f.__name__ 

6025 s, l, t = paArgs[-3:] 

6026 if len(paArgs) > 3: 

6027 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" 

6028 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

6029 try: 

6030 ret = f(*paArgs) 

6031 except Exception as exc: 

6032 sys.stderr.write( 

6033 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n" 

6034 ) 

6035 raise 

6036 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

6037 return ret 

6038 

6039 z.__name__ = f.__name__ 

6040 return z 

6041 

6042 

6043# convenience constants for positional expressions 

6044empty = Empty().set_name("empty") 

6045line_start = LineStart().set_name("line_start") 

6046line_end = LineEnd().set_name("line_end") 

6047string_start = StringStart().set_name("string_start") 

6048string_end = StringEnd().set_name("string_end") 

6049 

6050_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

6051 lambda s, l, t: t[0][1] 

6052) 

6053_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

6054 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

6055) 

6056_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

6057 lambda s, l, t: chr(int(t[0][1:], 8)) 

6058) 

6059_singleChar = ( 

6060 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

6061) 

6062_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

6063_reBracketExpr = ( 

6064 Literal("[") 

6065 + Opt("^").set_results_name("negate") 

6066 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

6067 + Literal("]") 

6068) 

6069 

6070 

6071def srange(s: str) -> str: 

6072 r"""Helper to easily define string ranges for use in :class:`Word` 

6073 construction. Borrows syntax from regexp ``'[]'`` string range 

6074 definitions:: 

6075 

6076 srange("[0-9]") -> "0123456789" 

6077 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

6078 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

6079 

6080 The input string must be enclosed in []'s, and the returned string 

6081 is the expanded character set joined into a single string. The 

6082 values enclosed in the []'s may be: 

6083 

6084 - a single character 

6085 - an escaped character with a leading backslash (such as ``\-`` 

6086 or ``\]``) 

6087 - an escaped hex character with a leading ``'\x'`` 

6088 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

6089 is also supported for backwards compatibility) 

6090 - an escaped octal character with a leading ``'\0'`` 

6091 (``\041``, which is a ``'!'`` character) 

6092 - a range of any of the above, separated by a dash (``'a-z'``, 

6093 etc.) 

6094 - any combination of the above (``'aeiouy'``, 

6095 ``'a-zA-Z0-9_$'``, etc.) 

6096 """ 

6097 _expanded = lambda p: ( 

6098 p 

6099 if not isinstance(p, ParseResults) 

6100 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

6101 ) 

6102 try: 

6103 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) 

6104 except Exception as e: 

6105 return "" 

6106 

6107 

6108def token_map(func, *args) -> ParseAction: 

6109 """Helper to define a parse action by mapping a function to all 

6110 elements of a :class:`ParseResults` list. If any additional args are passed, 

6111 they are forwarded to the given function as additional arguments 

6112 after the token, as in 

6113 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

6114 which will convert the parsed data to an integer using base 16. 

6115 

6116 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

6117 

6118 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6119 hex_ints.run_tests(''' 

6120 00 11 22 aa FF 0a 0d 1a 

6121 ''') 

6122 

6123 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6124 upperword[1, ...].run_tests(''' 

6125 my kingdom for a horse 

6126 ''') 

6127 

6128 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6129 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6130 now is the winter of our discontent made glorious summer by this sun of york 

6131 ''') 

6132 

6133 prints:: 

6134 

6135 00 11 22 aa FF 0a 0d 1a 

6136 [0, 17, 34, 170, 255, 10, 13, 26] 

6137 

6138 my kingdom for a horse 

6139 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6140 

6141 now is the winter of our discontent made glorious summer by this sun of york 

6142 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6143 """ 

6144 

6145 def pa(s, l, t): 

6146 return [func(tokn, *args) for tokn in t] 

6147 

6148 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6149 pa.__name__ = func_name 

6150 

6151 return pa 

6152 

6153 

6154def autoname_elements() -> None: 

6155 """ 

6156 Utility to simplify mass-naming of parser elements, for 

6157 generating railroad diagram with named subdiagrams. 

6158 """ 

6159 calling_frame = sys._getframe(1) 

6160 if calling_frame is None: 

6161 return 

6162 calling_frame = typing.cast(types.FrameType, calling_frame) 

6163 for name, var in calling_frame.f_locals.items(): 

6164 if isinstance(var, ParserElement) and not var.customName: 

6165 var.set_name(name) 

6166 

6167 

6168dbl_quoted_string = Combine( 

6169 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6170).set_name("string enclosed in double quotes") 

6171 

6172sgl_quoted_string = Combine( 

6173 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6174).set_name("string enclosed in single quotes") 

6175 

6176quoted_string = Combine( 

6177 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6178 "double quoted string" 

6179 ) 

6180 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6181 "single quoted string" 

6182 ) 

6183).set_name("quoted string using single or double quotes") 

6184 

6185python_quoted_string = Combine( 

6186 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6187 "multiline double quoted string" 

6188 ) 

6189 ^ ( 

6190 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6191 ).set_name("multiline single quoted string") 

6192 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6193 "double quoted string" 

6194 ) 

6195 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6196 "single quoted string" 

6197 ) 

6198).set_name("Python quoted string") 

6199 

6200unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6201 

6202 

6203alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6204punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6205 

6206# build list of built-in expressions, for future reference if a global default value 

6207# gets updated 

6208_builtin_exprs: list[ParserElement] = [ 

6209 v for v in vars().values() if isinstance(v, ParserElement) 

6210] 

6211 

6212# Compatibility synonyms 

6213# fmt: off 

6214sglQuotedString = sgl_quoted_string 

6215dblQuotedString = dbl_quoted_string 

6216quotedString = quoted_string 

6217unicodeString = unicode_string 

6218lineStart = line_start 

6219lineEnd = line_end 

6220stringStart = string_start 

6221stringEnd = string_end 

6222nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) 

6223traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) 

6224conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) 

6225tokenMap = replaced_by_pep8("tokenMap", token_map) 

6226# fmt: on