Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 45%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2671 statements  

1# 

2# core.py 

3# 

4from __future__ import annotations 

5 

6import collections.abc 

7from collections import deque 

8import os 

9import typing 

10from typing import ( 

11 Any, 

12 Callable, 

13 Generator, 

14 NamedTuple, 

15 Sequence, 

16 TextIO, 

17 Union, 

18 cast, 

19) 

20from abc import ABC, abstractmethod 

21from enum import Enum 

22import string 

23import copy 

24import warnings 

25import re 

26import sys 

27from collections.abc import Iterable 

28import traceback 

29import types 

30from operator import itemgetter 

31from functools import wraps 

32from threading import RLock 

33from pathlib import Path 

34 

35from .util import ( 

36 _FifoCache, 

37 _UnboundedCache, 

38 __config_flags, 

39 _collapse_string_to_ranges, 

40 _escape_regex_range_chars, 

41 _flatten, 

42 LRUMemo as _LRUMemo, 

43 UnboundedMemo as _UnboundedMemo, 

44 replaced_by_pep8, 

45) 

46from .exceptions import * 

47from .actions import * 

48from .results import ParseResults, _ParseResultsWithOffset 

49from .unicode import pyparsing_unicode 

50 

51_MAX_INT = sys.maxsize 

52str_type: tuple[type, ...] = (str, bytes) 

53 

54# 

55# Copyright (c) 2003-2022 Paul T. McGuire 

56# 

57# Permission is hereby granted, free of charge, to any person obtaining 

58# a copy of this software and associated documentation files (the 

59# "Software"), to deal in the Software without restriction, including 

60# without limitation the rights to use, copy, modify, merge, publish, 

61# distribute, sublicense, and/or sell copies of the Software, and to 

62# permit persons to whom the Software is furnished to do so, subject to 

63# the following conditions: 

64# 

65# The above copyright notice and this permission notice shall be 

66# included in all copies or substantial portions of the Software. 

67# 

68# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

69# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

70# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

71# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

72# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

73# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

74# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

75# 

76 

77from functools import cached_property 

78 

79 

80class __compat__(__config_flags): 

81 """ 

82 A cross-version compatibility configuration for pyparsing features that will be 

83 released in a future version. By setting values in this configuration to True, 

84 those features can be enabled in prior versions for compatibility development 

85 and testing. 

86 

87 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

88 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

89 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

90 behavior 

91 """ 

92 

93 _type_desc = "compatibility" 

94 

95 collect_all_And_tokens = True 

96 

97 _all_names = [__ for __ in locals() if not __.startswith("_")] 

98 _fixed_names = """ 

99 collect_all_And_tokens 

100 """.split() 

101 

102 

103class __diag__(__config_flags): 

104 _type_desc = "diagnostic" 

105 

106 warn_multiple_tokens_in_named_alternation = False 

107 warn_ungrouped_named_tokens_in_collection = False 

108 warn_name_set_on_empty_Forward = False 

109 warn_on_parse_using_empty_Forward = False 

110 warn_on_assignment_to_Forward = False 

111 warn_on_multiple_string_args_to_oneof = False 

112 warn_on_match_first_with_lshift_operator = False 

113 enable_debug_on_named_expressions = False 

114 

115 _all_names = [__ for __ in locals() if not __.startswith("_")] 

116 _warning_names = [name for name in _all_names if name.startswith("warn")] 

117 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

118 

119 @classmethod 

120 def enable_all_warnings(cls) -> None: 

121 for name in cls._warning_names: 

122 cls.enable(name) 

123 

124 

125class Diagnostics(Enum): 

126 """ 

127 Diagnostic configuration (all default to disabled) 

128 

129 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

130 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

131 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

132 name is defined on a containing expression with ungrouped subexpressions that also 

133 have results names 

134 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

135 with a results name, but has no contents defined 

136 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

137 defined in a grammar but has never had an expression attached to it 

138 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

139 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

140 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

141 incorrectly called with multiple str arguments 

142 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

143 calls to :class:`ParserElement.set_name` 

144 

145 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

146 All warnings can be enabled by calling :class:`enable_all_warnings`. 

147 """ 

148 

149 warn_multiple_tokens_in_named_alternation = 0 

150 warn_ungrouped_named_tokens_in_collection = 1 

151 warn_name_set_on_empty_Forward = 2 

152 warn_on_parse_using_empty_Forward = 3 

153 warn_on_assignment_to_Forward = 4 

154 warn_on_multiple_string_args_to_oneof = 5 

155 warn_on_match_first_with_lshift_operator = 6 

156 enable_debug_on_named_expressions = 7 

157 

158 

159def enable_diag(diag_enum: Diagnostics) -> None: 

160 """ 

161 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

162 """ 

163 __diag__.enable(diag_enum.name) 

164 

165 

166def disable_diag(diag_enum: Diagnostics) -> None: 

167 """ 

168 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

169 """ 

170 __diag__.disable(diag_enum.name) 

171 

172 

173def enable_all_warnings() -> None: 

174 """ 

175 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

176 """ 

177 __diag__.enable_all_warnings() 

178 

179 

180# hide abstract class 

181del __config_flags 

182 

183 

184def _should_enable_warnings( 

185 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

186) -> bool: 

187 enable = bool(warn_env_var) 

188 for warn_opt in cmd_line_warn_options: 

189 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

190 ":" 

191 )[:5] 

192 if not w_action.lower().startswith("i") and ( 

193 not (w_message or w_category or w_module) or w_module == "pyparsing" 

194 ): 

195 enable = True 

196 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

197 enable = False 

198 return enable 

199 

200 

201if _should_enable_warnings( 

202 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

203): 

204 enable_all_warnings() 

205 

206 

207# build list of single arg builtins, that can be used as parse actions 

208# fmt: off 

209_single_arg_builtins = { 

210 sum, len, sorted, reversed, list, tuple, set, any, all, min, max 

211} 

212# fmt: on 

213 

214_generatorType = types.GeneratorType 

215ParseImplReturnType = tuple[int, Any] 

216PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

217 

218ParseCondition = Union[ 

219 Callable[[], bool], 

220 Callable[[ParseResults], bool], 

221 Callable[[int, ParseResults], bool], 

222 Callable[[str, int, ParseResults], bool], 

223] 

224ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

225DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

226DebugSuccessAction = Callable[ 

227 [str, int, int, "ParserElement", ParseResults, bool], None 

228] 

229DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

230 

231 

232alphas: str = string.ascii_uppercase + string.ascii_lowercase 

233identchars: str = pyparsing_unicode.Latin1.identchars 

234identbodychars: str = pyparsing_unicode.Latin1.identbodychars 

235nums: str = "0123456789" 

236hexnums: str = nums + "ABCDEFabcdef" 

237alphanums: str = alphas + nums 

238printables: str = "".join([c for c in string.printable if c not in string.whitespace]) 

239 

240 

241class _ParseActionIndexError(Exception): 

242 """ 

243 Internal wrapper around IndexError so that IndexErrors raised inside 

244 parse actions aren't misinterpreted as IndexErrors raised inside 

245 ParserElement parseImpl methods. 

246 """ 

247 

248 def __init__(self, msg: str, exc: BaseException) -> None: 

249 self.msg: str = msg 

250 self.exc: BaseException = exc 

251 

252 

253_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

254pa_call_line_synth = () 

255 

256 

257def _trim_arity(func, max_limit=3): 

258 """decorator to trim function calls to match the arity of the target""" 

259 global _trim_arity_call_line, pa_call_line_synth 

260 

261 if func in _single_arg_builtins: 

262 return lambda s, l, t: func(t) 

263 

264 limit = 0 

265 found_arity = False 

266 

267 # synthesize what would be returned by traceback.extract_stack at the call to 

268 # user's parse action 'func', so that we don't incur call penalty at parse time 

269 

270 # fmt: off 

271 LINE_DIFF = 9 

272 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

273 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

274 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1] 

275 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

276 

277 def wrapper(*args): 

278 nonlocal found_arity, limit 

279 if found_arity: 

280 return func(*args[limit:]) 

281 while 1: 

282 try: 

283 ret = func(*args[limit:]) 

284 found_arity = True 

285 return ret 

286 except TypeError as te: 

287 # re-raise TypeErrors if they did not come from our arity testing 

288 if found_arity: 

289 raise 

290 else: 

291 tb = te.__traceback__ 

292 frames = traceback.extract_tb(tb, limit=2) 

293 frame_summary = frames[-1] 

294 trim_arity_type_error = ( 

295 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

296 ) 

297 del tb 

298 

299 if trim_arity_type_error: 

300 if limit < max_limit: 

301 limit += 1 

302 continue 

303 

304 raise 

305 except IndexError as ie: 

306 # wrap IndexErrors inside a _ParseActionIndexError 

307 raise _ParseActionIndexError( 

308 "IndexError raised in parse action", ie 

309 ).with_traceback(None) 

310 # fmt: on 

311 

312 # copy func name to wrapper for sensible debug output 

313 # (can't use functools.wraps, since that messes with function signature) 

314 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

315 wrapper.__name__ = func_name 

316 wrapper.__doc__ = func.__doc__ 

317 

318 return wrapper 

319 

320 

321def condition_as_parse_action( 

322 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

323) -> ParseAction: 

324 """ 

325 Function to convert a simple predicate function that returns ``True`` or ``False`` 

326 into a parse action. Can be used in places when a parse action is required 

327 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition 

328 to an operator level in :class:`infix_notation`). 

329 

330 Optional keyword arguments: 

331 

332 - ``message`` - define a custom message to be used in the raised exception 

333 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; 

334 otherwise will raise :class:`ParseException` 

335 

336 """ 

337 msg = message if message is not None else "failed user-defined condition" 

338 exc_type = ParseFatalException if fatal else ParseException 

339 fn = _trim_arity(fn) 

340 

341 @wraps(fn) 

342 def pa(s, l, t): 

343 if not bool(fn(s, l, t)): 

344 raise exc_type(s, l, msg) 

345 

346 return pa 

347 

348 

349def _default_start_debug_action( 

350 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False 

351): 

352 cache_hit_str = "*" if cache_hit else "" 

353 print( 

354 ( 

355 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

356 f" {line(loc, instring)}\n" 

357 f" {'^':>{col(loc, instring)}}" 

358 ) 

359 ) 

360 

361 

362def _default_success_debug_action( 

363 instring: str, 

364 startloc: int, 

365 endloc: int, 

366 expr: ParserElement, 

367 toks: ParseResults, 

368 cache_hit: bool = False, 

369): 

370 cache_hit_str = "*" if cache_hit else "" 

371 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

372 

373 

374def _default_exception_debug_action( 

375 instring: str, 

376 loc: int, 

377 expr: ParserElement, 

378 exc: Exception, 

379 cache_hit: bool = False, 

380): 

381 cache_hit_str = "*" if cache_hit else "" 

382 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

383 

384 

385def null_debug_action(*args): 

386 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

387 

388 

389class ParserElement(ABC): 

390 """Abstract base level parser element class.""" 

391 

392 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

393 verbose_stacktrace: bool = False 

394 _literalStringClass: type = None # type: ignore[assignment] 

395 

396 @staticmethod 

397 def set_default_whitespace_chars(chars: str) -> None: 

398 r""" 

399 Overrides the default whitespace chars 

400 

401 Example:: 

402 

403 # default whitespace chars are space, <TAB> and newline 

404 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

405 

406 # change to just treat newline as significant 

407 ParserElement.set_default_whitespace_chars(" \t") 

408 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] 

409 """ 

410 ParserElement.DEFAULT_WHITE_CHARS = chars 

411 

412 # update whitespace all parse expressions defined in this module 

413 for expr in _builtin_exprs: 

414 if expr.copyDefaultWhiteChars: 

415 expr.whiteChars = set(chars) 

416 

417 @staticmethod 

418 def inline_literals_using(cls: type) -> None: 

419 """ 

420 Set class to be used for inclusion of string literals into a parser. 

421 

422 Example:: 

423 

424 # default literal class used is Literal 

425 integer = Word(nums) 

426 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

427 

428 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

429 

430 

431 # change to Suppress 

432 ParserElement.inline_literals_using(Suppress) 

433 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

434 

435 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] 

436 """ 

437 ParserElement._literalStringClass = cls 

438 

439 @classmethod 

440 def using_each(cls, seq, **class_kwargs): 

441 """ 

442 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq. 

443 

444 Example:: 

445 

446 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

447 

448 .. versionadded:: 3.1.0 

449 """ 

450 yield from (cls(obj, **class_kwargs) for obj in seq) 

451 

452 class DebugActions(NamedTuple): 

453 debug_try: typing.Optional[DebugStartAction] 

454 debug_match: typing.Optional[DebugSuccessAction] 

455 debug_fail: typing.Optional[DebugExceptionAction] 

456 

457 def __init__(self, savelist: bool = False) -> None: 

458 self.parseAction: list[ParseAction] = list() 

459 self.failAction: typing.Optional[ParseFailAction] = None 

460 self.customName: str = None # type: ignore[assignment] 

461 self._defaultName: typing.Optional[str] = None 

462 self.resultsName: str = None # type: ignore[assignment] 

463 self.saveAsList = savelist 

464 self.skipWhitespace = True 

465 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

466 self.copyDefaultWhiteChars = True 

467 # used when checking for left-recursion 

468 self._may_return_empty = False 

469 self.keepTabs = False 

470 self.ignoreExprs: list[ParserElement] = list() 

471 self.debug = False 

472 self.streamlined = False 

473 # optimize exception handling for subclasses that don't advance parse index 

474 self.mayIndexError = True 

475 self.errmsg: Union[str, None] = "" 

476 # mark results names as modal (report only last) or cumulative (list all) 

477 self.modalResults = True 

478 # custom debug actions 

479 self.debugActions = self.DebugActions(None, None, None) 

480 # avoid redundant calls to preParse 

481 self.callPreparse = True 

482 self.callDuringTry = False 

483 self.suppress_warnings_: list[Diagnostics] = [] 

484 self.show_in_diagram = True 

485 

486 @property 

487 def mayReturnEmpty(self): 

488 return self._may_return_empty 

489 

490 @mayReturnEmpty.setter 

491 def mayReturnEmpty(self, value): 

492 self._may_return_empty = value 

493 

494 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: 

495 """ 

496 Suppress warnings emitted for a particular diagnostic on this expression. 

497 

498 Example:: 

499 

500 base = pp.Forward() 

501 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) 

502 

503 # statement would normally raise a warning, but is now suppressed 

504 print(base.parse_string("x")) 

505 

506 """ 

507 self.suppress_warnings_.append(warning_type) 

508 return self 

509 

510 def visit_all(self): 

511 """General-purpose method to yield all expressions and sub-expressions 

512 in a grammar. Typically just for internal use. 

513 """ 

514 to_visit = deque([self]) 

515 seen = set() 

516 while to_visit: 

517 cur = to_visit.popleft() 

518 

519 # guard against looping forever through recursive grammars 

520 if cur in seen: 

521 continue 

522 seen.add(cur) 

523 

524 to_visit.extend(cur.recurse()) 

525 yield cur 

526 

527 def copy(self) -> ParserElement: 

528 """ 

529 Make a copy of this :class:`ParserElement`. Useful for defining 

530 different parse actions for the same parsing pattern, using copies of 

531 the original parse element. 

532 

533 Example:: 

534 

535 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

536 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") 

537 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

538 

539 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) 

540 

541 prints:: 

542 

543 [5120, 100, 655360, 268435456] 

544 

545 Equivalent form of ``expr.copy()`` is just ``expr()``:: 

546 

547 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

548 """ 

549 cpy = copy.copy(self) 

550 cpy.parseAction = self.parseAction[:] 

551 cpy.ignoreExprs = self.ignoreExprs[:] 

552 if self.copyDefaultWhiteChars: 

553 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

554 return cpy 

555 

556 def set_results_name( 

557 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 

558 ) -> ParserElement: 

559 """ 

560 Define name for referencing matching tokens as a nested attribute 

561 of the returned parse results. 

562 

563 Normally, results names are assigned as you would assign keys in a dict: 

564 any existing value is overwritten by later values. If it is necessary to 

565 keep all values captured for a particular results name, call ``set_results_name`` 

566 with ``list_all_matches`` = True. 

567 

568 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

569 this is so that the client can define a basic element, such as an 

570 integer, and reference it in multiple places with different names. 

571 

572 You can also set results names using the abbreviated syntax, 

573 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

574 - see :class:`__call__`. If ``list_all_matches`` is required, use 

575 ``expr("name*")``. 

576 

577 Example:: 

578 

579 integer = Word(nums) 

580 date_str = (integer.set_results_name("year") + '/' 

581 + integer.set_results_name("month") + '/' 

582 + integer.set_results_name("day")) 

583 

584 # equivalent form: 

585 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

586 """ 

587 listAllMatches = listAllMatches or list_all_matches 

588 return self._setResultsName(name, listAllMatches) 

589 

590 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

591 if name is None: 

592 return self 

593 newself = self.copy() 

594 if name.endswith("*"): 

595 name = name[:-1] 

596 list_all_matches = True 

597 newself.resultsName = name 

598 newself.modalResults = not list_all_matches 

599 return newself 

600 

601 def set_break(self, break_flag: bool = True) -> ParserElement: 

602 """ 

603 Method to invoke the Python pdb debugger when this element is 

604 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

605 disable. 

606 """ 

607 if break_flag: 

608 _parseMethod = self._parse 

609 

610 def breaker(instring, loc, do_actions=True, callPreParse=True): 

611 # this call to breakpoint() is intentional, not a checkin error 

612 breakpoint() 

613 return _parseMethod(instring, loc, do_actions, callPreParse) 

614 

615 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

616 self._parse = breaker # type: ignore [method-assign] 

617 elif hasattr(self._parse, "_originalParseMethod"): 

618 self._parse = self._parse._originalParseMethod # type: ignore [method-assign] 

619 return self 

620 

621 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: 

622 """ 

623 Define one or more actions to perform when successfully matching parse element definition. 

624 

625 Parse actions can be called to perform data conversions, do extra validation, 

626 update external data structures, or enhance or replace the parsed tokens. 

627 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

628 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

629 

630 - ``s`` = the original string being parsed (see note below) 

631 - ``loc`` = the location of the matching substring 

632 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

633 

634 The parsed tokens are passed to the parse action as ParseResults. They can be 

635 modified in place using list-style append, extend, and pop operations to update 

636 the parsed list elements; and with dictionary-style item set and del operations 

637 to add, update, or remove any named results. If the tokens are modified in place, 

638 it is not necessary to return them with a return statement. 

639 

640 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

641 object, or with some entirely different object (common for parse actions that perform data 

642 conversions). A convenient way to build a new parse result is to define the values 

643 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

644 

645 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

646 expression are cleared. 

647 

648 Optional keyword arguments: 

649 

650 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during 

651 lookaheads and alternate testing. For parse actions that have side effects, it is 

652 important to only call the parse action once it is determined that it is being 

653 called as part of a successful parse. For parse actions that perform additional 

654 validation, then call_during_try should be passed as True, so that the validation 

655 code is included in the preliminary "try" parses. 

656 

657 Note: the default parsing behavior is to expand tabs in the input string 

658 before starting the parsing process. See :class:`parse_string` for more 

659 information on parsing strings containing ``<TAB>`` s, and suggested 

660 methods to maintain a consistent view of the parsed string, the parse 

661 location, and line and column positions within the parsed string. 

662 

663 Example:: 

664 

665 # parse dates in the form YYYY/MM/DD 

666 

667 # use parse action to convert toks from str to int at parse time 

668 def convert_to_int(toks): 

669 return int(toks[0]) 

670 

671 # use a parse action to verify that the date is a valid date 

672 def is_valid_date(instring, loc, toks): 

673 from datetime import date 

674 year, month, day = toks[::2] 

675 try: 

676 date(year, month, day) 

677 except ValueError: 

678 raise ParseException(instring, loc, "invalid date given") 

679 

680 integer = Word(nums) 

681 date_str = integer + '/' + integer + '/' + integer 

682 

683 # add parse actions 

684 integer.set_parse_action(convert_to_int) 

685 date_str.set_parse_action(is_valid_date) 

686 

687 # note that integer fields are now ints, not strings 

688 date_str.run_tests(''' 

689 # successful parse - note that integer fields were converted to ints 

690 1999/12/31 

691 

692 # fail - invalid date 

693 1999/13/31 

694 ''') 

695 """ 

696 if list(fns) == [None]: 

697 self.parseAction.clear() 

698 return self 

699 

700 if not all(callable(fn) for fn in fns): 

701 raise TypeError("parse actions must be callable") 

702 self.parseAction[:] = [_trim_arity(fn) for fn in fns] 

703 self.callDuringTry = kwargs.get( 

704 "call_during_try", kwargs.get("callDuringTry", False) 

705 ) 

706 

707 return self 

708 

709 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: 

710 """ 

711 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

712 

713 See examples in :class:`copy`. 

714 """ 

715 self.parseAction += [_trim_arity(fn) for fn in fns] 

716 self.callDuringTry = self.callDuringTry or kwargs.get( 

717 "call_during_try", kwargs.get("callDuringTry", False) 

718 ) 

719 return self 

720 

721 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement: 

722 """Add a boolean predicate function to expression's list of parse actions. See 

723 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

724 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

725 

726 Optional keyword arguments: 

727 

728 - ``message`` = define a custom message to be used in the raised exception 

729 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

730 ParseException 

731 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

732 default=False 

733 

734 Example:: 

735 

736 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

737 year_int = integer.copy() 

738 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

739 date_str = year_int + '/' + integer + '/' + integer 

740 

741 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), 

742 (line:1, col:1) 

743 """ 

744 for fn in fns: 

745 self.parseAction.append( 

746 condition_as_parse_action( 

747 fn, 

748 message=str(kwargs.get("message")), 

749 fatal=bool(kwargs.get("fatal", False)), 

750 ) 

751 ) 

752 

753 self.callDuringTry = self.callDuringTry or kwargs.get( 

754 "call_during_try", kwargs.get("callDuringTry", False) 

755 ) 

756 return self 

757 

758 def set_fail_action(self, fn: ParseFailAction) -> ParserElement: 

759 """ 

760 Define action to perform if parsing fails at this expression. 

761 Fail acton fn is a callable function that takes the arguments 

762 ``fn(s, loc, expr, err)`` where: 

763 

764 - ``s`` = string being parsed 

765 - ``loc`` = location where expression match was attempted and failed 

766 - ``expr`` = the parse expression that failed 

767 - ``err`` = the exception thrown 

768 

769 The function returns no value. It may throw :class:`ParseFatalException` 

770 if it is desired to stop parsing immediately.""" 

771 self.failAction = fn 

772 return self 

773 

774 def _skipIgnorables(self, instring: str, loc: int) -> int: 

775 if not self.ignoreExprs: 

776 return loc 

777 exprsFound = True 

778 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

779 last_loc = loc 

780 while exprsFound: 

781 exprsFound = False 

782 for ignore_fn in ignore_expr_fns: 

783 try: 

784 while 1: 

785 loc, dummy = ignore_fn(instring, loc) 

786 exprsFound = True 

787 except ParseException: 

788 pass 

789 # check if all ignore exprs matched but didn't actually advance the parse location 

790 if loc == last_loc: 

791 break 

792 last_loc = loc 

793 return loc 

794 

795 def preParse(self, instring: str, loc: int) -> int: 

796 if self.ignoreExprs: 

797 loc = self._skipIgnorables(instring, loc) 

798 

799 if self.skipWhitespace: 

800 instrlen = len(instring) 

801 white_chars = self.whiteChars 

802 while loc < instrlen and instring[loc] in white_chars: 

803 loc += 1 

804 

805 return loc 

806 

807 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

808 return loc, [] 

809 

810 def postParse(self, instring, loc, tokenlist): 

811 return tokenlist 

812 

813 # @profile 

814 def _parseNoCache( 

815 self, instring, loc, do_actions=True, callPreParse=True 

816 ) -> tuple[int, ParseResults]: 

817 debugging = self.debug # and do_actions) 

818 len_instring = len(instring) 

819 

820 if debugging or self.failAction: 

821 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

822 try: 

823 if callPreParse and self.callPreparse: 

824 pre_loc = self.preParse(instring, loc) 

825 else: 

826 pre_loc = loc 

827 tokens_start = pre_loc 

828 if self.debugActions.debug_try: 

829 self.debugActions.debug_try(instring, tokens_start, self, False) 

830 if self.mayIndexError or pre_loc >= len_instring: 

831 try: 

832 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

833 except IndexError: 

834 raise ParseException(instring, len_instring, self.errmsg, self) 

835 else: 

836 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

837 except Exception as err: 

838 # print("Exception raised:", err) 

839 if self.debugActions.debug_fail: 

840 self.debugActions.debug_fail( 

841 instring, tokens_start, self, err, False 

842 ) 

843 if self.failAction: 

844 self.failAction(instring, tokens_start, self, err) 

845 raise 

846 else: 

847 if callPreParse and self.callPreparse: 

848 pre_loc = self.preParse(instring, loc) 

849 else: 

850 pre_loc = loc 

851 tokens_start = pre_loc 

852 if self.mayIndexError or pre_loc >= len_instring: 

853 try: 

854 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

855 except IndexError: 

856 raise ParseException(instring, len_instring, self.errmsg, self) 

857 else: 

858 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

859 

860 tokens = self.postParse(instring, loc, tokens) 

861 

862 ret_tokens = ParseResults( 

863 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 

864 ) 

865 if self.parseAction and (do_actions or self.callDuringTry): 

866 if debugging: 

867 try: 

868 for fn in self.parseAction: 

869 try: 

870 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

871 except IndexError as parse_action_exc: 

872 exc = ParseException("exception raised in parse action") 

873 raise exc from parse_action_exc 

874 

875 if tokens is not None and tokens is not ret_tokens: 

876 ret_tokens = ParseResults( 

877 tokens, 

878 self.resultsName, 

879 asList=self.saveAsList 

880 and isinstance(tokens, (ParseResults, list)), 

881 modal=self.modalResults, 

882 ) 

883 except Exception as err: 

884 # print "Exception raised in user parse action:", err 

885 if self.debugActions.debug_fail: 

886 self.debugActions.debug_fail( 

887 instring, tokens_start, self, err, False 

888 ) 

889 raise 

890 else: 

891 for fn in self.parseAction: 

892 try: 

893 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

894 except IndexError as parse_action_exc: 

895 exc = ParseException("exception raised in parse action") 

896 raise exc from parse_action_exc 

897 

898 if tokens is not None and tokens is not ret_tokens: 

899 ret_tokens = ParseResults( 

900 tokens, 

901 self.resultsName, 

902 asList=self.saveAsList 

903 and isinstance(tokens, (ParseResults, list)), 

904 modal=self.modalResults, 

905 ) 

906 if debugging: 

907 # print("Matched", self, "->", ret_tokens.as_list()) 

908 if self.debugActions.debug_match: 

909 self.debugActions.debug_match( 

910 instring, tokens_start, loc, self, ret_tokens, False 

911 ) 

912 

913 return loc, ret_tokens 

914 

915 def try_parse( 

916 self, 

917 instring: str, 

918 loc: int, 

919 *, 

920 raise_fatal: bool = False, 

921 do_actions: bool = False, 

922 ) -> int: 

923 try: 

924 return self._parse(instring, loc, do_actions=do_actions)[0] 

925 except ParseFatalException: 

926 if raise_fatal: 

927 raise 

928 raise ParseException(instring, loc, self.errmsg, self) 

929 

930 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

931 try: 

932 self.try_parse(instring, loc, do_actions=do_actions) 

933 except (ParseException, IndexError): 

934 return False 

935 else: 

936 return True 

937 

938 # cache for left-recursion in Forward references 

939 recursion_lock = RLock() 

940 recursion_memos: collections.abc.MutableMapping[ 

941 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]] 

942 ] = {} 

943 

944 class _CacheType(typing.Protocol): 

945 """ 

946 Class to be used for packrat and left-recursion cacheing of results 

947 and exceptions. 

948 """ 

949 

950 not_in_cache: bool 

951 

952 def get(self, *args) -> typing.Any: ... 

953 

954 def set(self, *args) -> None: ... 

955 

956 def clear(self) -> None: ... 

957 

958 class NullCache(dict): 

959 """ 

960 A null cache type for initialization of the packrat_cache class variable. 

961 If/when enable_packrat() is called, this null cache will be replaced by a 

962 proper _CacheType class instance. 

963 """ 

964 

965 not_in_cache: bool = True 

966 

967 def get(self, *args) -> typing.Any: ... 

968 

969 def set(self, *args) -> None: ... 

970 

971 def clear(self) -> None: ... 

972 

973 # class-level argument cache for optimizing repeated calls when backtracking 

974 # through recursive expressions 

975 packrat_cache: _CacheType = NullCache() 

976 packrat_cache_lock = RLock() 

977 packrat_cache_stats = [0, 0] 

978 

979 # this method gets repeatedly called during backtracking with the same arguments - 

980 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

981 def _parseCache( 

982 self, instring, loc, do_actions=True, callPreParse=True 

983 ) -> tuple[int, ParseResults]: 

984 HIT, MISS = 0, 1 

985 lookup = (self, instring, loc, callPreParse, do_actions) 

986 with ParserElement.packrat_cache_lock: 

987 cache = ParserElement.packrat_cache 

988 value = cache.get(lookup) 

989 if value is cache.not_in_cache: 

990 ParserElement.packrat_cache_stats[MISS] += 1 

991 try: 

992 value = self._parseNoCache(instring, loc, do_actions, callPreParse) 

993 except ParseBaseException as pe: 

994 # cache a copy of the exception, without the traceback 

995 cache.set(lookup, pe.__class__(*pe.args)) 

996 raise 

997 else: 

998 cache.set(lookup, (value[0], value[1].copy(), loc)) 

999 return value 

1000 else: 

1001 ParserElement.packrat_cache_stats[HIT] += 1 

1002 if self.debug and self.debugActions.debug_try: 

1003 try: 

1004 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

1005 except TypeError: 

1006 pass 

1007 if isinstance(value, Exception): 

1008 if self.debug and self.debugActions.debug_fail: 

1009 try: 

1010 self.debugActions.debug_fail( 

1011 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

1012 ) 

1013 except TypeError: 

1014 pass 

1015 raise value 

1016 

1017 value = cast(tuple[int, ParseResults, int], value) 

1018 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1019 if self.debug and self.debugActions.debug_match: 

1020 try: 

1021 self.debugActions.debug_match( 

1022 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1023 ) 

1024 except TypeError: 

1025 pass 

1026 

1027 return loc_, result 

1028 

1029 _parse = _parseNoCache 

1030 

1031 @staticmethod 

1032 def reset_cache() -> None: 

1033 with ParserElement.packrat_cache_lock: 

1034 ParserElement.packrat_cache.clear() 

1035 ParserElement.packrat_cache_stats[:] = [0] * len( 

1036 ParserElement.packrat_cache_stats 

1037 ) 

1038 ParserElement.recursion_memos.clear() 

1039 

1040 # class attributes to keep caching status 

1041 _packratEnabled = False 

1042 _left_recursion_enabled = False 

1043 

1044 @staticmethod 

1045 def disable_memoization() -> None: 

1046 """ 

1047 Disables active Packrat or Left Recursion parsing and their memoization 

1048 

1049 This method also works if neither Packrat nor Left Recursion are enabled. 

1050 This makes it safe to call before activating Packrat nor Left Recursion 

1051 to clear any previous settings. 

1052 """ 

1053 with ParserElement.packrat_cache_lock: 

1054 ParserElement.reset_cache() 

1055 ParserElement._left_recursion_enabled = False 

1056 ParserElement._packratEnabled = False 

1057 ParserElement._parse = ParserElement._parseNoCache 

1058 

1059 @staticmethod 

1060 def enable_left_recursion( 

1061 cache_size_limit: typing.Optional[int] = None, *, force=False 

1062 ) -> None: 

1063 """ 

1064 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1065 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1066 repeatedly matched with a fixed recursion depth that is gradually increased 

1067 until finding the longest match. 

1068 

1069 Example:: 

1070 

1071 import pyparsing as pp 

1072 pp.ParserElement.enable_left_recursion() 

1073 

1074 E = pp.Forward("E") 

1075 num = pp.Word(pp.nums) 

1076 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1077 E <<= E + '+' - num | num 

1078 

1079 print(E.parse_string("1+2+3")) 

1080 

1081 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1082 thus skip reevaluation of parse actions during backtracking. This may break 

1083 programs with parse actions which rely on strict ordering of side-effects. 

1084 

1085 Parameters: 

1086 

1087 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1088 ``Forward`` elements during matching; if ``None`` (the default), 

1089 memoize all ``Forward`` elements. 

1090 

1091 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1092 thus the two cannot be used together. Use ``force=True`` to disable any 

1093 previous, conflicting settings. 

1094 """ 

1095 with ParserElement.packrat_cache_lock: 

1096 if force: 

1097 ParserElement.disable_memoization() 

1098 elif ParserElement._packratEnabled: 

1099 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1100 if cache_size_limit is None: 

1101 ParserElement.recursion_memos = _UnboundedMemo() 

1102 elif cache_size_limit > 0: 

1103 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1104 else: 

1105 raise NotImplementedError(f"Memo size of {cache_size_limit}") 

1106 ParserElement._left_recursion_enabled = True 

1107 

1108 @staticmethod 

1109 def enable_packrat( 

1110 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1111 ) -> None: 

1112 """ 

1113 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1114 Repeated parse attempts at the same string location (which happens 

1115 often in many complex grammars) can immediately return a cached value, 

1116 instead of re-executing parsing/validating code. Memoizing is done of 

1117 both valid results and parsing exceptions. 

1118 

1119 Parameters: 

1120 

1121 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1122 will limit the size of the packrat cache; if None is passed, then 

1123 the cache size will be unbounded; if 0 is passed, the cache will 

1124 be effectively disabled. 

1125 

1126 This speedup may break existing programs that use parse actions that 

1127 have side-effects. For this reason, packrat parsing is disabled when 

1128 you first import pyparsing. To activate the packrat feature, your 

1129 program must call the class method :class:`ParserElement.enable_packrat`. 

1130 For best results, call ``enable_packrat()`` immediately after 

1131 importing pyparsing. 

1132 

1133 Example:: 

1134 

1135 import pyparsing 

1136 pyparsing.ParserElement.enable_packrat() 

1137 

1138 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1139 thus the two cannot be used together. Use ``force=True`` to disable any 

1140 previous, conflicting settings. 

1141 """ 

1142 with ParserElement.packrat_cache_lock: 

1143 if force: 

1144 ParserElement.disable_memoization() 

1145 elif ParserElement._left_recursion_enabled: 

1146 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1147 

1148 if ParserElement._packratEnabled: 

1149 return 

1150 

1151 ParserElement._packratEnabled = True 

1152 if cache_size_limit is None: 

1153 ParserElement.packrat_cache = _UnboundedCache() 

1154 else: 

1155 ParserElement.packrat_cache = _FifoCache(cache_size_limit) 

1156 ParserElement._parse = ParserElement._parseCache 

1157 

1158 def parse_string( 

1159 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 

1160 ) -> ParseResults: 

1161 """ 

1162 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1163 client code. 

1164 

1165 :param instring: The input string to be parsed. 

1166 :param parse_all: If set, the entire input string must match the grammar. 

1167 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1168 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1169 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1170 an object with attributes if the given parser includes results names. 

1171 

1172 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1173 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1174 

1175 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1176 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1177 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1178 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1179 

1180 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1181 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1182 parse action's ``s`` argument, or 

1183 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1184 

1185 Examples: 

1186 

1187 By default, partial matches are OK. 

1188 

1189 >>> res = Word('a').parse_string('aaaaabaaa') 

1190 >>> print(res) 

1191 ['aaaaa'] 

1192 

1193 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1194 directly to see more examples. 

1195 

1196 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1197 

1198 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1199 Traceback (most recent call last): 

1200 ... 

1201 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) 

1202 """ 

1203 parseAll = parse_all or parseAll 

1204 

1205 ParserElement.reset_cache() 

1206 if not self.streamlined: 

1207 self.streamline() 

1208 for e in self.ignoreExprs: 

1209 e.streamline() 

1210 if not self.keepTabs: 

1211 instring = instring.expandtabs() 

1212 try: 

1213 loc, tokens = self._parse(instring, 0) 

1214 if parseAll: 

1215 loc = self.preParse(instring, loc) 

1216 se = Empty() + StringEnd().set_debug(False) 

1217 se._parse(instring, loc) 

1218 except _ParseActionIndexError as pa_exc: 

1219 raise pa_exc.exc 

1220 except ParseBaseException as exc: 

1221 if ParserElement.verbose_stacktrace: 

1222 raise 

1223 

1224 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1225 raise exc.with_traceback(None) 

1226 else: 

1227 return tokens 

1228 

1229 def scan_string( 

1230 self, 

1231 instring: str, 

1232 max_matches: int = _MAX_INT, 

1233 overlap: bool = False, 

1234 always_skip_whitespace=True, 

1235 *, 

1236 debug: bool = False, 

1237 maxMatches: int = _MAX_INT, 

1238 ) -> Generator[tuple[ParseResults, int, int], None, None]: 

1239 """ 

1240 Scan the input string for expression matches. Each match will return the 

1241 matching tokens, start location, and end location. May be called with optional 

1242 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1243 ``overlap`` is specified, then overlapping matches will be reported. 

1244 

1245 Note that the start and end locations are reported relative to the string 

1246 being parsed. See :class:`parse_string` for more information on parsing 

1247 strings with embedded tabs. 

1248 

1249 Example:: 

1250 

1251 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1252 print(source) 

1253 for tokens, start, end in Word(alphas).scan_string(source): 

1254 print(' '*start + '^'*(end-start)) 

1255 print(' '*start + tokens[0]) 

1256 

1257 prints:: 

1258 

1259 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1260 ^^^^^ 

1261 sldjf 

1262 ^^^^^^^ 

1263 lsdjjkf 

1264 ^^^^^^ 

1265 sldkjf 

1266 ^^^^^^ 

1267 lkjsfd 

1268 """ 

1269 maxMatches = min(maxMatches, max_matches) 

1270 if not self.streamlined: 

1271 self.streamline() 

1272 for e in self.ignoreExprs: 

1273 e.streamline() 

1274 

1275 if not self.keepTabs: 

1276 instring = str(instring).expandtabs() 

1277 instrlen = len(instring) 

1278 loc = 0 

1279 if always_skip_whitespace: 

1280 preparser = Empty() 

1281 preparser.ignoreExprs = self.ignoreExprs 

1282 preparser.whiteChars = self.whiteChars 

1283 preparseFn = preparser.preParse 

1284 else: 

1285 preparseFn = self.preParse 

1286 parseFn = self._parse 

1287 ParserElement.resetCache() 

1288 matches = 0 

1289 try: 

1290 while loc <= instrlen and matches < maxMatches: 

1291 try: 

1292 preloc: int = preparseFn(instring, loc) 

1293 nextLoc: int 

1294 tokens: ParseResults 

1295 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1296 except ParseException: 

1297 loc = preloc + 1 

1298 else: 

1299 if nextLoc > loc: 

1300 matches += 1 

1301 if debug: 

1302 print( 

1303 { 

1304 "tokens": tokens.asList(), 

1305 "start": preloc, 

1306 "end": nextLoc, 

1307 } 

1308 ) 

1309 yield tokens, preloc, nextLoc 

1310 if overlap: 

1311 nextloc = preparseFn(instring, loc) 

1312 if nextloc > loc: 

1313 loc = nextLoc 

1314 else: 

1315 loc += 1 

1316 else: 

1317 loc = nextLoc 

1318 else: 

1319 loc = preloc + 1 

1320 except ParseBaseException as exc: 

1321 if ParserElement.verbose_stacktrace: 

1322 raise 

1323 

1324 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1325 raise exc.with_traceback(None) 

1326 

1327 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1328 """ 

1329 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1330 be returned from a parse action. To use ``transform_string``, define a grammar and 

1331 attach a parse action to it that modifies the returned token list. 

1332 Invoking ``transform_string()`` on a target string will then scan for matches, 

1333 and replace the matched text patterns according to the logic in the parse 

1334 action. ``transform_string()`` returns the resulting transformed string. 

1335 

1336 Example:: 

1337 

1338 wd = Word(alphas) 

1339 wd.set_parse_action(lambda toks: toks[0].title()) 

1340 

1341 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) 

1342 

1343 prints:: 

1344 

1345 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

1346 """ 

1347 out: list[str] = [] 

1348 lastE = 0 

1349 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1350 # keep string locs straight between transform_string and scan_string 

1351 self.keepTabs = True 

1352 try: 

1353 for t, s, e in self.scan_string(instring, debug=debug): 

1354 if s > lastE: 

1355 out.append(instring[lastE:s]) 

1356 lastE = e 

1357 

1358 if not t: 

1359 continue 

1360 

1361 if isinstance(t, ParseResults): 

1362 out += t.as_list() 

1363 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1364 out.extend(t) 

1365 else: 

1366 out.append(t) 

1367 

1368 out.append(instring[lastE:]) 

1369 out = [o for o in out if o] 

1370 return "".join([str(s) for s in _flatten(out)]) 

1371 except ParseBaseException as exc: 

1372 if ParserElement.verbose_stacktrace: 

1373 raise 

1374 

1375 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1376 raise exc.with_traceback(None) 

1377 

1378 def search_string( 

1379 self, 

1380 instring: str, 

1381 max_matches: int = _MAX_INT, 

1382 *, 

1383 debug: bool = False, 

1384 maxMatches: int = _MAX_INT, 

1385 ) -> ParseResults: 

1386 """ 

1387 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1388 to match the given parse expression. May be called with optional 

1389 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1390 

1391 Example:: 

1392 

1393 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

1394 cap_word = Word(alphas.upper(), alphas.lower()) 

1395 

1396 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) 

1397 

1398 # the sum() builtin can be used to merge results into a single ParseResults object 

1399 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) 

1400 

1401 prints:: 

1402 

1403 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1404 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1405 """ 

1406 maxMatches = min(maxMatches, max_matches) 

1407 try: 

1408 return ParseResults( 

1409 [ 

1410 t 

1411 for t, s, e in self.scan_string( 

1412 instring, maxMatches, always_skip_whitespace=False, debug=debug 

1413 ) 

1414 ] 

1415 ) 

1416 except ParseBaseException as exc: 

1417 if ParserElement.verbose_stacktrace: 

1418 raise 

1419 

1420 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1421 raise exc.with_traceback(None) 

1422 

1423 def split( 

1424 self, 

1425 instring: str, 

1426 maxsplit: int = _MAX_INT, 

1427 include_separators: bool = False, 

1428 *, 

1429 includeSeparators=False, 

1430 ) -> Generator[str, None, None]: 

1431 """ 

1432 Generator method to split a string using the given expression as a separator. 

1433 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1434 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1435 matching text should be included in the split results. 

1436 

1437 Example:: 

1438 

1439 punc = one_of(list(".,;:/-!?")) 

1440 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

1441 

1442 prints:: 

1443 

1444 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1445 """ 

1446 includeSeparators = includeSeparators or include_separators 

1447 last = 0 

1448 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1449 yield instring[last:s] 

1450 if includeSeparators: 

1451 yield t[0] 

1452 last = e 

1453 yield instring[last:] 

1454 

1455 def __add__(self, other) -> ParserElement: 

1456 """ 

1457 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1458 converts them to :class:`Literal`\\ s by default. 

1459 

1460 Example:: 

1461 

1462 greet = Word(alphas) + "," + Word(alphas) + "!" 

1463 hello = "Hello, World!" 

1464 print(hello, "->", greet.parse_string(hello)) 

1465 

1466 prints:: 

1467 

1468 Hello, World! -> ['Hello', ',', 'World', '!'] 

1469 

1470 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: 

1471 

1472 Literal('start') + ... + Literal('end') 

1473 

1474 is equivalent to:: 

1475 

1476 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1477 

1478 Note that the skipped text is returned with '_skipped' as a results name, 

1479 and to support having multiple skips in the same parser, the value returned is 

1480 a list of all skipped text. 

1481 """ 

1482 if other is Ellipsis: 

1483 return _PendingSkip(self) 

1484 

1485 if isinstance(other, str_type): 

1486 other = self._literalStringClass(other) 

1487 if not isinstance(other, ParserElement): 

1488 return NotImplemented 

1489 return And([self, other]) 

1490 

1491 def __radd__(self, other) -> ParserElement: 

1492 """ 

1493 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1494 """ 

1495 if other is Ellipsis: 

1496 return SkipTo(self)("_skipped*") + self 

1497 

1498 if isinstance(other, str_type): 

1499 other = self._literalStringClass(other) 

1500 if not isinstance(other, ParserElement): 

1501 return NotImplemented 

1502 return other + self 

1503 

1504 def __sub__(self, other) -> ParserElement: 

1505 """ 

1506 Implementation of ``-`` operator, returns :class:`And` with error stop 

1507 """ 

1508 if isinstance(other, str_type): 

1509 other = self._literalStringClass(other) 

1510 if not isinstance(other, ParserElement): 

1511 return NotImplemented 

1512 return self + And._ErrorStop() + other 

1513 

1514 def __rsub__(self, other) -> ParserElement: 

1515 """ 

1516 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1517 """ 

1518 if isinstance(other, str_type): 

1519 other = self._literalStringClass(other) 

1520 if not isinstance(other, ParserElement): 

1521 return NotImplemented 

1522 return other - self 

1523 

1524 def __mul__(self, other) -> ParserElement: 

1525 """ 

1526 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1527 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1528 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1529 may also include ``None`` as in: 

1530 

1531 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1532 to ``expr*n + ZeroOrMore(expr)`` 

1533 (read as "at least n instances of ``expr``") 

1534 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1535 (read as "0 to n instances of ``expr``") 

1536 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1537 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1538 

1539 Note that ``expr*(None, n)`` does not raise an exception if 

1540 more than n exprs exist in the input stream; that is, 

1541 ``expr*(None, n)`` does not enforce a maximum number of expr 

1542 occurrences. If this behavior is desired, then write 

1543 ``expr*(None, n) + ~expr`` 

1544 """ 

1545 if other is Ellipsis: 

1546 other = (0, None) 

1547 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1548 other = ((0,) + other[1:] + (None,))[:2] 

1549 

1550 if not isinstance(other, (int, tuple)): 

1551 return NotImplemented 

1552 

1553 if isinstance(other, int): 

1554 minElements, optElements = other, 0 

1555 else: 

1556 other = tuple(o if o is not Ellipsis else None for o in other) 

1557 other = (other + (None, None))[:2] 

1558 if other[0] is None: 

1559 other = (0, other[1]) 

1560 if isinstance(other[0], int) and other[1] is None: 

1561 if other[0] == 0: 

1562 return ZeroOrMore(self) 

1563 if other[0] == 1: 

1564 return OneOrMore(self) 

1565 else: 

1566 return self * other[0] + ZeroOrMore(self) 

1567 elif isinstance(other[0], int) and isinstance(other[1], int): 

1568 minElements, optElements = other 

1569 optElements -= minElements 

1570 else: 

1571 return NotImplemented 

1572 

1573 if minElements < 0: 

1574 raise ValueError("cannot multiply ParserElement by negative value") 

1575 if optElements < 0: 

1576 raise ValueError( 

1577 "second tuple value must be greater or equal to first tuple value" 

1578 ) 

1579 if minElements == optElements == 0: 

1580 return And([]) 

1581 

1582 if optElements: 

1583 

1584 def makeOptionalList(n): 

1585 if n > 1: 

1586 return Opt(self + makeOptionalList(n - 1)) 

1587 else: 

1588 return Opt(self) 

1589 

1590 if minElements: 

1591 if minElements == 1: 

1592 ret = self + makeOptionalList(optElements) 

1593 else: 

1594 ret = And([self] * minElements) + makeOptionalList(optElements) 

1595 else: 

1596 ret = makeOptionalList(optElements) 

1597 else: 

1598 if minElements == 1: 

1599 ret = self 

1600 else: 

1601 ret = And([self] * minElements) 

1602 return ret 

1603 

1604 def __rmul__(self, other) -> ParserElement: 

1605 return self.__mul__(other) 

1606 

1607 def __or__(self, other) -> ParserElement: 

1608 """ 

1609 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1610 

1611 .. versionchanged:: 3.1.0 

1612 Support ``expr | ""`` as a synonym for ``Optional(expr)``. 

1613 """ 

1614 if other is Ellipsis: 

1615 return _PendingSkip(self, must_skip=True) 

1616 

1617 if isinstance(other, str_type): 

1618 # `expr | ""` is equivalent to `Opt(expr)` 

1619 if other == "": 

1620 return Opt(self) 

1621 other = self._literalStringClass(other) 

1622 if not isinstance(other, ParserElement): 

1623 return NotImplemented 

1624 return MatchFirst([self, other]) 

1625 

1626 def __ror__(self, other) -> ParserElement: 

1627 """ 

1628 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1629 """ 

1630 if isinstance(other, str_type): 

1631 other = self._literalStringClass(other) 

1632 if not isinstance(other, ParserElement): 

1633 return NotImplemented 

1634 return other | self 

1635 

1636 def __xor__(self, other) -> ParserElement: 

1637 """ 

1638 Implementation of ``^`` operator - returns :class:`Or` 

1639 """ 

1640 if isinstance(other, str_type): 

1641 other = self._literalStringClass(other) 

1642 if not isinstance(other, ParserElement): 

1643 return NotImplemented 

1644 return Or([self, other]) 

1645 

1646 def __rxor__(self, other) -> ParserElement: 

1647 """ 

1648 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1649 """ 

1650 if isinstance(other, str_type): 

1651 other = self._literalStringClass(other) 

1652 if not isinstance(other, ParserElement): 

1653 return NotImplemented 

1654 return other ^ self 

1655 

1656 def __and__(self, other) -> ParserElement: 

1657 """ 

1658 Implementation of ``&`` operator - returns :class:`Each` 

1659 """ 

1660 if isinstance(other, str_type): 

1661 other = self._literalStringClass(other) 

1662 if not isinstance(other, ParserElement): 

1663 return NotImplemented 

1664 return Each([self, other]) 

1665 

1666 def __rand__(self, other) -> ParserElement: 

1667 """ 

1668 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1669 """ 

1670 if isinstance(other, str_type): 

1671 other = self._literalStringClass(other) 

1672 if not isinstance(other, ParserElement): 

1673 return NotImplemented 

1674 return other & self 

1675 

1676 def __invert__(self) -> ParserElement: 

1677 """ 

1678 Implementation of ``~`` operator - returns :class:`NotAny` 

1679 """ 

1680 return NotAny(self) 

1681 

1682 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1683 # iterate over a sequence 

1684 __iter__ = None 

1685 

1686 def __getitem__(self, key): 

1687 """ 

1688 use ``[]`` indexing notation as a short form for expression repetition: 

1689 

1690 - ``expr[n]`` is equivalent to ``expr*n`` 

1691 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1692 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1693 to ``expr*n + ZeroOrMore(expr)`` 

1694 (read as "at least n instances of ``expr``") 

1695 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1696 (read as "0 to n instances of ``expr``") 

1697 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1698 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1699 

1700 ``None`` may be used in place of ``...``. 

1701 

1702 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1703 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1704 desired, then write ``expr[..., n] + ~expr``. 

1705 

1706 For repetition with a stop_on expression, use slice notation: 

1707 

1708 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1709 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1710 

1711 .. versionchanged:: 3.1.0 

1712 Support for slice notation. 

1713 """ 

1714 

1715 stop_on_defined = False 

1716 stop_on = NoMatch() 

1717 if isinstance(key, slice): 

1718 key, stop_on = key.start, key.stop 

1719 if key is None: 

1720 key = ... 

1721 stop_on_defined = True 

1722 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1723 key, stop_on = (key[0], key[1].start), key[1].stop 

1724 stop_on_defined = True 

1725 

1726 # convert single arg keys to tuples 

1727 if isinstance(key, str_type): 

1728 key = (key,) 

1729 try: 

1730 iter(key) 

1731 except TypeError: 

1732 key = (key, key) 

1733 

1734 if len(key) > 2: 

1735 raise TypeError( 

1736 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1737 ) 

1738 

1739 # clip to 2 elements 

1740 ret = self * tuple(key[:2]) 

1741 ret = typing.cast(_MultipleMatch, ret) 

1742 

1743 if stop_on_defined: 

1744 ret.stopOn(stop_on) 

1745 

1746 return ret 

1747 

1748 def __call__(self, name: typing.Optional[str] = None) -> ParserElement: 

1749 """ 

1750 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1751 

1752 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1753 passed as ``True``. 

1754 

1755 If ``name`` is omitted, same as calling :class:`copy`. 

1756 

1757 Example:: 

1758 

1759 # these are equivalent 

1760 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") 

1761 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1762 """ 

1763 if name is not None: 

1764 return self._setResultsName(name) 

1765 

1766 return self.copy() 

1767 

1768 def suppress(self) -> ParserElement: 

1769 """ 

1770 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1771 cluttering up returned output. 

1772 """ 

1773 return Suppress(self) 

1774 

1775 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

1776 """ 

1777 Enables the skipping of whitespace before matching the characters in the 

1778 :class:`ParserElement`'s defined pattern. 

1779 

1780 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1781 """ 

1782 self.skipWhitespace = True 

1783 return self 

1784 

1785 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

1786 """ 

1787 Disables the skipping of whitespace before matching the characters in the 

1788 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1789 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1790 

1791 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1792 """ 

1793 self.skipWhitespace = False 

1794 return self 

1795 

1796 def set_whitespace_chars( 

1797 self, chars: Union[set[str], str], copy_defaults: bool = False 

1798 ) -> ParserElement: 

1799 """ 

1800 Overrides the default whitespace chars 

1801 """ 

1802 self.skipWhitespace = True 

1803 self.whiteChars = set(chars) 

1804 self.copyDefaultWhiteChars = copy_defaults 

1805 return self 

1806 

1807 def parse_with_tabs(self) -> ParserElement: 

1808 """ 

1809 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1810 Must be called before ``parse_string`` when the input grammar contains elements that 

1811 match ``<TAB>`` characters. 

1812 """ 

1813 self.keepTabs = True 

1814 return self 

1815 

1816 def ignore(self, other: ParserElement) -> ParserElement: 

1817 """ 

1818 Define expression to be ignored (e.g., comments) while doing pattern 

1819 matching; may be called repeatedly, to define multiple comment or other 

1820 ignorable patterns. 

1821 

1822 Example:: 

1823 

1824 patt = Word(alphas)[...] 

1825 patt.parse_string('ablaj /* comment */ lskjd') 

1826 # -> ['ablaj'] 

1827 

1828 patt.ignore(c_style_comment) 

1829 patt.parse_string('ablaj /* comment */ lskjd') 

1830 # -> ['ablaj', 'lskjd'] 

1831 """ 

1832 if isinstance(other, str_type): 

1833 other = Suppress(other) 

1834 

1835 if isinstance(other, Suppress): 

1836 if other not in self.ignoreExprs: 

1837 self.ignoreExprs.append(other) 

1838 else: 

1839 self.ignoreExprs.append(Suppress(other.copy())) 

1840 return self 

1841 

1842 def set_debug_actions( 

1843 self, 

1844 start_action: DebugStartAction, 

1845 success_action: DebugSuccessAction, 

1846 exception_action: DebugExceptionAction, 

1847 ) -> ParserElement: 

1848 """ 

1849 Customize display of debugging messages while doing pattern matching: 

1850 

1851 - ``start_action`` - method to be called when an expression is about to be parsed; 

1852 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` 

1853 

1854 - ``success_action`` - method to be called when an expression has successfully parsed; 

1855 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` 

1856 

1857 - ``exception_action`` - method to be called when expression fails to parse; 

1858 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` 

1859 """ 

1860 self.debugActions = self.DebugActions( 

1861 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

1862 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

1863 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

1864 ) 

1865 self.debug = True 

1866 return self 

1867 

1868 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement: 

1869 """ 

1870 Enable display of debugging messages while doing pattern matching. 

1871 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

1872 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

1873 

1874 Example:: 

1875 

1876 wd = Word(alphas).set_name("alphaword") 

1877 integer = Word(nums).set_name("numword") 

1878 term = wd | integer 

1879 

1880 # turn on debugging for wd 

1881 wd.set_debug() 

1882 

1883 term[1, ...].parse_string("abc 123 xyz 890") 

1884 

1885 prints:: 

1886 

1887 Match alphaword at loc 0(1,1) 

1888 Matched alphaword -> ['abc'] 

1889 Match alphaword at loc 3(1,4) 

1890 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

1891 Match alphaword at loc 7(1,8) 

1892 Matched alphaword -> ['xyz'] 

1893 Match alphaword at loc 11(1,12) 

1894 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

1895 Match alphaword at loc 15(1,16) 

1896 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

1897 

1898 The output shown is that produced by the default debug actions - custom debug actions can be 

1899 specified using :class:`set_debug_actions`. Prior to attempting 

1900 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

1901 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

1902 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, 

1903 which makes debugging and exception messages easier to understand - for instance, the default 

1904 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. 

1905 

1906 .. versionchanged:: 3.1.0 

1907 ``recurse`` argument added. 

1908 """ 

1909 if recurse: 

1910 for expr in self.visit_all(): 

1911 expr.set_debug(flag, recurse=False) 

1912 return self 

1913 

1914 if flag: 

1915 self.set_debug_actions( 

1916 _default_start_debug_action, 

1917 _default_success_debug_action, 

1918 _default_exception_debug_action, 

1919 ) 

1920 else: 

1921 self.debug = False 

1922 return self 

1923 

1924 @property 

1925 def default_name(self) -> str: 

1926 if self._defaultName is None: 

1927 self._defaultName = self._generateDefaultName() 

1928 return self._defaultName 

1929 

1930 @abstractmethod 

1931 def _generateDefaultName(self) -> str: 

1932 """ 

1933 Child classes must define this method, which defines how the ``default_name`` is set. 

1934 """ 

1935 

1936 def set_name(self, name: typing.Optional[str]) -> ParserElement: 

1937 """ 

1938 Define name for this expression, makes debugging and exception messages clearer. If 

1939 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also 

1940 enable debug for this expression. 

1941 

1942 If `name` is None, clears any custom name for this expression, and clears the 

1943 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`. 

1944 

1945 Example:: 

1946 

1947 integer = Word(nums) 

1948 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) 

1949 

1950 integer.set_name("integer") 

1951 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1952  

1953 .. versionchanged:: 3.1.0 

1954 Accept ``None`` as the ``name`` argument. 

1955 """ 

1956 self.customName = name # type: ignore[assignment] 

1957 self.errmsg = f"Expected {str(self)}" 

1958 

1959 if __diag__.enable_debug_on_named_expressions: 

1960 self.set_debug(name is not None) 

1961 

1962 return self 

1963 

1964 @property 

1965 def name(self) -> str: 

1966 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 

1967 return self.customName if self.customName is not None else self.default_name 

1968 

1969 @name.setter 

1970 def name(self, new_name) -> None: 

1971 self.set_name(new_name) 

1972 

1973 def __str__(self) -> str: 

1974 return self.name 

1975 

1976 def __repr__(self) -> str: 

1977 return str(self) 

1978 

1979 def streamline(self) -> ParserElement: 

1980 self.streamlined = True 

1981 self._defaultName = None 

1982 return self 

1983 

1984 def recurse(self) -> list[ParserElement]: 

1985 return [] 

1986 

1987 def _checkRecursion(self, parseElementList): 

1988 subRecCheckList = parseElementList[:] + [self] 

1989 for e in self.recurse(): 

1990 e._checkRecursion(subRecCheckList) 

1991 

1992 def validate(self, validateTrace=None) -> None: 

1993 """ 

1994 .. deprecated:: 3.0.0 

1995 Do not use to check for left recursion. 

1996 

1997 Check defined expressions for valid structure, check for infinite recursive definitions. 

1998 

1999 """ 

2000 warnings.warn( 

2001 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

2002 DeprecationWarning, 

2003 stacklevel=2, 

2004 ) 

2005 self._checkRecursion([]) 

2006 

2007 def parse_file( 

2008 self, 

2009 file_or_filename: Union[str, Path, TextIO], 

2010 encoding: str = "utf-8", 

2011 parse_all: bool = False, 

2012 *, 

2013 parseAll: bool = False, 

2014 ) -> ParseResults: 

2015 """ 

2016 Execute the parse expression on the given file or filename. 

2017 If a filename is specified (instead of a file object), 

2018 the entire file is opened, read, and closed before parsing. 

2019 """ 

2020 parseAll = parseAll or parse_all 

2021 try: 

2022 file_or_filename = typing.cast(TextIO, file_or_filename) 

2023 file_contents = file_or_filename.read() 

2024 except AttributeError: 

2025 file_or_filename = typing.cast(str, file_or_filename) 

2026 with open(file_or_filename, "r", encoding=encoding) as f: 

2027 file_contents = f.read() 

2028 try: 

2029 return self.parse_string(file_contents, parseAll) 

2030 except ParseBaseException as exc: 

2031 if ParserElement.verbose_stacktrace: 

2032 raise 

2033 

2034 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

2035 raise exc.with_traceback(None) 

2036 

2037 def __eq__(self, other): 

2038 if self is other: 

2039 return True 

2040 elif isinstance(other, str_type): 

2041 return self.matches(other, parse_all=True) 

2042 elif isinstance(other, ParserElement): 

2043 return vars(self) == vars(other) 

2044 return False 

2045 

2046 def __hash__(self): 

2047 return id(self) 

2048 

2049 def matches( 

2050 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 

2051 ) -> bool: 

2052 """ 

2053 Method for quick testing of a parser against a test string. Good for simple 

2054 inline microtests of sub expressions while building up larger parser. 

2055 

2056 Parameters: 

2057 

2058 - ``test_string`` - to test against this expression for a match 

2059 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2060 

2061 Example:: 

2062 

2063 expr = Word(nums) 

2064 assert expr.matches("100") 

2065 """ 

2066 parseAll = parseAll and parse_all 

2067 try: 

2068 self.parse_string(str(test_string), parse_all=parseAll) 

2069 return True 

2070 except ParseBaseException: 

2071 return False 

2072 

2073 def run_tests( 

2074 self, 

2075 tests: Union[str, list[str]], 

2076 parse_all: bool = True, 

2077 comment: typing.Optional[Union[ParserElement, str]] = "#", 

2078 full_dump: bool = True, 

2079 print_results: bool = True, 

2080 failure_tests: bool = False, 

2081 post_parse: typing.Optional[ 

2082 Callable[[str, ParseResults], typing.Optional[str]] 

2083 ] = None, 

2084 file: typing.Optional[TextIO] = None, 

2085 with_line_numbers: bool = False, 

2086 *, 

2087 parseAll: bool = True, 

2088 fullDump: bool = True, 

2089 printResults: bool = True, 

2090 failureTests: bool = False, 

2091 postParse: typing.Optional[ 

2092 Callable[[str, ParseResults], typing.Optional[str]] 

2093 ] = None, 

2094 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]: 

2095 """ 

2096 Execute the parse expression on a series of test strings, showing each 

2097 test, the parsed results or where the parse failed. Quick and easy way to 

2098 run a parse expression against a list of sample strings. 

2099 

2100 Parameters: 

2101 

2102 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2103 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2104 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2105 string; pass None to disable comment filtering 

2106 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2107 if False, only dump nested list 

2108 - ``print_results`` - (default= ``True``) prints test output to stdout 

2109 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2110 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2111 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2112 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2113 if None, will default to ``sys.stdout`` 

2114 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2115 

2116 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2117 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2118 test's output 

2119 

2120 Example:: 

2121 

2122 number_expr = pyparsing_common.number.copy() 

2123 

2124 result = number_expr.run_tests(''' 

2125 # unsigned integer 

2126 100 

2127 # negative integer 

2128 -100 

2129 # float with scientific notation 

2130 6.02e23 

2131 # integer with scientific notation 

2132 1e-12 

2133 ''') 

2134 print("Success" if result[0] else "Failed!") 

2135 

2136 result = number_expr.run_tests(''' 

2137 # stray character 

2138 100Z 

2139 # missing leading digit before '.' 

2140 -.100 

2141 # too many '.' 

2142 3.14.159 

2143 ''', failure_tests=True) 

2144 print("Success" if result[0] else "Failed!") 

2145 

2146 prints:: 

2147 

2148 # unsigned integer 

2149 100 

2150 [100] 

2151 

2152 # negative integer 

2153 -100 

2154 [-100] 

2155 

2156 # float with scientific notation 

2157 6.02e23 

2158 [6.02e+23] 

2159 

2160 # integer with scientific notation 

2161 1e-12 

2162 [1e-12] 

2163 

2164 Success 

2165 

2166 # stray character 

2167 100Z 

2168 ^ 

2169 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2170 

2171 # missing leading digit before '.' 

2172 -.100 

2173 ^ 

2174 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2175 

2176 # too many '.' 

2177 3.14.159 

2178 ^ 

2179 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2180 

2181 Success 

2182 

2183 Each test string must be on a single line. If you want to test a string that spans multiple 

2184 lines, create a test like this:: 

2185 

2186 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2187 

2188 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2189 """ 

2190 from .testing import pyparsing_test 

2191 

2192 parseAll = parseAll and parse_all 

2193 fullDump = fullDump and full_dump 

2194 printResults = printResults and print_results 

2195 failureTests = failureTests or failure_tests 

2196 postParse = postParse or post_parse 

2197 if isinstance(tests, str_type): 

2198 tests = typing.cast(str, tests) 

2199 line_strip = type(tests).strip 

2200 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2201 comment_specified = comment is not None 

2202 if comment_specified: 

2203 if isinstance(comment, str_type): 

2204 comment = typing.cast(str, comment) 

2205 comment = Literal(comment) 

2206 comment = typing.cast(ParserElement, comment) 

2207 if file is None: 

2208 file = sys.stdout 

2209 print_ = file.write 

2210 

2211 result: Union[ParseResults, Exception] 

2212 allResults: list[tuple[str, Union[ParseResults, Exception]]] = [] 

2213 comments: list[str] = [] 

2214 success = True 

2215 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2216 BOM = "\ufeff" 

2217 nlstr = "\n" 

2218 for t in tests: 

2219 if comment_specified and comment.matches(t, False) or comments and not t: 

2220 comments.append( 

2221 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2222 ) 

2223 continue 

2224 if not t: 

2225 continue 

2226 out = [ 

2227 f"{nlstr}{nlstr.join(comments) if comments else ''}", 

2228 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2229 ] 

2230 comments.clear() 

2231 try: 

2232 # convert newline marks to actual newlines, and strip leading BOM if present 

2233 t = NL.transform_string(t.lstrip(BOM)) 

2234 result = self.parse_string(t, parse_all=parseAll) 

2235 except ParseBaseException as pe: 

2236 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" 

2237 out.append(pe.explain()) 

2238 out.append(f"FAIL: {fatal}{pe}") 

2239 if ParserElement.verbose_stacktrace: 

2240 out.extend(traceback.format_tb(pe.__traceback__)) 

2241 success = success and failureTests 

2242 result = pe 

2243 except Exception as exc: 

2244 tag = "FAIL-EXCEPTION" 

2245 

2246 # see if this exception was raised in a parse action 

2247 tb = exc.__traceback__ 

2248 it = iter(traceback.walk_tb(tb)) 

2249 for f, line in it: 

2250 if (f.f_code.co_filename, line) == pa_call_line_synth: 

2251 next_f = next(it)[0] 

2252 tag += f" (raised in parse action {next_f.f_code.co_name!r})" 

2253 break 

2254 

2255 out.append(f"{tag}: {type(exc).__name__}: {exc}") 

2256 if ParserElement.verbose_stacktrace: 

2257 out.extend(traceback.format_tb(exc.__traceback__)) 

2258 success = success and failureTests 

2259 result = exc 

2260 else: 

2261 success = success and not failureTests 

2262 if postParse is not None: 

2263 try: 

2264 pp_value = postParse(t, result) 

2265 if pp_value is not None: 

2266 if isinstance(pp_value, ParseResults): 

2267 out.append(pp_value.dump()) 

2268 else: 

2269 out.append(str(pp_value)) 

2270 else: 

2271 out.append(result.dump()) 

2272 except Exception as e: 

2273 out.append(result.dump(full=fullDump)) 

2274 out.append( 

2275 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2276 ) 

2277 else: 

2278 out.append(result.dump(full=fullDump)) 

2279 out.append("") 

2280 

2281 if printResults: 

2282 print_("\n".join(out)) 

2283 

2284 allResults.append((t, result)) 

2285 

2286 return success, allResults 

2287 

2288 def create_diagram( 

2289 self, 

2290 output_html: Union[TextIO, Path, str], 

2291 vertical: int = 3, 

2292 show_results_names: bool = False, 

2293 show_groups: bool = False, 

2294 embed: bool = False, 

2295 show_hidden: bool = False, 

2296 **kwargs, 

2297 ) -> None: 

2298 """ 

2299 Create a railroad diagram for the parser. 

2300 

2301 Parameters: 

2302 

2303 - ``output_html`` (str or file-like object) - output target for generated 

2304 diagram HTML 

2305 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2306 instead of horizontally (default=3) 

2307 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2308 defined results names 

2309 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2310 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden 

2311 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2312 the resulting HTML in an enclosing HTML source 

2313 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2314 can be used to insert custom CSS styling 

2315 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2316 generated code 

2317 

2318 Additional diagram-formatting keyword arguments can also be included; 

2319 see railroad.Diagram class. 

2320 

2321 .. versionchanged:: 3.1.0 

2322 ``embed`` argument added. 

2323 """ 

2324 

2325 try: 

2326 from .diagram import to_railroad, railroad_to_html 

2327 except ImportError as ie: 

2328 raise Exception( 

2329 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2330 ) from ie 

2331 

2332 self.streamline() 

2333 

2334 railroad = to_railroad( 

2335 self, 

2336 vertical=vertical, 

2337 show_results_names=show_results_names, 

2338 show_groups=show_groups, 

2339 show_hidden=show_hidden, 

2340 diagram_kwargs=kwargs, 

2341 ) 

2342 if not isinstance(output_html, (str, Path)): 

2343 # we were passed a file-like object, just write to it 

2344 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2345 return 

2346 

2347 with open(output_html, "w", encoding="utf-8") as diag_file: 

2348 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2349 

2350 # Compatibility synonyms 

2351 # fmt: off 

2352 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)) 

2353 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8( 

2354 "setDefaultWhitespaceChars", set_default_whitespace_chars 

2355 )) 

2356 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization)) 

2357 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion)) 

2358 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat)) 

2359 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache)) 

2360 

2361 setResultsName = replaced_by_pep8("setResultsName", set_results_name) 

2362 setBreak = replaced_by_pep8("setBreak", set_break) 

2363 setParseAction = replaced_by_pep8("setParseAction", set_parse_action) 

2364 addParseAction = replaced_by_pep8("addParseAction", add_parse_action) 

2365 addCondition = replaced_by_pep8("addCondition", add_condition) 

2366 setFailAction = replaced_by_pep8("setFailAction", set_fail_action) 

2367 tryParse = replaced_by_pep8("tryParse", try_parse) 

2368 parseString = replaced_by_pep8("parseString", parse_string) 

2369 scanString = replaced_by_pep8("scanString", scan_string) 

2370 transformString = replaced_by_pep8("transformString", transform_string) 

2371 searchString = replaced_by_pep8("searchString", search_string) 

2372 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

2373 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

2374 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) 

2375 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) 

2376 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) 

2377 setDebug = replaced_by_pep8("setDebug", set_debug) 

2378 setName = replaced_by_pep8("setName", set_name) 

2379 parseFile = replaced_by_pep8("parseFile", parse_file) 

2380 runTests = replaced_by_pep8("runTests", run_tests) 

2381 canParseNext = replaced_by_pep8("canParseNext", can_parse_next) 

2382 defaultName = default_name 

2383 # fmt: on 

2384 

2385 

2386class _PendingSkip(ParserElement): 

2387 # internal placeholder class to hold a place were '...' is added to a parser element, 

2388 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2389 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None: 

2390 super().__init__() 

2391 self.anchor = expr 

2392 self.must_skip = must_skip 

2393 

2394 def _generateDefaultName(self) -> str: 

2395 return str(self.anchor + Empty()).replace("Empty", "...") 

2396 

2397 def __add__(self, other) -> ParserElement: 

2398 skipper = SkipTo(other).set_name("...")("_skipped*") 

2399 if self.must_skip: 

2400 

2401 def must_skip(t): 

2402 if not t._skipped or t._skipped.as_list() == [""]: 

2403 del t[0] 

2404 t.pop("_skipped", None) 

2405 

2406 def show_skip(t): 

2407 if t._skipped.as_list()[-1:] == [""]: 

2408 t.pop("_skipped") 

2409 t["_skipped"] = f"missing <{self.anchor!r}>" 

2410 

2411 return ( 

2412 self.anchor + skipper().add_parse_action(must_skip) 

2413 | skipper().add_parse_action(show_skip) 

2414 ) + other 

2415 

2416 return self.anchor + skipper + other 

2417 

2418 def __repr__(self): 

2419 return self.defaultName 

2420 

2421 def parseImpl(self, *args) -> ParseImplReturnType: 

2422 raise Exception( 

2423 "use of `...` expression without following SkipTo target expression" 

2424 ) 

2425 

2426 

2427class Token(ParserElement): 

2428 """Abstract :class:`ParserElement` subclass, for defining atomic 

2429 matching patterns. 

2430 """ 

2431 

2432 def __init__(self) -> None: 

2433 super().__init__(savelist=False) 

2434 

2435 def _generateDefaultName(self) -> str: 

2436 return type(self).__name__ 

2437 

2438 

2439class NoMatch(Token): 

2440 """ 

2441 A token that will never match. 

2442 """ 

2443 

2444 def __init__(self) -> None: 

2445 super().__init__() 

2446 self._may_return_empty = True 

2447 self.mayIndexError = False 

2448 self.errmsg = "Unmatchable token" 

2449 

2450 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2451 raise ParseException(instring, loc, self.errmsg, self) 

2452 

2453 

2454class Literal(Token): 

2455 """ 

2456 Token to exactly match a specified string. 

2457 

2458 Example:: 

2459 

2460 Literal('abc').parse_string('abc') # -> ['abc'] 

2461 Literal('abc').parse_string('abcdef') # -> ['abc'] 

2462 Literal('abc').parse_string('ab') # -> Exception: Expected "abc" 

2463 

2464 For case-insensitive matching, use :class:`CaselessLiteral`. 

2465 

2466 For keyword matching (force word break before and after the matched string), 

2467 use :class:`Keyword` or :class:`CaselessKeyword`. 

2468 """ 

2469 

2470 def __new__(cls, match_string: str = "", *, matchString: str = ""): 

2471 # Performance tuning: select a subclass with optimized parseImpl 

2472 if cls is Literal: 

2473 match_string = matchString or match_string 

2474 if not match_string: 

2475 return super().__new__(Empty) 

2476 if len(match_string) == 1: 

2477 return super().__new__(_SingleCharLiteral) 

2478 

2479 # Default behavior 

2480 return super().__new__(cls) 

2481 

2482 # Needed to make copy.copy() work correctly if we customize __new__ 

2483 def __getnewargs__(self): 

2484 return (self.match,) 

2485 

2486 def __init__(self, match_string: str = "", *, matchString: str = "") -> None: 

2487 super().__init__() 

2488 match_string = matchString or match_string 

2489 self.match = match_string 

2490 self.matchLen = len(match_string) 

2491 self.firstMatchChar = match_string[:1] 

2492 self.errmsg = f"Expected {self.name}" 

2493 self._may_return_empty = False 

2494 self.mayIndexError = False 

2495 

2496 def _generateDefaultName(self) -> str: 

2497 return repr(self.match) 

2498 

2499 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2500 if instring[loc] == self.firstMatchChar and instring.startswith( 

2501 self.match, loc 

2502 ): 

2503 return loc + self.matchLen, self.match 

2504 raise ParseException(instring, loc, self.errmsg, self) 

2505 

2506 

2507class Empty(Literal): 

2508 """ 

2509 An empty token, will always match. 

2510 """ 

2511 

2512 def __init__(self, match_string="", *, matchString="") -> None: 

2513 super().__init__("") 

2514 self._may_return_empty = True 

2515 self.mayIndexError = False 

2516 

2517 def _generateDefaultName(self) -> str: 

2518 return "Empty" 

2519 

2520 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2521 return loc, [] 

2522 

2523 

2524class _SingleCharLiteral(Literal): 

2525 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2526 if instring[loc] == self.firstMatchChar: 

2527 return loc + 1, self.match 

2528 raise ParseException(instring, loc, self.errmsg, self) 

2529 

2530 

2531ParserElement._literalStringClass = Literal 

2532 

2533 

2534class Keyword(Token): 

2535 """ 

2536 Token to exactly match a specified string as a keyword, that is, 

2537 it must be immediately preceded and followed by whitespace or 

2538 non-keyword characters. Compare with :class:`Literal`: 

2539 

2540 - ``Literal("if")`` will match the leading ``'if'`` in 

2541 ``'ifAndOnlyIf'``. 

2542 - ``Keyword("if")`` will not; it will only match the leading 

2543 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2544 

2545 Accepts two optional constructor arguments in addition to the 

2546 keyword string: 

2547 

2548 - ``ident_chars`` is a string of characters that would be valid 

2549 identifier characters, defaulting to all alphanumerics + "_" and 

2550 "$" 

2551 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2552 

2553 Example:: 

2554 

2555 Keyword("start").parse_string("start") # -> ['start'] 

2556 Keyword("start").parse_string("starting") # -> Exception 

2557 

2558 For case-insensitive matching, use :class:`CaselessKeyword`. 

2559 """ 

2560 

2561 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2562 

2563 def __init__( 

2564 self, 

2565 match_string: str = "", 

2566 ident_chars: typing.Optional[str] = None, 

2567 caseless: bool = False, 

2568 *, 

2569 matchString: str = "", 

2570 identChars: typing.Optional[str] = None, 

2571 ) -> None: 

2572 super().__init__() 

2573 identChars = identChars or ident_chars 

2574 if identChars is None: 

2575 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2576 match_string = matchString or match_string 

2577 self.match = match_string 

2578 self.matchLen = len(match_string) 

2579 self.firstMatchChar = match_string[:1] 

2580 if not self.firstMatchChar: 

2581 raise ValueError("null string passed to Keyword; use Empty() instead") 

2582 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2583 self._may_return_empty = False 

2584 self.mayIndexError = False 

2585 self.caseless = caseless 

2586 if caseless: 

2587 self.caselessmatch = match_string.upper() 

2588 identChars = identChars.upper() 

2589 self.identChars = set(identChars) 

2590 

2591 def _generateDefaultName(self) -> str: 

2592 return repr(self.match) 

2593 

2594 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2595 errmsg = self.errmsg or "" 

2596 errloc = loc 

2597 if self.caseless: 

2598 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2599 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2600 if ( 

2601 loc >= len(instring) - self.matchLen 

2602 or instring[loc + self.matchLen].upper() not in self.identChars 

2603 ): 

2604 return loc + self.matchLen, self.match 

2605 

2606 # followed by keyword char 

2607 errmsg += ", was immediately followed by keyword character" 

2608 errloc = loc + self.matchLen 

2609 else: 

2610 # preceded by keyword char 

2611 errmsg += ", keyword was immediately preceded by keyword character" 

2612 errloc = loc - 1 

2613 # else no match just raise plain exception 

2614 

2615 elif ( 

2616 instring[loc] == self.firstMatchChar 

2617 and self.matchLen == 1 

2618 or instring.startswith(self.match, loc) 

2619 ): 

2620 if loc == 0 or instring[loc - 1] not in self.identChars: 

2621 if ( 

2622 loc >= len(instring) - self.matchLen 

2623 or instring[loc + self.matchLen] not in self.identChars 

2624 ): 

2625 return loc + self.matchLen, self.match 

2626 

2627 # followed by keyword char 

2628 errmsg += ", keyword was immediately followed by keyword character" 

2629 errloc = loc + self.matchLen 

2630 else: 

2631 # preceded by keyword char 

2632 errmsg += ", keyword was immediately preceded by keyword character" 

2633 errloc = loc - 1 

2634 # else no match just raise plain exception 

2635 

2636 raise ParseException(instring, errloc, errmsg, self) 

2637 

2638 @staticmethod 

2639 def set_default_keyword_chars(chars) -> None: 

2640 """ 

2641 Overrides the default characters used by :class:`Keyword` expressions. 

2642 """ 

2643 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2644 

2645 # Compatibility synonyms 

2646 setDefaultKeywordChars = staticmethod( 

2647 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars) 

2648 ) 

2649 

2650 

2651class CaselessLiteral(Literal): 

2652 """ 

2653 Token to match a specified string, ignoring case of letters. 

2654 Note: the matched results will always be in the case of the given 

2655 match string, NOT the case of the input text. 

2656 

2657 Example:: 

2658 

2659 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2660 # -> ['CMD', 'CMD', 'CMD'] 

2661 

2662 (Contrast with example for :class:`CaselessKeyword`.) 

2663 """ 

2664 

2665 def __init__(self, match_string: str = "", *, matchString: str = "") -> None: 

2666 match_string = matchString or match_string 

2667 super().__init__(match_string.upper()) 

2668 # Preserve the defining literal. 

2669 self.returnString = match_string 

2670 self.errmsg = f"Expected {self.name}" 

2671 

2672 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2673 if instring[loc : loc + self.matchLen].upper() == self.match: 

2674 return loc + self.matchLen, self.returnString 

2675 raise ParseException(instring, loc, self.errmsg, self) 

2676 

2677 

2678class CaselessKeyword(Keyword): 

2679 """ 

2680 Caseless version of :class:`Keyword`. 

2681 

2682 Example:: 

2683 

2684 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2685 # -> ['CMD', 'CMD'] 

2686 

2687 (Contrast with example for :class:`CaselessLiteral`.) 

2688 """ 

2689 

2690 def __init__( 

2691 self, 

2692 match_string: str = "", 

2693 ident_chars: typing.Optional[str] = None, 

2694 *, 

2695 matchString: str = "", 

2696 identChars: typing.Optional[str] = None, 

2697 ) -> None: 

2698 identChars = identChars or ident_chars 

2699 match_string = matchString or match_string 

2700 super().__init__(match_string, identChars, caseless=True) 

2701 

2702 

2703class CloseMatch(Token): 

2704 """A variation on :class:`Literal` which matches "close" matches, 

2705 that is, strings with at most 'n' mismatching characters. 

2706 :class:`CloseMatch` takes parameters: 

2707 

2708 - ``match_string`` - string to be matched 

2709 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2710 - ``max_mismatches`` - (``default=1``) maximum number of 

2711 mismatches allowed to count as a match 

2712 

2713 The results from a successful parse will contain the matched text 

2714 from the input string and the following named results: 

2715 

2716 - ``mismatches`` - a list of the positions within the 

2717 match_string where mismatches were found 

2718 - ``original`` - the original match_string used to compare 

2719 against the input string 

2720 

2721 If ``mismatches`` is an empty list, then the match was an exact 

2722 match. 

2723 

2724 Example:: 

2725 

2726 patt = CloseMatch("ATCATCGAATGGA") 

2727 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

2728 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

2729 

2730 # exact match 

2731 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

2732 

2733 # close match allowing up to 2 mismatches 

2734 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

2735 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

2736 """ 

2737 

2738 def __init__( 

2739 self, 

2740 match_string: str, 

2741 max_mismatches: typing.Optional[int] = None, 

2742 *, 

2743 maxMismatches: int = 1, 

2744 caseless=False, 

2745 ) -> None: 

2746 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

2747 super().__init__() 

2748 self.match_string = match_string 

2749 self.maxMismatches = maxMismatches 

2750 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

2751 self.caseless = caseless 

2752 self.mayIndexError = False 

2753 self._may_return_empty = False 

2754 

2755 def _generateDefaultName(self) -> str: 

2756 return f"{type(self).__name__}:{self.match_string!r}" 

2757 

2758 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2759 start = loc 

2760 instrlen = len(instring) 

2761 maxloc = start + len(self.match_string) 

2762 

2763 if maxloc <= instrlen: 

2764 match_string = self.match_string 

2765 match_stringloc = 0 

2766 mismatches = [] 

2767 maxMismatches = self.maxMismatches 

2768 

2769 for match_stringloc, s_m in enumerate( 

2770 zip(instring[loc:maxloc], match_string) 

2771 ): 

2772 src, mat = s_m 

2773 if self.caseless: 

2774 src, mat = src.lower(), mat.lower() 

2775 

2776 if src != mat: 

2777 mismatches.append(match_stringloc) 

2778 if len(mismatches) > maxMismatches: 

2779 break 

2780 else: 

2781 loc = start + match_stringloc + 1 

2782 results = ParseResults([instring[start:loc]]) 

2783 results["original"] = match_string 

2784 results["mismatches"] = mismatches 

2785 return loc, results 

2786 

2787 raise ParseException(instring, loc, self.errmsg, self) 

2788 

2789 

2790class Word(Token): 

2791 """Token for matching words composed of allowed character sets. 

2792 

2793 Parameters: 

2794 

2795 - ``init_chars`` - string of all characters that should be used to 

2796 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

2797 if ``body_chars`` is also specified, then this is the string of 

2798 initial characters 

2799 - ``body_chars`` - string of characters that 

2800 can be used for matching after a matched initial character as 

2801 given in ``init_chars``; if omitted, same as the initial characters 

2802 (default=``None``) 

2803 - ``min`` - minimum number of characters to match (default=1) 

2804 - ``max`` - maximum number of characters to match (default=0) 

2805 - ``exact`` - exact number of characters to match (default=0) 

2806 - ``as_keyword`` - match as a keyword (default=``False``) 

2807 - ``exclude_chars`` - characters that might be 

2808 found in the input ``body_chars`` string but which should not be 

2809 accepted for matching ;useful to define a word of all 

2810 printables except for one or two characters, for instance 

2811 (default=``None``) 

2812 

2813 :class:`srange` is useful for defining custom character set strings 

2814 for defining :class:`Word` expressions, using range notation from 

2815 regular expression character sets. 

2816 

2817 A common mistake is to use :class:`Word` to match a specific literal 

2818 string, as in ``Word("Address")``. Remember that :class:`Word` 

2819 uses the string argument to define *sets* of matchable characters. 

2820 This expression would match "Add", "AAA", "dAred", or any other word 

2821 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

2822 exact literal string, use :class:`Literal` or :class:`Keyword`. 

2823 

2824 pyparsing includes helper strings for building Words: 

2825 

2826 - :class:`alphas` 

2827 - :class:`nums` 

2828 - :class:`alphanums` 

2829 - :class:`hexnums` 

2830 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 

2831 - accented, tilded, umlauted, etc.) 

2832 - :class:`punc8bit` (non-alphabetic characters in ASCII range 

2833 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

2834 - :class:`printables` (any non-whitespace character) 

2835 

2836 ``alphas``, ``nums``, and ``printables`` are also defined in several 

2837 Unicode sets - see :class:`pyparsing_unicode`. 

2838 

2839 Example:: 

2840 

2841 # a word composed of digits 

2842 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

2843 

2844 # a word with a leading capital, and zero or more lowercase 

2845 capitalized_word = Word(alphas.upper(), alphas.lower()) 

2846 

2847 # hostnames are alphanumeric, with leading alpha, and '-' 

2848 hostname = Word(alphas, alphanums + '-') 

2849 

2850 # roman numeral (not a strict parser, accepts invalid mix of characters) 

2851 roman = Word("IVXLCDM") 

2852 

2853 # any string of non-whitespace characters, except for ',' 

2854 csv_value = Word(printables, exclude_chars=",") 

2855 

2856 :raises ValueError: If ``min`` and ``max`` are both specified 

2857 and the test ``min <= max`` fails. 

2858 

2859 .. versionchanged:: 3.1.0 

2860 Raises :exc:`ValueError` if ``min`` > ``max``. 

2861 """ 

2862 

2863 def __init__( 

2864 self, 

2865 init_chars: str = "", 

2866 body_chars: typing.Optional[str] = None, 

2867 min: int = 1, 

2868 max: int = 0, 

2869 exact: int = 0, 

2870 as_keyword: bool = False, 

2871 exclude_chars: typing.Optional[str] = None, 

2872 *, 

2873 initChars: typing.Optional[str] = None, 

2874 bodyChars: typing.Optional[str] = None, 

2875 asKeyword: bool = False, 

2876 excludeChars: typing.Optional[str] = None, 

2877 ) -> None: 

2878 initChars = initChars or init_chars 

2879 bodyChars = bodyChars or body_chars 

2880 asKeyword = asKeyword or as_keyword 

2881 excludeChars = excludeChars or exclude_chars 

2882 super().__init__() 

2883 if not initChars: 

2884 raise ValueError( 

2885 f"invalid {type(self).__name__}, initChars cannot be empty string" 

2886 ) 

2887 

2888 initChars_set = set(initChars) 

2889 if excludeChars: 

2890 excludeChars_set = set(excludeChars) 

2891 initChars_set -= excludeChars_set 

2892 if bodyChars: 

2893 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

2894 self.initChars = initChars_set 

2895 self.initCharsOrig = "".join(sorted(initChars_set)) 

2896 

2897 if bodyChars: 

2898 self.bodyChars = set(bodyChars) 

2899 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

2900 else: 

2901 self.bodyChars = initChars_set 

2902 self.bodyCharsOrig = self.initCharsOrig 

2903 

2904 self.maxSpecified = max > 0 

2905 

2906 if min < 1: 

2907 raise ValueError( 

2908 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

2909 ) 

2910 

2911 if self.maxSpecified and min > max: 

2912 raise ValueError( 

2913 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

2914 ) 

2915 

2916 self.minLen = min 

2917 

2918 if max > 0: 

2919 self.maxLen = max 

2920 else: 

2921 self.maxLen = _MAX_INT 

2922 

2923 if exact > 0: 

2924 min = max = exact 

2925 self.maxLen = exact 

2926 self.minLen = exact 

2927 

2928 self.errmsg = f"Expected {self.name}" 

2929 self.mayIndexError = False 

2930 self.asKeyword = asKeyword 

2931 if self.asKeyword: 

2932 self.errmsg += " as a keyword" 

2933 

2934 # see if we can make a regex for this Word 

2935 if " " not in (self.initChars | self.bodyChars): 

2936 if len(self.initChars) == 1: 

2937 re_leading_fragment = re.escape(self.initCharsOrig) 

2938 else: 

2939 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

2940 

2941 if self.bodyChars == self.initChars: 

2942 if max == 0 and self.minLen == 1: 

2943 repeat = "+" 

2944 elif max == 1: 

2945 repeat = "" 

2946 else: 

2947 if self.minLen != self.maxLen: 

2948 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

2949 else: 

2950 repeat = f"{{{self.minLen}}}" 

2951 self.reString = f"{re_leading_fragment}{repeat}" 

2952 else: 

2953 if max == 1: 

2954 re_body_fragment = "" 

2955 repeat = "" 

2956 else: 

2957 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

2958 if max == 0 and self.minLen == 1: 

2959 repeat = "*" 

2960 elif max == 2: 

2961 repeat = "?" if min <= 1 else "" 

2962 else: 

2963 if min != max: 

2964 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

2965 else: 

2966 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

2967 

2968 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

2969 

2970 if self.asKeyword: 

2971 self.reString = rf"\b{self.reString}\b" 

2972 

2973 try: 

2974 self.re = re.compile(self.reString) 

2975 except re.error: 

2976 self.re = None # type: ignore[assignment] 

2977 else: 

2978 self.re_match = self.re.match 

2979 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign] 

2980 

2981 def _generateDefaultName(self) -> str: 

2982 def charsAsStr(s): 

2983 max_repr_len = 16 

2984 s = _collapse_string_to_ranges(s, re_escape=False) 

2985 

2986 if len(s) > max_repr_len: 

2987 return s[: max_repr_len - 3] + "..." 

2988 

2989 return s 

2990 

2991 if self.initChars != self.bodyChars: 

2992 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

2993 else: 

2994 base = f"W:({charsAsStr(self.initChars)})" 

2995 

2996 # add length specification 

2997 if self.minLen > 1 or self.maxLen != _MAX_INT: 

2998 if self.minLen == self.maxLen: 

2999 if self.minLen == 1: 

3000 return base[2:] 

3001 else: 

3002 return base + f"{{{self.minLen}}}" 

3003 elif self.maxLen == _MAX_INT: 

3004 return base + f"{{{self.minLen},...}}" 

3005 else: 

3006 return base + f"{{{self.minLen},{self.maxLen}}}" 

3007 return base 

3008 

3009 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3010 if instring[loc] not in self.initChars: 

3011 raise ParseException(instring, loc, self.errmsg, self) 

3012 

3013 start = loc 

3014 loc += 1 

3015 instrlen = len(instring) 

3016 body_chars: set[str] = self.bodyChars 

3017 maxloc = start + self.maxLen 

3018 maxloc = min(maxloc, instrlen) 

3019 while loc < maxloc and instring[loc] in body_chars: 

3020 loc += 1 

3021 

3022 throw_exception = False 

3023 if loc - start < self.minLen: 

3024 throw_exception = True 

3025 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars: 

3026 throw_exception = True 

3027 elif self.asKeyword and ( 

3028 (start > 0 and instring[start - 1] in body_chars) 

3029 or (loc < instrlen and instring[loc] in body_chars) 

3030 ): 

3031 throw_exception = True 

3032 

3033 if throw_exception: 

3034 raise ParseException(instring, loc, self.errmsg, self) 

3035 

3036 return loc, instring[start:loc] 

3037 

3038 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3039 result = self.re_match(instring, loc) 

3040 if not result: 

3041 raise ParseException(instring, loc, self.errmsg, self) 

3042 

3043 loc = result.end() 

3044 return loc, result.group() 

3045 

3046 

3047class Char(Word): 

3048 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

3049 when defining a match of any single character in a string of 

3050 characters. 

3051 """ 

3052 

3053 def __init__( 

3054 self, 

3055 charset: str, 

3056 as_keyword: bool = False, 

3057 exclude_chars: typing.Optional[str] = None, 

3058 *, 

3059 asKeyword: bool = False, 

3060 excludeChars: typing.Optional[str] = None, 

3061 ) -> None: 

3062 asKeyword = asKeyword or as_keyword 

3063 excludeChars = excludeChars or exclude_chars 

3064 super().__init__( 

3065 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

3066 ) 

3067 

3068 

3069class Regex(Token): 

3070 r"""Token for matching strings that match a given regular 

3071 expression. Defined with string specifying the regular expression in 

3072 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3073 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3074 these will be preserved as named :class:`ParseResults`. 

3075 

3076 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3077 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3078 a compiled RE that was compiled using ``regex``. 

3079 

3080 The parameters ``pattern`` and ``flags`` are passed 

3081 to the ``re.compile()`` function as-is. See the Python 

3082 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3083 explanation of the acceptable patterns and flags. 

3084 

3085 Example:: 

3086 

3087 realnum = Regex(r"[+-]?\d+\.\d*") 

3088 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3089 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3090 

3091 # named fields in a regex will be returned as named results 

3092 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3093 

3094 # the Regex class will accept re's compiled using the regex module 

3095 import regex 

3096 parser = pp.Regex(regex.compile(r'[0-9]')) 

3097 """ 

3098 

3099 def __init__( 

3100 self, 

3101 pattern: Any, 

3102 flags: Union[re.RegexFlag, int] = 0, 

3103 as_group_list: bool = False, 

3104 as_match: bool = False, 

3105 *, 

3106 asGroupList: bool = False, 

3107 asMatch: bool = False, 

3108 ) -> None: 

3109 super().__init__() 

3110 asGroupList = asGroupList or as_group_list 

3111 asMatch = asMatch or as_match 

3112 

3113 if isinstance(pattern, str_type): 

3114 if not pattern: 

3115 raise ValueError("null string passed to Regex; use Empty() instead") 

3116 

3117 self._re = None 

3118 self._may_return_empty = None # type: ignore [assignment] 

3119 self.reString = self.pattern = pattern 

3120 

3121 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3122 self._re = pattern 

3123 self._may_return_empty = None # type: ignore [assignment] 

3124 self.pattern = self.reString = pattern.pattern 

3125 

3126 elif callable(pattern): 

3127 # defer creating this pattern until we really need it 

3128 self.pattern = pattern 

3129 self._may_return_empty = None # type: ignore [assignment] 

3130 self._re = None 

3131 

3132 else: 

3133 raise TypeError( 

3134 "Regex may only be constructed with a string or a compiled RE object," 

3135 " or a callable that takes no arguments and returns a string or a" 

3136 " compiled RE object" 

3137 ) 

3138 

3139 self.flags = flags 

3140 self.errmsg = f"Expected {self.name}" 

3141 self.mayIndexError = False 

3142 self.asGroupList = asGroupList 

3143 self.asMatch = asMatch 

3144 if self.asGroupList: 

3145 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign] 

3146 if self.asMatch: 

3147 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign] 

3148 

3149 @cached_property 

3150 def re(self) -> re.Pattern: 

3151 if self._re: 

3152 return self._re 

3153 

3154 if callable(self.pattern): 

3155 # replace self.pattern with the string returned by calling self.pattern() 

3156 self.pattern = cast(Callable[[], str], self.pattern)() 

3157 

3158 # see if we got a compiled RE back instead of a str - if so, we're done 

3159 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"): 

3160 self._re = cast(re.Pattern[str], self.pattern) 

3161 self.pattern = self.reString = self._re.pattern 

3162 return self._re 

3163 

3164 try: 

3165 self._re = re.compile(self.pattern, self.flags) 

3166 except re.error: 

3167 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3168 else: 

3169 self._may_return_empty = self.re.match("", pos=0) is not None 

3170 return self._re 

3171 

3172 @cached_property 

3173 def re_match(self) -> Callable[[str, int], Any]: 

3174 return self.re.match 

3175 

3176 @property 

3177 def mayReturnEmpty(self): 

3178 if self._may_return_empty is None: 

3179 # force compile of regex pattern, to set may_return_empty flag 

3180 self.re # noqa 

3181 return self._may_return_empty 

3182 

3183 @mayReturnEmpty.setter 

3184 def mayReturnEmpty(self, value): 

3185 self._may_return_empty = value 

3186 

3187 def _generateDefaultName(self) -> str: 

3188 unescaped = repr(self.pattern).replace("\\\\", "\\") 

3189 return f"Re:({unescaped})" 

3190 

3191 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3192 # explicit check for matching past the length of the string; 

3193 # this is done because the re module will not complain about 

3194 # a match with `pos > len(instring)`, it will just return "" 

3195 if loc > len(instring) and self.mayReturnEmpty: 

3196 raise ParseException(instring, loc, self.errmsg, self) 

3197 

3198 result = self.re_match(instring, loc) 

3199 if not result: 

3200 raise ParseException(instring, loc, self.errmsg, self) 

3201 

3202 loc = result.end() 

3203 ret = ParseResults(result.group()) 

3204 d = result.groupdict() 

3205 

3206 for k, v in d.items(): 

3207 ret[k] = v 

3208 

3209 return loc, ret 

3210 

3211 def parseImplAsGroupList(self, instring, loc, do_actions=True): 

3212 if loc > len(instring) and self.mayReturnEmpty: 

3213 raise ParseException(instring, loc, self.errmsg, self) 

3214 

3215 result = self.re_match(instring, loc) 

3216 if not result: 

3217 raise ParseException(instring, loc, self.errmsg, self) 

3218 

3219 loc = result.end() 

3220 ret = result.groups() 

3221 return loc, ret 

3222 

3223 def parseImplAsMatch(self, instring, loc, do_actions=True): 

3224 if loc > len(instring) and self.mayReturnEmpty: 

3225 raise ParseException(instring, loc, self.errmsg, self) 

3226 

3227 result = self.re_match(instring, loc) 

3228 if not result: 

3229 raise ParseException(instring, loc, self.errmsg, self) 

3230 

3231 loc = result.end() 

3232 ret = result 

3233 return loc, ret 

3234 

3235 def sub(self, repl: str) -> ParserElement: 

3236 r""" 

3237 Return :class:`Regex` with an attached parse action to transform the parsed 

3238 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3239 

3240 Example:: 

3241 

3242 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3243 print(make_html.transform_string("h1:main title:")) 

3244 # prints "<h1>main title</h1>" 

3245 """ 

3246 if self.asGroupList: 

3247 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3248 

3249 if self.asMatch and callable(repl): 

3250 raise TypeError( 

3251 "cannot use sub() with a callable with Regex(as_match=True)" 

3252 ) 

3253 

3254 if self.asMatch: 

3255 

3256 def pa(tokens): 

3257 return tokens[0].expand(repl) 

3258 

3259 else: 

3260 

3261 def pa(tokens): 

3262 return self.re.sub(repl, tokens[0]) 

3263 

3264 return self.add_parse_action(pa) 

3265 

3266 

3267class QuotedString(Token): 

3268 r""" 

3269 Token for matching strings that are delimited by quoting characters. 

3270 

3271 Defined with the following parameters: 

3272 

3273 - ``quote_char`` - string of one or more characters defining the 

3274 quote delimiting string 

3275 - ``esc_char`` - character to re_escape quotes, typically backslash 

3276 (default= ``None``) 

3277 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3278 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3279 (default= ``None``) 

3280 - ``multiline`` - boolean indicating whether quotes can span 

3281 multiple lines (default= ``False``) 

3282 - ``unquote_results`` - boolean indicating whether the matched text 

3283 should be unquoted (default= ``True``) 

3284 - ``end_quote_char`` - string of one or more characters defining the 

3285 end of the quote delimited string (default= ``None`` => same as 

3286 quote_char) 

3287 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3288 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3289 (default= ``True``) 

3290 

3291 .. caution:: ``convert_whitespace_escapes`` has no effect if 

3292 ``unquote_results`` is ``False``. 

3293 

3294 Example:: 

3295 

3296 qs = QuotedString('"') 

3297 print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3298 complex_qs = QuotedString('{{', end_quote_char='}}') 

3299 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) 

3300 sql_qs = QuotedString('"', esc_quote='""') 

3301 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3302 

3303 prints:: 

3304 

3305 [['This is the quote']] 

3306 [['This is the "quote"']] 

3307 [['This is the quote with "embedded" quotes']] 

3308 """ 

3309 

3310 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3311 

3312 def __init__( 

3313 self, 

3314 quote_char: str = "", 

3315 esc_char: typing.Optional[str] = None, 

3316 esc_quote: typing.Optional[str] = None, 

3317 multiline: bool = False, 

3318 unquote_results: bool = True, 

3319 end_quote_char: typing.Optional[str] = None, 

3320 convert_whitespace_escapes: bool = True, 

3321 *, 

3322 quoteChar: str = "", 

3323 escChar: typing.Optional[str] = None, 

3324 escQuote: typing.Optional[str] = None, 

3325 unquoteResults: bool = True, 

3326 endQuoteChar: typing.Optional[str] = None, 

3327 convertWhitespaceEscapes: bool = True, 

3328 ) -> None: 

3329 super().__init__() 

3330 esc_char = escChar or esc_char 

3331 esc_quote = escQuote or esc_quote 

3332 unquote_results = unquoteResults and unquote_results 

3333 end_quote_char = endQuoteChar or end_quote_char 

3334 convert_whitespace_escapes = ( 

3335 convertWhitespaceEscapes and convert_whitespace_escapes 

3336 ) 

3337 quote_char = quoteChar or quote_char 

3338 

3339 # remove white space from quote chars 

3340 quote_char = quote_char.strip() 

3341 if not quote_char: 

3342 raise ValueError("quote_char cannot be the empty string") 

3343 

3344 if end_quote_char is None: 

3345 end_quote_char = quote_char 

3346 else: 

3347 end_quote_char = end_quote_char.strip() 

3348 if not end_quote_char: 

3349 raise ValueError("end_quote_char cannot be the empty string") 

3350 

3351 self.quote_char: str = quote_char 

3352 self.quote_char_len: int = len(quote_char) 

3353 self.first_quote_char: str = quote_char[0] 

3354 self.end_quote_char: str = end_quote_char 

3355 self.end_quote_char_len: int = len(end_quote_char) 

3356 self.esc_char: str = esc_char or "" 

3357 self.has_esc_char: bool = esc_char is not None 

3358 self.esc_quote: str = esc_quote or "" 

3359 self.unquote_results: bool = unquote_results 

3360 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3361 self.multiline = multiline 

3362 self.re_flags = re.RegexFlag(0) 

3363 

3364 # fmt: off 

3365 # build up re pattern for the content between the quote delimiters 

3366 inner_pattern: list[str] = [] 

3367 

3368 if esc_quote: 

3369 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3370 

3371 if esc_char: 

3372 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3373 

3374 if len(self.end_quote_char) > 1: 

3375 inner_pattern.append( 

3376 "(?:" 

3377 + "|".join( 

3378 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3379 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3380 ) 

3381 + ")" 

3382 ) 

3383 

3384 if self.multiline: 

3385 self.re_flags |= re.MULTILINE | re.DOTALL 

3386 inner_pattern.append( 

3387 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3388 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3389 ) 

3390 else: 

3391 inner_pattern.append( 

3392 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3393 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3394 ) 

3395 

3396 self.pattern = "".join( 

3397 [ 

3398 re.escape(self.quote_char), 

3399 "(?:", 

3400 '|'.join(inner_pattern), 

3401 ")*", 

3402 re.escape(self.end_quote_char), 

3403 ] 

3404 ) 

3405 

3406 if self.unquote_results: 

3407 if self.convert_whitespace_escapes: 

3408 self.unquote_scan_re = re.compile( 

3409 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3410 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})" 

3411 rf"|({re.escape(self.esc_char)}.)" 

3412 rf"|(\n|.)", 

3413 flags=self.re_flags, 

3414 ) 

3415 else: 

3416 self.unquote_scan_re = re.compile( 

3417 rf"({re.escape(self.esc_char)}.)" 

3418 rf"|(\n|.)", 

3419 flags=self.re_flags 

3420 ) 

3421 # fmt: on 

3422 

3423 try: 

3424 self.re = re.compile(self.pattern, self.re_flags) 

3425 self.reString = self.pattern 

3426 self.re_match = self.re.match 

3427 except re.error: 

3428 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3429 

3430 self.errmsg = f"Expected {self.name}" 

3431 self.mayIndexError = False 

3432 self._may_return_empty = True 

3433 

3434 def _generateDefaultName(self) -> str: 

3435 if self.quote_char == self.end_quote_char and isinstance( 

3436 self.quote_char, str_type 

3437 ): 

3438 return f"string enclosed in {self.quote_char!r}" 

3439 

3440 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3441 

3442 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3443 # check first character of opening quote to see if that is a match 

3444 # before doing the more complicated regex match 

3445 result = ( 

3446 instring[loc] == self.first_quote_char 

3447 and self.re_match(instring, loc) 

3448 or None 

3449 ) 

3450 if not result: 

3451 raise ParseException(instring, loc, self.errmsg, self) 

3452 

3453 # get ending loc and matched string from regex matching result 

3454 loc = result.end() 

3455 ret = result.group() 

3456 

3457 def convert_escaped_numerics(s: str) -> str: 

3458 if s == "0": 

3459 return "\0" 

3460 if s.isdigit() and len(s) == 3: 

3461 return chr(int(s, base=8)) 

3462 elif s.startswith(("u", "x")): 

3463 return chr(int(s[1:], base=16)) 

3464 else: 

3465 return s 

3466 

3467 if self.unquote_results: 

3468 # strip off quotes 

3469 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3470 

3471 if isinstance(ret, str_type): 

3472 # fmt: off 

3473 if self.convert_whitespace_escapes: 

3474 # as we iterate over matches in the input string, 

3475 # collect from whichever match group of the unquote_scan_re 

3476 # regex matches (only 1 group will match at any given time) 

3477 ret = "".join( 

3478 # match group 1 matches \t, \n, etc. 

3479 self.ws_map[match.group(1)] if match.group(1) 

3480 # match group 2 matches escaped octal, null, hex, and Unicode 

3481 # sequences 

3482 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2) 

3483 # match group 3 matches escaped characters 

3484 else match.group(3)[-1] if match.group(3) 

3485 # match group 4 matches any character 

3486 else match.group(4) 

3487 for match in self.unquote_scan_re.finditer(ret) 

3488 ) 

3489 else: 

3490 ret = "".join( 

3491 # match group 1 matches escaped characters 

3492 match.group(1)[-1] if match.group(1) 

3493 # match group 2 matches any character 

3494 else match.group(2) 

3495 for match in self.unquote_scan_re.finditer(ret) 

3496 ) 

3497 # fmt: on 

3498 

3499 # replace escaped quotes 

3500 if self.esc_quote: 

3501 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3502 

3503 return loc, ret 

3504 

3505 

3506class CharsNotIn(Token): 

3507 """Token for matching words composed of characters *not* in a given 

3508 set (will include whitespace in matched characters if not listed in 

3509 the provided exclusion set - see example). Defined with string 

3510 containing all disallowed characters, and an optional minimum, 

3511 maximum, and/or exact length. The default value for ``min`` is 

3512 1 (a minimum value < 1 is not valid); the default values for 

3513 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3514 length restriction. 

3515 

3516 Example:: 

3517 

3518 # define a comma-separated-value as anything that is not a ',' 

3519 csv_value = CharsNotIn(',') 

3520 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) 

3521 

3522 prints:: 

3523 

3524 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3525 """ 

3526 

3527 def __init__( 

3528 self, 

3529 not_chars: str = "", 

3530 min: int = 1, 

3531 max: int = 0, 

3532 exact: int = 0, 

3533 *, 

3534 notChars: str = "", 

3535 ) -> None: 

3536 super().__init__() 

3537 self.skipWhitespace = False 

3538 self.notChars = not_chars or notChars 

3539 self.notCharsSet = set(self.notChars) 

3540 

3541 if min < 1: 

3542 raise ValueError( 

3543 "cannot specify a minimum length < 1; use" 

3544 " Opt(CharsNotIn()) if zero-length char group is permitted" 

3545 ) 

3546 

3547 self.minLen = min 

3548 

3549 if max > 0: 

3550 self.maxLen = max 

3551 else: 

3552 self.maxLen = _MAX_INT 

3553 

3554 if exact > 0: 

3555 self.maxLen = exact 

3556 self.minLen = exact 

3557 

3558 self.errmsg = f"Expected {self.name}" 

3559 self._may_return_empty = self.minLen == 0 

3560 self.mayIndexError = False 

3561 

3562 def _generateDefaultName(self) -> str: 

3563 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3564 if len(not_chars_str) > 16: 

3565 return f"!W:({self.notChars[: 16 - 3]}...)" 

3566 else: 

3567 return f"!W:({self.notChars})" 

3568 

3569 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3570 notchars = self.notCharsSet 

3571 if instring[loc] in notchars: 

3572 raise ParseException(instring, loc, self.errmsg, self) 

3573 

3574 start = loc 

3575 loc += 1 

3576 maxlen = min(start + self.maxLen, len(instring)) 

3577 while loc < maxlen and instring[loc] not in notchars: 

3578 loc += 1 

3579 

3580 if loc - start < self.minLen: 

3581 raise ParseException(instring, loc, self.errmsg, self) 

3582 

3583 return loc, instring[start:loc] 

3584 

3585 

3586class White(Token): 

3587 """Special matching class for matching whitespace. Normally, 

3588 whitespace is ignored by pyparsing grammars. This class is included 

3589 when some whitespace structures are significant. Define with 

3590 a string containing the whitespace characters to be matched; default 

3591 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3592 ``max``, and ``exact`` arguments, as defined for the 

3593 :class:`Word` class. 

3594 """ 

3595 

3596 whiteStrs = { 

3597 " ": "<SP>", 

3598 "\t": "<TAB>", 

3599 "\n": "<LF>", 

3600 "\r": "<CR>", 

3601 "\f": "<FF>", 

3602 "\u00A0": "<NBSP>", 

3603 "\u1680": "<OGHAM_SPACE_MARK>", 

3604 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3605 "\u2000": "<EN_QUAD>", 

3606 "\u2001": "<EM_QUAD>", 

3607 "\u2002": "<EN_SPACE>", 

3608 "\u2003": "<EM_SPACE>", 

3609 "\u2004": "<THREE-PER-EM_SPACE>", 

3610 "\u2005": "<FOUR-PER-EM_SPACE>", 

3611 "\u2006": "<SIX-PER-EM_SPACE>", 

3612 "\u2007": "<FIGURE_SPACE>", 

3613 "\u2008": "<PUNCTUATION_SPACE>", 

3614 "\u2009": "<THIN_SPACE>", 

3615 "\u200A": "<HAIR_SPACE>", 

3616 "\u200B": "<ZERO_WIDTH_SPACE>", 

3617 "\u202F": "<NNBSP>", 

3618 "\u205F": "<MMSP>", 

3619 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3620 } 

3621 

3622 def __init__( 

3623 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0 

3624 ) -> None: 

3625 super().__init__() 

3626 self.matchWhite = ws 

3627 self.set_whitespace_chars( 

3628 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3629 copy_defaults=True, 

3630 ) 

3631 # self.leave_whitespace() 

3632 self._may_return_empty = True 

3633 self.errmsg = f"Expected {self.name}" 

3634 

3635 self.minLen = min 

3636 

3637 if max > 0: 

3638 self.maxLen = max 

3639 else: 

3640 self.maxLen = _MAX_INT 

3641 

3642 if exact > 0: 

3643 self.maxLen = exact 

3644 self.minLen = exact 

3645 

3646 def _generateDefaultName(self) -> str: 

3647 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

3648 

3649 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3650 if instring[loc] not in self.matchWhite: 

3651 raise ParseException(instring, loc, self.errmsg, self) 

3652 start = loc 

3653 loc += 1 

3654 maxloc = start + self.maxLen 

3655 maxloc = min(maxloc, len(instring)) 

3656 while loc < maxloc and instring[loc] in self.matchWhite: 

3657 loc += 1 

3658 

3659 if loc - start < self.minLen: 

3660 raise ParseException(instring, loc, self.errmsg, self) 

3661 

3662 return loc, instring[start:loc] 

3663 

3664 

3665class PositionToken(Token): 

3666 def __init__(self) -> None: 

3667 super().__init__() 

3668 self._may_return_empty = True 

3669 self.mayIndexError = False 

3670 

3671 

3672class GoToColumn(PositionToken): 

3673 """Token to advance to a specific column of input text; useful for 

3674 tabular report scraping. 

3675 """ 

3676 

3677 def __init__(self, colno: int) -> None: 

3678 super().__init__() 

3679 self.col = colno 

3680 

3681 def preParse(self, instring: str, loc: int) -> int: 

3682 if col(loc, instring) == self.col: 

3683 return loc 

3684 

3685 instrlen = len(instring) 

3686 if self.ignoreExprs: 

3687 loc = self._skipIgnorables(instring, loc) 

3688 while ( 

3689 loc < instrlen 

3690 and instring[loc].isspace() 

3691 and col(loc, instring) != self.col 

3692 ): 

3693 loc += 1 

3694 

3695 return loc 

3696 

3697 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3698 thiscol = col(loc, instring) 

3699 if thiscol > self.col: 

3700 raise ParseException(instring, loc, "Text not in expected column", self) 

3701 newloc = loc + self.col - thiscol 

3702 ret = instring[loc:newloc] 

3703 return newloc, ret 

3704 

3705 

3706class LineStart(PositionToken): 

3707 r"""Matches if current position is at the beginning of a line within 

3708 the parse string 

3709 

3710 Example:: 

3711 

3712 test = '''\ 

3713 AAA this line 

3714 AAA and this line 

3715 AAA but not this one 

3716 B AAA and definitely not this one 

3717 ''' 

3718 

3719 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

3720 print(t) 

3721 

3722 prints:: 

3723 

3724 ['AAA', ' this line'] 

3725 ['AAA', ' and this line'] 

3726 

3727 """ 

3728 

3729 def __init__(self) -> None: 

3730 super().__init__() 

3731 self.leave_whitespace() 

3732 self.orig_whiteChars = set() | self.whiteChars 

3733 self.whiteChars.discard("\n") 

3734 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

3735 self.set_name("start of line") 

3736 

3737 def preParse(self, instring: str, loc: int) -> int: 

3738 if loc == 0: 

3739 return loc 

3740 

3741 ret = self.skipper.preParse(instring, loc) 

3742 

3743 if "\n" in self.orig_whiteChars: 

3744 while instring[ret : ret + 1] == "\n": 

3745 ret = self.skipper.preParse(instring, ret + 1) 

3746 

3747 return ret 

3748 

3749 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3750 if col(loc, instring) == 1: 

3751 return loc, [] 

3752 raise ParseException(instring, loc, self.errmsg, self) 

3753 

3754 

3755class LineEnd(PositionToken): 

3756 """Matches if current position is at the end of a line within the 

3757 parse string 

3758 """ 

3759 

3760 def __init__(self) -> None: 

3761 super().__init__() 

3762 self.whiteChars.discard("\n") 

3763 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

3764 self.set_name("end of line") 

3765 

3766 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3767 if loc < len(instring): 

3768 if instring[loc] == "\n": 

3769 return loc + 1, "\n" 

3770 else: 

3771 raise ParseException(instring, loc, self.errmsg, self) 

3772 elif loc == len(instring): 

3773 return loc + 1, [] 

3774 else: 

3775 raise ParseException(instring, loc, self.errmsg, self) 

3776 

3777 

3778class StringStart(PositionToken): 

3779 """Matches if current position is at the beginning of the parse 

3780 string 

3781 """ 

3782 

3783 def __init__(self) -> None: 

3784 super().__init__() 

3785 self.set_name("start of text") 

3786 

3787 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3788 # see if entire string up to here is just whitespace and ignoreables 

3789 if loc != 0 and loc != self.preParse(instring, 0): 

3790 raise ParseException(instring, loc, self.errmsg, self) 

3791 

3792 return loc, [] 

3793 

3794 

3795class StringEnd(PositionToken): 

3796 """ 

3797 Matches if current position is at the end of the parse string 

3798 """ 

3799 

3800 def __init__(self) -> None: 

3801 super().__init__() 

3802 self.set_name("end of text") 

3803 

3804 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3805 if loc < len(instring): 

3806 raise ParseException(instring, loc, self.errmsg, self) 

3807 if loc == len(instring): 

3808 return loc + 1, [] 

3809 if loc > len(instring): 

3810 return loc, [] 

3811 

3812 raise ParseException(instring, loc, self.errmsg, self) 

3813 

3814 

3815class WordStart(PositionToken): 

3816 """Matches if the current position is at the beginning of a 

3817 :class:`Word`, and is not preceded by any character in a given 

3818 set of ``word_chars`` (default= ``printables``). To emulate the 

3819 ``\b`` behavior of regular expressions, use 

3820 ``WordStart(alphanums)``. ``WordStart`` will also match at 

3821 the beginning of the string being parsed, or at the beginning of 

3822 a line. 

3823 """ 

3824 

3825 def __init__( 

3826 self, word_chars: str = printables, *, wordChars: str = printables 

3827 ) -> None: 

3828 wordChars = word_chars if wordChars == printables else wordChars 

3829 super().__init__() 

3830 self.wordChars = set(wordChars) 

3831 self.set_name("start of a word") 

3832 

3833 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3834 if loc != 0: 

3835 if ( 

3836 instring[loc - 1] in self.wordChars 

3837 or instring[loc] not in self.wordChars 

3838 ): 

3839 raise ParseException(instring, loc, self.errmsg, self) 

3840 return loc, [] 

3841 

3842 

3843class WordEnd(PositionToken): 

3844 """Matches if the current position is at the end of a :class:`Word`, 

3845 and is not followed by any character in a given set of ``word_chars`` 

3846 (default= ``printables``). To emulate the ``\b`` behavior of 

3847 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

3848 will also match at the end of the string being parsed, or at the end 

3849 of a line. 

3850 """ 

3851 

3852 def __init__( 

3853 self, word_chars: str = printables, *, wordChars: str = printables 

3854 ) -> None: 

3855 wordChars = word_chars if wordChars == printables else wordChars 

3856 super().__init__() 

3857 self.wordChars = set(wordChars) 

3858 self.skipWhitespace = False 

3859 self.set_name("end of a word") 

3860 

3861 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3862 instrlen = len(instring) 

3863 if instrlen > 0 and loc < instrlen: 

3864 if ( 

3865 instring[loc] in self.wordChars 

3866 or instring[loc - 1] not in self.wordChars 

3867 ): 

3868 raise ParseException(instring, loc, self.errmsg, self) 

3869 return loc, [] 

3870 

3871 

3872class Tag(Token): 

3873 """ 

3874 A meta-element for inserting a named result into the parsed 

3875 tokens that may be checked later in a parse action or while 

3876 processing the parsed results. Accepts an optional tag value, 

3877 defaulting to `True`. 

3878 

3879 Example:: 

3880 

3881 end_punc = "." | ("!" + Tag("enthusiastic")) 

3882 greeting = "Hello," + Word(alphas) + end_punc 

3883 

3884 result = greeting.parse_string("Hello, World.") 

3885 print(result.dump()) 

3886 

3887 result = greeting.parse_string("Hello, World!") 

3888 print(result.dump()) 

3889 

3890 prints:: 

3891 

3892 ['Hello,', 'World', '.'] 

3893 

3894 ['Hello,', 'World', '!'] 

3895 - enthusiastic: True 

3896 

3897 .. versionadded:: 3.1.0 

3898 """ 

3899 

3900 def __init__(self, tag_name: str, value: Any = True) -> None: 

3901 super().__init__() 

3902 self._may_return_empty = True 

3903 self.mayIndexError = False 

3904 self.leave_whitespace() 

3905 self.tag_name = tag_name 

3906 self.tag_value = value 

3907 self.add_parse_action(self._add_tag) 

3908 self.show_in_diagram = False 

3909 

3910 def _add_tag(self, tokens: ParseResults): 

3911 tokens[self.tag_name] = self.tag_value 

3912 

3913 def _generateDefaultName(self) -> str: 

3914 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}" 

3915 

3916 

3917class ParseExpression(ParserElement): 

3918 """Abstract subclass of ParserElement, for combining and 

3919 post-processing parsed tokens. 

3920 """ 

3921 

3922 def __init__( 

3923 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

3924 ) -> None: 

3925 super().__init__(savelist) 

3926 self.exprs: list[ParserElement] 

3927 if isinstance(exprs, _generatorType): 

3928 exprs = list(exprs) 

3929 

3930 if isinstance(exprs, str_type): 

3931 self.exprs = [self._literalStringClass(exprs)] 

3932 elif isinstance(exprs, ParserElement): 

3933 self.exprs = [exprs] 

3934 elif isinstance(exprs, Iterable): 

3935 exprs = list(exprs) 

3936 # if sequence of strings provided, wrap with Literal 

3937 if any(isinstance(expr, str_type) for expr in exprs): 

3938 exprs = ( 

3939 self._literalStringClass(e) if isinstance(e, str_type) else e 

3940 for e in exprs 

3941 ) 

3942 self.exprs = list(exprs) 

3943 else: 

3944 try: 

3945 self.exprs = list(exprs) 

3946 except TypeError: 

3947 self.exprs = [exprs] 

3948 self.callPreparse = False 

3949 

3950 def recurse(self) -> list[ParserElement]: 

3951 return self.exprs[:] 

3952 

3953 def append(self, other) -> ParserElement: 

3954 self.exprs.append(other) 

3955 self._defaultName = None 

3956 return self 

3957 

3958 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

3959 """ 

3960 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3961 all contained expressions. 

3962 """ 

3963 super().leave_whitespace(recursive) 

3964 

3965 if recursive: 

3966 self.exprs = [e.copy() for e in self.exprs] 

3967 for e in self.exprs: 

3968 e.leave_whitespace(recursive) 

3969 return self 

3970 

3971 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

3972 """ 

3973 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3974 all contained expressions. 

3975 """ 

3976 super().ignore_whitespace(recursive) 

3977 if recursive: 

3978 self.exprs = [e.copy() for e in self.exprs] 

3979 for e in self.exprs: 

3980 e.ignore_whitespace(recursive) 

3981 return self 

3982 

3983 def ignore(self, other) -> ParserElement: 

3984 if isinstance(other, Suppress): 

3985 if other not in self.ignoreExprs: 

3986 super().ignore(other) 

3987 for e in self.exprs: 

3988 e.ignore(self.ignoreExprs[-1]) 

3989 else: 

3990 super().ignore(other) 

3991 for e in self.exprs: 

3992 e.ignore(self.ignoreExprs[-1]) 

3993 return self 

3994 

3995 def _generateDefaultName(self) -> str: 

3996 return f"{type(self).__name__}:({self.exprs})" 

3997 

3998 def streamline(self) -> ParserElement: 

3999 if self.streamlined: 

4000 return self 

4001 

4002 super().streamline() 

4003 

4004 for e in self.exprs: 

4005 e.streamline() 

4006 

4007 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

4008 # but only if there are no parse actions or resultsNames on the nested And's 

4009 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

4010 if len(self.exprs) == 2: 

4011 other = self.exprs[0] 

4012 if ( 

4013 isinstance(other, self.__class__) 

4014 and not other.parseAction 

4015 and other.resultsName is None 

4016 and not other.debug 

4017 ): 

4018 self.exprs = other.exprs[:] + [self.exprs[1]] 

4019 self._defaultName = None 

4020 self._may_return_empty |= other.mayReturnEmpty 

4021 self.mayIndexError |= other.mayIndexError 

4022 

4023 other = self.exprs[-1] 

4024 if ( 

4025 isinstance(other, self.__class__) 

4026 and not other.parseAction 

4027 and other.resultsName is None 

4028 and not other.debug 

4029 ): 

4030 self.exprs = self.exprs[:-1] + other.exprs[:] 

4031 self._defaultName = None 

4032 self._may_return_empty |= other.mayReturnEmpty 

4033 self.mayIndexError |= other.mayIndexError 

4034 

4035 self.errmsg = f"Expected {self}" 

4036 

4037 return self 

4038 

4039 def validate(self, validateTrace=None) -> None: 

4040 warnings.warn( 

4041 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4042 DeprecationWarning, 

4043 stacklevel=2, 

4044 ) 

4045 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

4046 for e in self.exprs: 

4047 e.validate(tmp) 

4048 self._checkRecursion([]) 

4049 

4050 def copy(self) -> ParserElement: 

4051 ret = super().copy() 

4052 ret = typing.cast(ParseExpression, ret) 

4053 ret.exprs = [e.copy() for e in self.exprs] 

4054 return ret 

4055 

4056 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4057 if not ( 

4058 __diag__.warn_ungrouped_named_tokens_in_collection 

4059 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4060 not in self.suppress_warnings_ 

4061 ): 

4062 return super()._setResultsName(name, list_all_matches) 

4063 

4064 for e in self.exprs: 

4065 if ( 

4066 isinstance(e, ParserElement) 

4067 and e.resultsName 

4068 and ( 

4069 Diagnostics.warn_ungrouped_named_tokens_in_collection 

4070 not in e.suppress_warnings_ 

4071 ) 

4072 ): 

4073 warning = ( 

4074 "warn_ungrouped_named_tokens_in_collection:" 

4075 f" setting results name {name!r} on {type(self).__name__} expression" 

4076 f" collides with {e.resultsName!r} on contained expression" 

4077 ) 

4078 warnings.warn(warning, stacklevel=3) 

4079 break 

4080 

4081 return super()._setResultsName(name, list_all_matches) 

4082 

4083 # Compatibility synonyms 

4084 # fmt: off 

4085 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4086 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4087 # fmt: on 

4088 

4089 

4090class And(ParseExpression): 

4091 """ 

4092 Requires all given :class:`ParserElement` s to be found in the given order. 

4093 Expressions may be separated by whitespace. 

4094 May be constructed using the ``'+'`` operator. 

4095 May also be constructed using the ``'-'`` operator, which will 

4096 suppress backtracking. 

4097 

4098 Example:: 

4099 

4100 integer = Word(nums) 

4101 name_expr = Word(alphas)[1, ...] 

4102 

4103 expr = And([integer("id"), name_expr("name"), integer("age")]) 

4104 # more easily written as: 

4105 expr = integer("id") + name_expr("name") + integer("age") 

4106 """ 

4107 

4108 class _ErrorStop(Empty): 

4109 def __init__(self, *args, **kwargs) -> None: 

4110 super().__init__(*args, **kwargs) 

4111 self.leave_whitespace() 

4112 

4113 def _generateDefaultName(self) -> str: 

4114 return "-" 

4115 

4116 def __init__( 

4117 self, 

4118 exprs_arg: typing.Iterable[Union[ParserElement, str]], 

4119 savelist: bool = True, 

4120 ) -> None: 

4121 # instantiate exprs as a list, converting strs to ParserElements 

4122 exprs: list[ParserElement] = [ 

4123 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg 

4124 ] 

4125 

4126 # convert any Ellipsis elements to SkipTo 

4127 if Ellipsis in exprs: 

4128 

4129 # Ellipsis cannot be the last element 

4130 if exprs[-1] is Ellipsis: 

4131 raise Exception("cannot construct And with sequence ending in ...") 

4132 

4133 tmp: list[ParserElement] = [] 

4134 for cur_expr, next_expr in zip(exprs, exprs[1:]): 

4135 if cur_expr is Ellipsis: 

4136 tmp.append(SkipTo(next_expr)("_skipped*")) 

4137 else: 

4138 tmp.append(cur_expr) 

4139 

4140 exprs[:-1] = tmp 

4141 

4142 super().__init__(exprs, savelist) 

4143 if self.exprs: 

4144 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4145 if not isinstance(self.exprs[0], White): 

4146 self.set_whitespace_chars( 

4147 self.exprs[0].whiteChars, 

4148 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

4149 ) 

4150 self.skipWhitespace = self.exprs[0].skipWhitespace 

4151 else: 

4152 self.skipWhitespace = False 

4153 else: 

4154 self._may_return_empty = True 

4155 self.callPreparse = True 

4156 

4157 def streamline(self) -> ParserElement: 

4158 # collapse any _PendingSkip's 

4159 if self.exprs and any( 

4160 isinstance(e, ParseExpression) 

4161 and e.exprs 

4162 and isinstance(e.exprs[-1], _PendingSkip) 

4163 for e in self.exprs[:-1] 

4164 ): 

4165 deleted_expr_marker = NoMatch() 

4166 for i, e in enumerate(self.exprs[:-1]): 

4167 if e is deleted_expr_marker: 

4168 continue 

4169 if ( 

4170 isinstance(e, ParseExpression) 

4171 and e.exprs 

4172 and isinstance(e.exprs[-1], _PendingSkip) 

4173 ): 

4174 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4175 self.exprs[i + 1] = deleted_expr_marker 

4176 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

4177 

4178 super().streamline() 

4179 

4180 # link any IndentedBlocks to the prior expression 

4181 prev: ParserElement 

4182 cur: ParserElement 

4183 for prev, cur in zip(self.exprs, self.exprs[1:]): 

4184 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

4185 # (but watch out for recursive grammar) 

4186 seen = set() 

4187 while True: 

4188 if id(cur) in seen: 

4189 break 

4190 seen.add(id(cur)) 

4191 if isinstance(cur, IndentedBlock): 

4192 prev.add_parse_action( 

4193 lambda s, l, t, cur_=cur: setattr( 

4194 cur_, "parent_anchor", col(l, s) 

4195 ) 

4196 ) 

4197 break 

4198 subs = cur.recurse() 

4199 next_first = next(iter(subs), None) 

4200 if next_first is None: 

4201 break 

4202 cur = typing.cast(ParserElement, next_first) 

4203 

4204 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4205 return self 

4206 

4207 def parseImpl(self, instring, loc, do_actions=True): 

4208 # pass False as callPreParse arg to _parse for first element, since we already 

4209 # pre-parsed the string as part of our And pre-parsing 

4210 loc, resultlist = self.exprs[0]._parse( 

4211 instring, loc, do_actions, callPreParse=False 

4212 ) 

4213 errorStop = False 

4214 for e in self.exprs[1:]: 

4215 # if isinstance(e, And._ErrorStop): 

4216 if type(e) is And._ErrorStop: 

4217 errorStop = True 

4218 continue 

4219 if errorStop: 

4220 try: 

4221 loc, exprtokens = e._parse(instring, loc, do_actions) 

4222 except ParseSyntaxException: 

4223 raise 

4224 except ParseBaseException as pe: 

4225 pe.__traceback__ = None 

4226 raise ParseSyntaxException._from_exception(pe) 

4227 except IndexError: 

4228 raise ParseSyntaxException( 

4229 instring, len(instring), self.errmsg, self 

4230 ) 

4231 else: 

4232 loc, exprtokens = e._parse(instring, loc, do_actions) 

4233 resultlist += exprtokens 

4234 return loc, resultlist 

4235 

4236 def __iadd__(self, other): 

4237 if isinstance(other, str_type): 

4238 other = self._literalStringClass(other) 

4239 if not isinstance(other, ParserElement): 

4240 return NotImplemented 

4241 return self.append(other) # And([self, other]) 

4242 

4243 def _checkRecursion(self, parseElementList): 

4244 subRecCheckList = parseElementList[:] + [self] 

4245 for e in self.exprs: 

4246 e._checkRecursion(subRecCheckList) 

4247 if not e.mayReturnEmpty: 

4248 break 

4249 

4250 def _generateDefaultName(self) -> str: 

4251 inner = " ".join(str(e) for e in self.exprs) 

4252 # strip off redundant inner {}'s 

4253 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4254 inner = inner[1:-1] 

4255 return f"{{{inner}}}" 

4256 

4257 

4258class Or(ParseExpression): 

4259 """Requires that at least one :class:`ParserElement` is found. If 

4260 two expressions match, the expression that matches the longest 

4261 string will be used. May be constructed using the ``'^'`` 

4262 operator. 

4263 

4264 Example:: 

4265 

4266 # construct Or using '^' operator 

4267 

4268 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4269 print(number.search_string("123 3.1416 789")) 

4270 

4271 prints:: 

4272 

4273 [['123'], ['3.1416'], ['789']] 

4274 """ 

4275 

4276 def __init__( 

4277 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4278 ) -> None: 

4279 super().__init__(exprs, savelist) 

4280 if self.exprs: 

4281 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4282 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4283 else: 

4284 self._may_return_empty = True 

4285 

4286 def streamline(self) -> ParserElement: 

4287 super().streamline() 

4288 if self.exprs: 

4289 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4290 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4291 self.skipWhitespace = all( 

4292 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4293 ) 

4294 else: 

4295 self.saveAsList = False 

4296 return self 

4297 

4298 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4299 maxExcLoc = -1 

4300 maxException = None 

4301 matches: list[tuple[int, ParserElement]] = [] 

4302 fatals: list[ParseFatalException] = [] 

4303 if all(e.callPreparse for e in self.exprs): 

4304 loc = self.preParse(instring, loc) 

4305 for e in self.exprs: 

4306 try: 

4307 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4308 except ParseFatalException as pfe: 

4309 pfe.__traceback__ = None 

4310 pfe.parser_element = e 

4311 fatals.append(pfe) 

4312 maxException = None 

4313 maxExcLoc = -1 

4314 except ParseException as err: 

4315 if not fatals: 

4316 err.__traceback__ = None 

4317 if err.loc > maxExcLoc: 

4318 maxException = err 

4319 maxExcLoc = err.loc 

4320 except IndexError: 

4321 if len(instring) > maxExcLoc: 

4322 maxException = ParseException( 

4323 instring, len(instring), e.errmsg, self 

4324 ) 

4325 maxExcLoc = len(instring) 

4326 else: 

4327 # save match among all matches, to retry longest to shortest 

4328 matches.append((loc2, e)) 

4329 

4330 if matches: 

4331 # re-evaluate all matches in descending order of length of match, in case attached actions 

4332 # might change whether or how much they match of the input. 

4333 matches.sort(key=itemgetter(0), reverse=True) 

4334 

4335 if not do_actions: 

4336 # no further conditions or parse actions to change the selection of 

4337 # alternative, so the first match will be the best match 

4338 best_expr = matches[0][1] 

4339 return best_expr._parse(instring, loc, do_actions) 

4340 

4341 longest: tuple[int, typing.Optional[ParseResults]] = -1, None 

4342 for loc1, expr1 in matches: 

4343 if loc1 <= longest[0]: 

4344 # already have a longer match than this one will deliver, we are done 

4345 return longest 

4346 

4347 try: 

4348 loc2, toks = expr1._parse(instring, loc, do_actions) 

4349 except ParseException as err: 

4350 err.__traceback__ = None 

4351 if err.loc > maxExcLoc: 

4352 maxException = err 

4353 maxExcLoc = err.loc 

4354 else: 

4355 if loc2 >= loc1: 

4356 return loc2, toks 

4357 # didn't match as much as before 

4358 elif loc2 > longest[0]: 

4359 longest = loc2, toks 

4360 

4361 if longest != (-1, None): 

4362 return longest 

4363 

4364 if fatals: 

4365 if len(fatals) > 1: 

4366 fatals.sort(key=lambda e: -e.loc) 

4367 if fatals[0].loc == fatals[1].loc: 

4368 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4369 max_fatal = fatals[0] 

4370 raise max_fatal 

4371 

4372 if maxException is not None: 

4373 # infer from this check that all alternatives failed at the current position 

4374 # so emit this collective error message instead of any single error message 

4375 parse_start_loc = self.preParse(instring, loc) 

4376 if maxExcLoc == parse_start_loc: 

4377 maxException.msg = self.errmsg or "" 

4378 raise maxException 

4379 

4380 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4381 

4382 def __ixor__(self, other): 

4383 if isinstance(other, str_type): 

4384 other = self._literalStringClass(other) 

4385 if not isinstance(other, ParserElement): 

4386 return NotImplemented 

4387 return self.append(other) # Or([self, other]) 

4388 

4389 def _generateDefaultName(self) -> str: 

4390 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" 

4391 

4392 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4393 if ( 

4394 __diag__.warn_multiple_tokens_in_named_alternation 

4395 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4396 not in self.suppress_warnings_ 

4397 ): 

4398 if any( 

4399 isinstance(e, And) 

4400 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4401 not in e.suppress_warnings_ 

4402 for e in self.exprs 

4403 ): 

4404 warning = ( 

4405 "warn_multiple_tokens_in_named_alternation:" 

4406 f" setting results name {name!r} on {type(self).__name__} expression" 

4407 " will return a list of all parsed tokens in an And alternative," 

4408 " in prior versions only the first token was returned; enclose" 

4409 " contained argument in Group" 

4410 ) 

4411 warnings.warn(warning, stacklevel=3) 

4412 

4413 return super()._setResultsName(name, list_all_matches) 

4414 

4415 

4416class MatchFirst(ParseExpression): 

4417 """Requires that at least one :class:`ParserElement` is found. If 

4418 more than one expression matches, the first one listed is the one that will 

4419 match. May be constructed using the ``'|'`` operator. 

4420 

4421 Example:: 

4422 

4423 # construct MatchFirst using '|' operator 

4424 

4425 # watch the order of expressions to match 

4426 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4427 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

4428 

4429 # put more selective expression first 

4430 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4431 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

4432 """ 

4433 

4434 def __init__( 

4435 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4436 ) -> None: 

4437 super().__init__(exprs, savelist) 

4438 if self.exprs: 

4439 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4440 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4441 else: 

4442 self._may_return_empty = True 

4443 

4444 def streamline(self) -> ParserElement: 

4445 if self.streamlined: 

4446 return self 

4447 

4448 super().streamline() 

4449 if self.exprs: 

4450 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4451 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4452 self.skipWhitespace = all( 

4453 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4454 ) 

4455 else: 

4456 self.saveAsList = False 

4457 self._may_return_empty = True 

4458 return self 

4459 

4460 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4461 maxExcLoc = -1 

4462 maxException = None 

4463 

4464 for e in self.exprs: 

4465 try: 

4466 return e._parse(instring, loc, do_actions) 

4467 except ParseFatalException as pfe: 

4468 pfe.__traceback__ = None 

4469 pfe.parser_element = e 

4470 raise 

4471 except ParseException as err: 

4472 if err.loc > maxExcLoc: 

4473 maxException = err 

4474 maxExcLoc = err.loc 

4475 except IndexError: 

4476 if len(instring) > maxExcLoc: 

4477 maxException = ParseException( 

4478 instring, len(instring), e.errmsg, self 

4479 ) 

4480 maxExcLoc = len(instring) 

4481 

4482 if maxException is not None: 

4483 # infer from this check that all alternatives failed at the current position 

4484 # so emit this collective error message instead of any individual error message 

4485 parse_start_loc = self.preParse(instring, loc) 

4486 if maxExcLoc == parse_start_loc: 

4487 maxException.msg = self.errmsg or "" 

4488 raise maxException 

4489 

4490 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4491 

4492 def __ior__(self, other): 

4493 if isinstance(other, str_type): 

4494 other = self._literalStringClass(other) 

4495 if not isinstance(other, ParserElement): 

4496 return NotImplemented 

4497 return self.append(other) # MatchFirst([self, other]) 

4498 

4499 def _generateDefaultName(self) -> str: 

4500 return f"{{{' | '.join(str(e) for e in self.exprs)}}}" 

4501 

4502 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4503 if ( 

4504 __diag__.warn_multiple_tokens_in_named_alternation 

4505 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4506 not in self.suppress_warnings_ 

4507 ): 

4508 if any( 

4509 isinstance(e, And) 

4510 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4511 not in e.suppress_warnings_ 

4512 for e in self.exprs 

4513 ): 

4514 warning = ( 

4515 "warn_multiple_tokens_in_named_alternation:" 

4516 f" setting results name {name!r} on {type(self).__name__} expression" 

4517 " will return a list of all parsed tokens in an And alternative," 

4518 " in prior versions only the first token was returned; enclose" 

4519 " contained argument in Group" 

4520 ) 

4521 warnings.warn(warning, stacklevel=3) 

4522 

4523 return super()._setResultsName(name, list_all_matches) 

4524 

4525 

4526class Each(ParseExpression): 

4527 """Requires all given :class:`ParserElement` s to be found, but in 

4528 any order. Expressions may be separated by whitespace. 

4529 

4530 May be constructed using the ``'&'`` operator. 

4531 

4532 Example:: 

4533 

4534 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4535 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4536 integer = Word(nums) 

4537 shape_attr = "shape:" + shape_type("shape") 

4538 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4539 color_attr = "color:" + color("color") 

4540 size_attr = "size:" + integer("size") 

4541 

4542 # use Each (using operator '&') to accept attributes in any order 

4543 # (shape and posn are required, color and size are optional) 

4544 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4545 

4546 shape_spec.run_tests(''' 

4547 shape: SQUARE color: BLACK posn: 100, 120 

4548 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4549 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4550 ''' 

4551 ) 

4552 

4553 prints:: 

4554 

4555 shape: SQUARE color: BLACK posn: 100, 120 

4556 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4557 - color: BLACK 

4558 - posn: ['100', ',', '120'] 

4559 - x: 100 

4560 - y: 120 

4561 - shape: SQUARE 

4562 

4563 

4564 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4565 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

4566 - color: BLUE 

4567 - posn: ['50', ',', '80'] 

4568 - x: 50 

4569 - y: 80 

4570 - shape: CIRCLE 

4571 - size: 50 

4572 

4573 

4574 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

4575 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

4576 - color: GREEN 

4577 - posn: ['20', ',', '40'] 

4578 - x: 20 

4579 - y: 40 

4580 - shape: TRIANGLE 

4581 - size: 20 

4582 """ 

4583 

4584 def __init__( 

4585 self, exprs: typing.Iterable[ParserElement], savelist: bool = True 

4586 ) -> None: 

4587 super().__init__(exprs, savelist) 

4588 if self.exprs: 

4589 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4590 else: 

4591 self._may_return_empty = True 

4592 self.skipWhitespace = True 

4593 self.initExprGroups = True 

4594 self.saveAsList = True 

4595 

4596 def __iand__(self, other): 

4597 if isinstance(other, str_type): 

4598 other = self._literalStringClass(other) 

4599 if not isinstance(other, ParserElement): 

4600 return NotImplemented 

4601 return self.append(other) # Each([self, other]) 

4602 

4603 def streamline(self) -> ParserElement: 

4604 super().streamline() 

4605 if self.exprs: 

4606 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4607 else: 

4608 self._may_return_empty = True 

4609 return self 

4610 

4611 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4612 if self.initExprGroups: 

4613 self.opt1map = dict( 

4614 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

4615 ) 

4616 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

4617 opt2 = [ 

4618 e 

4619 for e in self.exprs 

4620 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

4621 ] 

4622 self.optionals = opt1 + opt2 

4623 self.multioptionals = [ 

4624 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4625 for e in self.exprs 

4626 if isinstance(e, _MultipleMatch) 

4627 ] 

4628 self.multirequired = [ 

4629 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4630 for e in self.exprs 

4631 if isinstance(e, OneOrMore) 

4632 ] 

4633 self.required = [ 

4634 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

4635 ] 

4636 self.required += self.multirequired 

4637 self.initExprGroups = False 

4638 

4639 tmpLoc = loc 

4640 tmpReqd = self.required[:] 

4641 tmpOpt = self.optionals[:] 

4642 multis = self.multioptionals[:] 

4643 matchOrder: list[ParserElement] = [] 

4644 

4645 keepMatching = True 

4646 failed: list[ParserElement] = [] 

4647 fatals: list[ParseFatalException] = [] 

4648 while keepMatching: 

4649 tmpExprs = tmpReqd + tmpOpt + multis 

4650 failed.clear() 

4651 fatals.clear() 

4652 for e in tmpExprs: 

4653 try: 

4654 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

4655 except ParseFatalException as pfe: 

4656 pfe.__traceback__ = None 

4657 pfe.parser_element = e 

4658 fatals.append(pfe) 

4659 failed.append(e) 

4660 except ParseException: 

4661 failed.append(e) 

4662 else: 

4663 matchOrder.append(self.opt1map.get(id(e), e)) 

4664 if e in tmpReqd: 

4665 tmpReqd.remove(e) 

4666 elif e in tmpOpt: 

4667 tmpOpt.remove(e) 

4668 if len(failed) == len(tmpExprs): 

4669 keepMatching = False 

4670 

4671 # look for any ParseFatalExceptions 

4672 if fatals: 

4673 if len(fatals) > 1: 

4674 fatals.sort(key=lambda e: -e.loc) 

4675 if fatals[0].loc == fatals[1].loc: 

4676 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4677 max_fatal = fatals[0] 

4678 raise max_fatal 

4679 

4680 if tmpReqd: 

4681 missing = ", ".join([str(e) for e in tmpReqd]) 

4682 raise ParseException( 

4683 instring, 

4684 loc, 

4685 f"Missing one or more required elements ({missing})", 

4686 ) 

4687 

4688 # add any unmatched Opts, in case they have default values defined 

4689 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

4690 

4691 total_results = ParseResults([]) 

4692 for e in matchOrder: 

4693 loc, results = e._parse(instring, loc, do_actions) 

4694 total_results += results 

4695 

4696 return loc, total_results 

4697 

4698 def _generateDefaultName(self) -> str: 

4699 return f"{{{' & '.join(str(e) for e in self.exprs)}}}" 

4700 

4701 

4702class ParseElementEnhance(ParserElement): 

4703 """Abstract subclass of :class:`ParserElement`, for combining and 

4704 post-processing parsed tokens. 

4705 """ 

4706 

4707 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

4708 super().__init__(savelist) 

4709 if isinstance(expr, str_type): 

4710 expr_str = typing.cast(str, expr) 

4711 if issubclass(self._literalStringClass, Token): 

4712 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

4713 elif issubclass(type(self), self._literalStringClass): 

4714 expr = Literal(expr_str) 

4715 else: 

4716 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

4717 expr = typing.cast(ParserElement, expr) 

4718 self.expr = expr 

4719 if expr is not None: 

4720 self.mayIndexError = expr.mayIndexError 

4721 self._may_return_empty = expr.mayReturnEmpty 

4722 self.set_whitespace_chars( 

4723 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

4724 ) 

4725 self.skipWhitespace = expr.skipWhitespace 

4726 self.saveAsList = expr.saveAsList 

4727 self.callPreparse = expr.callPreparse 

4728 self.ignoreExprs.extend(expr.ignoreExprs) 

4729 

4730 def recurse(self) -> list[ParserElement]: 

4731 return [self.expr] if self.expr is not None else [] 

4732 

4733 def parseImpl(self, instring, loc, do_actions=True): 

4734 if self.expr is None: 

4735 raise ParseException(instring, loc, "No expression defined", self) 

4736 

4737 try: 

4738 return self.expr._parse(instring, loc, do_actions, callPreParse=False) 

4739 except ParseSyntaxException: 

4740 raise 

4741 except ParseBaseException as pbe: 

4742 pbe.pstr = pbe.pstr or instring 

4743 pbe.loc = pbe.loc or loc 

4744 pbe.parser_element = pbe.parser_element or self 

4745 if not isinstance(self, Forward) and self.customName is not None: 

4746 if self.errmsg: 

4747 pbe.msg = self.errmsg 

4748 raise 

4749 

4750 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4751 super().leave_whitespace(recursive) 

4752 

4753 if recursive: 

4754 if self.expr is not None: 

4755 self.expr = self.expr.copy() 

4756 self.expr.leave_whitespace(recursive) 

4757 return self 

4758 

4759 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4760 super().ignore_whitespace(recursive) 

4761 

4762 if recursive: 

4763 if self.expr is not None: 

4764 self.expr = self.expr.copy() 

4765 self.expr.ignore_whitespace(recursive) 

4766 return self 

4767 

4768 def ignore(self, other) -> ParserElement: 

4769 if not isinstance(other, Suppress) or other not in self.ignoreExprs: 

4770 super().ignore(other) 

4771 if self.expr is not None: 

4772 self.expr.ignore(self.ignoreExprs[-1]) 

4773 

4774 return self 

4775 

4776 def streamline(self) -> ParserElement: 

4777 super().streamline() 

4778 if self.expr is not None: 

4779 self.expr.streamline() 

4780 return self 

4781 

4782 def _checkRecursion(self, parseElementList): 

4783 if self in parseElementList: 

4784 raise RecursiveGrammarException(parseElementList + [self]) 

4785 subRecCheckList = parseElementList[:] + [self] 

4786 if self.expr is not None: 

4787 self.expr._checkRecursion(subRecCheckList) 

4788 

4789 def validate(self, validateTrace=None) -> None: 

4790 warnings.warn( 

4791 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4792 DeprecationWarning, 

4793 stacklevel=2, 

4794 ) 

4795 if validateTrace is None: 

4796 validateTrace = [] 

4797 tmp = validateTrace[:] + [self] 

4798 if self.expr is not None: 

4799 self.expr.validate(tmp) 

4800 self._checkRecursion([]) 

4801 

4802 def _generateDefaultName(self) -> str: 

4803 return f"{type(self).__name__}:({self.expr})" 

4804 

4805 # Compatibility synonyms 

4806 # fmt: off 

4807 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4808 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4809 # fmt: on 

4810 

4811 

4812class IndentedBlock(ParseElementEnhance): 

4813 """ 

4814 Expression to match one or more expressions at a given indentation level. 

4815 Useful for parsing text where structure is implied by indentation (like Python source code). 

4816 """ 

4817 

4818 class _Indent(Empty): 

4819 def __init__(self, ref_col: int) -> None: 

4820 super().__init__() 

4821 self.errmsg = f"expected indent at column {ref_col}" 

4822 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

4823 

4824 class _IndentGreater(Empty): 

4825 def __init__(self, ref_col: int) -> None: 

4826 super().__init__() 

4827 self.errmsg = f"expected indent at column greater than {ref_col}" 

4828 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

4829 

4830 def __init__( 

4831 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

4832 ) -> None: 

4833 super().__init__(expr, savelist=True) 

4834 # if recursive: 

4835 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

4836 self._recursive = recursive 

4837 self._grouped = grouped 

4838 self.parent_anchor = 1 

4839 

4840 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4841 # advance parse position to non-whitespace by using an Empty() 

4842 # this should be the column to be used for all subsequent indented lines 

4843 anchor_loc = Empty().preParse(instring, loc) 

4844 

4845 # see if self.expr matches at the current location - if not it will raise an exception 

4846 # and no further work is necessary 

4847 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions) 

4848 

4849 indent_col = col(anchor_loc, instring) 

4850 peer_detect_expr = self._Indent(indent_col) 

4851 

4852 inner_expr = Empty() + peer_detect_expr + self.expr 

4853 if self._recursive: 

4854 sub_indent = self._IndentGreater(indent_col) 

4855 nested_block = IndentedBlock( 

4856 self.expr, recursive=self._recursive, grouped=self._grouped 

4857 ) 

4858 nested_block.set_debug(self.debug) 

4859 nested_block.parent_anchor = indent_col 

4860 inner_expr += Opt(sub_indent + nested_block) 

4861 

4862 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

4863 block = OneOrMore(inner_expr) 

4864 

4865 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

4866 

4867 if self._grouped: 

4868 wrapper = Group 

4869 else: 

4870 wrapper = lambda expr: expr # type: ignore[misc, assignment] 

4871 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

4872 instring, anchor_loc, do_actions 

4873 ) 

4874 

4875 

4876class AtStringStart(ParseElementEnhance): 

4877 """Matches if expression matches at the beginning of the parse 

4878 string:: 

4879 

4880 AtStringStart(Word(nums)).parse_string("123") 

4881 # prints ["123"] 

4882 

4883 AtStringStart(Word(nums)).parse_string(" 123") 

4884 # raises ParseException 

4885 """ 

4886 

4887 def __init__(self, expr: Union[ParserElement, str]) -> None: 

4888 super().__init__(expr) 

4889 self.callPreparse = False 

4890 

4891 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4892 if loc != 0: 

4893 raise ParseException(instring, loc, "not found at string start") 

4894 return super().parseImpl(instring, loc, do_actions) 

4895 

4896 

4897class AtLineStart(ParseElementEnhance): 

4898 r"""Matches if an expression matches at the beginning of a line within 

4899 the parse string 

4900 

4901 Example:: 

4902 

4903 test = '''\ 

4904 AAA this line 

4905 AAA and this line 

4906 AAA but not this one 

4907 B AAA and definitely not this one 

4908 ''' 

4909 

4910 for t in (AtLineStart('AAA') + rest_of_line).search_string(test): 

4911 print(t) 

4912 

4913 prints:: 

4914 

4915 ['AAA', ' this line'] 

4916 ['AAA', ' and this line'] 

4917 

4918 """ 

4919 

4920 def __init__(self, expr: Union[ParserElement, str]) -> None: 

4921 super().__init__(expr) 

4922 self.callPreparse = False 

4923 

4924 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4925 if col(loc, instring) != 1: 

4926 raise ParseException(instring, loc, "not found at line start") 

4927 return super().parseImpl(instring, loc, do_actions) 

4928 

4929 

4930class FollowedBy(ParseElementEnhance): 

4931 """Lookahead matching of the given parse expression. 

4932 ``FollowedBy`` does *not* advance the parsing position within 

4933 the input string, it only verifies that the specified parse 

4934 expression matches at the current position. ``FollowedBy`` 

4935 always returns a null token list. If any results names are defined 

4936 in the lookahead expression, those *will* be returned for access by 

4937 name. 

4938 

4939 Example:: 

4940 

4941 # use FollowedBy to match a label only if it is followed by a ':' 

4942 data_word = Word(alphas) 

4943 label = data_word + FollowedBy(':') 

4944 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

4945 

4946 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() 

4947 

4948 prints:: 

4949 

4950 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

4951 """ 

4952 

4953 def __init__(self, expr: Union[ParserElement, str]) -> None: 

4954 super().__init__(expr) 

4955 self._may_return_empty = True 

4956 

4957 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4958 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

4959 # we keep any named results that were defined in the FollowedBy expression 

4960 _, ret = self.expr._parse(instring, loc, do_actions=do_actions) 

4961 del ret[:] 

4962 

4963 return loc, ret 

4964 

4965 

4966class PrecededBy(ParseElementEnhance): 

4967 """Lookbehind matching of the given parse expression. 

4968 ``PrecededBy`` does not advance the parsing position within the 

4969 input string, it only verifies that the specified parse expression 

4970 matches prior to the current position. ``PrecededBy`` always 

4971 returns a null token list, but if a results name is defined on the 

4972 given expression, it is returned. 

4973 

4974 Parameters: 

4975 

4976 - ``expr`` - expression that must match prior to the current parse 

4977 location 

4978 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

4979 to lookbehind prior to the current parse location 

4980 

4981 If the lookbehind expression is a string, :class:`Literal`, 

4982 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

4983 with a specified exact or maximum length, then the retreat 

4984 parameter is not required. Otherwise, retreat must be specified to 

4985 give a maximum number of characters to look back from 

4986 the current parse position for a lookbehind match. 

4987 

4988 Example:: 

4989 

4990 # VB-style variable names with type prefixes 

4991 int_var = PrecededBy("#") + pyparsing_common.identifier 

4992 str_var = PrecededBy("$") + pyparsing_common.identifier 

4993 

4994 """ 

4995 

4996 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None: 

4997 super().__init__(expr) 

4998 self.expr = self.expr().leave_whitespace() 

4999 self._may_return_empty = True 

5000 self.mayIndexError = False 

5001 self.exact = False 

5002 if isinstance(expr, str_type): 

5003 expr = typing.cast(str, expr) 

5004 retreat = len(expr) 

5005 self.exact = True 

5006 elif isinstance(expr, (Literal, Keyword)): 

5007 retreat = expr.matchLen 

5008 self.exact = True 

5009 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

5010 retreat = expr.maxLen 

5011 self.exact = True 

5012 elif isinstance(expr, PositionToken): 

5013 retreat = 0 

5014 self.exact = True 

5015 self.retreat = retreat 

5016 self.errmsg = f"not preceded by {expr}" 

5017 self.skipWhitespace = False 

5018 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

5019 

5020 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType: 

5021 if self.exact: 

5022 if loc < self.retreat: 

5023 raise ParseException(instring, loc, self.errmsg, self) 

5024 start = loc - self.retreat 

5025 _, ret = self.expr._parse(instring, start) 

5026 return loc, ret 

5027 

5028 # retreat specified a maximum lookbehind window, iterate 

5029 test_expr = self.expr + StringEnd() 

5030 instring_slice = instring[max(0, loc - self.retreat) : loc] 

5031 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self) 

5032 

5033 for offset in range(1, min(loc, self.retreat + 1) + 1): 

5034 try: 

5035 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

5036 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

5037 except ParseBaseException as pbe: 

5038 last_expr = pbe 

5039 else: 

5040 break 

5041 else: 

5042 raise last_expr 

5043 

5044 return loc, ret 

5045 

5046 

5047class Located(ParseElementEnhance): 

5048 """ 

5049 Decorates a returned token with its starting and ending 

5050 locations in the input string. 

5051 

5052 This helper adds the following results names: 

5053 

5054 - ``locn_start`` - location where matched expression begins 

5055 - ``locn_end`` - location where matched expression ends 

5056 - ``value`` - the actual parsed results 

5057 

5058 Be careful if the input text contains ``<TAB>`` characters, you 

5059 may want to call :class:`ParserElement.parse_with_tabs` 

5060 

5061 Example:: 

5062 

5063 wd = Word(alphas) 

5064 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

5065 print(match) 

5066 

5067 prints:: 

5068 

5069 [0, ['ljsdf'], 5] 

5070 [8, ['lksdjjf'], 15] 

5071 [18, ['lkkjj'], 23] 

5072 

5073 """ 

5074 

5075 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5076 start = loc 

5077 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False) 

5078 ret_tokens = ParseResults([start, tokens, loc]) 

5079 ret_tokens["locn_start"] = start 

5080 ret_tokens["value"] = tokens 

5081 ret_tokens["locn_end"] = loc 

5082 if self.resultsName: 

5083 # must return as a list, so that the name will be attached to the complete group 

5084 return loc, [ret_tokens] 

5085 else: 

5086 return loc, ret_tokens 

5087 

5088 

5089class NotAny(ParseElementEnhance): 

5090 """ 

5091 Lookahead to disallow matching with the given parse expression. 

5092 ``NotAny`` does *not* advance the parsing position within the 

5093 input string, it only verifies that the specified parse expression 

5094 does *not* match at the current position. Also, ``NotAny`` does 

5095 *not* skip over leading whitespace. ``NotAny`` always returns 

5096 a null token list. May be constructed using the ``'~'`` operator. 

5097 

5098 Example:: 

5099 

5100 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

5101 

5102 # take care not to mistake keywords for identifiers 

5103 ident = ~(AND | OR | NOT) + Word(alphas) 

5104 boolean_term = Opt(NOT) + ident 

5105 

5106 # very crude boolean expression - to support parenthesis groups and 

5107 # operation hierarchy, use infix_notation 

5108 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

5109 

5110 # integers that are followed by "." are actually floats 

5111 integer = Word(nums) + ~Char(".") 

5112 """ 

5113 

5114 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5115 super().__init__(expr) 

5116 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

5117 # self.leave_whitespace() 

5118 self.skipWhitespace = False 

5119 

5120 self._may_return_empty = True 

5121 self.errmsg = f"Found unwanted token, {self.expr}" 

5122 

5123 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5124 if self.expr.can_parse_next(instring, loc, do_actions=do_actions): 

5125 raise ParseException(instring, loc, self.errmsg, self) 

5126 return loc, [] 

5127 

5128 def _generateDefaultName(self) -> str: 

5129 return f"~{{{self.expr}}}" 

5130 

5131 

5132class _MultipleMatch(ParseElementEnhance): 

5133 def __init__( 

5134 self, 

5135 expr: Union[str, ParserElement], 

5136 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5137 *, 

5138 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5139 ) -> None: 

5140 super().__init__(expr) 

5141 stopOn = stopOn or stop_on 

5142 self.saveAsList = True 

5143 ender = stopOn 

5144 if isinstance(ender, str_type): 

5145 ender = self._literalStringClass(ender) 

5146 self.stopOn(ender) 

5147 

5148 def stopOn(self, ender) -> ParserElement: 

5149 if isinstance(ender, str_type): 

5150 ender = self._literalStringClass(ender) 

5151 self.not_ender = ~ender if ender is not None else None 

5152 return self 

5153 

5154 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5155 self_expr_parse = self.expr._parse 

5156 self_skip_ignorables = self._skipIgnorables 

5157 check_ender = False 

5158 if self.not_ender is not None: 

5159 try_not_ender = self.not_ender.try_parse 

5160 check_ender = True 

5161 

5162 # must be at least one (but first see if we are the stopOn sentinel; 

5163 # if so, fail) 

5164 if check_ender: 

5165 try_not_ender(instring, loc) 

5166 loc, tokens = self_expr_parse(instring, loc, do_actions) 

5167 try: 

5168 hasIgnoreExprs = not not self.ignoreExprs 

5169 while 1: 

5170 if check_ender: 

5171 try_not_ender(instring, loc) 

5172 if hasIgnoreExprs: 

5173 preloc = self_skip_ignorables(instring, loc) 

5174 else: 

5175 preloc = loc 

5176 loc, tmptokens = self_expr_parse(instring, preloc, do_actions) 

5177 tokens += tmptokens 

5178 except (ParseException, IndexError): 

5179 pass 

5180 

5181 return loc, tokens 

5182 

5183 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5184 if ( 

5185 __diag__.warn_ungrouped_named_tokens_in_collection 

5186 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5187 not in self.suppress_warnings_ 

5188 ): 

5189 for e in [self.expr] + self.expr.recurse(): 

5190 if ( 

5191 isinstance(e, ParserElement) 

5192 and e.resultsName 

5193 and ( 

5194 Diagnostics.warn_ungrouped_named_tokens_in_collection 

5195 not in e.suppress_warnings_ 

5196 ) 

5197 ): 

5198 warning = ( 

5199 "warn_ungrouped_named_tokens_in_collection:" 

5200 f" setting results name {name!r} on {type(self).__name__} expression" 

5201 f" collides with {e.resultsName!r} on contained expression" 

5202 ) 

5203 warnings.warn(warning, stacklevel=3) 

5204 break 

5205 

5206 return super()._setResultsName(name, list_all_matches) 

5207 

5208 

5209class OneOrMore(_MultipleMatch): 

5210 """ 

5211 Repetition of one or more of the given expression. 

5212 

5213 Parameters: 

5214 

5215 - ``expr`` - expression that must match one or more times 

5216 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5217 (only required if the sentinel would ordinarily match the repetition 

5218 expression) 

5219 

5220 Example:: 

5221 

5222 data_word = Word(alphas) 

5223 label = data_word + FollowedBy(':') 

5224 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) 

5225 

5226 text = "shape: SQUARE posn: upper left color: BLACK" 

5227 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

5228 

5229 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data 

5230 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5231 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5232 

5233 # could also be written as 

5234 (attr_expr * (1,)).parse_string(text).pprint() 

5235 """ 

5236 

5237 def _generateDefaultName(self) -> str: 

5238 return f"{{{self.expr}}}..." 

5239 

5240 

5241class ZeroOrMore(_MultipleMatch): 

5242 """ 

5243 Optional repetition of zero or more of the given expression. 

5244 

5245 Parameters: 

5246 

5247 - ``expr`` - expression that must match zero or more times 

5248 - ``stop_on`` - expression for a terminating sentinel 

5249 (only required if the sentinel would ordinarily match the repetition 

5250 expression) - (default= ``None``) 

5251 

5252 Example: similar to :class:`OneOrMore` 

5253 """ 

5254 

5255 def __init__( 

5256 self, 

5257 expr: Union[str, ParserElement], 

5258 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5259 *, 

5260 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5261 ) -> None: 

5262 super().__init__(expr, stopOn=stopOn or stop_on) 

5263 self._may_return_empty = True 

5264 

5265 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5266 try: 

5267 return super().parseImpl(instring, loc, do_actions) 

5268 except (ParseException, IndexError): 

5269 return loc, ParseResults([], name=self.resultsName) 

5270 

5271 def _generateDefaultName(self) -> str: 

5272 return f"[{self.expr}]..." 

5273 

5274 

5275class DelimitedList(ParseElementEnhance): 

5276 """Helper to define a delimited list of expressions - the delimiter 

5277 defaults to ','. By default, the list elements and delimiters can 

5278 have intervening whitespace, and comments, but this can be 

5279 overridden by passing ``combine=True`` in the constructor. If 

5280 ``combine`` is set to ``True``, the matching tokens are 

5281 returned as a single token string, with the delimiters included; 

5282 otherwise, the matching tokens are returned as a list of tokens, 

5283 with the delimiters suppressed. 

5284 

5285 If ``allow_trailing_delim`` is set to True, then the list may end with 

5286 a delimiter. 

5287 

5288 Example:: 

5289 

5290 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

5291 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

5292 

5293 .. versionadded:: 3.1.0 

5294 """ 

5295 

5296 def __init__( 

5297 self, 

5298 expr: Union[str, ParserElement], 

5299 delim: Union[str, ParserElement] = ",", 

5300 combine: bool = False, 

5301 min: typing.Optional[int] = None, 

5302 max: typing.Optional[int] = None, 

5303 *, 

5304 allow_trailing_delim: bool = False, 

5305 ) -> None: 

5306 if isinstance(expr, str_type): 

5307 expr = ParserElement._literalStringClass(expr) 

5308 expr = typing.cast(ParserElement, expr) 

5309 

5310 if min is not None and min < 1: 

5311 raise ValueError("min must be greater than 0") 

5312 

5313 if max is not None and min is not None and max < min: 

5314 raise ValueError("max must be greater than, or equal to min") 

5315 

5316 self.content = expr 

5317 self.raw_delim = str(delim) 

5318 self.delim = delim 

5319 self.combine = combine 

5320 if not combine: 

5321 self.delim = Suppress(delim) 

5322 self.min = min or 1 

5323 self.max = max 

5324 self.allow_trailing_delim = allow_trailing_delim 

5325 

5326 delim_list_expr = self.content + (self.delim + self.content) * ( 

5327 self.min - 1, 

5328 None if self.max is None else self.max - 1, 

5329 ) 

5330 if self.allow_trailing_delim: 

5331 delim_list_expr += Opt(self.delim) 

5332 

5333 if self.combine: 

5334 delim_list_expr = Combine(delim_list_expr) 

5335 

5336 super().__init__(delim_list_expr, savelist=True) 

5337 

5338 def _generateDefaultName(self) -> str: 

5339 content_expr = self.content.streamline() 

5340 return f"{content_expr} [{self.raw_delim} {content_expr}]..." 

5341 

5342 

5343class _NullToken: 

5344 def __bool__(self): 

5345 return False 

5346 

5347 def __str__(self): 

5348 return "" 

5349 

5350 

5351class Opt(ParseElementEnhance): 

5352 """ 

5353 Optional matching of the given expression. 

5354 

5355 Parameters: 

5356 

5357 - ``expr`` - expression that must match zero or more times 

5358 - ``default`` (optional) - value to be returned if the optional expression is not found. 

5359 

5360 Example:: 

5361 

5362 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5363 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5364 zip.run_tests(''' 

5365 # traditional ZIP code 

5366 12345 

5367 

5368 # ZIP+4 form 

5369 12101-0001 

5370 

5371 # invalid ZIP 

5372 98765- 

5373 ''') 

5374 

5375 prints:: 

5376 

5377 # traditional ZIP code 

5378 12345 

5379 ['12345'] 

5380 

5381 # ZIP+4 form 

5382 12101-0001 

5383 ['12101-0001'] 

5384 

5385 # invalid ZIP 

5386 98765- 

5387 ^ 

5388 FAIL: Expected end of text (at char 5), (line:1, col:6) 

5389 """ 

5390 

5391 __optionalNotMatched = _NullToken() 

5392 

5393 def __init__( 

5394 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5395 ) -> None: 

5396 super().__init__(expr, savelist=False) 

5397 self.saveAsList = self.expr.saveAsList 

5398 self.defaultValue = default 

5399 self._may_return_empty = True 

5400 

5401 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5402 self_expr = self.expr 

5403 try: 

5404 loc, tokens = self_expr._parse( 

5405 instring, loc, do_actions, callPreParse=False 

5406 ) 

5407 except (ParseException, IndexError): 

5408 default_value = self.defaultValue 

5409 if default_value is not self.__optionalNotMatched: 

5410 if self_expr.resultsName: 

5411 tokens = ParseResults([default_value]) 

5412 tokens[self_expr.resultsName] = default_value 

5413 else: 

5414 tokens = [default_value] # type: ignore[assignment] 

5415 else: 

5416 tokens = [] # type: ignore[assignment] 

5417 return loc, tokens 

5418 

5419 def _generateDefaultName(self) -> str: 

5420 inner = str(self.expr) 

5421 # strip off redundant inner {}'s 

5422 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5423 inner = inner[1:-1] 

5424 return f"[{inner}]" 

5425 

5426 

5427Optional = Opt 

5428 

5429 

5430class SkipTo(ParseElementEnhance): 

5431 """ 

5432 Token for skipping over all undefined text until the matched 

5433 expression is found. 

5434 

5435 Parameters: 

5436 

5437 - ``expr`` - target expression marking the end of the data to be skipped 

5438 - ``include`` - if ``True``, the target expression is also parsed 

5439 (the skipped text and target expression are returned as a 2-element 

5440 list) (default= ``False``). 

5441 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and 

5442 comments) that might contain false matches to the target expression 

5443 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be 

5444 included in the skipped test; if found before the target expression is found, 

5445 the :class:`SkipTo` is not a match 

5446 

5447 Example:: 

5448 

5449 report = ''' 

5450 Outstanding Issues Report - 1 Jan 2000 

5451 

5452 # | Severity | Description | Days Open 

5453 -----+----------+-------------------------------------------+----------- 

5454 101 | Critical | Intermittent system crash | 6 

5455 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5456 79 | Minor | System slow when running too many reports | 47 

5457 ''' 

5458 integer = Word(nums) 

5459 SEP = Suppress('|') 

5460 # use SkipTo to simply match everything up until the next SEP 

5461 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5462 # - parse action will call token.strip() for each matched token, i.e., the description body 

5463 string_data = SkipTo(SEP, ignore=quoted_string) 

5464 string_data.set_parse_action(token_map(str.strip)) 

5465 ticket_expr = (integer("issue_num") + SEP 

5466 + string_data("sev") + SEP 

5467 + string_data("desc") + SEP 

5468 + integer("days_open")) 

5469 

5470 for tkt in ticket_expr.search_string(report): 

5471 print tkt.dump() 

5472 

5473 prints:: 

5474 

5475 ['101', 'Critical', 'Intermittent system crash', '6'] 

5476 - days_open: '6' 

5477 - desc: 'Intermittent system crash' 

5478 - issue_num: '101' 

5479 - sev: 'Critical' 

5480 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

5481 - days_open: '14' 

5482 - desc: "Spelling error on Login ('log|n')" 

5483 - issue_num: '94' 

5484 - sev: 'Cosmetic' 

5485 ['79', 'Minor', 'System slow when running too many reports', '47'] 

5486 - days_open: '47' 

5487 - desc: 'System slow when running too many reports' 

5488 - issue_num: '79' 

5489 - sev: 'Minor' 

5490 """ 

5491 

5492 def __init__( 

5493 self, 

5494 other: Union[ParserElement, str], 

5495 include: bool = False, 

5496 ignore: typing.Optional[Union[ParserElement, str]] = None, 

5497 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

5498 *, 

5499 failOn: typing.Optional[Union[ParserElement, str]] = None, 

5500 ) -> None: 

5501 super().__init__(other) 

5502 failOn = failOn or fail_on 

5503 self.ignoreExpr = ignore 

5504 self._may_return_empty = True 

5505 self.mayIndexError = False 

5506 self.includeMatch = include 

5507 self.saveAsList = False 

5508 if isinstance(failOn, str_type): 

5509 self.failOn = self._literalStringClass(failOn) 

5510 else: 

5511 self.failOn = failOn 

5512 self.errmsg = f"No match found for {self.expr}" 

5513 self.ignorer = Empty().leave_whitespace() 

5514 self._update_ignorer() 

5515 

5516 def _update_ignorer(self): 

5517 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

5518 self.ignorer.ignoreExprs.clear() 

5519 for e in self.expr.ignoreExprs: 

5520 self.ignorer.ignore(e) 

5521 if self.ignoreExpr: 

5522 self.ignorer.ignore(self.ignoreExpr) 

5523 

5524 def ignore(self, expr): 

5525 super().ignore(expr) 

5526 self._update_ignorer() 

5527 

5528 def parseImpl(self, instring, loc, do_actions=True): 

5529 startloc = loc 

5530 instrlen = len(instring) 

5531 self_expr_parse = self.expr._parse 

5532 self_failOn_canParseNext = ( 

5533 self.failOn.canParseNext if self.failOn is not None else None 

5534 ) 

5535 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

5536 

5537 tmploc = loc 

5538 while tmploc <= instrlen: 

5539 if self_failOn_canParseNext is not None: 

5540 # break if failOn expression matches 

5541 if self_failOn_canParseNext(instring, tmploc): 

5542 break 

5543 

5544 if ignorer_try_parse is not None: 

5545 # advance past ignore expressions 

5546 prev_tmploc = tmploc 

5547 while 1: 

5548 try: 

5549 tmploc = ignorer_try_parse(instring, tmploc) 

5550 except ParseBaseException: 

5551 break 

5552 # see if all ignorers matched, but didn't actually ignore anything 

5553 if tmploc == prev_tmploc: 

5554 break 

5555 prev_tmploc = tmploc 

5556 

5557 try: 

5558 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False) 

5559 except (ParseException, IndexError): 

5560 # no match, advance loc in string 

5561 tmploc += 1 

5562 else: 

5563 # matched skipto expr, done 

5564 break 

5565 

5566 else: 

5567 # ran off the end of the input string without matching skipto expr, fail 

5568 raise ParseException(instring, loc, self.errmsg, self) 

5569 

5570 # build up return values 

5571 loc = tmploc 

5572 skiptext = instring[startloc:loc] 

5573 skipresult = ParseResults(skiptext) 

5574 

5575 if self.includeMatch: 

5576 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False) 

5577 skipresult += mat 

5578 

5579 return loc, skipresult 

5580 

5581 

5582class Forward(ParseElementEnhance): 

5583 """ 

5584 Forward declaration of an expression to be defined later - 

5585 used for recursive grammars, such as algebraic infix notation. 

5586 When the expression is known, it is assigned to the ``Forward`` 

5587 variable using the ``'<<'`` operator. 

5588 

5589 Note: take care when assigning to ``Forward`` not to overlook 

5590 precedence of operators. 

5591 

5592 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

5593 

5594 fwd_expr << a | b | c 

5595 

5596 will actually be evaluated as:: 

5597 

5598 (fwd_expr << a) | b | c 

5599 

5600 thereby leaving b and c out as parseable alternatives. It is recommended that you 

5601 explicitly group the values inserted into the ``Forward``:: 

5602 

5603 fwd_expr << (a | b | c) 

5604 

5605 Converting to use the ``'<<='`` operator instead will avoid this problem. 

5606 

5607 See :class:`ParseResults.pprint` for an example of a recursive 

5608 parser created using ``Forward``. 

5609 """ 

5610 

5611 def __init__( 

5612 self, other: typing.Optional[Union[ParserElement, str]] = None 

5613 ) -> None: 

5614 self.caller_frame = traceback.extract_stack(limit=2)[0] 

5615 super().__init__(other, savelist=False) # type: ignore[arg-type] 

5616 self.lshift_line = None 

5617 

5618 def __lshift__(self, other) -> Forward: 

5619 if hasattr(self, "caller_frame"): 

5620 del self.caller_frame 

5621 if isinstance(other, str_type): 

5622 other = self._literalStringClass(other) 

5623 

5624 if not isinstance(other, ParserElement): 

5625 return NotImplemented 

5626 

5627 self.expr = other 

5628 self.streamlined = other.streamlined 

5629 self.mayIndexError = self.expr.mayIndexError 

5630 self._may_return_empty = self.expr.mayReturnEmpty 

5631 self.set_whitespace_chars( 

5632 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

5633 ) 

5634 self.skipWhitespace = self.expr.skipWhitespace 

5635 self.saveAsList = self.expr.saveAsList 

5636 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5637 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

5638 return self 

5639 

5640 def __ilshift__(self, other) -> Forward: 

5641 if not isinstance(other, ParserElement): 

5642 return NotImplemented 

5643 

5644 return self << other 

5645 

5646 def __or__(self, other) -> ParserElement: 

5647 caller_line = traceback.extract_stack(limit=2)[-2] 

5648 if ( 

5649 __diag__.warn_on_match_first_with_lshift_operator 

5650 and caller_line == self.lshift_line 

5651 and Diagnostics.warn_on_match_first_with_lshift_operator 

5652 not in self.suppress_warnings_ 

5653 ): 

5654 warnings.warn( 

5655 "warn_on_match_first_with_lshift_operator:" 

5656 " using '<<' operator with '|' is probably an error, use '<<='", 

5657 stacklevel=2, 

5658 ) 

5659 ret = super().__or__(other) 

5660 return ret 

5661 

5662 def __del__(self): 

5663 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

5664 if ( 

5665 self.expr is None 

5666 and __diag__.warn_on_assignment_to_Forward 

5667 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

5668 ): 

5669 warnings.warn_explicit( 

5670 "warn_on_assignment_to_Forward:" 

5671 " Forward defined here but no expression attached later using '<<=' or '<<'", 

5672 UserWarning, 

5673 filename=self.caller_frame.filename, 

5674 lineno=self.caller_frame.lineno, 

5675 ) 

5676 

5677 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5678 if ( 

5679 self.expr is None 

5680 and __diag__.warn_on_parse_using_empty_Forward 

5681 and Diagnostics.warn_on_parse_using_empty_Forward 

5682 not in self.suppress_warnings_ 

5683 ): 

5684 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

5685 parse_fns = ( 

5686 "parse_string", 

5687 "scan_string", 

5688 "search_string", 

5689 "transform_string", 

5690 ) 

5691 tb = traceback.extract_stack(limit=200) 

5692 for i, frm in enumerate(reversed(tb), start=1): 

5693 if frm.name in parse_fns: 

5694 stacklevel = i + 1 

5695 break 

5696 else: 

5697 stacklevel = 2 

5698 warnings.warn( 

5699 "warn_on_parse_using_empty_Forward:" 

5700 " Forward expression was never assigned a value, will not parse any input", 

5701 stacklevel=stacklevel, 

5702 ) 

5703 if not ParserElement._left_recursion_enabled: 

5704 return super().parseImpl(instring, loc, do_actions) 

5705 # ## Bounded Recursion algorithm ## 

5706 # Recursion only needs to be processed at ``Forward`` elements, since they are 

5707 # the only ones that can actually refer to themselves. The general idea is 

5708 # to handle recursion stepwise: We start at no recursion, then recurse once, 

5709 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

5710 # 

5711 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

5712 # - to *match* a specific recursion level, and 

5713 # - to *search* the bounded recursion level 

5714 # and the two run concurrently. The *search* must *match* each recursion level 

5715 # to find the best possible match. This is handled by a memo table, which 

5716 # provides the previous match to the next level match attempt. 

5717 # 

5718 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

5719 # 

5720 # There is a complication since we not only *parse* but also *transform* via 

5721 # actions: We do not want to run the actions too often while expanding. Thus, 

5722 # we expand using `do_actions=False` and only run `do_actions=True` if the next 

5723 # recursion level is acceptable. 

5724 with ParserElement.recursion_lock: 

5725 memo = ParserElement.recursion_memos 

5726 try: 

5727 # we are parsing at a specific recursion expansion - use it as-is 

5728 prev_loc, prev_result = memo[loc, self, do_actions] 

5729 if isinstance(prev_result, Exception): 

5730 raise prev_result 

5731 return prev_loc, prev_result.copy() 

5732 except KeyError: 

5733 act_key = (loc, self, True) 

5734 peek_key = (loc, self, False) 

5735 # we are searching for the best recursion expansion - keep on improving 

5736 # both `do_actions` cases must be tracked separately here! 

5737 prev_loc, prev_peek = memo[peek_key] = ( 

5738 loc - 1, 

5739 ParseException( 

5740 instring, loc, "Forward recursion without base case", self 

5741 ), 

5742 ) 

5743 if do_actions: 

5744 memo[act_key] = memo[peek_key] 

5745 while True: 

5746 try: 

5747 new_loc, new_peek = super().parseImpl(instring, loc, False) 

5748 except ParseException: 

5749 # we failed before getting any match - do not hide the error 

5750 if isinstance(prev_peek, Exception): 

5751 raise 

5752 new_loc, new_peek = prev_loc, prev_peek 

5753 # the match did not get better: we are done 

5754 if new_loc <= prev_loc: 

5755 if do_actions: 

5756 # replace the match for do_actions=False as well, 

5757 # in case the action did backtrack 

5758 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

5759 del memo[peek_key], memo[act_key] 

5760 return prev_loc, copy.copy(prev_result) 

5761 del memo[peek_key] 

5762 return prev_loc, copy.copy(prev_peek) 

5763 # the match did get better: see if we can improve further 

5764 if do_actions: 

5765 try: 

5766 memo[act_key] = super().parseImpl(instring, loc, True) 

5767 except ParseException as e: 

5768 memo[peek_key] = memo[act_key] = (new_loc, e) 

5769 raise 

5770 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

5771 

5772 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5773 self.skipWhitespace = False 

5774 return self 

5775 

5776 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5777 self.skipWhitespace = True 

5778 return self 

5779 

5780 def streamline(self) -> ParserElement: 

5781 if not self.streamlined: 

5782 self.streamlined = True 

5783 if self.expr is not None: 

5784 self.expr.streamline() 

5785 return self 

5786 

5787 def validate(self, validateTrace=None) -> None: 

5788 warnings.warn( 

5789 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5790 DeprecationWarning, 

5791 stacklevel=2, 

5792 ) 

5793 if validateTrace is None: 

5794 validateTrace = [] 

5795 

5796 if self not in validateTrace: 

5797 tmp = validateTrace[:] + [self] 

5798 if self.expr is not None: 

5799 self.expr.validate(tmp) 

5800 self._checkRecursion([]) 

5801 

5802 def _generateDefaultName(self) -> str: 

5803 # Avoid infinite recursion by setting a temporary _defaultName 

5804 save_default_name = self._defaultName 

5805 self._defaultName = ": ..." 

5806 

5807 # Use the string representation of main expression. 

5808 try: 

5809 if self.expr is not None: 

5810 ret_string = str(self.expr)[:1000] 

5811 else: 

5812 ret_string = "None" 

5813 except Exception: 

5814 ret_string = "..." 

5815 

5816 self._defaultName = save_default_name 

5817 return f"{type(self).__name__}: {ret_string}" 

5818 

5819 def copy(self) -> ParserElement: 

5820 if self.expr is not None: 

5821 return super().copy() 

5822 else: 

5823 ret = Forward() 

5824 ret <<= self 

5825 return ret 

5826 

5827 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5828 # fmt: off 

5829 if ( 

5830 __diag__.warn_name_set_on_empty_Forward 

5831 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ 

5832 and self.expr is None 

5833 ): 

5834 warning = ( 

5835 "warn_name_set_on_empty_Forward:" 

5836 f" setting results name {name!r} on {type(self).__name__} expression" 

5837 " that has no contained expression" 

5838 ) 

5839 warnings.warn(warning, stacklevel=3) 

5840 # fmt: on 

5841 

5842 return super()._setResultsName(name, list_all_matches) 

5843 

5844 # Compatibility synonyms 

5845 # fmt: off 

5846 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

5847 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

5848 # fmt: on 

5849 

5850 

5851class TokenConverter(ParseElementEnhance): 

5852 """ 

5853 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. 

5854 """ 

5855 

5856 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None: 

5857 super().__init__(expr) # , savelist) 

5858 self.saveAsList = False 

5859 

5860 

5861class Combine(TokenConverter): 

5862 """Converter to concatenate all matching tokens to a single string. 

5863 By default, the matching patterns must also be contiguous in the 

5864 input string; this can be disabled by specifying 

5865 ``'adjacent=False'`` in the constructor. 

5866 

5867 Example:: 

5868 

5869 real = Word(nums) + '.' + Word(nums) 

5870 print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] 

5871 # will also erroneously match the following 

5872 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] 

5873 

5874 real = Combine(Word(nums) + '.' + Word(nums)) 

5875 print(real.parse_string('3.1416')) # -> ['3.1416'] 

5876 # no match when there are internal spaces 

5877 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) 

5878 """ 

5879 

5880 def __init__( 

5881 self, 

5882 expr: ParserElement, 

5883 join_string: str = "", 

5884 adjacent: bool = True, 

5885 *, 

5886 joinString: typing.Optional[str] = None, 

5887 ) -> None: 

5888 super().__init__(expr) 

5889 joinString = joinString if joinString is not None else join_string 

5890 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

5891 if adjacent: 

5892 self.leave_whitespace() 

5893 self.adjacent = adjacent 

5894 self.skipWhitespace = True 

5895 self.joinString = joinString 

5896 self.callPreparse = True 

5897 

5898 def ignore(self, other) -> ParserElement: 

5899 if self.adjacent: 

5900 ParserElement.ignore(self, other) 

5901 else: 

5902 super().ignore(other) 

5903 return self 

5904 

5905 def postParse(self, instring, loc, tokenlist): 

5906 retToks = tokenlist.copy() 

5907 del retToks[:] 

5908 retToks += ParseResults( 

5909 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

5910 ) 

5911 

5912 if self.resultsName and retToks.haskeys(): 

5913 return [retToks] 

5914 else: 

5915 return retToks 

5916 

5917 

5918class Group(TokenConverter): 

5919 """Converter to return the matched tokens as a list - useful for 

5920 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

5921 

5922 The optional ``aslist`` argument when set to True will return the 

5923 parsed tokens as a Python list instead of a pyparsing ParseResults. 

5924 

5925 Example:: 

5926 

5927 ident = Word(alphas) 

5928 num = Word(nums) 

5929 term = ident | num 

5930 func = ident + Opt(DelimitedList(term)) 

5931 print(func.parse_string("fn a, b, 100")) 

5932 # -> ['fn', 'a', 'b', '100'] 

5933 

5934 func = ident + Group(Opt(DelimitedList(term))) 

5935 print(func.parse_string("fn a, b, 100")) 

5936 # -> ['fn', ['a', 'b', '100']] 

5937 """ 

5938 

5939 def __init__(self, expr: ParserElement, aslist: bool = False) -> None: 

5940 super().__init__(expr) 

5941 self.saveAsList = True 

5942 self._asPythonList = aslist 

5943 

5944 def postParse(self, instring, loc, tokenlist): 

5945 if self._asPythonList: 

5946 return ParseResults.List( 

5947 tokenlist.asList() 

5948 if isinstance(tokenlist, ParseResults) 

5949 else list(tokenlist) 

5950 ) 

5951 

5952 return [tokenlist] 

5953 

5954 

5955class Dict(TokenConverter): 

5956 """Converter to return a repetitive expression as a list, but also 

5957 as a dictionary. Each element can also be referenced using the first 

5958 token in the expression as its key. Useful for tabular report 

5959 scraping when the first column can be used as a item key. 

5960 

5961 The optional ``asdict`` argument when set to True will return the 

5962 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

5963 

5964 Example:: 

5965 

5966 data_word = Word(alphas) 

5967 label = data_word + FollowedBy(':') 

5968 

5969 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5970 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5971 

5972 # print attributes as plain groups 

5973 print(attr_expr[1, ...].parse_string(text).dump()) 

5974 

5975 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names 

5976 result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

5977 print(result.dump()) 

5978 

5979 # access named fields as dict entries, or output as dict 

5980 print(result['shape']) 

5981 print(result.as_dict()) 

5982 

5983 prints:: 

5984 

5985 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

5986 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5987 - color: 'light blue' 

5988 - posn: 'upper left' 

5989 - shape: 'SQUARE' 

5990 - texture: 'burlap' 

5991 SQUARE 

5992 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

5993 

5994 See more examples at :class:`ParseResults` of accessing fields by results name. 

5995 """ 

5996 

5997 def __init__(self, expr: ParserElement, asdict: bool = False) -> None: 

5998 super().__init__(expr) 

5999 self.saveAsList = True 

6000 self._asPythonDict = asdict 

6001 

6002 def postParse(self, instring, loc, tokenlist): 

6003 for i, tok in enumerate(tokenlist): 

6004 if len(tok) == 0: 

6005 continue 

6006 

6007 ikey = tok[0] 

6008 if isinstance(ikey, int): 

6009 ikey = str(ikey).strip() 

6010 

6011 if len(tok) == 1: 

6012 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

6013 

6014 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

6015 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

6016 

6017 else: 

6018 try: 

6019 dictvalue = tok.copy() # ParseResults(i) 

6020 except Exception: 

6021 exc = TypeError( 

6022 "could not extract dict values from parsed results" 

6023 " - Dict expression must contain Grouped expressions" 

6024 ) 

6025 raise exc from None 

6026 

6027 del dictvalue[0] 

6028 

6029 if len(dictvalue) != 1 or ( 

6030 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

6031 ): 

6032 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

6033 else: 

6034 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

6035 

6036 if self._asPythonDict: 

6037 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

6038 

6039 return [tokenlist] if self.resultsName else tokenlist 

6040 

6041 

6042class Suppress(TokenConverter): 

6043 """Converter for ignoring the results of a parsed expression. 

6044 

6045 Example:: 

6046 

6047 source = "a, b, c,d" 

6048 wd = Word(alphas) 

6049 wd_list1 = wd + (',' + wd)[...] 

6050 print(wd_list1.parse_string(source)) 

6051 

6052 # often, delimiters that are useful during parsing are just in the 

6053 # way afterward - use Suppress to keep them out of the parsed output 

6054 wd_list2 = wd + (Suppress(',') + wd)[...] 

6055 print(wd_list2.parse_string(source)) 

6056 

6057 # Skipped text (using '...') can be suppressed as well 

6058 source = "lead in START relevant text END trailing text" 

6059 start_marker = Keyword("START") 

6060 end_marker = Keyword("END") 

6061 find_body = Suppress(...) + start_marker + ... + end_marker 

6062 print(find_body.parse_string(source) 

6063 

6064 prints:: 

6065 

6066 ['a', ',', 'b', ',', 'c', ',', 'd'] 

6067 ['a', 'b', 'c', 'd'] 

6068 ['START', 'relevant text ', 'END'] 

6069 

6070 (See also :class:`DelimitedList`.) 

6071 """ 

6072 

6073 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

6074 if expr is ...: 

6075 expr = _PendingSkip(NoMatch()) 

6076 super().__init__(expr) 

6077 

6078 def __add__(self, other) -> ParserElement: 

6079 if isinstance(self.expr, _PendingSkip): 

6080 return Suppress(SkipTo(other)) + other 

6081 

6082 return super().__add__(other) 

6083 

6084 def __sub__(self, other) -> ParserElement: 

6085 if isinstance(self.expr, _PendingSkip): 

6086 return Suppress(SkipTo(other)) - other 

6087 

6088 return super().__sub__(other) 

6089 

6090 def postParse(self, instring, loc, tokenlist): 

6091 return [] 

6092 

6093 def suppress(self) -> ParserElement: 

6094 return self 

6095 

6096 

6097# XXX: Example needs to be re-done for updated output 

6098def trace_parse_action(f: ParseAction) -> ParseAction: 

6099 """Decorator for debugging parse actions. 

6100 

6101 When the parse action is called, this decorator will print 

6102 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

6103 When the parse action completes, the decorator will print 

6104 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

6105 

6106 Example:: 

6107 

6108 wd = Word(alphas) 

6109 

6110 @trace_parse_action 

6111 def remove_duplicate_chars(tokens): 

6112 return ''.join(sorted(set(''.join(tokens)))) 

6113 

6114 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

6115 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

6116 

6117 prints:: 

6118 

6119 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

6120 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

6121 ['dfjkls'] 

6122 

6123 .. versionchanged:: 3.1.0 

6124 Exception type added to output 

6125 """ 

6126 f = _trim_arity(f) 

6127 

6128 def z(*paArgs): 

6129 thisFunc = f.__name__ 

6130 s, l, t = paArgs[-3:] 

6131 if len(paArgs) > 3: 

6132 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" 

6133 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

6134 try: 

6135 ret = f(*paArgs) 

6136 except Exception as exc: 

6137 sys.stderr.write( 

6138 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n" 

6139 ) 

6140 raise 

6141 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

6142 return ret 

6143 

6144 z.__name__ = f.__name__ 

6145 return z 

6146 

6147 

6148# convenience constants for positional expressions 

6149empty = Empty().set_name("empty") 

6150line_start = LineStart().set_name("line_start") 

6151line_end = LineEnd().set_name("line_end") 

6152string_start = StringStart().set_name("string_start") 

6153string_end = StringEnd().set_name("string_end") 

6154 

6155_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

6156 lambda s, l, t: t[0][1] 

6157) 

6158_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

6159 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

6160) 

6161_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

6162 lambda s, l, t: chr(int(t[0][1:], 8)) 

6163) 

6164_singleChar = ( 

6165 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

6166) 

6167_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

6168_reBracketExpr = ( 

6169 Literal("[") 

6170 + Opt("^").set_results_name("negate") 

6171 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

6172 + Literal("]") 

6173) 

6174 

6175 

6176def srange(s: str) -> str: 

6177 r"""Helper to easily define string ranges for use in :class:`Word` 

6178 construction. Borrows syntax from regexp ``'[]'`` string range 

6179 definitions:: 

6180 

6181 srange("[0-9]") -> "0123456789" 

6182 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

6183 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

6184 

6185 The input string must be enclosed in []'s, and the returned string 

6186 is the expanded character set joined into a single string. The 

6187 values enclosed in the []'s may be: 

6188 

6189 - a single character 

6190 - an escaped character with a leading backslash (such as ``\-`` 

6191 or ``\]``) 

6192 - an escaped hex character with a leading ``'\x'`` 

6193 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

6194 is also supported for backwards compatibility) 

6195 - an escaped octal character with a leading ``'\0'`` 

6196 (``\041``, which is a ``'!'`` character) 

6197 - a range of any of the above, separated by a dash (``'a-z'``, 

6198 etc.) 

6199 - any combination of the above (``'aeiouy'``, 

6200 ``'a-zA-Z0-9_$'``, etc.) 

6201 """ 

6202 

6203 def _expanded(p): 

6204 if isinstance(p, ParseResults): 

6205 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

6206 else: 

6207 yield p 

6208 

6209 try: 

6210 return "".join( 

6211 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)] 

6212 ) 

6213 except Exception as e: 

6214 return "" 

6215 

6216 

6217def token_map(func, *args) -> ParseAction: 

6218 """Helper to define a parse action by mapping a function to all 

6219 elements of a :class:`ParseResults` list. If any additional args are passed, 

6220 they are forwarded to the given function as additional arguments 

6221 after the token, as in 

6222 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

6223 which will convert the parsed data to an integer using base 16. 

6224 

6225 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

6226 

6227 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6228 hex_ints.run_tests(''' 

6229 00 11 22 aa FF 0a 0d 1a 

6230 ''') 

6231 

6232 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6233 upperword[1, ...].run_tests(''' 

6234 my kingdom for a horse 

6235 ''') 

6236 

6237 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6238 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6239 now is the winter of our discontent made glorious summer by this sun of york 

6240 ''') 

6241 

6242 prints:: 

6243 

6244 00 11 22 aa FF 0a 0d 1a 

6245 [0, 17, 34, 170, 255, 10, 13, 26] 

6246 

6247 my kingdom for a horse 

6248 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6249 

6250 now is the winter of our discontent made glorious summer by this sun of york 

6251 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6252 """ 

6253 

6254 def pa(s, l, t): 

6255 return [func(tokn, *args) for tokn in t] 

6256 

6257 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6258 pa.__name__ = func_name 

6259 

6260 return pa 

6261 

6262 

6263def autoname_elements() -> None: 

6264 """ 

6265 Utility to simplify mass-naming of parser elements, for 

6266 generating railroad diagram with named subdiagrams. 

6267 """ 

6268 

6269 # guard against _getframe not being implemented in the current Python 

6270 getframe_fn = getattr(sys, "_getframe", lambda _: None) 

6271 calling_frame = getframe_fn(1) 

6272 if calling_frame is None: 

6273 return 

6274 

6275 # find all locals in the calling frame that are ParserElements 

6276 calling_frame = typing.cast(types.FrameType, calling_frame) 

6277 for name, var in calling_frame.f_locals.items(): 

6278 # if no custom name defined, set the name to the var name 

6279 if isinstance(var, ParserElement) and not var.customName: 

6280 var.set_name(name) 

6281 

6282 

6283dbl_quoted_string = Combine( 

6284 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6285).set_name("string enclosed in double quotes") 

6286 

6287sgl_quoted_string = Combine( 

6288 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6289).set_name("string enclosed in single quotes") 

6290 

6291quoted_string = Combine( 

6292 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6293 "double quoted string" 

6294 ) 

6295 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6296 "single quoted string" 

6297 ) 

6298).set_name("quoted string using single or double quotes") 

6299 

6300# XXX: Is there some way to make this show up in API docs? 

6301# .. versionadded:: 3.1.0 

6302python_quoted_string = Combine( 

6303 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6304 "multiline double quoted string" 

6305 ) 

6306 ^ ( 

6307 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6308 ).set_name("multiline single quoted string") 

6309 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6310 "double quoted string" 

6311 ) 

6312 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6313 "single quoted string" 

6314 ) 

6315).set_name("Python quoted string") 

6316 

6317unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6318 

6319 

6320alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6321punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6322 

6323# build list of built-in expressions, for future reference if a global default value 

6324# gets updated 

6325_builtin_exprs: list[ParserElement] = [ 

6326 v for v in vars().values() if isinstance(v, ParserElement) 

6327] 

6328 

6329# Compatibility synonyms 

6330# fmt: off 

6331sglQuotedString = sgl_quoted_string 

6332dblQuotedString = dbl_quoted_string 

6333quotedString = quoted_string 

6334unicodeString = unicode_string 

6335lineStart = line_start 

6336lineEnd = line_end 

6337stringStart = string_start 

6338stringEnd = string_end 

6339nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) 

6340traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) 

6341conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) 

6342tokenMap = replaced_by_pep8("tokenMap", token_map) 

6343# fmt: on