Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 45%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2729 statements  

1# 

2# core.py 

3# 

4from __future__ import annotations 

5 

6import collections.abc 

7from collections import deque 

8import os 

9import typing 

10from typing import ( 

11 Any, 

12 Callable, 

13 Generator, 

14 NamedTuple, 

15 Sequence, 

16 TextIO, 

17 Union, 

18 cast, 

19) 

20from abc import ABC, abstractmethod 

21from enum import Enum 

22import string 

23import copy 

24import warnings 

25import re 

26import sys 

27from collections.abc import Iterable 

28import traceback 

29import types 

30from operator import itemgetter 

31from functools import wraps 

32from threading import RLock 

33from pathlib import Path 

34 

35from .util import ( 

36 _FifoCache, 

37 _UnboundedCache, 

38 __config_flags, 

39 _collapse_string_to_ranges, 

40 _escape_regex_range_chars, 

41 _flatten, 

42 LRUMemo as _LRUMemo, 

43 UnboundedMemo as _UnboundedMemo, 

44 deprecate_argument, 

45 replaced_by_pep8, 

46) 

47from .exceptions import * 

48from .actions import * 

49from .results import ParseResults, _ParseResultsWithOffset 

50from .unicode import pyparsing_unicode 

51 

52_MAX_INT = sys.maxsize 

53str_type: tuple[type, ...] = (str, bytes) 

54 

55# 

56# Copyright (c) 2003-2022 Paul T. McGuire 

57# 

58# Permission is hereby granted, free of charge, to any person obtaining 

59# a copy of this software and associated documentation files (the 

60# "Software"), to deal in the Software without restriction, including 

61# without limitation the rights to use, copy, modify, merge, publish, 

62# distribute, sublicense, and/or sell copies of the Software, and to 

63# permit persons to whom the Software is furnished to do so, subject to 

64# the following conditions: 

65# 

66# The above copyright notice and this permission notice shall be 

67# included in all copies or substantial portions of the Software. 

68# 

69# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

70# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

71# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

72# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

73# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

74# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

75# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

76# 

77 

78from functools import cached_property 

79 

80 

81class __compat__(__config_flags): 

82 """ 

83 A cross-version compatibility configuration for pyparsing features that will be 

84 released in a future version. By setting values in this configuration to True, 

85 those features can be enabled in prior versions for compatibility development 

86 and testing. 

87 

88 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

89 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

90 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

91 behavior 

92 """ 

93 

94 _type_desc = "compatibility" 

95 

96 collect_all_And_tokens = True 

97 

98 _all_names = [__ for __ in locals() if not __.startswith("_")] 

99 _fixed_names = """ 

100 collect_all_And_tokens 

101 """.split() 

102 

103 

104class __diag__(__config_flags): 

105 _type_desc = "diagnostic" 

106 

107 warn_multiple_tokens_in_named_alternation = False 

108 warn_ungrouped_named_tokens_in_collection = False 

109 warn_name_set_on_empty_Forward = False 

110 warn_on_parse_using_empty_Forward = False 

111 warn_on_assignment_to_Forward = False 

112 warn_on_multiple_string_args_to_oneof = False 

113 warn_on_match_first_with_lshift_operator = False 

114 enable_debug_on_named_expressions = False 

115 

116 _all_names = [__ for __ in locals() if not __.startswith("_")] 

117 _warning_names = [name for name in _all_names if name.startswith("warn")] 

118 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

119 

120 @classmethod 

121 def enable_all_warnings(cls) -> None: 

122 for name in cls._warning_names: 

123 cls.enable(name) 

124 

125 

126class Diagnostics(Enum): 

127 """ 

128 Diagnostic configuration (all default to disabled) 

129 

130 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

131 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

132 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

133 name is defined on a containing expression with ungrouped subexpressions that also 

134 have results names 

135 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

136 with a results name, but has no contents defined 

137 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

138 defined in a grammar but has never had an expression attached to it 

139 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

140 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

141 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

142 incorrectly called with multiple str arguments 

143 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

144 calls to :class:`ParserElement.set_name` 

145 

146 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

147 All warnings can be enabled by calling :class:`enable_all_warnings`. 

148 """ 

149 

150 warn_multiple_tokens_in_named_alternation = 0 

151 warn_ungrouped_named_tokens_in_collection = 1 

152 warn_name_set_on_empty_Forward = 2 

153 warn_on_parse_using_empty_Forward = 3 

154 warn_on_assignment_to_Forward = 4 

155 warn_on_multiple_string_args_to_oneof = 5 

156 warn_on_match_first_with_lshift_operator = 6 

157 enable_debug_on_named_expressions = 7 

158 

159 

160def enable_diag(diag_enum: Diagnostics) -> None: 

161 """ 

162 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

163 """ 

164 __diag__.enable(diag_enum.name) 

165 

166 

167def disable_diag(diag_enum: Diagnostics) -> None: 

168 """ 

169 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

170 """ 

171 __diag__.disable(diag_enum.name) 

172 

173 

174def enable_all_warnings() -> None: 

175 """ 

176 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

177 """ 

178 __diag__.enable_all_warnings() 

179 

180 

181# hide abstract class 

182del __config_flags 

183 

184 

185def _should_enable_warnings( 

186 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

187) -> bool: 

188 enable = bool(warn_env_var) 

189 for warn_opt in cmd_line_warn_options: 

190 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

191 ":" 

192 )[:5] 

193 if not w_action.lower().startswith("i") and ( 

194 not (w_message or w_category or w_module) or w_module == "pyparsing" 

195 ): 

196 enable = True 

197 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

198 enable = False 

199 return enable 

200 

201 

202if _should_enable_warnings( 

203 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

204): 

205 enable_all_warnings() 

206 

207 

208# build list of single arg builtins, that can be used as parse actions 

209# fmt: off 

210_single_arg_builtins = { 

211 sum, len, sorted, reversed, list, tuple, set, any, all, min, max 

212} 

213# fmt: on 

214 

215_generatorType = types.GeneratorType 

216ParseImplReturnType = tuple[int, Any] 

217PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

218 

219ParseCondition = Union[ 

220 Callable[[], bool], 

221 Callable[[ParseResults], bool], 

222 Callable[[int, ParseResults], bool], 

223 Callable[[str, int, ParseResults], bool], 

224] 

225ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

226DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

227DebugSuccessAction = Callable[ 

228 [str, int, int, "ParserElement", ParseResults, bool], None 

229] 

230DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

231 

232 

233alphas: str = string.ascii_uppercase + string.ascii_lowercase 

234identchars: str = pyparsing_unicode.Latin1.identchars 

235identbodychars: str = pyparsing_unicode.Latin1.identbodychars 

236nums: str = "0123456789" 

237hexnums: str = nums + "ABCDEFabcdef" 

238alphanums: str = alphas + nums 

239printables: str = "".join([c for c in string.printable if c not in string.whitespace]) 

240 

241 

242class _ParseActionIndexError(Exception): 

243 """ 

244 Internal wrapper around IndexError so that IndexErrors raised inside 

245 parse actions aren't misinterpreted as IndexErrors raised inside 

246 ParserElement parseImpl methods. 

247 """ 

248 

249 def __init__(self, msg: str, exc: BaseException) -> None: 

250 self.msg: str = msg 

251 self.exc: BaseException = exc 

252 

253 

254_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

255pa_call_line_synth = () 

256 

257 

258def _trim_arity(func, max_limit=3): 

259 """decorator to trim function calls to match the arity of the target""" 

260 global _trim_arity_call_line, pa_call_line_synth 

261 

262 if func in _single_arg_builtins: 

263 return lambda s, l, t: func(t) 

264 

265 limit = 0 

266 found_arity = False 

267 

268 # synthesize what would be returned by traceback.extract_stack at the call to 

269 # user's parse action 'func', so that we don't incur call penalty at parse time 

270 

271 # fmt: off 

272 LINE_DIFF = 9 

273 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

274 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

275 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1] 

276 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

277 

278 def wrapper(*args): 

279 nonlocal found_arity, limit 

280 if found_arity: 

281 return func(*args[limit:]) 

282 while 1: 

283 try: 

284 ret = func(*args[limit:]) 

285 found_arity = True 

286 return ret 

287 except TypeError as te: 

288 # re-raise TypeErrors if they did not come from our arity testing 

289 if found_arity: 

290 raise 

291 else: 

292 tb = te.__traceback__ 

293 frames = traceback.extract_tb(tb, limit=2) 

294 frame_summary = frames[-1] 

295 trim_arity_type_error = ( 

296 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

297 ) 

298 del tb 

299 

300 if trim_arity_type_error: 

301 if limit < max_limit: 

302 limit += 1 

303 continue 

304 

305 raise 

306 except IndexError as ie: 

307 # wrap IndexErrors inside a _ParseActionIndexError 

308 raise _ParseActionIndexError( 

309 "IndexError raised in parse action", ie 

310 ).with_traceback(None) 

311 # fmt: on 

312 

313 # copy func name to wrapper for sensible debug output 

314 # (can't use functools.wraps, since that messes with function signature) 

315 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

316 wrapper.__name__ = func_name 

317 wrapper.__doc__ = func.__doc__ 

318 

319 return wrapper 

320 

321 

322def condition_as_parse_action( 

323 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

324) -> ParseAction: 

325 """ 

326 Function to convert a simple predicate function that returns ``True`` or ``False`` 

327 into a parse action. Can be used in places when a parse action is required 

328 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition 

329 to an operator level in :class:`infix_notation`). 

330 

331 Optional keyword arguments: 

332 

333 :param message: define a custom message to be used in the raised exception 

334 :param fatal: if ``True``, will raise :class:`ParseFatalException` 

335 to stop parsing immediately; 

336 otherwise will raise :class:`ParseException` 

337 

338 """ 

339 msg = message if message is not None else "failed user-defined condition" 

340 exc_type = ParseFatalException if fatal else ParseException 

341 fn = _trim_arity(fn) 

342 

343 @wraps(fn) 

344 def pa(s, l, t): 

345 if not bool(fn(s, l, t)): 

346 raise exc_type(s, l, msg) 

347 

348 return pa 

349 

350 

351def _default_start_debug_action( 

352 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False 

353): 

354 cache_hit_str = "*" if cache_hit else "" 

355 print( 

356 ( 

357 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

358 f" {line(loc, instring)}\n" 

359 f" {'^':>{col(loc, instring)}}" 

360 ) 

361 ) 

362 

363 

364def _default_success_debug_action( 

365 instring: str, 

366 startloc: int, 

367 endloc: int, 

368 expr: ParserElement, 

369 toks: ParseResults, 

370 cache_hit: bool = False, 

371): 

372 cache_hit_str = "*" if cache_hit else "" 

373 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

374 

375 

376def _default_exception_debug_action( 

377 instring: str, 

378 loc: int, 

379 expr: ParserElement, 

380 exc: Exception, 

381 cache_hit: bool = False, 

382): 

383 cache_hit_str = "*" if cache_hit else "" 

384 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

385 

386 

387def null_debug_action(*args): 

388 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

389 

390 

391class ParserElement(ABC): 

392 """Abstract base level parser element class.""" 

393 

394 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

395 verbose_stacktrace: bool = False 

396 _literalStringClass: type = None # type: ignore[assignment] 

397 

398 @staticmethod 

399 def set_default_whitespace_chars(chars: str) -> None: 

400 r""" 

401 Overrides the default whitespace chars 

402 

403 Example: 

404 

405 .. doctest:: 

406 

407 # default whitespace chars are space, <TAB> and newline 

408 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl") 

409 ParseResults(['abc', 'def', 'ghi', 'jkl'], {}) 

410 

411 # change to just treat newline as significant 

412 >>> ParserElement.set_default_whitespace_chars(" \t") 

413 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl") 

414 ParseResults(['abc', 'def'], {}) 

415 

416 # Reset to default 

417 >>> ParserElement.set_default_whitespace_chars(" \n\t\r") 

418 """ 

419 ParserElement.DEFAULT_WHITE_CHARS = chars 

420 

421 # update whitespace all parse expressions defined in this module 

422 for expr in _builtin_exprs: 

423 if expr.copyDefaultWhiteChars: 

424 expr.whiteChars = set(chars) 

425 

426 @staticmethod 

427 def inline_literals_using(cls: type) -> None: 

428 """ 

429 Set class to be used for inclusion of string literals into a parser. 

430 

431 Example: 

432 

433 .. doctest:: 

434 :options: +NORMALIZE_WHITESPACE 

435 

436 # default literal class used is Literal 

437 >>> integer = Word(nums) 

438 >>> date_str = ( 

439 ... integer("year") + '/' 

440 ... + integer("month") + '/' 

441 ... + integer("day") 

442 ... ) 

443 

444 >>> date_str.parse_string("1999/12/31") 

445 ParseResults(['1999', '/', '12', '/', '31'], 

446 {'year': '1999', 'month': '12', 'day': '31'}) 

447 

448 # change to Suppress 

449 >>> ParserElement.inline_literals_using(Suppress) 

450 >>> date_str = ( 

451 ... integer("year") + '/' 

452 ... + integer("month") + '/' 

453 ... + integer("day") 

454 ... ) 

455 

456 >>> date_str.parse_string("1999/12/31") 

457 ParseResults(['1999', '12', '31'], 

458 {'year': '1999', 'month': '12', 'day': '31'}) 

459 

460 # Reset 

461 >>> ParserElement.inline_literals_using(Literal) 

462 """ 

463 ParserElement._literalStringClass = cls 

464 

465 @classmethod 

466 def using_each(cls, seq, **class_kwargs): 

467 """ 

468 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq. 

469 

470 Example: 

471 

472 .. testcode:: 

473 

474 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

475 

476 .. versionadded:: 3.1.0 

477 """ 

478 yield from (cls(obj, **class_kwargs) for obj in seq) 

479 

480 class DebugActions(NamedTuple): 

481 debug_try: typing.Optional[DebugStartAction] 

482 debug_match: typing.Optional[DebugSuccessAction] 

483 debug_fail: typing.Optional[DebugExceptionAction] 

484 

485 def __init__(self, savelist: bool = False) -> None: 

486 self.parseAction: list[ParseAction] = list() 

487 self.failAction: typing.Optional[ParseFailAction] = None 

488 self.customName: str = None # type: ignore[assignment] 

489 self._defaultName: typing.Optional[str] = None 

490 self.resultsName: str = None # type: ignore[assignment] 

491 self.saveAsList: bool = savelist 

492 self.skipWhitespace: bool = True 

493 self.whiteChars: set[str] = set(ParserElement.DEFAULT_WHITE_CHARS) 

494 self.copyDefaultWhiteChars: bool = True 

495 # used when checking for left-recursion 

496 self._may_return_empty: bool = False 

497 self.keepTabs: bool = False 

498 self.ignoreExprs: list[ParserElement] = list() 

499 self.debug: bool = False 

500 self.streamlined: bool = False 

501 # optimize exception handling for subclasses that don't advance parse index 

502 self.mayIndexError: bool = True 

503 self.errmsg: Union[str, None] = "" 

504 # mark results names as modal (report only last) or cumulative (list all) 

505 self.modalResults: bool = True 

506 # custom debug actions 

507 self.debugActions = self.DebugActions(None, None, None) 

508 # avoid redundant calls to preParse 

509 self.callPreparse: bool = True 

510 self.callDuringTry: bool = False 

511 self.suppress_warnings_: list[Diagnostics] = [] 

512 self.show_in_diagram: bool = True 

513 

514 @property 

515 def mayReturnEmpty(self) -> bool: 

516 """ 

517 .. deprecated:: 3.3.0 

518 use _may_return_empty instead. 

519 """ 

520 return self._may_return_empty 

521 

522 @mayReturnEmpty.setter 

523 def mayReturnEmpty(self, value) -> None: 

524 """ 

525 .. deprecated:: 3.3.0 

526 use _may_return_empty instead. 

527 """ 

528 self._may_return_empty = value 

529 

530 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: 

531 """ 

532 Suppress warnings emitted for a particular diagnostic on this expression. 

533 

534 Example: 

535 

536 .. doctest:: 

537 

538 >>> label = pp.Word(pp.alphas) 

539 

540 # Normally using an empty Forward in a grammar 

541 # would print a warning, but we can suppress that 

542 >>> base = pp.Forward().suppress_warning( 

543 ... pp.Diagnostics.warn_on_parse_using_empty_Forward) 

544 

545 >>> grammar = base | label 

546 >>> print(grammar.parse_string("x")) 

547 ['x'] 

548 """ 

549 self.suppress_warnings_.append(warning_type) 

550 return self 

551 

552 def visit_all(self): 

553 """General-purpose method to yield all expressions and sub-expressions 

554 in a grammar. Typically just for internal use. 

555 """ 

556 to_visit = deque([self]) 

557 seen = set() 

558 while to_visit: 

559 cur = to_visit.popleft() 

560 

561 # guard against looping forever through recursive grammars 

562 if cur in seen: 

563 continue 

564 seen.add(cur) 

565 

566 to_visit.extend(cur.recurse()) 

567 yield cur 

568 

569 def copy(self) -> ParserElement: 

570 """ 

571 Make a copy of this :class:`ParserElement`. Useful for defining 

572 different parse actions for the same parsing pattern, using copies of 

573 the original parse element. 

574 

575 Example: 

576 

577 .. testcode:: 

578 

579 integer = Word(nums).set_parse_action( 

580 lambda toks: int(toks[0])) 

581 integerK = integer.copy().add_parse_action( 

582 lambda toks: toks[0] * 1024) + Suppress("K") 

583 integerM = integer.copy().add_parse_action( 

584 lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

585 

586 print( 

587 (integerK | integerM | integer)[1, ...].parse_string( 

588 "5K 100 640K 256M") 

589 ) 

590 

591 prints: 

592 

593 .. testoutput:: 

594 

595 [5120, 100, 655360, 268435456] 

596 

597 Equivalent form of ``expr.copy()`` is just ``expr()``: 

598 

599 .. testcode:: 

600 

601 integerM = integer().add_parse_action( 

602 lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

603 """ 

604 cpy = copy.copy(self) 

605 cpy.parseAction = self.parseAction[:] 

606 cpy.ignoreExprs = self.ignoreExprs[:] 

607 if self.copyDefaultWhiteChars: 

608 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

609 return cpy 

610 

611 def set_results_name( 

612 self, name: str, list_all_matches: bool = False, **kwargs 

613 ) -> ParserElement: 

614 """ 

615 Define name for referencing matching tokens as a nested attribute 

616 of the returned parse results. 

617 

618 Normally, results names are assigned as you would assign keys in a dict: 

619 any existing value is overwritten by later values. If it is necessary to 

620 keep all values captured for a particular results name, call ``set_results_name`` 

621 with ``list_all_matches`` = True. 

622 

623 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

624 this is so that the client can define a basic element, such as an 

625 integer, and reference it in multiple places with different names. 

626 

627 You can also set results names using the abbreviated syntax, 

628 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

629 - see :meth:`__call__`. If ``list_all_matches`` is required, use 

630 ``expr("name*")``. 

631 

632 Example: 

633 

634 .. testcode:: 

635 

636 integer = Word(nums) 

637 date_str = (integer.set_results_name("year") + '/' 

638 + integer.set_results_name("month") + '/' 

639 + integer.set_results_name("day")) 

640 

641 # equivalent form: 

642 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

643 """ 

644 listAllMatches: bool = deprecate_argument(kwargs, "listAllMatches", False) 

645 

646 list_all_matches = listAllMatches or list_all_matches 

647 return self._setResultsName(name, list_all_matches) 

648 

649 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

650 if name is None: 

651 return self 

652 newself = self.copy() 

653 if name.endswith("*"): 

654 name = name[:-1] 

655 list_all_matches = True 

656 newself.resultsName = name 

657 newself.modalResults = not list_all_matches 

658 return newself 

659 

660 def set_break(self, break_flag: bool = True) -> ParserElement: 

661 """ 

662 Method to invoke the Python pdb debugger when this element is 

663 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

664 disable. 

665 """ 

666 if break_flag: 

667 _parseMethod = self._parse 

668 

669 def breaker(instring, loc, do_actions=True, callPreParse=True): 

670 # this call to breakpoint() is intentional, not a checkin error 

671 breakpoint() 

672 return _parseMethod(instring, loc, do_actions, callPreParse) 

673 

674 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

675 self._parse = breaker # type: ignore [method-assign] 

676 elif hasattr(self._parse, "_originalParseMethod"): 

677 self._parse = self._parse._originalParseMethod # type: ignore [method-assign] 

678 return self 

679 

680 def set_parse_action( 

681 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any 

682 ) -> ParserElement: 

683 """ 

684 Define one or more actions to perform when successfully matching parse element definition. 

685 

686 Parse actions can be called to perform data conversions, do extra validation, 

687 update external data structures, or enhance or replace the parsed tokens. 

688 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

689 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

690 

691 - ``s`` = the original string being parsed (see note below) 

692 - ``loc`` = the location of the matching substring 

693 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

694 

695 The parsed tokens are passed to the parse action as ParseResults. They can be 

696 modified in place using list-style append, extend, and pop operations to update 

697 the parsed list elements; and with dictionary-style item set and del operations 

698 to add, update, or remove any named results. If the tokens are modified in place, 

699 it is not necessary to return them with a return statement. 

700 

701 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

702 object, or with some entirely different object (common for parse actions that perform data 

703 conversions). A convenient way to build a new parse result is to define the values 

704 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

705 

706 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

707 expression are cleared. 

708 

709 Optional keyword arguments: 

710 

711 :param call_during_try: (default= ``False``) indicate if parse action 

712 should be run during lookaheads and alternate 

713 testing. For parse actions that have side 

714 effects, it is important to only call the parse 

715 action once it is determined that it is being 

716 called as part of a successful parse. 

717 For parse actions that perform additional 

718 validation, then ``call_during_try`` should 

719 be passed as True, so that the validation code 

720 is included in the preliminary "try" parses. 

721 

722 .. Note:: 

723 The default parsing behavior is to expand tabs in the input string 

724 before starting the parsing process. 

725 See :meth:`parse_string` for more information on parsing strings 

726 containing ``<TAB>`` s, and suggested methods to maintain a 

727 consistent view of the parsed string, the parse location, and 

728 line and column positions within the parsed string. 

729 

730 Example: Parse dates in the form ``YYYY/MM/DD`` 

731 ----------------------------------------------- 

732 

733 Setup code: 

734 

735 .. testcode:: 

736 

737 def convert_to_int(toks): 

738 '''a parse action to convert toks from str to int 

739 at parse time''' 

740 return int(toks[0]) 

741 

742 def is_valid_date(instring, loc, toks): 

743 '''a parse action to verify that the date is a valid date''' 

744 from datetime import date 

745 year, month, day = toks[::2] 

746 try: 

747 date(year, month, day) 

748 except ValueError: 

749 raise ParseException(instring, loc, "invalid date given") 

750 

751 integer = Word(nums) 

752 date_str = integer + '/' + integer + '/' + integer 

753 

754 # add parse actions 

755 integer.set_parse_action(convert_to_int) 

756 date_str.set_parse_action(is_valid_date) 

757 

758 Successful parse - note that integer fields are converted to ints: 

759 

760 .. testcode:: 

761 

762 print(date_str.parse_string("1999/12/31")) 

763 

764 prints: 

765 

766 .. testoutput:: 

767 

768 [1999, '/', 12, '/', 31] 

769 

770 Failure - invalid date: 

771 

772 .. testcode:: 

773 

774 date_str.parse_string("1999/13/31") 

775 

776 prints: 

777 

778 .. testoutput:: 

779 

780 Traceback (most recent call last): 

781 ParseException: invalid date given, found '1999' ... 

782 """ 

783 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False) 

784 

785 if list(fns) == [None]: 

786 self.parseAction.clear() 

787 return self 

788 

789 if not all(callable(fn) for fn in fns): 

790 raise TypeError("parse actions must be callable") 

791 self.parseAction[:] = [_trim_arity(fn) for fn in fns] 

792 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry 

793 

794 return self 

795 

796 def add_parse_action( 

797 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any 

798 ) -> ParserElement: 

799 """ 

800 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

801 

802 See examples in :class:`copy`. 

803 """ 

804 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False) 

805 

806 self.parseAction += [_trim_arity(fn) for fn in fns] 

807 self.callDuringTry = self.callDuringTry or callDuringTry or call_during_try 

808 return self 

809 

810 def add_condition( 

811 self, *fns: ParseCondition, call_during_try: bool = False, **kwargs: Any 

812 ) -> ParserElement: 

813 """Add a boolean predicate function to expression's list of parse actions. See 

814 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

815 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

816 

817 Optional keyword arguments: 

818 

819 - ``message`` = define a custom message to be used in the raised exception 

820 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

821 ParseException 

822 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

823 default=False 

824 

825 Example: 

826 

827 .. doctest:: 

828 :options: +NORMALIZE_WHITESPACE 

829 

830 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

831 >>> year_int = integer.copy().add_condition( 

832 ... lambda toks: toks[0] >= 2000, 

833 ... message="Only support years 2000 and later") 

834 >>> date_str = year_int + '/' + integer + '/' + integer 

835 

836 >>> result = date_str.parse_string("1999/12/31") 

837 Traceback (most recent call last): 

838 ParseException: Only support years 2000 and later... 

839 """ 

840 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False) 

841 

842 for fn in fns: 

843 self.parseAction.append( 

844 condition_as_parse_action( 

845 fn, 

846 message=str(kwargs.get("message")), 

847 fatal=bool(kwargs.get("fatal", False)), 

848 ) 

849 ) 

850 

851 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry 

852 return self 

853 

854 def set_fail_action(self, fn: ParseFailAction) -> ParserElement: 

855 """ 

856 Define action to perform if parsing fails at this expression. 

857 Fail acton fn is a callable function that takes the arguments 

858 ``fn(s, loc, expr, err)`` where: 

859 

860 - ``s`` = string being parsed 

861 - ``loc`` = location where expression match was attempted and failed 

862 - ``expr`` = the parse expression that failed 

863 - ``err`` = the exception thrown 

864 

865 The function returns no value. It may throw :class:`ParseFatalException` 

866 if it is desired to stop parsing immediately.""" 

867 self.failAction = fn 

868 return self 

869 

870 def _skipIgnorables(self, instring: str, loc: int) -> int: 

871 if not self.ignoreExprs: 

872 return loc 

873 exprsFound = True 

874 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

875 last_loc = loc 

876 while exprsFound: 

877 exprsFound = False 

878 for ignore_fn in ignore_expr_fns: 

879 try: 

880 while 1: 

881 loc, dummy = ignore_fn(instring, loc) 

882 exprsFound = True 

883 except ParseException: 

884 pass 

885 # check if all ignore exprs matched but didn't actually advance the parse location 

886 if loc == last_loc: 

887 break 

888 last_loc = loc 

889 return loc 

890 

891 def preParse(self, instring: str, loc: int) -> int: 

892 if self.ignoreExprs: 

893 loc = self._skipIgnorables(instring, loc) 

894 

895 if self.skipWhitespace: 

896 instrlen = len(instring) 

897 white_chars = self.whiteChars 

898 while loc < instrlen and instring[loc] in white_chars: 

899 loc += 1 

900 

901 return loc 

902 

903 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

904 return loc, [] 

905 

906 def postParse(self, instring, loc, tokenlist): 

907 return tokenlist 

908 

909 # @profile 

910 def _parseNoCache( 

911 self, instring, loc, do_actions=True, callPreParse=True 

912 ) -> tuple[int, ParseResults]: 

913 debugging = self.debug # and do_actions) 

914 len_instring = len(instring) 

915 

916 if debugging or self.failAction: 

917 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

918 try: 

919 if callPreParse and self.callPreparse: 

920 pre_loc = self.preParse(instring, loc) 

921 else: 

922 pre_loc = loc 

923 tokens_start = pre_loc 

924 if self.debugActions.debug_try: 

925 self.debugActions.debug_try(instring, tokens_start, self, False) 

926 if self.mayIndexError or pre_loc >= len_instring: 

927 try: 

928 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

929 except IndexError: 

930 raise ParseException(instring, len_instring, self.errmsg, self) 

931 else: 

932 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

933 except Exception as err: 

934 # print("Exception raised:", err) 

935 if self.debugActions.debug_fail: 

936 self.debugActions.debug_fail( 

937 instring, tokens_start, self, err, False 

938 ) 

939 if self.failAction: 

940 self.failAction(instring, tokens_start, self, err) 

941 raise 

942 else: 

943 if callPreParse and self.callPreparse: 

944 pre_loc = self.preParse(instring, loc) 

945 else: 

946 pre_loc = loc 

947 tokens_start = pre_loc 

948 if self.mayIndexError or pre_loc >= len_instring: 

949 try: 

950 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

951 except IndexError: 

952 raise ParseException(instring, len_instring, self.errmsg, self) 

953 else: 

954 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

955 

956 tokens = self.postParse(instring, loc, tokens) 

957 

958 ret_tokens = ParseResults( 

959 tokens, self.resultsName, aslist=self.saveAsList, modal=self.modalResults 

960 ) 

961 if self.parseAction and (do_actions or self.callDuringTry): 

962 if debugging: 

963 try: 

964 for fn in self.parseAction: 

965 try: 

966 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

967 except IndexError as parse_action_exc: 

968 exc = ParseException("exception raised in parse action") 

969 raise exc from parse_action_exc 

970 

971 if tokens is not None and tokens is not ret_tokens: 

972 ret_tokens = ParseResults( 

973 tokens, 

974 self.resultsName, 

975 aslist=self.saveAsList 

976 and isinstance(tokens, (ParseResults, list)), 

977 modal=self.modalResults, 

978 ) 

979 except Exception as err: 

980 # print "Exception raised in user parse action:", err 

981 if self.debugActions.debug_fail: 

982 self.debugActions.debug_fail( 

983 instring, tokens_start, self, err, False 

984 ) 

985 raise 

986 else: 

987 for fn in self.parseAction: 

988 try: 

989 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

990 except IndexError as parse_action_exc: 

991 exc = ParseException("exception raised in parse action") 

992 raise exc from parse_action_exc 

993 

994 if tokens is not None and tokens is not ret_tokens: 

995 ret_tokens = ParseResults( 

996 tokens, 

997 self.resultsName, 

998 aslist=self.saveAsList 

999 and isinstance(tokens, (ParseResults, list)), 

1000 modal=self.modalResults, 

1001 ) 

1002 if debugging: 

1003 # print("Matched", self, "->", ret_tokens.as_list()) 

1004 if self.debugActions.debug_match: 

1005 self.debugActions.debug_match( 

1006 instring, tokens_start, loc, self, ret_tokens, False 

1007 ) 

1008 

1009 return loc, ret_tokens 

1010 

1011 def try_parse( 

1012 self, 

1013 instring: str, 

1014 loc: int, 

1015 *, 

1016 raise_fatal: bool = False, 

1017 do_actions: bool = False, 

1018 ) -> int: 

1019 try: 

1020 return self._parse(instring, loc, do_actions=do_actions)[0] 

1021 except ParseFatalException: 

1022 if raise_fatal: 

1023 raise 

1024 raise ParseException(instring, loc, self.errmsg, self) 

1025 

1026 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

1027 try: 

1028 self.try_parse(instring, loc, do_actions=do_actions) 

1029 except (ParseException, IndexError): 

1030 return False 

1031 else: 

1032 return True 

1033 

1034 # cache for left-recursion in Forward references 

1035 recursion_lock = RLock() 

1036 recursion_memos: collections.abc.MutableMapping[ 

1037 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]] 

1038 ] = {} 

1039 

1040 class _CacheType(typing.Protocol): 

1041 """ 

1042 Class to be used for packrat and left-recursion cacheing of results 

1043 and exceptions. 

1044 """ 

1045 

1046 not_in_cache: bool 

1047 

1048 def get(self, *args) -> typing.Any: ... 

1049 

1050 def set(self, *args) -> None: ... 

1051 

1052 def clear(self) -> None: ... 

1053 

1054 class NullCache(dict): 

1055 """ 

1056 A null cache type for initialization of the packrat_cache class variable. 

1057 If/when enable_packrat() is called, this null cache will be replaced by a 

1058 proper _CacheType class instance. 

1059 """ 

1060 

1061 not_in_cache: bool = True 

1062 

1063 def get(self, *args) -> typing.Any: ... 

1064 

1065 def set(self, *args) -> None: ... 

1066 

1067 def clear(self) -> None: ... 

1068 

1069 # class-level argument cache for optimizing repeated calls when backtracking 

1070 # through recursive expressions 

1071 packrat_cache: _CacheType = NullCache() 

1072 packrat_cache_lock = RLock() 

1073 packrat_cache_stats = [0, 0] 

1074 

1075 # this method gets repeatedly called during backtracking with the same arguments - 

1076 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

1077 def _parseCache( 

1078 self, instring, loc, do_actions=True, callPreParse=True 

1079 ) -> tuple[int, ParseResults]: 

1080 HIT, MISS = 0, 1 

1081 lookup = (self, instring, loc, callPreParse, do_actions) 

1082 with ParserElement.packrat_cache_lock: 

1083 cache = ParserElement.packrat_cache 

1084 value = cache.get(lookup) 

1085 if value is cache.not_in_cache: 

1086 ParserElement.packrat_cache_stats[MISS] += 1 

1087 try: 

1088 value = self._parseNoCache(instring, loc, do_actions, callPreParse) 

1089 except ParseBaseException as pe: 

1090 # cache a copy of the exception, without the traceback 

1091 cache.set(lookup, pe.__class__(*pe.args)) 

1092 raise 

1093 else: 

1094 cache.set(lookup, (value[0], value[1].copy(), loc)) 

1095 return value 

1096 else: 

1097 ParserElement.packrat_cache_stats[HIT] += 1 

1098 if self.debug and self.debugActions.debug_try: 

1099 try: 

1100 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

1101 except TypeError: 

1102 pass 

1103 if isinstance(value, Exception): 

1104 if self.debug and self.debugActions.debug_fail: 

1105 try: 

1106 self.debugActions.debug_fail( 

1107 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

1108 ) 

1109 except TypeError: 

1110 pass 

1111 raise value 

1112 

1113 value = cast(tuple[int, ParseResults, int], value) 

1114 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1115 if self.debug and self.debugActions.debug_match: 

1116 try: 

1117 self.debugActions.debug_match( 

1118 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1119 ) 

1120 except TypeError: 

1121 pass 

1122 

1123 return loc_, result 

1124 

1125 _parse = _parseNoCache 

1126 

1127 @staticmethod 

1128 def reset_cache() -> None: 

1129 """ 

1130 Clears caches used by packrat and left-recursion. 

1131 """ 

1132 with ParserElement.packrat_cache_lock: 

1133 ParserElement.packrat_cache.clear() 

1134 ParserElement.packrat_cache_stats[:] = [0] * len( 

1135 ParserElement.packrat_cache_stats 

1136 ) 

1137 ParserElement.recursion_memos.clear() 

1138 

1139 # class attributes to keep caching status 

1140 _packratEnabled = False 

1141 _left_recursion_enabled = False 

1142 

1143 @staticmethod 

1144 def disable_memoization() -> None: 

1145 """ 

1146 Disables active Packrat or Left Recursion parsing and their memoization 

1147 

1148 This method also works if neither Packrat nor Left Recursion are enabled. 

1149 This makes it safe to call before activating Packrat nor Left Recursion 

1150 to clear any previous settings. 

1151 """ 

1152 with ParserElement.packrat_cache_lock: 

1153 ParserElement.reset_cache() 

1154 ParserElement._left_recursion_enabled = False 

1155 ParserElement._packratEnabled = False 

1156 ParserElement._parse = ParserElement._parseNoCache 

1157 

1158 @staticmethod 

1159 def enable_left_recursion( 

1160 cache_size_limit: typing.Optional[int] = None, *, force=False 

1161 ) -> None: 

1162 """ 

1163 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1164 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1165 repeatedly matched with a fixed recursion depth that is gradually increased 

1166 until finding the longest match. 

1167 

1168 Example: 

1169 

1170 .. testcode:: 

1171 

1172 import pyparsing as pp 

1173 pp.ParserElement.enable_left_recursion() 

1174 

1175 E = pp.Forward("E") 

1176 num = pp.Word(pp.nums) 

1177 

1178 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1179 E <<= E + '+' - num | num 

1180 

1181 print(E.parse_string("1+2+3+4")) 

1182 

1183 prints: 

1184 

1185 .. testoutput:: 

1186 

1187 ['1', '+', '2', '+', '3', '+', '4'] 

1188 

1189 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1190 thus skip reevaluation of parse actions during backtracking. This may break 

1191 programs with parse actions which rely on strict ordering of side-effects. 

1192 

1193 Parameters: 

1194 

1195 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1196 ``Forward`` elements during matching; if ``None`` (the default), 

1197 memoize all ``Forward`` elements. 

1198 

1199 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1200 thus the two cannot be used together. Use ``force=True`` to disable any 

1201 previous, conflicting settings. 

1202 """ 

1203 with ParserElement.packrat_cache_lock: 

1204 if force: 

1205 ParserElement.disable_memoization() 

1206 elif ParserElement._packratEnabled: 

1207 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1208 if cache_size_limit is None: 

1209 ParserElement.recursion_memos = _UnboundedMemo() 

1210 elif cache_size_limit > 0: 

1211 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1212 else: 

1213 raise NotImplementedError(f"Memo size of {cache_size_limit}") 

1214 ParserElement._left_recursion_enabled = True 

1215 

1216 @staticmethod 

1217 def enable_packrat( 

1218 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1219 ) -> None: 

1220 """ 

1221 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1222 Repeated parse attempts at the same string location (which happens 

1223 often in many complex grammars) can immediately return a cached value, 

1224 instead of re-executing parsing/validating code. Memoizing is done of 

1225 both valid results and parsing exceptions. 

1226 

1227 Parameters: 

1228 

1229 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1230 will limit the size of the packrat cache; if None is passed, then 

1231 the cache size will be unbounded; if 0 is passed, the cache will 

1232 be effectively disabled. 

1233 

1234 This speedup may break existing programs that use parse actions that 

1235 have side-effects. For this reason, packrat parsing is disabled when 

1236 you first import pyparsing. To activate the packrat feature, your 

1237 program must call the class method :class:`ParserElement.enable_packrat`. 

1238 For best results, call ``enable_packrat()`` immediately after 

1239 importing pyparsing. 

1240 

1241 .. Can't really be doctested, alas 

1242 

1243 Example:: 

1244 

1245 import pyparsing 

1246 pyparsing.ParserElement.enable_packrat() 

1247 

1248 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1249 thus the two cannot be used together. Use ``force=True`` to disable any 

1250 previous, conflicting settings. 

1251 """ 

1252 with ParserElement.packrat_cache_lock: 

1253 if force: 

1254 ParserElement.disable_memoization() 

1255 elif ParserElement._left_recursion_enabled: 

1256 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1257 

1258 if ParserElement._packratEnabled: 

1259 return 

1260 

1261 ParserElement._packratEnabled = True 

1262 if cache_size_limit is None: 

1263 ParserElement.packrat_cache = _UnboundedCache() 

1264 else: 

1265 ParserElement.packrat_cache = _FifoCache(cache_size_limit) 

1266 ParserElement._parse = ParserElement._parseCache 

1267 

1268 def parse_string( 

1269 self, instring: str, parse_all: bool = False, **kwargs 

1270 ) -> ParseResults: 

1271 """ 

1272 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1273 client code. 

1274 

1275 :param instring: The input string to be parsed. 

1276 :param parse_all: If set, the entire input string must match the grammar. 

1277 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1278 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1279 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1280 an object with attributes if the given parser includes results names. 

1281 

1282 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1283 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1284 

1285 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1286 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1287 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1288 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1289 

1290 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1291 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1292 parse action's ``s`` argument, or 

1293 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1294 

1295 Examples: 

1296 

1297 By default, partial matches are OK. 

1298 

1299 .. doctest:: 

1300 

1301 >>> res = Word('a').parse_string('aaaaabaaa') 

1302 >>> print(res) 

1303 ['aaaaa'] 

1304 

1305 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1306 directly to see more examples. 

1307 

1308 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1309 

1310 .. doctest:: 

1311 

1312 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1313 Traceback (most recent call last): 

1314 ParseException: Expected end of text, found 'b' ... 

1315 """ 

1316 parseAll: bool = deprecate_argument(kwargs, "parseAll", False) 

1317 

1318 parse_all = parse_all or parseAll 

1319 

1320 ParserElement.reset_cache() 

1321 if not self.streamlined: 

1322 self.streamline() 

1323 for e in self.ignoreExprs: 

1324 e.streamline() 

1325 if not self.keepTabs: 

1326 instring = instring.expandtabs() 

1327 try: 

1328 loc, tokens = self._parse(instring, 0) 

1329 if parse_all: 

1330 loc = self.preParse(instring, loc) 

1331 se = Empty() + StringEnd().set_debug(False) 

1332 se._parse(instring, loc) 

1333 except _ParseActionIndexError as pa_exc: 

1334 raise pa_exc.exc 

1335 except ParseBaseException as exc: 

1336 if ParserElement.verbose_stacktrace: 

1337 raise 

1338 

1339 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1340 raise exc.with_traceback(None) 

1341 else: 

1342 return tokens 

1343 

1344 def scan_string( 

1345 self, 

1346 instring: str, 

1347 max_matches: int = _MAX_INT, 

1348 overlap: bool = False, 

1349 always_skip_whitespace=True, 

1350 *, 

1351 debug: bool = False, 

1352 **kwargs, 

1353 ) -> Generator[tuple[ParseResults, int, int], None, None]: 

1354 """ 

1355 Scan the input string for expression matches. Each match will return the 

1356 matching tokens, start location, and end location. May be called with optional 

1357 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1358 ``overlap`` is specified, then overlapping matches will be reported. 

1359 

1360 Note that the start and end locations are reported relative to the string 

1361 being parsed. See :class:`parse_string` for more information on parsing 

1362 strings with embedded tabs. 

1363 

1364 Example: 

1365 

1366 .. testcode:: 

1367 

1368 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1369 print(source) 

1370 for tokens, start, end in Word(alphas).scan_string(source): 

1371 print(' '*start + '^'*(end-start)) 

1372 print(' '*start + tokens[0]) 

1373 

1374 prints: 

1375 

1376 .. testoutput:: 

1377 

1378 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1379 ^^^^^ 

1380 sldjf 

1381 ^^^^^^^ 

1382 lsdjjkf 

1383 ^^^^^^ 

1384 sldkjf 

1385 ^^^^^^ 

1386 lkjsfd 

1387 """ 

1388 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT) 

1389 

1390 max_matches = min(maxMatches, max_matches) 

1391 if not self.streamlined: 

1392 self.streamline() 

1393 for e in self.ignoreExprs: 

1394 e.streamline() 

1395 

1396 if not self.keepTabs: 

1397 instring = str(instring).expandtabs() 

1398 instrlen = len(instring) 

1399 loc = 0 

1400 if always_skip_whitespace: 

1401 preparser = Empty() 

1402 preparser.ignoreExprs = self.ignoreExprs 

1403 preparser.whiteChars = self.whiteChars 

1404 preparseFn = preparser.preParse 

1405 else: 

1406 preparseFn = self.preParse 

1407 parseFn = self._parse 

1408 ParserElement.reset_cache() 

1409 matches = 0 

1410 try: 

1411 while loc <= instrlen and matches < max_matches: 

1412 try: 

1413 preloc: int = preparseFn(instring, loc) 

1414 nextLoc: int 

1415 tokens: ParseResults 

1416 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1417 except ParseException: 

1418 loc = preloc + 1 

1419 else: 

1420 if nextLoc > loc: 

1421 matches += 1 

1422 if debug: 

1423 print( 

1424 { 

1425 "tokens": tokens.as_list(), 

1426 "start": preloc, 

1427 "end": nextLoc, 

1428 } 

1429 ) 

1430 yield tokens, preloc, nextLoc 

1431 if overlap: 

1432 nextloc = preparseFn(instring, loc) 

1433 if nextloc > loc: 

1434 loc = nextLoc 

1435 else: 

1436 loc += 1 

1437 else: 

1438 loc = nextLoc 

1439 else: 

1440 loc = preloc + 1 

1441 except ParseBaseException as exc: 

1442 if ParserElement.verbose_stacktrace: 

1443 raise 

1444 

1445 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1446 raise exc.with_traceback(None) 

1447 

1448 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1449 """ 

1450 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1451 be returned from a parse action. To use ``transform_string``, define a grammar and 

1452 attach a parse action to it that modifies the returned token list. 

1453 Invoking ``transform_string()`` on a target string will then scan for matches, 

1454 and replace the matched text patterns according to the logic in the parse 

1455 action. ``transform_string()`` returns the resulting transformed string. 

1456 

1457 Example: 

1458 

1459 .. testcode:: 

1460 

1461 quote = '''now is the winter of our discontent, 

1462 made glorious summer by this sun of york.''' 

1463 

1464 wd = Word(alphas) 

1465 wd.set_parse_action(lambda toks: toks[0].title()) 

1466 

1467 print(wd.transform_string(quote)) 

1468 

1469 prints: 

1470 

1471 .. testoutput:: 

1472 

1473 Now Is The Winter Of Our Discontent, 

1474 Made Glorious Summer By This Sun Of York. 

1475 """ 

1476 out: list[str] = [] 

1477 lastE = 0 

1478 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1479 # keep string locs straight between transform_string and scan_string 

1480 self.keepTabs = True 

1481 try: 

1482 for t, s, e in self.scan_string(instring, debug=debug): 

1483 if s > lastE: 

1484 out.append(instring[lastE:s]) 

1485 lastE = e 

1486 

1487 if not t: 

1488 continue 

1489 

1490 if isinstance(t, ParseResults): 

1491 out += t.as_list() 

1492 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1493 out.extend(t) 

1494 else: 

1495 out.append(t) 

1496 

1497 out.append(instring[lastE:]) 

1498 out = [o for o in out if o] 

1499 return "".join([str(s) for s in _flatten(out)]) 

1500 except ParseBaseException as exc: 

1501 if ParserElement.verbose_stacktrace: 

1502 raise 

1503 

1504 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1505 raise exc.with_traceback(None) 

1506 

1507 def search_string( 

1508 self, 

1509 instring: str, 

1510 max_matches: int = _MAX_INT, 

1511 *, 

1512 debug: bool = False, 

1513 **kwargs, 

1514 ) -> ParseResults: 

1515 """ 

1516 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1517 to match the given parse expression. May be called with optional 

1518 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1519 

1520 Example: 

1521 

1522 .. testcode:: 

1523 

1524 quote = '''More than Iron, more than Lead, 

1525 more than Gold I need Electricity''' 

1526 

1527 # a capitalized word starts with an uppercase letter, 

1528 # followed by zero or more lowercase letters 

1529 cap_word = Word(alphas.upper(), alphas.lower()) 

1530 

1531 print(cap_word.search_string(quote)) 

1532 

1533 # the sum() builtin can be used to merge results 

1534 # into a single ParseResults object 

1535 print(sum(cap_word.search_string(quote))) 

1536 

1537 prints: 

1538 

1539 .. testoutput:: 

1540 

1541 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1542 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1543 """ 

1544 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT) 

1545 

1546 max_matches = min(maxMatches, max_matches) 

1547 try: 

1548 return ParseResults( 

1549 [ 

1550 t 

1551 for t, s, e in self.scan_string( 

1552 instring, 

1553 max_matches=max_matches, 

1554 always_skip_whitespace=False, 

1555 debug=debug, 

1556 ) 

1557 ] 

1558 ) 

1559 except ParseBaseException as exc: 

1560 if ParserElement.verbose_stacktrace: 

1561 raise 

1562 

1563 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1564 raise exc.with_traceback(None) 

1565 

1566 def split( 

1567 self, 

1568 instring: str, 

1569 maxsplit: int = _MAX_INT, 

1570 include_separators: bool = False, 

1571 **kwargs, 

1572 ) -> Generator[str, None, None]: 

1573 """ 

1574 Generator method to split a string using the given expression as a separator. 

1575 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1576 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1577 matching text should be included in the split results. 

1578 

1579 Example: 

1580 

1581 .. testcode:: 

1582 

1583 punc = one_of(list(".,;:/-!?")) 

1584 print(list(punc.split( 

1585 "This, this?, this sentence, is badly punctuated!"))) 

1586 

1587 prints: 

1588 

1589 .. testoutput:: 

1590 

1591 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1592 """ 

1593 includeSeparators: bool = deprecate_argument(kwargs, "includeSeparators", False) 

1594 

1595 include_separators = includeSeparators or include_separators 

1596 last = 0 

1597 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1598 yield instring[last:s] 

1599 if include_separators: 

1600 yield t[0] 

1601 last = e 

1602 yield instring[last:] 

1603 

1604 def __add__(self, other) -> ParserElement: 

1605 """ 

1606 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1607 converts them to :class:`Literal`\\ s by default. 

1608 

1609 Example: 

1610 

1611 .. testcode:: 

1612 

1613 greet = Word(alphas) + "," + Word(alphas) + "!" 

1614 hello = "Hello, World!" 

1615 print(hello, "->", greet.parse_string(hello)) 

1616 

1617 prints: 

1618 

1619 .. testoutput:: 

1620 

1621 Hello, World! -> ['Hello', ',', 'World', '!'] 

1622 

1623 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`: 

1624 

1625 .. testcode:: 

1626 

1627 Literal('start') + ... + Literal('end') 

1628 

1629 is equivalent to: 

1630 

1631 .. testcode:: 

1632 

1633 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1634 

1635 Note that the skipped text is returned with '_skipped' as a results name, 

1636 and to support having multiple skips in the same parser, the value returned is 

1637 a list of all skipped text. 

1638 """ 

1639 if other is Ellipsis: 

1640 return _PendingSkip(self) 

1641 

1642 if isinstance(other, str_type): 

1643 other = self._literalStringClass(other) 

1644 if not isinstance(other, ParserElement): 

1645 return NotImplemented 

1646 return And([self, other]) 

1647 

1648 def __radd__(self, other) -> ParserElement: 

1649 """ 

1650 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1651 """ 

1652 if other is Ellipsis: 

1653 return SkipTo(self)("_skipped*") + self 

1654 

1655 if isinstance(other, str_type): 

1656 other = self._literalStringClass(other) 

1657 if not isinstance(other, ParserElement): 

1658 return NotImplemented 

1659 return other + self 

1660 

1661 def __sub__(self, other) -> ParserElement: 

1662 """ 

1663 Implementation of ``-`` operator, returns :class:`And` with error stop 

1664 """ 

1665 if isinstance(other, str_type): 

1666 other = self._literalStringClass(other) 

1667 if not isinstance(other, ParserElement): 

1668 return NotImplemented 

1669 return self + And._ErrorStop() + other 

1670 

1671 def __rsub__(self, other) -> ParserElement: 

1672 """ 

1673 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1674 """ 

1675 if isinstance(other, str_type): 

1676 other = self._literalStringClass(other) 

1677 if not isinstance(other, ParserElement): 

1678 return NotImplemented 

1679 return other - self 

1680 

1681 def __mul__(self, other) -> ParserElement: 

1682 """ 

1683 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1684 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1685 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1686 may also include ``None`` as in: 

1687 

1688 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1689 to ``expr*n + ZeroOrMore(expr)`` 

1690 (read as "at least n instances of ``expr``") 

1691 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1692 (read as "0 to n instances of ``expr``") 

1693 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1694 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1695 

1696 Note that ``expr*(None, n)`` does not raise an exception if 

1697 more than n exprs exist in the input stream; that is, 

1698 ``expr*(None, n)`` does not enforce a maximum number of expr 

1699 occurrences. If this behavior is desired, then write 

1700 ``expr*(None, n) + ~expr`` 

1701 """ 

1702 if other is Ellipsis: 

1703 other = (0, None) 

1704 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1705 other = ((0,) + other[1:] + (None,))[:2] 

1706 

1707 if not isinstance(other, (int, tuple)): 

1708 return NotImplemented 

1709 

1710 if isinstance(other, int): 

1711 minElements, optElements = other, 0 

1712 else: 

1713 other = tuple(o if o is not Ellipsis else None for o in other) 

1714 other = (other + (None, None))[:2] 

1715 if other[0] is None: 

1716 other = (0, other[1]) 

1717 if isinstance(other[0], int) and other[1] is None: 

1718 if other[0] == 0: 

1719 return ZeroOrMore(self) 

1720 if other[0] == 1: 

1721 return OneOrMore(self) 

1722 else: 

1723 return self * other[0] + ZeroOrMore(self) 

1724 elif isinstance(other[0], int) and isinstance(other[1], int): 

1725 minElements, optElements = other 

1726 optElements -= minElements 

1727 else: 

1728 return NotImplemented 

1729 

1730 if minElements < 0: 

1731 raise ValueError("cannot multiply ParserElement by negative value") 

1732 if optElements < 0: 

1733 raise ValueError( 

1734 "second tuple value must be greater or equal to first tuple value" 

1735 ) 

1736 if minElements == optElements == 0: 

1737 return And([]) 

1738 

1739 if optElements: 

1740 

1741 def makeOptionalList(n): 

1742 if n > 1: 

1743 return Opt(self + makeOptionalList(n - 1)) 

1744 else: 

1745 return Opt(self) 

1746 

1747 if minElements: 

1748 if minElements == 1: 

1749 ret = self + makeOptionalList(optElements) 

1750 else: 

1751 ret = And([self] * minElements) + makeOptionalList(optElements) 

1752 else: 

1753 ret = makeOptionalList(optElements) 

1754 else: 

1755 if minElements == 1: 

1756 ret = self 

1757 else: 

1758 ret = And([self] * minElements) 

1759 return ret 

1760 

1761 def __rmul__(self, other) -> ParserElement: 

1762 return self.__mul__(other) 

1763 

1764 def __or__(self, other) -> ParserElement: 

1765 """ 

1766 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1767 

1768 .. versionchanged:: 3.1.0 

1769 Support ``expr | ""`` as a synonym for ``Optional(expr)``. 

1770 """ 

1771 if other is Ellipsis: 

1772 return _PendingSkip(self, must_skip=True) 

1773 

1774 if isinstance(other, str_type): 

1775 # `expr | ""` is equivalent to `Opt(expr)` 

1776 if other == "": 

1777 return Opt(self) 

1778 other = self._literalStringClass(other) 

1779 if not isinstance(other, ParserElement): 

1780 return NotImplemented 

1781 return MatchFirst([self, other]) 

1782 

1783 def __ror__(self, other) -> ParserElement: 

1784 """ 

1785 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1786 """ 

1787 if isinstance(other, str_type): 

1788 other = self._literalStringClass(other) 

1789 if not isinstance(other, ParserElement): 

1790 return NotImplemented 

1791 return other | self 

1792 

1793 def __xor__(self, other) -> ParserElement: 

1794 """ 

1795 Implementation of ``^`` operator - returns :class:`Or` 

1796 """ 

1797 if isinstance(other, str_type): 

1798 other = self._literalStringClass(other) 

1799 if not isinstance(other, ParserElement): 

1800 return NotImplemented 

1801 return Or([self, other]) 

1802 

1803 def __rxor__(self, other) -> ParserElement: 

1804 """ 

1805 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1806 """ 

1807 if isinstance(other, str_type): 

1808 other = self._literalStringClass(other) 

1809 if not isinstance(other, ParserElement): 

1810 return NotImplemented 

1811 return other ^ self 

1812 

1813 def __and__(self, other) -> ParserElement: 

1814 """ 

1815 Implementation of ``&`` operator - returns :class:`Each` 

1816 """ 

1817 if isinstance(other, str_type): 

1818 other = self._literalStringClass(other) 

1819 if not isinstance(other, ParserElement): 

1820 return NotImplemented 

1821 return Each([self, other]) 

1822 

1823 def __rand__(self, other) -> ParserElement: 

1824 """ 

1825 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1826 """ 

1827 if isinstance(other, str_type): 

1828 other = self._literalStringClass(other) 

1829 if not isinstance(other, ParserElement): 

1830 return NotImplemented 

1831 return other & self 

1832 

1833 def __invert__(self) -> ParserElement: 

1834 """ 

1835 Implementation of ``~`` operator - returns :class:`NotAny` 

1836 """ 

1837 return NotAny(self) 

1838 

1839 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1840 # iterate over a sequence 

1841 __iter__ = None 

1842 

1843 def __getitem__(self, key): 

1844 """ 

1845 use ``[]`` indexing notation as a short form for expression repetition: 

1846 

1847 - ``expr[n]`` is equivalent to ``expr*n`` 

1848 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1849 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1850 to ``expr*n + ZeroOrMore(expr)`` 

1851 (read as "at least n instances of ``expr``") 

1852 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1853 (read as "0 to n instances of ``expr``") 

1854 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1855 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1856 

1857 ``None`` may be used in place of ``...``. 

1858 

1859 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1860 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1861 desired, then write ``expr[..., n] + ~expr``. 

1862 

1863 For repetition with a stop_on expression, use slice notation: 

1864 

1865 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1866 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1867 

1868 .. versionchanged:: 3.1.0 

1869 Support for slice notation. 

1870 """ 

1871 

1872 stop_on_defined = False 

1873 stop_on = NoMatch() 

1874 if isinstance(key, slice): 

1875 key, stop_on = key.start, key.stop 

1876 if key is None: 

1877 key = ... 

1878 stop_on_defined = True 

1879 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1880 key, stop_on = (key[0], key[1].start), key[1].stop 

1881 stop_on_defined = True 

1882 

1883 # convert single arg keys to tuples 

1884 if isinstance(key, str_type): 

1885 key = (key,) 

1886 try: 

1887 iter(key) 

1888 except TypeError: 

1889 key = (key, key) 

1890 

1891 if len(key) > 2: 

1892 raise TypeError( 

1893 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1894 ) 

1895 

1896 # clip to 2 elements 

1897 ret = self * tuple(key[:2]) 

1898 ret = typing.cast(_MultipleMatch, ret) 

1899 

1900 if stop_on_defined: 

1901 ret.stopOn(stop_on) 

1902 

1903 return ret 

1904 

1905 def __call__(self, name: typing.Optional[str] = None) -> ParserElement: 

1906 """ 

1907 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1908 

1909 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1910 passed as ``True``. 

1911 

1912 If ``name`` is omitted, same as calling :class:`copy`. 

1913 

1914 Example: 

1915 

1916 .. testcode:: 

1917 

1918 # these are equivalent 

1919 userdata = ( 

1920 Word(alphas).set_results_name("name") 

1921 + Word(nums + "-").set_results_name("socsecno") 

1922 ) 

1923 

1924 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1925 """ 

1926 if name is not None: 

1927 return self._setResultsName(name) 

1928 

1929 return self.copy() 

1930 

1931 def suppress(self) -> ParserElement: 

1932 """ 

1933 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1934 cluttering up returned output. 

1935 """ 

1936 return Suppress(self) 

1937 

1938 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

1939 """ 

1940 Enables the skipping of whitespace before matching the characters in the 

1941 :class:`ParserElement`'s defined pattern. 

1942 

1943 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1944 """ 

1945 self.skipWhitespace = True 

1946 return self 

1947 

1948 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

1949 """ 

1950 Disables the skipping of whitespace before matching the characters in the 

1951 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1952 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1953 

1954 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1955 """ 

1956 self.skipWhitespace = False 

1957 return self 

1958 

1959 def set_whitespace_chars( 

1960 self, chars: Union[set[str], str], copy_defaults: bool = False 

1961 ) -> ParserElement: 

1962 """ 

1963 Overrides the default whitespace chars 

1964 """ 

1965 self.skipWhitespace = True 

1966 self.whiteChars = set(chars) 

1967 self.copyDefaultWhiteChars = copy_defaults 

1968 return self 

1969 

1970 def parse_with_tabs(self) -> ParserElement: 

1971 """ 

1972 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1973 Must be called before ``parse_string`` when the input grammar contains elements that 

1974 match ``<TAB>`` characters. 

1975 """ 

1976 self.keepTabs = True 

1977 return self 

1978 

1979 def ignore(self, other: ParserElement) -> ParserElement: 

1980 """ 

1981 Define expression to be ignored (e.g., comments) while doing pattern 

1982 matching; may be called repeatedly, to define multiple comment or other 

1983 ignorable patterns. 

1984 

1985 Example: 

1986 

1987 .. doctest:: 

1988 

1989 >>> patt = Word(alphas)[...] 

1990 >>> print(patt.parse_string('ablaj /* comment */ lskjd')) 

1991 ['ablaj'] 

1992 

1993 >>> patt = Word(alphas)[...].ignore(c_style_comment) 

1994 >>> print(patt.parse_string('ablaj /* comment */ lskjd')) 

1995 ['ablaj', 'lskjd'] 

1996 """ 

1997 if isinstance(other, str_type): 

1998 other = Suppress(other) 

1999 

2000 if isinstance(other, Suppress): 

2001 if other not in self.ignoreExprs: 

2002 self.ignoreExprs.append(other) 

2003 else: 

2004 self.ignoreExprs.append(Suppress(other.copy())) 

2005 return self 

2006 

2007 def set_debug_actions( 

2008 self, 

2009 start_action: DebugStartAction, 

2010 success_action: DebugSuccessAction, 

2011 exception_action: DebugExceptionAction, 

2012 ) -> ParserElement: 

2013 """ 

2014 Customize display of debugging messages while doing pattern matching: 

2015 

2016 :param start_action: method to be called when an expression is about to be parsed; 

2017 should have the signature:: 

2018 

2019 fn(input_string: str, 

2020 location: int, 

2021 expression: ParserElement, 

2022 cache_hit: bool) 

2023 

2024 :param success_action: method to be called when an expression has successfully parsed; 

2025 should have the signature:: 

2026 

2027 fn(input_string: str, 

2028 start_location: int, 

2029 end_location: int, 

2030 expression: ParserELement, 

2031 parsed_tokens: ParseResults, 

2032 cache_hit: bool) 

2033 

2034 :param exception_action: method to be called when expression fails to parse; 

2035 should have the signature:: 

2036 

2037 fn(input_string: str, 

2038 location: int, 

2039 expression: ParserElement, 

2040 exception: Exception, 

2041 cache_hit: bool) 

2042 """ 

2043 self.debugActions = self.DebugActions( 

2044 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

2045 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

2046 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

2047 ) 

2048 self.debug = True 

2049 return self 

2050 

2051 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement: 

2052 """ 

2053 Enable display of debugging messages while doing pattern matching. 

2054 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

2055 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

2056 

2057 Example: 

2058 

2059 .. testcode:: 

2060 

2061 wd = Word(alphas).set_name("alphaword") 

2062 integer = Word(nums).set_name("numword") 

2063 term = wd | integer 

2064 

2065 # turn on debugging for wd 

2066 wd.set_debug() 

2067 

2068 term[1, ...].parse_string("abc 123 xyz 890") 

2069 

2070 prints: 

2071 

2072 .. testoutput:: 

2073 :options: +NORMALIZE_WHITESPACE 

2074 

2075 Match alphaword at loc 0(1,1) 

2076 abc 123 xyz 890 

2077 ^ 

2078 Matched alphaword -> ['abc'] 

2079 Match alphaword at loc 4(1,5) 

2080 abc 123 xyz 890 

2081 ^ 

2082 Match alphaword failed, ParseException raised: Expected alphaword, ... 

2083 Match alphaword at loc 8(1,9) 

2084 abc 123 xyz 890 

2085 ^ 

2086 Matched alphaword -> ['xyz'] 

2087 Match alphaword at loc 12(1,13) 

2088 abc 123 xyz 890 

2089 ^ 

2090 Match alphaword failed, ParseException raised: Expected alphaword, ... 

2091 abc 123 xyz 890 

2092 ^ 

2093 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ... 

2094 

2095 The output shown is that produced by the default debug actions - custom debug actions can be 

2096 specified using :meth:`set_debug_actions`. Prior to attempting 

2097 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

2098 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

2099 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression, 

2100 which makes debugging and exception messages easier to understand - for instance, the default 

2101 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``. 

2102 

2103 .. versionchanged:: 3.1.0 

2104 ``recurse`` argument added. 

2105 """ 

2106 if recurse: 

2107 for expr in self.visit_all(): 

2108 expr.set_debug(flag, recurse=False) 

2109 return self 

2110 

2111 if flag: 

2112 self.set_debug_actions( 

2113 _default_start_debug_action, 

2114 _default_success_debug_action, 

2115 _default_exception_debug_action, 

2116 ) 

2117 else: 

2118 self.debug = False 

2119 return self 

2120 

2121 @property 

2122 def default_name(self) -> str: 

2123 if self._defaultName is None: 

2124 self._defaultName = self._generateDefaultName() 

2125 return self._defaultName 

2126 

2127 @abstractmethod 

2128 def _generateDefaultName(self) -> str: 

2129 """ 

2130 Child classes must define this method, which defines how the ``default_name`` is set. 

2131 """ 

2132 

2133 def set_name(self, name: typing.Optional[str]) -> ParserElement: 

2134 """ 

2135 Define name for this expression, makes debugging and exception messages clearer. If 

2136 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also 

2137 enable debug for this expression. 

2138 

2139 If `name` is None, clears any custom name for this expression, and clears the 

2140 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`. 

2141 

2142 Example: 

2143 

2144 .. doctest:: 

2145 

2146 >>> integer = Word(nums) 

2147 >>> integer.parse_string("ABC") 

2148 Traceback (most recent call last): 

2149 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1) 

2150 

2151 >>> integer.set_name("integer") 

2152 integer 

2153 >>> integer.parse_string("ABC") 

2154 Traceback (most recent call last): 

2155 ParseException: Expected integer (at char 0), (line:1, col:1) 

2156 

2157 .. versionchanged:: 3.1.0 

2158 Accept ``None`` as the ``name`` argument. 

2159 """ 

2160 self.customName = name # type: ignore[assignment] 

2161 self.errmsg = f"Expected {str(self)}" 

2162 

2163 if __diag__.enable_debug_on_named_expressions: 

2164 self.set_debug(name is not None) 

2165 

2166 return self 

2167 

2168 @property 

2169 def name(self) -> str: 

2170 """ 

2171 Returns a user-defined name if available, but otherwise defaults back to the auto-generated name 

2172 """ 

2173 return self.customName if self.customName is not None else self.default_name 

2174 

2175 @name.setter 

2176 def name(self, new_name) -> None: 

2177 self.set_name(new_name) 

2178 

2179 def __str__(self) -> str: 

2180 return self.name 

2181 

2182 def __repr__(self) -> str: 

2183 return str(self) 

2184 

2185 def streamline(self) -> ParserElement: 

2186 self.streamlined = True 

2187 self._defaultName = None 

2188 return self 

2189 

2190 def recurse(self) -> list[ParserElement]: 

2191 return [] 

2192 

2193 def _checkRecursion(self, parseElementList): 

2194 subRecCheckList = parseElementList[:] + [self] 

2195 for e in self.recurse(): 

2196 e._checkRecursion(subRecCheckList) 

2197 

2198 def validate(self, validateTrace=None) -> None: 

2199 """ 

2200 .. deprecated:: 3.0.0 

2201 Do not use to check for left recursion. 

2202 

2203 Check defined expressions for valid structure, check for infinite recursive definitions. 

2204 

2205 """ 

2206 warnings.warn( 

2207 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

2208 DeprecationWarning, 

2209 stacklevel=2, 

2210 ) 

2211 self._checkRecursion([]) 

2212 

2213 def parse_file( 

2214 self, 

2215 file_or_filename: Union[str, Path, TextIO], 

2216 encoding: str = "utf-8", 

2217 parse_all: bool = False, 

2218 **kwargs, 

2219 ) -> ParseResults: 

2220 """ 

2221 Execute the parse expression on the given file or filename. 

2222 If a filename is specified (instead of a file object), 

2223 the entire file is opened, read, and closed before parsing. 

2224 """ 

2225 parseAll: bool = deprecate_argument(kwargs, "parseAll", False) 

2226 

2227 parse_all = parse_all or parseAll 

2228 try: 

2229 file_or_filename = typing.cast(TextIO, file_or_filename) 

2230 file_contents = file_or_filename.read() 

2231 except AttributeError: 

2232 file_or_filename = typing.cast(str, file_or_filename) 

2233 with open(file_or_filename, "r", encoding=encoding) as f: 

2234 file_contents = f.read() 

2235 try: 

2236 return self.parse_string(file_contents, parse_all) 

2237 except ParseBaseException as exc: 

2238 if ParserElement.verbose_stacktrace: 

2239 raise 

2240 

2241 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

2242 raise exc.with_traceback(None) 

2243 

2244 def __eq__(self, other): 

2245 if self is other: 

2246 return True 

2247 elif isinstance(other, str_type): 

2248 return self.matches(other, parse_all=True) 

2249 elif isinstance(other, ParserElement): 

2250 return vars(self) == vars(other) 

2251 return False 

2252 

2253 def __hash__(self): 

2254 return id(self) 

2255 

2256 def matches(self, test_string: str, parse_all: bool = True, **kwargs) -> bool: 

2257 """ 

2258 Method for quick testing of a parser against a test string. Good for simple 

2259 inline microtests of sub expressions while building up larger parser. 

2260 

2261 :param test_string: to test against this expression for a match 

2262 :param parse_all: flag to pass to :meth:`parse_string` when running tests 

2263 

2264 Example: 

2265 

2266 .. doctest:: 

2267 

2268 >>> expr = Word(nums) 

2269 >>> expr.matches("100") 

2270 True 

2271 """ 

2272 parseAll: bool = deprecate_argument(kwargs, "parseAll", True) 

2273 

2274 parse_all = parse_all and parseAll 

2275 try: 

2276 self.parse_string(str(test_string), parse_all=parse_all) 

2277 return True 

2278 except ParseBaseException: 

2279 return False 

2280 

2281 def run_tests( 

2282 self, 

2283 tests: Union[str, list[str]], 

2284 parse_all: bool = True, 

2285 comment: typing.Optional[Union[ParserElement, str]] = "#", 

2286 full_dump: bool = True, 

2287 print_results: bool = True, 

2288 failure_tests: bool = False, 

2289 post_parse: typing.Optional[ 

2290 Callable[[str, ParseResults], typing.Optional[str]] 

2291 ] = None, 

2292 file: typing.Optional[TextIO] = None, 

2293 with_line_numbers: bool = False, 

2294 *, 

2295 parseAll: bool = True, 

2296 fullDump: bool = True, 

2297 printResults: bool = True, 

2298 failureTests: bool = False, 

2299 postParse: typing.Optional[ 

2300 Callable[[str, ParseResults], typing.Optional[str]] 

2301 ] = None, 

2302 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]: 

2303 """ 

2304 Execute the parse expression on a series of test strings, showing each 

2305 test, the parsed results or where the parse failed. Quick and easy way to 

2306 run a parse expression against a list of sample strings. 

2307 

2308 Parameters: 

2309 

2310 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2311 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2312 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2313 string; pass None to disable comment filtering 

2314 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2315 if False, only dump nested list 

2316 - ``print_results`` - (default= ``True``) prints test output to stdout 

2317 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2318 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2319 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2320 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2321 if None, will default to ``sys.stdout`` 

2322 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2323 

2324 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2325 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2326 test's output 

2327 

2328 Passing example: 

2329 

2330 .. testcode:: 

2331 

2332 number_expr = pyparsing_common.number.copy() 

2333 

2334 result = number_expr.run_tests(''' 

2335 # unsigned integer 

2336 100 

2337 # negative integer 

2338 -100 

2339 # float with scientific notation 

2340 6.02e23 

2341 # integer with scientific notation 

2342 1e-12 

2343 # negative decimal number without leading digit 

2344 -.100 

2345 ''') 

2346 print("Success" if result[0] else "Failed!") 

2347 

2348 prints: 

2349 

2350 .. testoutput:: 

2351 :options: +NORMALIZE_WHITESPACE 

2352 

2353 

2354 # unsigned integer 

2355 100 

2356 [100] 

2357 

2358 # negative integer 

2359 -100 

2360 [-100] 

2361 

2362 # float with scientific notation 

2363 6.02e23 

2364 [6.02e+23] 

2365 

2366 # integer with scientific notation 

2367 1e-12 

2368 [1e-12] 

2369 

2370 # negative decimal number without leading digit 

2371 -.100 

2372 [-0.1] 

2373 Success 

2374 

2375 Failure-test example: 

2376 

2377 .. testcode:: 

2378 

2379 result = number_expr.run_tests(''' 

2380 # stray character 

2381 100Z 

2382 # too many '.' 

2383 3.14.159 

2384 ''', failure_tests=True) 

2385 print("Success" if result[0] else "Failed!") 

2386 

2387 prints: 

2388 

2389 .. testoutput:: 

2390 :options: +NORMALIZE_WHITESPACE 

2391 

2392 

2393 # stray character 

2394 100Z 

2395 100Z 

2396 ^ 

2397 ParseException: Expected end of text, found 'Z' ... 

2398 

2399 # too many '.' 

2400 3.14.159 

2401 3.14.159 

2402 ^ 

2403 ParseException: Expected end of text, found '.' ... 

2404 FAIL: Expected end of text, found '.' ... 

2405 Success 

2406 

2407 Each test string must be on a single line. If you want to test a string that spans multiple 

2408 lines, create a test like this: 

2409 

2410 .. testcode:: 

2411 

2412 expr = Word(alphanums)[1,...] 

2413 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2414 

2415 .. testoutput:: 

2416 :options: +NORMALIZE_WHITESPACE 

2417 :hide: 

2418 

2419 

2420 this is a test\\n of strings that spans \\n 3 lines 

2421 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines'] 

2422 

2423 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2424 """ 

2425 from .testing import pyparsing_test 

2426 

2427 parseAll = parseAll and parse_all 

2428 fullDump = fullDump and full_dump 

2429 printResults = printResults and print_results 

2430 failureTests = failureTests or failure_tests 

2431 postParse = postParse or post_parse 

2432 if isinstance(tests, str_type): 

2433 tests = typing.cast(str, tests) 

2434 line_strip = type(tests).strip 

2435 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2436 comment_specified = comment is not None 

2437 if comment_specified: 

2438 if isinstance(comment, str_type): 

2439 comment = typing.cast(str, comment) 

2440 comment = Literal(comment) 

2441 comment = typing.cast(ParserElement, comment) 

2442 if file is None: 

2443 file = sys.stdout 

2444 print_ = file.write 

2445 

2446 result: Union[ParseResults, Exception] 

2447 allResults: list[tuple[str, Union[ParseResults, Exception]]] = [] 

2448 comments: list[str] = [] 

2449 success = True 

2450 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2451 BOM = "\ufeff" 

2452 nlstr = "\n" 

2453 for t in tests: 

2454 if comment_specified and comment.matches(t, False) or comments and not t: 

2455 comments.append( 

2456 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2457 ) 

2458 continue 

2459 if not t: 

2460 continue 

2461 out = [ 

2462 f"{nlstr}{nlstr.join(comments) if comments else ''}", 

2463 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2464 ] 

2465 comments.clear() 

2466 try: 

2467 # convert newline marks to actual newlines, and strip leading BOM if present 

2468 t = NL.transform_string(t.lstrip(BOM)) 

2469 result = self.parse_string(t, parse_all=parse_all) 

2470 except ParseBaseException as pe: 

2471 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" 

2472 out.append(pe.explain()) 

2473 out.append(f"FAIL: {fatal}{pe}") 

2474 if ParserElement.verbose_stacktrace: 

2475 out.extend(traceback.format_tb(pe.__traceback__)) 

2476 success = success and failureTests 

2477 result = pe 

2478 except Exception as exc: 

2479 tag = "FAIL-EXCEPTION" 

2480 

2481 # see if this exception was raised in a parse action 

2482 tb = exc.__traceback__ 

2483 it = iter(traceback.walk_tb(tb)) 

2484 for f, line in it: 

2485 if (f.f_code.co_filename, line) == pa_call_line_synth: 

2486 next_f = next(it)[0] 

2487 tag += f" (raised in parse action {next_f.f_code.co_name!r})" 

2488 break 

2489 

2490 out.append(f"{tag}: {type(exc).__name__}: {exc}") 

2491 if ParserElement.verbose_stacktrace: 

2492 out.extend(traceback.format_tb(exc.__traceback__)) 

2493 success = success and failureTests 

2494 result = exc 

2495 else: 

2496 success = success and not failureTests 

2497 if postParse is not None: 

2498 try: 

2499 pp_value = postParse(t, result) 

2500 if pp_value is not None: 

2501 if isinstance(pp_value, ParseResults): 

2502 out.append(pp_value.dump()) 

2503 else: 

2504 out.append(str(pp_value)) 

2505 else: 

2506 out.append(result.dump()) 

2507 except Exception as e: 

2508 out.append(result.dump(full=fullDump)) 

2509 out.append( 

2510 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2511 ) 

2512 else: 

2513 out.append(result.dump(full=fullDump)) 

2514 out.append("") 

2515 

2516 if printResults: 

2517 print_("\n".join(out)) 

2518 

2519 allResults.append((t, result)) 

2520 

2521 return success, allResults 

2522 

2523 def create_diagram( 

2524 self, 

2525 output_html: Union[TextIO, Path, str], 

2526 vertical: int = 3, 

2527 show_results_names: bool = False, 

2528 show_groups: bool = False, 

2529 embed: bool = False, 

2530 show_hidden: bool = False, 

2531 **kwargs, 

2532 ) -> None: 

2533 """ 

2534 Create a railroad diagram for the parser. 

2535 

2536 Parameters: 

2537 

2538 - ``output_html`` (str or file-like object) - output target for generated 

2539 diagram HTML 

2540 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2541 instead of horizontally (default=3) 

2542 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2543 defined results names 

2544 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2545 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden 

2546 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2547 the resulting HTML in an enclosing HTML source 

2548 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2549 can be used to insert custom CSS styling 

2550 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2551 generated code 

2552 

2553 Additional diagram-formatting keyword arguments can also be included; 

2554 see railroad.Diagram class. 

2555 

2556 .. versionchanged:: 3.1.0 

2557 ``embed`` argument added. 

2558 """ 

2559 

2560 try: 

2561 from .diagram import to_railroad, railroad_to_html 

2562 except ImportError as ie: 

2563 raise Exception( 

2564 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2565 ) from ie 

2566 

2567 self.streamline() 

2568 

2569 railroad = to_railroad( 

2570 self, 

2571 vertical=vertical, 

2572 show_results_names=show_results_names, 

2573 show_groups=show_groups, 

2574 show_hidden=show_hidden, 

2575 diagram_kwargs=kwargs, 

2576 ) 

2577 if not isinstance(output_html, (str, Path)): 

2578 # we were passed a file-like object, just write to it 

2579 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2580 return 

2581 

2582 with open(output_html, "w", encoding="utf-8") as diag_file: 

2583 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2584 

2585 # Compatibility synonyms 

2586 # fmt: off 

2587 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)) 

2588 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8( 

2589 "setDefaultWhitespaceChars", set_default_whitespace_chars 

2590 )) 

2591 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization)) 

2592 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion)) 

2593 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat)) 

2594 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache)) 

2595 

2596 setResultsName = replaced_by_pep8("setResultsName", set_results_name) 

2597 setBreak = replaced_by_pep8("setBreak", set_break) 

2598 setParseAction = replaced_by_pep8("setParseAction", set_parse_action) 

2599 addParseAction = replaced_by_pep8("addParseAction", add_parse_action) 

2600 addCondition = replaced_by_pep8("addCondition", add_condition) 

2601 setFailAction = replaced_by_pep8("setFailAction", set_fail_action) 

2602 tryParse = replaced_by_pep8("tryParse", try_parse) 

2603 parseString = replaced_by_pep8("parseString", parse_string) 

2604 scanString = replaced_by_pep8("scanString", scan_string) 

2605 transformString = replaced_by_pep8("transformString", transform_string) 

2606 searchString = replaced_by_pep8("searchString", search_string) 

2607 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

2608 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

2609 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) 

2610 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) 

2611 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) 

2612 setDebug = replaced_by_pep8("setDebug", set_debug) 

2613 setName = replaced_by_pep8("setName", set_name) 

2614 parseFile = replaced_by_pep8("parseFile", parse_file) 

2615 runTests = replaced_by_pep8("runTests", run_tests) 

2616 canParseNext = replaced_by_pep8("canParseNext", can_parse_next) 

2617 defaultName = default_name 

2618 # fmt: on 

2619 

2620 

2621class _PendingSkip(ParserElement): 

2622 # internal placeholder class to hold a place were '...' is added to a parser element, 

2623 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2624 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None: 

2625 super().__init__() 

2626 self.anchor = expr 

2627 self.must_skip = must_skip 

2628 

2629 def _generateDefaultName(self) -> str: 

2630 return str(self.anchor + Empty()).replace("Empty", "...") 

2631 

2632 def __add__(self, other) -> ParserElement: 

2633 skipper = SkipTo(other).set_name("...")("_skipped*") 

2634 if self.must_skip: 

2635 

2636 def must_skip(t): 

2637 if not t._skipped or t._skipped.as_list() == [""]: 

2638 del t[0] 

2639 t.pop("_skipped", None) 

2640 

2641 def show_skip(t): 

2642 if t._skipped.as_list()[-1:] == [""]: 

2643 t.pop("_skipped") 

2644 t["_skipped"] = f"missing <{self.anchor!r}>" 

2645 

2646 return ( 

2647 self.anchor + skipper().add_parse_action(must_skip) 

2648 | skipper().add_parse_action(show_skip) 

2649 ) + other 

2650 

2651 return self.anchor + skipper + other 

2652 

2653 def __repr__(self): 

2654 return self.defaultName 

2655 

2656 def parseImpl(self, *args) -> ParseImplReturnType: 

2657 raise Exception( 

2658 "use of `...` expression without following SkipTo target expression" 

2659 ) 

2660 

2661 

2662class Token(ParserElement): 

2663 """Abstract :class:`ParserElement` subclass, for defining atomic 

2664 matching patterns. 

2665 """ 

2666 

2667 def __init__(self) -> None: 

2668 super().__init__(savelist=False) 

2669 

2670 def _generateDefaultName(self) -> str: 

2671 return type(self).__name__ 

2672 

2673 

2674class NoMatch(Token): 

2675 """ 

2676 A token that will never match. 

2677 """ 

2678 

2679 def __init__(self) -> None: 

2680 super().__init__() 

2681 self._may_return_empty = True 

2682 self.mayIndexError = False 

2683 self.errmsg = "Unmatchable token" 

2684 

2685 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2686 raise ParseException(instring, loc, self.errmsg, self) 

2687 

2688 

2689class Literal(Token): 

2690 """ 

2691 Token to exactly match a specified string. 

2692 

2693 Example: 

2694 

2695 .. doctest:: 

2696 

2697 >>> Literal('abc').parse_string('abc') 

2698 ParseResults(['abc'], {}) 

2699 >>> Literal('abc').parse_string('abcdef') 

2700 ParseResults(['abc'], {}) 

2701 >>> Literal('abc').parse_string('ab') 

2702 Traceback (most recent call last): 

2703 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1) 

2704 

2705 For case-insensitive matching, use :class:`CaselessLiteral`. 

2706 

2707 For keyword matching (force word break before and after the matched string), 

2708 use :class:`Keyword` or :class:`CaselessKeyword`. 

2709 """ 

2710 

2711 def __new__(cls, match_string: str = "", **kwargs): 

2712 # Performance tuning: select a subclass with optimized parseImpl 

2713 if cls is Literal: 

2714 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2715 

2716 match_string = matchString or match_string 

2717 if not match_string: 

2718 return super().__new__(Empty) 

2719 if len(match_string) == 1: 

2720 return super().__new__(_SingleCharLiteral) 

2721 

2722 # Default behavior 

2723 return super().__new__(cls) 

2724 

2725 # Needed to make copy.copy() work correctly if we customize __new__ 

2726 def __getnewargs__(self): 

2727 return (self.match,) 

2728 

2729 def __init__(self, match_string: str = "", **kwargs) -> None: 

2730 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2731 

2732 super().__init__() 

2733 match_string = matchString or match_string 

2734 self.match = match_string 

2735 self.matchLen = len(match_string) 

2736 self.firstMatchChar = match_string[:1] 

2737 self.errmsg = f"Expected {self.name}" 

2738 self._may_return_empty = False 

2739 self.mayIndexError = False 

2740 

2741 def _generateDefaultName(self) -> str: 

2742 return repr(self.match) 

2743 

2744 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2745 if instring[loc] == self.firstMatchChar and instring.startswith( 

2746 self.match, loc 

2747 ): 

2748 return loc + self.matchLen, self.match 

2749 raise ParseException(instring, loc, self.errmsg, self) 

2750 

2751 

2752class Empty(Literal): 

2753 """ 

2754 An empty token, will always match. 

2755 """ 

2756 

2757 def __init__(self, match_string="", *, matchString="") -> None: 

2758 super().__init__("") 

2759 self._may_return_empty = True 

2760 self.mayIndexError = False 

2761 

2762 def _generateDefaultName(self) -> str: 

2763 return "Empty" 

2764 

2765 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2766 return loc, [] 

2767 

2768 

2769class _SingleCharLiteral(Literal): 

2770 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2771 if instring[loc] == self.firstMatchChar: 

2772 return loc + 1, self.match 

2773 raise ParseException(instring, loc, self.errmsg, self) 

2774 

2775 

2776ParserElement._literalStringClass = Literal 

2777 

2778 

2779class Keyword(Token): 

2780 """ 

2781 Token to exactly match a specified string as a keyword, that is, 

2782 it must be immediately preceded and followed by whitespace or 

2783 non-keyword characters. Compare with :class:`Literal`: 

2784 

2785 - ``Literal("if")`` will match the leading ``'if'`` in 

2786 ``'ifAndOnlyIf'``. 

2787 - ``Keyword("if")`` will not; it will only match the leading 

2788 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2789 

2790 Accepts two optional constructor arguments in addition to the 

2791 keyword string: 

2792 

2793 - ``ident_chars`` is a string of characters that would be valid 

2794 identifier characters, defaulting to all alphanumerics + "_" and 

2795 "$" 

2796 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2797 

2798 Example: 

2799 

2800 .. doctest:: 

2801 :options: +NORMALIZE_WHITESPACE 

2802 

2803 >>> Keyword("start").parse_string("start") 

2804 ParseResults(['start'], {}) 

2805 >>> Keyword("start").parse_string("starting") 

2806 Traceback (most recent call last): 

2807 ParseException: Expected Keyword 'start', keyword was immediately 

2808 followed by keyword character, found 'ing' (at char 5), (line:1, col:6) 

2809 

2810 .. doctest:: 

2811 :options: +NORMALIZE_WHITESPACE 

2812 

2813 >>> Keyword("start").parse_string("starting").debug() 

2814 Traceback (most recent call last): 

2815 ParseException: Expected Keyword "start", keyword was immediately 

2816 followed by keyword character, found 'ing' ... 

2817 

2818 For case-insensitive matching, use :class:`CaselessKeyword`. 

2819 """ 

2820 

2821 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2822 

2823 def __init__( 

2824 self, 

2825 match_string: str = "", 

2826 ident_chars: typing.Optional[str] = None, 

2827 caseless: bool = False, 

2828 **kwargs, 

2829 ) -> None: 

2830 matchString = deprecate_argument(kwargs, "matchString", "") 

2831 identChars = deprecate_argument(kwargs, "identChars", None) 

2832 

2833 super().__init__() 

2834 identChars = identChars or ident_chars 

2835 if identChars is None: 

2836 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2837 match_string = matchString or match_string 

2838 self.match = match_string 

2839 self.matchLen = len(match_string) 

2840 self.firstMatchChar = match_string[:1] 

2841 if not self.firstMatchChar: 

2842 raise ValueError("null string passed to Keyword; use Empty() instead") 

2843 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2844 self._may_return_empty = False 

2845 self.mayIndexError = False 

2846 self.caseless = caseless 

2847 if caseless: 

2848 self.caselessmatch = match_string.upper() 

2849 identChars = identChars.upper() 

2850 self.ident_chars = set(identChars) 

2851 

2852 @property 

2853 def identChars(self) -> set[str]: 

2854 """ 

2855 .. deprecated:: 3.3.0 

2856 use ident_chars instead. 

2857 

2858 Property returning the characters being used as keyword characters for this expression. 

2859 """ 

2860 return self.ident_chars 

2861 

2862 def _generateDefaultName(self) -> str: 

2863 return repr(self.match) 

2864 

2865 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2866 errmsg = self.errmsg or "" 

2867 errloc = loc 

2868 if self.caseless: 

2869 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2870 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2871 if ( 

2872 loc >= len(instring) - self.matchLen 

2873 or instring[loc + self.matchLen].upper() not in self.identChars 

2874 ): 

2875 return loc + self.matchLen, self.match 

2876 

2877 # followed by keyword char 

2878 errmsg += ", was immediately followed by keyword character" 

2879 errloc = loc + self.matchLen 

2880 else: 

2881 # preceded by keyword char 

2882 errmsg += ", keyword was immediately preceded by keyword character" 

2883 errloc = loc - 1 

2884 # else no match just raise plain exception 

2885 

2886 elif ( 

2887 instring[loc] == self.firstMatchChar 

2888 and self.matchLen == 1 

2889 or instring.startswith(self.match, loc) 

2890 ): 

2891 if loc == 0 or instring[loc - 1] not in self.identChars: 

2892 if ( 

2893 loc >= len(instring) - self.matchLen 

2894 or instring[loc + self.matchLen] not in self.identChars 

2895 ): 

2896 return loc + self.matchLen, self.match 

2897 

2898 # followed by keyword char 

2899 errmsg += ", keyword was immediately followed by keyword character" 

2900 errloc = loc + self.matchLen 

2901 else: 

2902 # preceded by keyword char 

2903 errmsg += ", keyword was immediately preceded by keyword character" 

2904 errloc = loc - 1 

2905 # else no match just raise plain exception 

2906 

2907 raise ParseException(instring, errloc, errmsg, self) 

2908 

2909 @staticmethod 

2910 def set_default_keyword_chars(chars) -> None: 

2911 """ 

2912 Overrides the default characters used by :class:`Keyword` expressions. 

2913 """ 

2914 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2915 

2916 # Compatibility synonyms 

2917 setDefaultKeywordChars = staticmethod( 

2918 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars) 

2919 ) 

2920 

2921 

2922class CaselessLiteral(Literal): 

2923 """ 

2924 Token to match a specified string, ignoring case of letters. 

2925 Note: the matched results will always be in the case of the given 

2926 match string, NOT the case of the input text. 

2927 

2928 Example: 

2929 

2930 .. doctest:: 

2931 

2932 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2933 ParseResults(['CMD', 'CMD', 'CMD'], {}) 

2934 

2935 (Contrast with example for :class:`CaselessKeyword`.) 

2936 """ 

2937 

2938 def __init__(self, match_string: str = "", **kwargs) -> None: 

2939 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2940 

2941 match_string = matchString or match_string 

2942 super().__init__(match_string.upper()) 

2943 # Preserve the defining literal. 

2944 self.returnString = match_string 

2945 self.errmsg = f"Expected {self.name}" 

2946 

2947 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2948 if instring[loc : loc + self.matchLen].upper() == self.match: 

2949 return loc + self.matchLen, self.returnString 

2950 raise ParseException(instring, loc, self.errmsg, self) 

2951 

2952 

2953class CaselessKeyword(Keyword): 

2954 """ 

2955 Caseless version of :class:`Keyword`. 

2956 

2957 Example: 

2958 

2959 .. doctest:: 

2960 

2961 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2962 ParseResults(['CMD', 'CMD'], {}) 

2963 

2964 (Contrast with example for :class:`CaselessLiteral`.) 

2965 """ 

2966 

2967 def __init__( 

2968 self, match_string: str = "", ident_chars: typing.Optional[str] = None, **kwargs 

2969 ) -> None: 

2970 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2971 identChars: typing.Optional[str] = deprecate_argument( 

2972 kwargs, "identChars", None 

2973 ) 

2974 

2975 identChars = identChars or ident_chars 

2976 match_string = matchString or match_string 

2977 super().__init__(match_string, identChars, caseless=True) 

2978 

2979 

2980class CloseMatch(Token): 

2981 """A variation on :class:`Literal` which matches "close" matches, 

2982 that is, strings with at most 'n' mismatching characters. 

2983 :class:`CloseMatch` takes parameters: 

2984 

2985 - ``match_string`` - string to be matched 

2986 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2987 - ``max_mismatches`` - (``default=1``) maximum number of 

2988 mismatches allowed to count as a match 

2989 

2990 The results from a successful parse will contain the matched text 

2991 from the input string and the following named results: 

2992 

2993 - ``mismatches`` - a list of the positions within the 

2994 match_string where mismatches were found 

2995 - ``original`` - the original match_string used to compare 

2996 against the input string 

2997 

2998 If ``mismatches`` is an empty list, then the match was an exact 

2999 match. 

3000 

3001 Example: 

3002 

3003 .. doctest:: 

3004 :options: +NORMALIZE_WHITESPACE 

3005 

3006 >>> patt = CloseMatch("ATCATCGAATGGA") 

3007 >>> patt.parse_string("ATCATCGAAXGGA") 

3008 ParseResults(['ATCATCGAAXGGA'], 

3009 {'original': 'ATCATCGAATGGA', 'mismatches': [9]}) 

3010 

3011 >>> patt.parse_string("ATCAXCGAAXGGA") 

3012 Traceback (most recent call last): 

3013 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches), 

3014 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1) 

3015 

3016 # exact match 

3017 >>> patt.parse_string("ATCATCGAATGGA") 

3018 ParseResults(['ATCATCGAATGGA'], 

3019 {'original': 'ATCATCGAATGGA', 'mismatches': []}) 

3020 

3021 # close match allowing up to 2 mismatches 

3022 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

3023 >>> patt.parse_string("ATCAXCGAAXGGA") 

3024 ParseResults(['ATCAXCGAAXGGA'], 

3025 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]}) 

3026 """ 

3027 

3028 def __init__( 

3029 self, 

3030 match_string: str, 

3031 max_mismatches: typing.Optional[int] = None, 

3032 *, 

3033 caseless=False, 

3034 **kwargs, 

3035 ) -> None: 

3036 maxMismatches: int = deprecate_argument(kwargs, "maxMismatches", 1) 

3037 

3038 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

3039 super().__init__() 

3040 self.match_string = match_string 

3041 self.maxMismatches = maxMismatches 

3042 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

3043 self.caseless = caseless 

3044 self.mayIndexError = False 

3045 self._may_return_empty = False 

3046 

3047 def _generateDefaultName(self) -> str: 

3048 return f"{type(self).__name__}:{self.match_string!r}" 

3049 

3050 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3051 start = loc 

3052 instrlen = len(instring) 

3053 maxloc = start + len(self.match_string) 

3054 

3055 if maxloc <= instrlen: 

3056 match_string = self.match_string 

3057 match_stringloc = 0 

3058 mismatches = [] 

3059 maxMismatches = self.maxMismatches 

3060 

3061 for match_stringloc, s_m in enumerate( 

3062 zip(instring[loc:maxloc], match_string) 

3063 ): 

3064 src, mat = s_m 

3065 if self.caseless: 

3066 src, mat = src.lower(), mat.lower() 

3067 

3068 if src != mat: 

3069 mismatches.append(match_stringloc) 

3070 if len(mismatches) > maxMismatches: 

3071 break 

3072 else: 

3073 loc = start + match_stringloc + 1 

3074 results = ParseResults([instring[start:loc]]) 

3075 results["original"] = match_string 

3076 results["mismatches"] = mismatches 

3077 return loc, results 

3078 

3079 raise ParseException(instring, loc, self.errmsg, self) 

3080 

3081 

3082class Word(Token): 

3083 """Token for matching words composed of allowed character sets. 

3084 

3085 Parameters: 

3086 

3087 - ``init_chars`` - string of all characters that should be used to 

3088 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

3089 if ``body_chars`` is also specified, then this is the string of 

3090 initial characters 

3091 - ``body_chars`` - string of characters that 

3092 can be used for matching after a matched initial character as 

3093 given in ``init_chars``; if omitted, same as the initial characters 

3094 (default=``None``) 

3095 - ``min`` - minimum number of characters to match (default=1) 

3096 - ``max`` - maximum number of characters to match (default=0) 

3097 - ``exact`` - exact number of characters to match (default=0) 

3098 - ``as_keyword`` - match as a keyword (default=``False``) 

3099 - ``exclude_chars`` - characters that might be 

3100 found in the input ``body_chars`` string but which should not be 

3101 accepted for matching ;useful to define a word of all 

3102 printables except for one or two characters, for instance 

3103 (default=``None``) 

3104 

3105 :class:`srange` is useful for defining custom character set strings 

3106 for defining :class:`Word` expressions, using range notation from 

3107 regular expression character sets. 

3108 

3109 A common mistake is to use :class:`Word` to match a specific literal 

3110 string, as in ``Word("Address")``. Remember that :class:`Word` 

3111 uses the string argument to define *sets* of matchable characters. 

3112 This expression would match "Add", "AAA", "dAred", or any other word 

3113 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

3114 exact literal string, use :class:`Literal` or :class:`Keyword`. 

3115 

3116 pyparsing includes helper strings for building Words: 

3117 

3118 - :attr:`alphas` 

3119 - :attr:`nums` 

3120 - :attr:`alphanums` 

3121 - :attr:`hexnums` 

3122 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255 

3123 - accented, tilded, umlauted, etc.) 

3124 - :attr:`punc8bit` (non-alphabetic characters in ASCII range 

3125 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

3126 - :attr:`printables` (any non-whitespace character) 

3127 

3128 ``alphas``, ``nums``, and ``printables`` are also defined in several 

3129 Unicode sets - see :class:`pyparsing_unicode`. 

3130 

3131 Example: 

3132 

3133 .. testcode:: 

3134 

3135 # a word composed of digits 

3136 integer = Word(nums) 

3137 # Two equivalent alternate forms: 

3138 Word("0123456789") 

3139 Word(srange("[0-9]")) 

3140 

3141 # a word with a leading capital, and zero or more lowercase 

3142 capitalized_word = Word(alphas.upper(), alphas.lower()) 

3143 

3144 # hostnames are alphanumeric, with leading alpha, and '-' 

3145 hostname = Word(alphas, alphanums + '-') 

3146 

3147 # roman numeral 

3148 # (not a strict parser, accepts invalid mix of characters) 

3149 roman = Word("IVXLCDM") 

3150 

3151 # any string of non-whitespace characters, except for ',' 

3152 csv_value = Word(printables, exclude_chars=",") 

3153 

3154 :raises ValueError: If ``min`` and ``max`` are both specified 

3155 and the test ``min <= max`` fails. 

3156 

3157 .. versionchanged:: 3.1.0 

3158 Raises :exc:`ValueError` if ``min`` > ``max``. 

3159 """ 

3160 

3161 def __init__( 

3162 self, 

3163 init_chars: str = "", 

3164 body_chars: typing.Optional[str] = None, 

3165 min: int = 1, 

3166 max: int = 0, 

3167 exact: int = 0, 

3168 as_keyword: bool = False, 

3169 exclude_chars: typing.Optional[str] = None, 

3170 **kwargs, 

3171 ) -> None: 

3172 initChars: typing.Optional[str] = deprecate_argument(kwargs, "initChars", None) 

3173 bodyChars: typing.Optional[str] = deprecate_argument(kwargs, "bodyChars", None) 

3174 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

3175 excludeChars: typing.Optional[str] = deprecate_argument( 

3176 kwargs, "excludeChars", None 

3177 ) 

3178 

3179 initChars = initChars or init_chars 

3180 bodyChars = bodyChars or body_chars 

3181 asKeyword = asKeyword or as_keyword 

3182 excludeChars = excludeChars or exclude_chars 

3183 super().__init__() 

3184 if not initChars: 

3185 raise ValueError( 

3186 f"invalid {type(self).__name__}, initChars cannot be empty string" 

3187 ) 

3188 

3189 initChars_set = set(initChars) 

3190 if excludeChars: 

3191 excludeChars_set = set(excludeChars) 

3192 initChars_set -= excludeChars_set 

3193 if bodyChars: 

3194 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

3195 self.init_chars = initChars_set 

3196 self.initCharsOrig = "".join(sorted(initChars_set)) 

3197 

3198 if bodyChars: 

3199 self.bodyChars = set(bodyChars) 

3200 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

3201 else: 

3202 self.bodyChars = initChars_set 

3203 self.bodyCharsOrig = self.initCharsOrig 

3204 

3205 self.maxSpecified = max > 0 

3206 

3207 if min < 1: 

3208 raise ValueError( 

3209 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

3210 ) 

3211 

3212 if self.maxSpecified and min > max: 

3213 raise ValueError( 

3214 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

3215 ) 

3216 

3217 self.minLen = min 

3218 

3219 if max > 0: 

3220 self.maxLen = max 

3221 else: 

3222 self.maxLen = _MAX_INT 

3223 

3224 if exact > 0: 

3225 min = max = exact 

3226 self.maxLen = exact 

3227 self.minLen = exact 

3228 

3229 self.errmsg = f"Expected {self.name}" 

3230 self.mayIndexError = False 

3231 self.asKeyword = asKeyword 

3232 if self.asKeyword: 

3233 self.errmsg += " as a keyword" 

3234 

3235 # see if we can make a regex for this Word 

3236 if " " not in (self.initChars | self.bodyChars): 

3237 if len(self.initChars) == 1: 

3238 re_leading_fragment = re.escape(self.initCharsOrig) 

3239 else: 

3240 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

3241 

3242 if self.bodyChars == self.initChars: 

3243 if max == 0 and self.minLen == 1: 

3244 repeat = "+" 

3245 elif max == 1: 

3246 repeat = "" 

3247 else: 

3248 if self.minLen != self.maxLen: 

3249 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

3250 else: 

3251 repeat = f"{{{self.minLen}}}" 

3252 self.reString = f"{re_leading_fragment}{repeat}" 

3253 else: 

3254 if max == 1: 

3255 re_body_fragment = "" 

3256 repeat = "" 

3257 else: 

3258 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

3259 if max == 0 and self.minLen == 1: 

3260 repeat = "*" 

3261 elif max == 2: 

3262 repeat = "?" if min <= 1 else "" 

3263 else: 

3264 if min != max: 

3265 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

3266 else: 

3267 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

3268 

3269 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

3270 

3271 if self.asKeyword: 

3272 self.reString = rf"\b{self.reString}\b" 

3273 

3274 try: 

3275 self.re = re.compile(self.reString) 

3276 except re.error: 

3277 self.re = None # type: ignore[assignment] 

3278 else: 

3279 self.re_match = self.re.match 

3280 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign] 

3281 

3282 @property 

3283 def initChars(self) -> set[str]: 

3284 """ 

3285 .. deprecated:: 3.3.0 

3286 use `init_chars` instead. 

3287 

3288 Property returning the initial chars to be used when matching this 

3289 Word expression. If no body chars were specified, the initial characters 

3290 will also be the body characters. 

3291 """ 

3292 return set(self.init_chars) 

3293 

3294 def copy(self) -> Word: 

3295 """ 

3296 Returns a copy of this expression. 

3297 

3298 Generally only used internally by pyparsing. 

3299 """ 

3300 ret: Word = cast(Word, super().copy()) 

3301 if hasattr(self, "re_match"): 

3302 ret.re_match = self.re_match 

3303 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign] 

3304 return ret 

3305 

3306 def _generateDefaultName(self) -> str: 

3307 def charsAsStr(s): 

3308 max_repr_len = 16 

3309 s = _collapse_string_to_ranges(s, re_escape=False) 

3310 

3311 if len(s) > max_repr_len: 

3312 return s[: max_repr_len - 3] + "..." 

3313 

3314 return s 

3315 

3316 if self.initChars != self.bodyChars: 

3317 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

3318 else: 

3319 base = f"W:({charsAsStr(self.initChars)})" 

3320 

3321 # add length specification 

3322 if self.minLen > 1 or self.maxLen != _MAX_INT: 

3323 if self.minLen == self.maxLen: 

3324 if self.minLen == 1: 

3325 return base[2:] 

3326 else: 

3327 return base + f"{{{self.minLen}}}" 

3328 elif self.maxLen == _MAX_INT: 

3329 return base + f"{{{self.minLen},...}}" 

3330 else: 

3331 return base + f"{{{self.minLen},{self.maxLen}}}" 

3332 return base 

3333 

3334 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3335 if instring[loc] not in self.initChars: 

3336 raise ParseException(instring, loc, self.errmsg, self) 

3337 

3338 start = loc 

3339 loc += 1 

3340 instrlen = len(instring) 

3341 body_chars: set[str] = self.bodyChars 

3342 maxloc = start + self.maxLen 

3343 maxloc = min(maxloc, instrlen) 

3344 while loc < maxloc and instring[loc] in body_chars: 

3345 loc += 1 

3346 

3347 throw_exception = False 

3348 if loc - start < self.minLen: 

3349 throw_exception = True 

3350 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars: 

3351 throw_exception = True 

3352 elif self.asKeyword and ( 

3353 (start > 0 and instring[start - 1] in body_chars) 

3354 or (loc < instrlen and instring[loc] in body_chars) 

3355 ): 

3356 throw_exception = True 

3357 

3358 if throw_exception: 

3359 raise ParseException(instring, loc, self.errmsg, self) 

3360 

3361 return loc, instring[start:loc] 

3362 

3363 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3364 result = self.re_match(instring, loc) 

3365 if not result: 

3366 raise ParseException(instring, loc, self.errmsg, self) 

3367 

3368 loc = result.end() 

3369 return loc, result.group() 

3370 

3371 

3372class Char(Word): 

3373 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

3374 when defining a match of any single character in a string of 

3375 characters. 

3376 """ 

3377 

3378 def __init__( 

3379 self, 

3380 charset: str, 

3381 as_keyword: bool = False, 

3382 exclude_chars: typing.Optional[str] = None, 

3383 **kwargs, 

3384 ) -> None: 

3385 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

3386 excludeChars: typing.Optional[str] = deprecate_argument( 

3387 kwargs, "excludeChars", None 

3388 ) 

3389 

3390 asKeyword = asKeyword or as_keyword 

3391 excludeChars = excludeChars or exclude_chars 

3392 super().__init__( 

3393 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

3394 ) 

3395 

3396 

3397class Regex(Token): 

3398 r"""Token for matching strings that match a given regular 

3399 expression. Defined with string specifying the regular expression in 

3400 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3401 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3402 these will be preserved as named :class:`ParseResults`. 

3403 

3404 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3405 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3406 a compiled RE that was compiled using ``regex``. 

3407 

3408 The parameters ``pattern`` and ``flags`` are passed 

3409 to the ``re.compile()`` function as-is. See the Python 

3410 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3411 explanation of the acceptable patterns and flags. 

3412 

3413 Example: 

3414 

3415 .. testcode:: 

3416 

3417 realnum = Regex(r"[+-]?\d+\.\d*") 

3418 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3419 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3420 

3421 # named fields in a regex will be returned as named results 

3422 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3423 

3424 # the Regex class will accept regular expressions compiled using the 

3425 # re module 

3426 import re 

3427 parser = pp.Regex(re.compile(r'[0-9]')) 

3428 """ 

3429 

3430 def __init__( 

3431 self, 

3432 pattern: Any, 

3433 flags: Union[re.RegexFlag, int] = 0, 

3434 as_group_list: bool = False, 

3435 as_match: bool = False, 

3436 **kwargs, 

3437 ) -> None: 

3438 super().__init__() 

3439 asGroupList: bool = deprecate_argument(kwargs, "asGroupList", False) 

3440 asMatch: bool = deprecate_argument(kwargs, "asMatch", False) 

3441 

3442 asGroupList = asGroupList or as_group_list 

3443 asMatch = asMatch or as_match 

3444 

3445 if isinstance(pattern, str_type): 

3446 if not pattern: 

3447 raise ValueError("null string passed to Regex; use Empty() instead") 

3448 

3449 self._re = None 

3450 self._may_return_empty = None # type: ignore [assignment] 

3451 self.reString = self.pattern = pattern 

3452 

3453 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3454 self._re = pattern 

3455 self._may_return_empty = None # type: ignore [assignment] 

3456 self.pattern = self.reString = pattern.pattern 

3457 

3458 elif callable(pattern): 

3459 # defer creating this pattern until we really need it 

3460 self.pattern = pattern 

3461 self._may_return_empty = None # type: ignore [assignment] 

3462 self._re = None 

3463 

3464 else: 

3465 raise TypeError( 

3466 "Regex may only be constructed with a string or a compiled RE object," 

3467 " or a callable that takes no arguments and returns a string or a" 

3468 " compiled RE object" 

3469 ) 

3470 

3471 self.flags = flags 

3472 self.errmsg = f"Expected {self.name}" 

3473 self.mayIndexError = False 

3474 self.asGroupList = asGroupList 

3475 self.asMatch = asMatch 

3476 if self.asGroupList: 

3477 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign] 

3478 if self.asMatch: 

3479 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign] 

3480 

3481 def copy(self) -> Regex: 

3482 """ 

3483 Returns a copy of this expression. 

3484 

3485 Generally only used internally by pyparsing. 

3486 """ 

3487 ret: Regex = cast(Regex, super().copy()) 

3488 if self.asGroupList: 

3489 ret.parseImpl = ret.parseImplAsGroupList # type: ignore [method-assign] 

3490 if self.asMatch: 

3491 ret.parseImpl = ret.parseImplAsMatch # type: ignore [method-assign] 

3492 return ret 

3493 

3494 @cached_property 

3495 def re(self) -> re.Pattern: 

3496 """ 

3497 Property returning the compiled regular expression for this Regex. 

3498 

3499 Generally only used internally by pyparsing. 

3500 """ 

3501 if self._re: 

3502 return self._re 

3503 

3504 if callable(self.pattern): 

3505 # replace self.pattern with the string returned by calling self.pattern() 

3506 self.pattern = cast(Callable[[], str], self.pattern)() 

3507 

3508 # see if we got a compiled RE back instead of a str - if so, we're done 

3509 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"): 

3510 self._re = cast(re.Pattern[str], self.pattern) 

3511 self.pattern = self.reString = self._re.pattern 

3512 return self._re 

3513 

3514 try: 

3515 self._re = re.compile(self.pattern, self.flags) 

3516 except re.error: 

3517 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3518 else: 

3519 self._may_return_empty = self.re.match("", pos=0) is not None 

3520 return self._re 

3521 

3522 @cached_property 

3523 def re_match(self) -> Callable[[str, int], Any]: 

3524 return self.re.match 

3525 

3526 @property 

3527 def mayReturnEmpty(self): 

3528 if self._may_return_empty is None: 

3529 # force compile of regex pattern, to set may_return_empty flag 

3530 self.re # noqa 

3531 return self._may_return_empty 

3532 

3533 @mayReturnEmpty.setter 

3534 def mayReturnEmpty(self, value): 

3535 self._may_return_empty = value 

3536 

3537 def _generateDefaultName(self) -> str: 

3538 unescaped = repr(self.pattern).replace("\\\\", "\\") 

3539 return f"Re:({unescaped})" 

3540 

3541 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3542 # explicit check for matching past the length of the string; 

3543 # this is done because the re module will not complain about 

3544 # a match with `pos > len(instring)`, it will just return "" 

3545 if loc > len(instring) and self.mayReturnEmpty: 

3546 raise ParseException(instring, loc, self.errmsg, self) 

3547 

3548 result = self.re_match(instring, loc) 

3549 if not result: 

3550 raise ParseException(instring, loc, self.errmsg, self) 

3551 

3552 loc = result.end() 

3553 ret = ParseResults(result.group()) 

3554 d = result.groupdict() 

3555 

3556 for k, v in d.items(): 

3557 ret[k] = v 

3558 

3559 return loc, ret 

3560 

3561 def parseImplAsGroupList(self, instring, loc, do_actions=True): 

3562 if loc > len(instring) and self.mayReturnEmpty: 

3563 raise ParseException(instring, loc, self.errmsg, self) 

3564 

3565 result = self.re_match(instring, loc) 

3566 if not result: 

3567 raise ParseException(instring, loc, self.errmsg, self) 

3568 

3569 loc = result.end() 

3570 ret = result.groups() 

3571 return loc, ret 

3572 

3573 def parseImplAsMatch(self, instring, loc, do_actions=True): 

3574 if loc > len(instring) and self.mayReturnEmpty: 

3575 raise ParseException(instring, loc, self.errmsg, self) 

3576 

3577 result = self.re_match(instring, loc) 

3578 if not result: 

3579 raise ParseException(instring, loc, self.errmsg, self) 

3580 

3581 loc = result.end() 

3582 ret = result 

3583 return loc, ret 

3584 

3585 def sub(self, repl: str) -> ParserElement: 

3586 r""" 

3587 Return :class:`Regex` with an attached parse action to transform the parsed 

3588 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3589 

3590 Example: 

3591 

3592 .. testcode:: 

3593 

3594 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3595 print(make_html.transform_string("h1:main title:")) 

3596 

3597 .. testoutput:: 

3598 

3599 <h1>main title</h1> 

3600 """ 

3601 if self.asGroupList: 

3602 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3603 

3604 if self.asMatch and callable(repl): 

3605 raise TypeError( 

3606 "cannot use sub() with a callable with Regex(as_match=True)" 

3607 ) 

3608 

3609 if self.asMatch: 

3610 

3611 def pa(tokens): 

3612 return tokens[0].expand(repl) 

3613 

3614 else: 

3615 

3616 def pa(tokens): 

3617 return self.re.sub(repl, tokens[0]) 

3618 

3619 return self.add_parse_action(pa) 

3620 

3621 

3622class QuotedString(Token): 

3623 r""" 

3624 Token for matching strings that are delimited by quoting characters. 

3625 

3626 Defined with the following parameters: 

3627 

3628 - ``quote_char`` - string of one or more characters defining the 

3629 quote delimiting string 

3630 - ``esc_char`` - character to re_escape quotes, typically backslash 

3631 (default= ``None``) 

3632 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3633 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3634 (default= ``None``) 

3635 - ``multiline`` - boolean indicating whether quotes can span 

3636 multiple lines (default= ``False``) 

3637 - ``unquote_results`` - boolean indicating whether the matched text 

3638 should be unquoted (default= ``True``) 

3639 - ``end_quote_char`` - string of one or more characters defining the 

3640 end of the quote delimited string (default= ``None`` => same as 

3641 quote_char) 

3642 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3643 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3644 (default= ``True``) 

3645 

3646 .. caution:: ``convert_whitespace_escapes`` has no effect if 

3647 ``unquote_results`` is ``False``. 

3648 

3649 Example: 

3650 

3651 .. doctest:: 

3652 

3653 >>> qs = QuotedString('"') 

3654 >>> print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3655 [['This is the quote']] 

3656 >>> complex_qs = QuotedString('{{', end_quote_char='}}') 

3657 >>> print(complex_qs.search_string( 

3658 ... 'lsjdf {{This is the "quote"}} sldjf')) 

3659 [['This is the "quote"']] 

3660 >>> sql_qs = QuotedString('"', esc_quote='""') 

3661 >>> print(sql_qs.search_string( 

3662 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3663 [['This is the quote with "embedded" quotes']] 

3664 """ 

3665 

3666 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3667 

3668 def __init__( 

3669 self, 

3670 quote_char: str = "", 

3671 esc_char: typing.Optional[str] = None, 

3672 esc_quote: typing.Optional[str] = None, 

3673 multiline: bool = False, 

3674 unquote_results: bool = True, 

3675 end_quote_char: typing.Optional[str] = None, 

3676 convert_whitespace_escapes: bool = True, 

3677 **kwargs, 

3678 ) -> None: 

3679 super().__init__() 

3680 quoteChar: str = deprecate_argument(kwargs, "quoteChar", "") 

3681 escChar: str = deprecate_argument(kwargs, "escChar", None) 

3682 escQuote: str = deprecate_argument(kwargs, "escQuote", None) 

3683 unquoteResults: bool = deprecate_argument(kwargs, "unquoteResults", True) 

3684 endQuoteChar: typing.Optional[str] = deprecate_argument( 

3685 kwargs, "endQuoteChar", None 

3686 ) 

3687 convertWhitespaceEscapes: bool = deprecate_argument( 

3688 kwargs, "convertWhitespaceEscapes", True 

3689 ) 

3690 

3691 esc_char = escChar or esc_char 

3692 esc_quote = escQuote or esc_quote 

3693 unquote_results = unquoteResults and unquote_results 

3694 end_quote_char = endQuoteChar or end_quote_char 

3695 convert_whitespace_escapes = ( 

3696 convertWhitespaceEscapes and convert_whitespace_escapes 

3697 ) 

3698 quote_char = quoteChar or quote_char 

3699 

3700 # remove white space from quote chars 

3701 quote_char = quote_char.strip() 

3702 if not quote_char: 

3703 raise ValueError("quote_char cannot be the empty string") 

3704 

3705 if end_quote_char is None: 

3706 end_quote_char = quote_char 

3707 else: 

3708 end_quote_char = end_quote_char.strip() 

3709 if not end_quote_char: 

3710 raise ValueError("end_quote_char cannot be the empty string") 

3711 

3712 self.quote_char: str = quote_char 

3713 self.quote_char_len: int = len(quote_char) 

3714 self.first_quote_char: str = quote_char[0] 

3715 self.end_quote_char: str = end_quote_char 

3716 self.end_quote_char_len: int = len(end_quote_char) 

3717 self.esc_char: str = esc_char or "" 

3718 self.has_esc_char: bool = esc_char is not None 

3719 self.esc_quote: str = esc_quote or "" 

3720 self.unquote_results: bool = unquote_results 

3721 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3722 self.multiline = multiline 

3723 self.re_flags = re.RegexFlag(0) 

3724 

3725 # fmt: off 

3726 # build up re pattern for the content between the quote delimiters 

3727 inner_pattern: list[str] = [] 

3728 

3729 if esc_quote: 

3730 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3731 

3732 if esc_char: 

3733 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3734 

3735 if len(self.end_quote_char) > 1: 

3736 inner_pattern.append( 

3737 "(?:" 

3738 + "|".join( 

3739 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3740 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3741 ) 

3742 + ")" 

3743 ) 

3744 

3745 if self.multiline: 

3746 self.re_flags |= re.MULTILINE | re.DOTALL 

3747 inner_pattern.append( 

3748 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3749 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3750 ) 

3751 else: 

3752 inner_pattern.append( 

3753 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3754 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3755 ) 

3756 

3757 self.pattern = "".join( 

3758 [ 

3759 re.escape(self.quote_char), 

3760 "(?:", 

3761 '|'.join(inner_pattern), 

3762 ")*", 

3763 re.escape(self.end_quote_char), 

3764 ] 

3765 ) 

3766 

3767 if self.unquote_results: 

3768 if self.convert_whitespace_escapes: 

3769 self.unquote_scan_re = re.compile( 

3770 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3771 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})" 

3772 rf"|({re.escape(self.esc_char)}.)" 

3773 rf"|(\n|.)", 

3774 flags=self.re_flags, 

3775 ) 

3776 else: 

3777 self.unquote_scan_re = re.compile( 

3778 rf"({re.escape(self.esc_char)}.)" 

3779 rf"|(\n|.)", 

3780 flags=self.re_flags 

3781 ) 

3782 # fmt: on 

3783 

3784 try: 

3785 self.re = re.compile(self.pattern, self.re_flags) 

3786 self.reString = self.pattern 

3787 self.re_match = self.re.match 

3788 except re.error: 

3789 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3790 

3791 self.errmsg = f"Expected {self.name}" 

3792 self.mayIndexError = False 

3793 self._may_return_empty = True 

3794 

3795 def _generateDefaultName(self) -> str: 

3796 if self.quote_char == self.end_quote_char and isinstance( 

3797 self.quote_char, str_type 

3798 ): 

3799 return f"string enclosed in {self.quote_char!r}" 

3800 

3801 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3802 

3803 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3804 # check first character of opening quote to see if that is a match 

3805 # before doing the more complicated regex match 

3806 result = ( 

3807 instring[loc] == self.first_quote_char 

3808 and self.re_match(instring, loc) 

3809 or None 

3810 ) 

3811 if not result: 

3812 raise ParseException(instring, loc, self.errmsg, self) 

3813 

3814 # get ending loc and matched string from regex matching result 

3815 loc = result.end() 

3816 ret = result.group() 

3817 

3818 def convert_escaped_numerics(s: str) -> str: 

3819 if s == "0": 

3820 return "\0" 

3821 if s.isdigit() and len(s) == 3: 

3822 return chr(int(s, base=8)) 

3823 elif s.startswith(("u", "x")): 

3824 return chr(int(s[1:], base=16)) 

3825 else: 

3826 return s 

3827 

3828 if self.unquote_results: 

3829 # strip off quotes 

3830 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3831 

3832 if isinstance(ret, str_type): 

3833 # fmt: off 

3834 if self.convert_whitespace_escapes: 

3835 # as we iterate over matches in the input string, 

3836 # collect from whichever match group of the unquote_scan_re 

3837 # regex matches (only 1 group will match at any given time) 

3838 ret = "".join( 

3839 # match group 1 matches \t, \n, etc. 

3840 self.ws_map[match.group(1)] if match.group(1) 

3841 # match group 2 matches escaped octal, null, hex, and Unicode 

3842 # sequences 

3843 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2) 

3844 # match group 3 matches escaped characters 

3845 else match.group(3)[-1] if match.group(3) 

3846 # match group 4 matches any character 

3847 else match.group(4) 

3848 for match in self.unquote_scan_re.finditer(ret) 

3849 ) 

3850 else: 

3851 ret = "".join( 

3852 # match group 1 matches escaped characters 

3853 match.group(1)[-1] if match.group(1) 

3854 # match group 2 matches any character 

3855 else match.group(2) 

3856 for match in self.unquote_scan_re.finditer(ret) 

3857 ) 

3858 # fmt: on 

3859 

3860 # replace escaped quotes 

3861 if self.esc_quote: 

3862 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3863 

3864 return loc, ret 

3865 

3866 

3867class CharsNotIn(Token): 

3868 """Token for matching words composed of characters *not* in a given 

3869 set (will include whitespace in matched characters if not listed in 

3870 the provided exclusion set - see example). Defined with string 

3871 containing all disallowed characters, and an optional minimum, 

3872 maximum, and/or exact length. The default value for ``min`` is 

3873 1 (a minimum value < 1 is not valid); the default values for 

3874 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3875 length restriction. 

3876 

3877 Example: 

3878 

3879 .. testcode:: 

3880 

3881 # define a comma-separated-value as anything that is not a ',' 

3882 csv_value = CharsNotIn(',') 

3883 print( 

3884 DelimitedList(csv_value).parse_string( 

3885 "dkls,lsdkjf,s12 34,@!#,213" 

3886 ) 

3887 ) 

3888 

3889 prints: 

3890 

3891 .. testoutput:: 

3892 

3893 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3894 """ 

3895 

3896 def __init__( 

3897 self, not_chars: str = "", min: int = 1, max: int = 0, exact: int = 0, **kwargs 

3898 ) -> None: 

3899 super().__init__() 

3900 notChars: str = deprecate_argument(kwargs, "notChars", "") 

3901 

3902 self.skipWhitespace = False 

3903 self.notChars = not_chars or notChars 

3904 self.notCharsSet = set(self.notChars) 

3905 

3906 if min < 1: 

3907 raise ValueError( 

3908 "cannot specify a minimum length < 1; use" 

3909 " Opt(CharsNotIn()) if zero-length char group is permitted" 

3910 ) 

3911 

3912 self.minLen = min 

3913 

3914 if max > 0: 

3915 self.maxLen = max 

3916 else: 

3917 self.maxLen = _MAX_INT 

3918 

3919 if exact > 0: 

3920 self.maxLen = exact 

3921 self.minLen = exact 

3922 

3923 self.errmsg = f"Expected {self.name}" 

3924 self._may_return_empty = self.minLen == 0 

3925 self.mayIndexError = False 

3926 

3927 def _generateDefaultName(self) -> str: 

3928 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3929 if len(not_chars_str) > 16: 

3930 return f"!W:({self.notChars[: 16 - 3]}...)" 

3931 else: 

3932 return f"!W:({self.notChars})" 

3933 

3934 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3935 notchars = self.notCharsSet 

3936 if instring[loc] in notchars: 

3937 raise ParseException(instring, loc, self.errmsg, self) 

3938 

3939 start = loc 

3940 loc += 1 

3941 maxlen = min(start + self.maxLen, len(instring)) 

3942 while loc < maxlen and instring[loc] not in notchars: 

3943 loc += 1 

3944 

3945 if loc - start < self.minLen: 

3946 raise ParseException(instring, loc, self.errmsg, self) 

3947 

3948 return loc, instring[start:loc] 

3949 

3950 

3951class White(Token): 

3952 """Special matching class for matching whitespace. Normally, 

3953 whitespace is ignored by pyparsing grammars. This class is included 

3954 when some whitespace structures are significant. Define with 

3955 a string containing the whitespace characters to be matched; default 

3956 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3957 ``max``, and ``exact`` arguments, as defined for the 

3958 :class:`Word` class. 

3959 """ 

3960 

3961 whiteStrs = { 

3962 " ": "<SP>", 

3963 "\t": "<TAB>", 

3964 "\n": "<LF>", 

3965 "\r": "<CR>", 

3966 "\f": "<FF>", 

3967 "\u00a0": "<NBSP>", 

3968 "\u1680": "<OGHAM_SPACE_MARK>", 

3969 "\u180e": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3970 "\u2000": "<EN_QUAD>", 

3971 "\u2001": "<EM_QUAD>", 

3972 "\u2002": "<EN_SPACE>", 

3973 "\u2003": "<EM_SPACE>", 

3974 "\u2004": "<THREE-PER-EM_SPACE>", 

3975 "\u2005": "<FOUR-PER-EM_SPACE>", 

3976 "\u2006": "<SIX-PER-EM_SPACE>", 

3977 "\u2007": "<FIGURE_SPACE>", 

3978 "\u2008": "<PUNCTUATION_SPACE>", 

3979 "\u2009": "<THIN_SPACE>", 

3980 "\u200a": "<HAIR_SPACE>", 

3981 "\u200b": "<ZERO_WIDTH_SPACE>", 

3982 "\u202f": "<NNBSP>", 

3983 "\u205f": "<MMSP>", 

3984 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3985 } 

3986 

3987 def __init__( 

3988 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0 

3989 ) -> None: 

3990 super().__init__() 

3991 self.matchWhite = ws 

3992 self.set_whitespace_chars( 

3993 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3994 copy_defaults=True, 

3995 ) 

3996 # self.leave_whitespace() 

3997 self._may_return_empty = True 

3998 self.errmsg = f"Expected {self.name}" 

3999 

4000 self.minLen = min 

4001 

4002 if max > 0: 

4003 self.maxLen = max 

4004 else: 

4005 self.maxLen = _MAX_INT 

4006 

4007 if exact > 0: 

4008 self.maxLen = exact 

4009 self.minLen = exact 

4010 

4011 def _generateDefaultName(self) -> str: 

4012 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

4013 

4014 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4015 if instring[loc] not in self.matchWhite: 

4016 raise ParseException(instring, loc, self.errmsg, self) 

4017 start = loc 

4018 loc += 1 

4019 maxloc = start + self.maxLen 

4020 maxloc = min(maxloc, len(instring)) 

4021 while loc < maxloc and instring[loc] in self.matchWhite: 

4022 loc += 1 

4023 

4024 if loc - start < self.minLen: 

4025 raise ParseException(instring, loc, self.errmsg, self) 

4026 

4027 return loc, instring[start:loc] 

4028 

4029 

4030class PositionToken(Token): 

4031 def __init__(self) -> None: 

4032 super().__init__() 

4033 self._may_return_empty = True 

4034 self.mayIndexError = False 

4035 

4036 

4037class GoToColumn(PositionToken): 

4038 """Token to advance to a specific column of input text; useful for 

4039 tabular report scraping. 

4040 """ 

4041 

4042 def __init__(self, colno: int) -> None: 

4043 super().__init__() 

4044 self.col = colno 

4045 

4046 def preParse(self, instring: str, loc: int) -> int: 

4047 if col(loc, instring) == self.col: 

4048 return loc 

4049 

4050 instrlen = len(instring) 

4051 if self.ignoreExprs: 

4052 loc = self._skipIgnorables(instring, loc) 

4053 while ( 

4054 loc < instrlen 

4055 and instring[loc].isspace() 

4056 and col(loc, instring) != self.col 

4057 ): 

4058 loc += 1 

4059 

4060 return loc 

4061 

4062 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4063 thiscol = col(loc, instring) 

4064 if thiscol > self.col: 

4065 raise ParseException(instring, loc, "Text not in expected column", self) 

4066 newloc = loc + self.col - thiscol 

4067 ret = instring[loc:newloc] 

4068 return newloc, ret 

4069 

4070 

4071class LineStart(PositionToken): 

4072 r"""Matches if current position is at the logical beginning of a line (after skipping whitespace) 

4073 within the parse string 

4074 

4075 Example: 

4076 

4077 .. testcode:: 

4078 

4079 test = '''\ 

4080 AAA this line 

4081 AAA and this line 

4082 AAA and even this line 

4083 B AAA but definitely not this line 

4084 ''' 

4085 

4086 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

4087 print(t) 

4088 

4089 prints: 

4090 

4091 .. testoutput:: 

4092 

4093 ['AAA', ' this line'] 

4094 ['AAA', ' and this line'] 

4095 ['AAA', ' and even this line'] 

4096 

4097 """ 

4098 

4099 def __init__(self) -> None: 

4100 super().__init__() 

4101 self.leave_whitespace() 

4102 self.orig_whiteChars = set() | self.whiteChars 

4103 self.whiteChars.discard("\n") 

4104 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

4105 self.set_name("start of line") 

4106 

4107 def preParse(self, instring: str, loc: int) -> int: 

4108 if loc == 0: 

4109 return loc 

4110 

4111 ret = self.skipper.preParse(instring, loc) 

4112 

4113 if "\n" in self.orig_whiteChars: 

4114 while instring[ret : ret + 1] == "\n": 

4115 ret = self.skipper.preParse(instring, ret + 1) 

4116 

4117 return ret 

4118 

4119 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4120 if col(loc, instring) == 1: 

4121 return loc, [] 

4122 raise ParseException(instring, loc, self.errmsg, self) 

4123 

4124 

4125class LineEnd(PositionToken): 

4126 """Matches if current position is at the end of a line within the 

4127 parse string 

4128 """ 

4129 

4130 def __init__(self) -> None: 

4131 super().__init__() 

4132 self.whiteChars.discard("\n") 

4133 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

4134 self.set_name("end of line") 

4135 

4136 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4137 if loc < len(instring): 

4138 if instring[loc] == "\n": 

4139 return loc + 1, "\n" 

4140 else: 

4141 raise ParseException(instring, loc, self.errmsg, self) 

4142 elif loc == len(instring): 

4143 return loc + 1, [] 

4144 else: 

4145 raise ParseException(instring, loc, self.errmsg, self) 

4146 

4147 

4148class StringStart(PositionToken): 

4149 """Matches if current position is at the beginning of the parse 

4150 string 

4151 """ 

4152 

4153 def __init__(self) -> None: 

4154 super().__init__() 

4155 self.set_name("start of text") 

4156 

4157 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4158 # see if entire string up to here is just whitespace and ignoreables 

4159 if loc != 0 and loc != self.preParse(instring, 0): 

4160 raise ParseException(instring, loc, self.errmsg, self) 

4161 

4162 return loc, [] 

4163 

4164 

4165class StringEnd(PositionToken): 

4166 """ 

4167 Matches if current position is at the end of the parse string 

4168 """ 

4169 

4170 def __init__(self) -> None: 

4171 super().__init__() 

4172 self.set_name("end of text") 

4173 

4174 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4175 if loc < len(instring): 

4176 raise ParseException(instring, loc, self.errmsg, self) 

4177 if loc == len(instring): 

4178 return loc + 1, [] 

4179 if loc > len(instring): 

4180 return loc, [] 

4181 

4182 raise ParseException(instring, loc, self.errmsg, self) 

4183 

4184 

4185class WordStart(PositionToken): 

4186 """Matches if the current position is at the beginning of a 

4187 :class:`Word`, and is not preceded by any character in a given 

4188 set of ``word_chars`` (default= ``printables``). To emulate the 

4189 ``\b`` behavior of regular expressions, use 

4190 ``WordStart(alphanums)``. ``WordStart`` will also match at 

4191 the beginning of the string being parsed, or at the beginning of 

4192 a line. 

4193 """ 

4194 

4195 def __init__(self, word_chars: str = printables, **kwargs) -> None: 

4196 wordChars: str = deprecate_argument(kwargs, "wordChars", printables) 

4197 

4198 wordChars = word_chars if wordChars == printables else wordChars 

4199 super().__init__() 

4200 self.wordChars = set(wordChars) 

4201 self.set_name("start of a word") 

4202 

4203 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4204 if loc != 0: 

4205 if ( 

4206 instring[loc - 1] in self.wordChars 

4207 or instring[loc] not in self.wordChars 

4208 ): 

4209 raise ParseException(instring, loc, self.errmsg, self) 

4210 return loc, [] 

4211 

4212 

4213class WordEnd(PositionToken): 

4214 """Matches if the current position is at the end of a :class:`Word`, 

4215 and is not followed by any character in a given set of ``word_chars`` 

4216 (default= ``printables``). To emulate the ``\b`` behavior of 

4217 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

4218 will also match at the end of the string being parsed, or at the end 

4219 of a line. 

4220 """ 

4221 

4222 def __init__(self, word_chars: str = printables, **kwargs) -> None: 

4223 wordChars: str = deprecate_argument(kwargs, "wordChars", printables) 

4224 

4225 wordChars = word_chars if wordChars == printables else wordChars 

4226 super().__init__() 

4227 self.wordChars = set(wordChars) 

4228 self.skipWhitespace = False 

4229 self.set_name("end of a word") 

4230 

4231 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4232 instrlen = len(instring) 

4233 if instrlen > 0 and loc < instrlen: 

4234 if ( 

4235 instring[loc] in self.wordChars 

4236 or instring[loc - 1] not in self.wordChars 

4237 ): 

4238 raise ParseException(instring, loc, self.errmsg, self) 

4239 return loc, [] 

4240 

4241 

4242class Tag(Token): 

4243 """ 

4244 A meta-element for inserting a named result into the parsed 

4245 tokens that may be checked later in a parse action or while 

4246 processing the parsed results. Accepts an optional tag value, 

4247 defaulting to `True`. 

4248 

4249 Example: 

4250 

4251 .. doctest:: 

4252 

4253 >>> end_punc = "." | ("!" + Tag("enthusiastic")) 

4254 >>> greeting = "Hello," + Word(alphas) + end_punc 

4255 

4256 >>> result = greeting.parse_string("Hello, World.") 

4257 >>> print(result.dump()) 

4258 ['Hello,', 'World', '.'] 

4259 

4260 >>> result = greeting.parse_string("Hello, World!") 

4261 >>> print(result.dump()) 

4262 ['Hello,', 'World', '!'] 

4263 - enthusiastic: True 

4264 

4265 .. versionadded:: 3.1.0 

4266 """ 

4267 

4268 def __init__(self, tag_name: str, value: Any = True) -> None: 

4269 super().__init__() 

4270 self._may_return_empty = True 

4271 self.mayIndexError = False 

4272 self.leave_whitespace() 

4273 self.tag_name = tag_name 

4274 self.tag_value = value 

4275 self.add_parse_action(self._add_tag) 

4276 self.show_in_diagram = False 

4277 

4278 def _add_tag(self, tokens: ParseResults): 

4279 tokens[self.tag_name] = self.tag_value 

4280 

4281 def _generateDefaultName(self) -> str: 

4282 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}" 

4283 

4284 

4285class ParseExpression(ParserElement): 

4286 """Abstract subclass of ParserElement, for combining and 

4287 post-processing parsed tokens. 

4288 """ 

4289 

4290 def __init__( 

4291 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4292 ) -> None: 

4293 super().__init__(savelist) 

4294 self.exprs: list[ParserElement] 

4295 if isinstance(exprs, _generatorType): 

4296 exprs = list(exprs) 

4297 

4298 if isinstance(exprs, str_type): 

4299 self.exprs = [self._literalStringClass(exprs)] 

4300 elif isinstance(exprs, ParserElement): 

4301 self.exprs = [exprs] 

4302 elif isinstance(exprs, Iterable): 

4303 exprs = list(exprs) 

4304 # if sequence of strings provided, wrap with Literal 

4305 if any(isinstance(expr, str_type) for expr in exprs): 

4306 exprs = ( 

4307 self._literalStringClass(e) if isinstance(e, str_type) else e 

4308 for e in exprs 

4309 ) 

4310 self.exprs = list(exprs) 

4311 else: 

4312 try: 

4313 self.exprs = list(exprs) 

4314 except TypeError: 

4315 self.exprs = [exprs] 

4316 self.callPreparse = False 

4317 

4318 def recurse(self) -> list[ParserElement]: 

4319 return self.exprs[:] 

4320 

4321 def append(self, other) -> ParserElement: 

4322 """ 

4323 Add an expression to the list of expressions related to this ParseExpression instance. 

4324 """ 

4325 self.exprs.append(other) 

4326 self._defaultName = None 

4327 return self 

4328 

4329 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4330 """ 

4331 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

4332 all contained expressions. 

4333 """ 

4334 super().leave_whitespace(recursive) 

4335 

4336 if recursive: 

4337 self.exprs = [e.copy() for e in self.exprs] 

4338 for e in self.exprs: 

4339 e.leave_whitespace(recursive) 

4340 return self 

4341 

4342 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4343 """ 

4344 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on 

4345 all contained expressions. 

4346 """ 

4347 super().ignore_whitespace(recursive) 

4348 if recursive: 

4349 self.exprs = [e.copy() for e in self.exprs] 

4350 for e in self.exprs: 

4351 e.ignore_whitespace(recursive) 

4352 return self 

4353 

4354 def ignore(self, other) -> ParserElement: 

4355 """ 

4356 Define expression to be ignored (e.g., comments) while doing pattern 

4357 matching; may be called repeatedly, to define multiple comment or other 

4358 ignorable patterns. 

4359 """ 

4360 if isinstance(other, Suppress): 

4361 if other not in self.ignoreExprs: 

4362 super().ignore(other) 

4363 for e in self.exprs: 

4364 e.ignore(self.ignoreExprs[-1]) 

4365 else: 

4366 super().ignore(other) 

4367 for e in self.exprs: 

4368 e.ignore(self.ignoreExprs[-1]) 

4369 return self 

4370 

4371 def _generateDefaultName(self) -> str: 

4372 return f"{type(self).__name__}:({self.exprs})" 

4373 

4374 def streamline(self) -> ParserElement: 

4375 if self.streamlined: 

4376 return self 

4377 

4378 super().streamline() 

4379 

4380 for e in self.exprs: 

4381 e.streamline() 

4382 

4383 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

4384 # but only if there are no parse actions or resultsNames on the nested And's 

4385 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

4386 if len(self.exprs) == 2: 

4387 other = self.exprs[0] 

4388 if ( 

4389 isinstance(other, self.__class__) 

4390 and not other.parseAction 

4391 and other.resultsName is None 

4392 and not other.debug 

4393 ): 

4394 self.exprs = other.exprs[:] + [self.exprs[1]] 

4395 self._defaultName = None 

4396 self._may_return_empty |= other.mayReturnEmpty 

4397 self.mayIndexError |= other.mayIndexError 

4398 

4399 other = self.exprs[-1] 

4400 if ( 

4401 isinstance(other, self.__class__) 

4402 and not other.parseAction 

4403 and other.resultsName is None 

4404 and not other.debug 

4405 ): 

4406 self.exprs = self.exprs[:-1] + other.exprs[:] 

4407 self._defaultName = None 

4408 self._may_return_empty |= other.mayReturnEmpty 

4409 self.mayIndexError |= other.mayIndexError 

4410 

4411 self.errmsg = f"Expected {self}" 

4412 

4413 return self 

4414 

4415 def validate(self, validateTrace=None) -> None: 

4416 warnings.warn( 

4417 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4418 DeprecationWarning, 

4419 stacklevel=2, 

4420 ) 

4421 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

4422 for e in self.exprs: 

4423 e.validate(tmp) 

4424 self._checkRecursion([]) 

4425 

4426 def copy(self) -> ParserElement: 

4427 """ 

4428 Returns a copy of this expression. 

4429 

4430 Generally only used internally by pyparsing. 

4431 """ 

4432 ret = super().copy() 

4433 ret = typing.cast(ParseExpression, ret) 

4434 ret.exprs = [e.copy() for e in self.exprs] 

4435 return ret 

4436 

4437 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4438 if not ( 

4439 __diag__.warn_ungrouped_named_tokens_in_collection 

4440 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4441 not in self.suppress_warnings_ 

4442 ): 

4443 return super()._setResultsName(name, list_all_matches) 

4444 

4445 for e in self.exprs: 

4446 if ( 

4447 isinstance(e, ParserElement) 

4448 and e.resultsName 

4449 and ( 

4450 Diagnostics.warn_ungrouped_named_tokens_in_collection 

4451 not in e.suppress_warnings_ 

4452 ) 

4453 ): 

4454 warning = ( 

4455 "warn_ungrouped_named_tokens_in_collection:" 

4456 f" setting results name {name!r} on {type(self).__name__} expression" 

4457 f" collides with {e.resultsName!r} on contained expression" 

4458 ) 

4459 warnings.warn(warning, stacklevel=3) 

4460 break 

4461 

4462 return super()._setResultsName(name, list_all_matches) 

4463 

4464 # Compatibility synonyms 

4465 # fmt: off 

4466 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4467 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4468 # fmt: on 

4469 

4470 

4471class And(ParseExpression): 

4472 """ 

4473 Requires all given :class:`ParserElement` s to be found in the given order. 

4474 Expressions may be separated by whitespace. 

4475 May be constructed using the ``'+'`` operator. 

4476 May also be constructed using the ``'-'`` operator, which will 

4477 suppress backtracking. 

4478 

4479 Example: 

4480 

4481 .. testcode:: 

4482 

4483 integer = Word(nums) 

4484 name_expr = Word(alphas)[1, ...] 

4485 

4486 expr = And([integer("id"), name_expr("name"), integer("age")]) 

4487 # more easily written as: 

4488 expr = integer("id") + name_expr("name") + integer("age") 

4489 """ 

4490 

4491 class _ErrorStop(Empty): 

4492 def __init__(self, *args, **kwargs) -> None: 

4493 super().__init__(*args, **kwargs) 

4494 self.leave_whitespace() 

4495 

4496 def _generateDefaultName(self) -> str: 

4497 return "-" 

4498 

4499 def __init__( 

4500 self, 

4501 exprs_arg: typing.Iterable[Union[ParserElement, str]], 

4502 savelist: bool = True, 

4503 ) -> None: 

4504 # instantiate exprs as a list, converting strs to ParserElements 

4505 exprs: list[ParserElement] = [ 

4506 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg 

4507 ] 

4508 

4509 # convert any Ellipsis elements to SkipTo 

4510 if Ellipsis in exprs: 

4511 

4512 # Ellipsis cannot be the last element 

4513 if exprs[-1] is Ellipsis: 

4514 raise Exception("cannot construct And with sequence ending in ...") 

4515 

4516 tmp: list[ParserElement] = [] 

4517 for cur_expr, next_expr in zip(exprs, exprs[1:]): 

4518 if cur_expr is Ellipsis: 

4519 tmp.append(SkipTo(next_expr)("_skipped*")) 

4520 else: 

4521 tmp.append(cur_expr) 

4522 

4523 exprs[:-1] = tmp 

4524 

4525 super().__init__(exprs, savelist) 

4526 if self.exprs: 

4527 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4528 if not isinstance(self.exprs[0], White): 

4529 self.set_whitespace_chars( 

4530 self.exprs[0].whiteChars, 

4531 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

4532 ) 

4533 self.skipWhitespace = self.exprs[0].skipWhitespace 

4534 else: 

4535 self.skipWhitespace = False 

4536 else: 

4537 self._may_return_empty = True 

4538 self.callPreparse = True 

4539 

4540 def streamline(self) -> ParserElement: 

4541 """ 

4542 Collapse `And` expressions like `And(And(And(A, B), C), D)` 

4543 to `And(A, B, C, D)`. 

4544 

4545 .. doctest:: 

4546 

4547 >>> expr = Word("A") + Word("B") + Word("C") + Word("D") 

4548 >>> # Using '+' operator creates nested And expression 

4549 >>> expr 

4550 {{{W:(A) W:(B)} W:(C)} W:(D)} 

4551 >>> # streamline simplifies to a single And with multiple expressions 

4552 >>> expr.streamline() 

4553 {W:(A) W:(B) W:(C) W:(D)} 

4554 

4555 Guards against collapsing out expressions that have special features, 

4556 such as results names or parse actions. 

4557 

4558 Resolves pending Skip commands defined using `...` terms. 

4559 """ 

4560 # collapse any _PendingSkip's 

4561 if self.exprs and any( 

4562 isinstance(e, ParseExpression) 

4563 and e.exprs 

4564 and isinstance(e.exprs[-1], _PendingSkip) 

4565 for e in self.exprs[:-1] 

4566 ): 

4567 deleted_expr_marker = NoMatch() 

4568 for i, e in enumerate(self.exprs[:-1]): 

4569 if e is deleted_expr_marker: 

4570 continue 

4571 if ( 

4572 isinstance(e, ParseExpression) 

4573 and e.exprs 

4574 and isinstance(e.exprs[-1], _PendingSkip) 

4575 ): 

4576 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4577 self.exprs[i + 1] = deleted_expr_marker 

4578 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

4579 

4580 super().streamline() 

4581 

4582 # link any IndentedBlocks to the prior expression 

4583 prev: ParserElement 

4584 cur: ParserElement 

4585 for prev, cur in zip(self.exprs, self.exprs[1:]): 

4586 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

4587 # (but watch out for recursive grammar) 

4588 seen = set() 

4589 while True: 

4590 if id(cur) in seen: 

4591 break 

4592 seen.add(id(cur)) 

4593 if isinstance(cur, IndentedBlock): 

4594 prev.add_parse_action( 

4595 lambda s, l, t, cur_=cur: setattr( 

4596 cur_, "parent_anchor", col(l, s) 

4597 ) 

4598 ) 

4599 break 

4600 subs = cur.recurse() 

4601 next_first = next(iter(subs), None) 

4602 if next_first is None: 

4603 break 

4604 cur = typing.cast(ParserElement, next_first) 

4605 

4606 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4607 return self 

4608 

4609 def parseImpl(self, instring, loc, do_actions=True): 

4610 # pass False as callPreParse arg to _parse for first element, since we already 

4611 # pre-parsed the string as part of our And pre-parsing 

4612 loc, resultlist = self.exprs[0]._parse( 

4613 instring, loc, do_actions, callPreParse=False 

4614 ) 

4615 errorStop = False 

4616 for e in self.exprs[1:]: 

4617 # if isinstance(e, And._ErrorStop): 

4618 if type(e) is And._ErrorStop: 

4619 errorStop = True 

4620 continue 

4621 if errorStop: 

4622 try: 

4623 loc, exprtokens = e._parse(instring, loc, do_actions) 

4624 except ParseSyntaxException: 

4625 raise 

4626 except ParseBaseException as pe: 

4627 pe.__traceback__ = None 

4628 raise ParseSyntaxException._from_exception(pe) 

4629 except IndexError: 

4630 raise ParseSyntaxException( 

4631 instring, len(instring), self.errmsg, self 

4632 ) 

4633 else: 

4634 loc, exprtokens = e._parse(instring, loc, do_actions) 

4635 resultlist += exprtokens 

4636 return loc, resultlist 

4637 

4638 def __iadd__(self, other): 

4639 if isinstance(other, str_type): 

4640 other = self._literalStringClass(other) 

4641 if not isinstance(other, ParserElement): 

4642 return NotImplemented 

4643 return self.append(other) # And([self, other]) 

4644 

4645 def _checkRecursion(self, parseElementList): 

4646 subRecCheckList = parseElementList[:] + [self] 

4647 for e in self.exprs: 

4648 e._checkRecursion(subRecCheckList) 

4649 if not e.mayReturnEmpty: 

4650 break 

4651 

4652 def _generateDefaultName(self) -> str: 

4653 inner = " ".join(str(e) for e in self.exprs) 

4654 # strip off redundant inner {}'s 

4655 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4656 inner = inner[1:-1] 

4657 return f"{{{inner}}}" 

4658 

4659 

4660class Or(ParseExpression): 

4661 """Requires that at least one :class:`ParserElement` is found. If 

4662 two expressions match, the expression that matches the longest 

4663 string will be used. May be constructed using the ``'^'`` 

4664 operator. 

4665 

4666 Example: 

4667 

4668 .. testcode:: 

4669 

4670 # construct Or using '^' operator 

4671 

4672 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4673 print(number.search_string("123 3.1416 789")) 

4674 

4675 prints: 

4676 

4677 .. testoutput:: 

4678 

4679 [['123'], ['3.1416'], ['789']] 

4680 """ 

4681 

4682 def __init__( 

4683 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4684 ) -> None: 

4685 super().__init__(exprs, savelist) 

4686 if self.exprs: 

4687 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4688 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4689 else: 

4690 self._may_return_empty = True 

4691 

4692 def streamline(self) -> ParserElement: 

4693 super().streamline() 

4694 if self.exprs: 

4695 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4696 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4697 self.skipWhitespace = all( 

4698 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4699 ) 

4700 else: 

4701 self.saveAsList = False 

4702 return self 

4703 

4704 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4705 maxExcLoc = -1 

4706 maxException = None 

4707 matches: list[tuple[int, ParserElement]] = [] 

4708 fatals: list[ParseFatalException] = [] 

4709 if all(e.callPreparse for e in self.exprs): 

4710 loc = self.preParse(instring, loc) 

4711 for e in self.exprs: 

4712 try: 

4713 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4714 except ParseFatalException as pfe: 

4715 pfe.__traceback__ = None 

4716 pfe.parser_element = e 

4717 fatals.append(pfe) 

4718 maxException = None 

4719 maxExcLoc = -1 

4720 except ParseException as err: 

4721 if not fatals: 

4722 err.__traceback__ = None 

4723 if err.loc > maxExcLoc: 

4724 maxException = err 

4725 maxExcLoc = err.loc 

4726 except IndexError: 

4727 if len(instring) > maxExcLoc: 

4728 maxException = ParseException( 

4729 instring, len(instring), e.errmsg, self 

4730 ) 

4731 maxExcLoc = len(instring) 

4732 else: 

4733 # save match among all matches, to retry longest to shortest 

4734 matches.append((loc2, e)) 

4735 

4736 if matches: 

4737 # re-evaluate all matches in descending order of length of match, in case attached actions 

4738 # might change whether or how much they match of the input. 

4739 matches.sort(key=itemgetter(0), reverse=True) 

4740 

4741 if not do_actions: 

4742 # no further conditions or parse actions to change the selection of 

4743 # alternative, so the first match will be the best match 

4744 best_expr = matches[0][1] 

4745 return best_expr._parse(instring, loc, do_actions) 

4746 

4747 longest: tuple[int, typing.Optional[ParseResults]] = -1, None 

4748 for loc1, expr1 in matches: 

4749 if loc1 <= longest[0]: 

4750 # already have a longer match than this one will deliver, we are done 

4751 return longest 

4752 

4753 try: 

4754 loc2, toks = expr1._parse(instring, loc, do_actions) 

4755 except ParseException as err: 

4756 err.__traceback__ = None 

4757 if err.loc > maxExcLoc: 

4758 maxException = err 

4759 maxExcLoc = err.loc 

4760 else: 

4761 if loc2 >= loc1: 

4762 return loc2, toks 

4763 # didn't match as much as before 

4764 elif loc2 > longest[0]: 

4765 longest = loc2, toks 

4766 

4767 if longest != (-1, None): 

4768 return longest 

4769 

4770 if fatals: 

4771 if len(fatals) > 1: 

4772 fatals.sort(key=lambda e: -e.loc) 

4773 if fatals[0].loc == fatals[1].loc: 

4774 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4775 max_fatal = fatals[0] 

4776 raise max_fatal 

4777 

4778 if maxException is not None: 

4779 # infer from this check that all alternatives failed at the current position 

4780 # so emit this collective error message instead of any single error message 

4781 parse_start_loc = self.preParse(instring, loc) 

4782 if maxExcLoc == parse_start_loc: 

4783 maxException.msg = self.errmsg or "" 

4784 raise maxException 

4785 

4786 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4787 

4788 def __ixor__(self, other): 

4789 if isinstance(other, str_type): 

4790 other = self._literalStringClass(other) 

4791 if not isinstance(other, ParserElement): 

4792 return NotImplemented 

4793 return self.append(other) # Or([self, other]) 

4794 

4795 def _generateDefaultName(self) -> str: 

4796 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" 

4797 

4798 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4799 if ( 

4800 __diag__.warn_multiple_tokens_in_named_alternation 

4801 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4802 not in self.suppress_warnings_ 

4803 ): 

4804 if any( 

4805 isinstance(e, And) 

4806 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4807 not in e.suppress_warnings_ 

4808 for e in self.exprs 

4809 ): 

4810 warning = ( 

4811 "warn_multiple_tokens_in_named_alternation:" 

4812 f" setting results name {name!r} on {type(self).__name__} expression" 

4813 " will return a list of all parsed tokens in an And alternative," 

4814 " in prior versions only the first token was returned; enclose" 

4815 " contained argument in Group" 

4816 ) 

4817 warnings.warn(warning, stacklevel=3) 

4818 

4819 return super()._setResultsName(name, list_all_matches) 

4820 

4821 

4822class MatchFirst(ParseExpression): 

4823 """Requires that at least one :class:`ParserElement` is found. If 

4824 more than one expression matches, the first one listed is the one that will 

4825 match. May be constructed using the ``'|'`` operator. 

4826 

4827 Example: Construct MatchFirst using '|' operator 

4828 

4829 .. doctest:: 

4830 

4831 # watch the order of expressions to match 

4832 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4833 >>> print(number.search_string("123 3.1416 789")) # Fail! 

4834 [['123'], ['3'], ['1416'], ['789']] 

4835 

4836 # put more selective expression first 

4837 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4838 >>> print(number.search_string("123 3.1416 789")) # Better 

4839 [['123'], ['3.1416'], ['789']] 

4840 """ 

4841 

4842 def __init__( 

4843 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4844 ) -> None: 

4845 super().__init__(exprs, savelist) 

4846 if self.exprs: 

4847 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4848 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4849 else: 

4850 self._may_return_empty = True 

4851 

4852 def streamline(self) -> ParserElement: 

4853 if self.streamlined: 

4854 return self 

4855 

4856 super().streamline() 

4857 if self.exprs: 

4858 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4859 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4860 self.skipWhitespace = all( 

4861 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4862 ) 

4863 else: 

4864 self.saveAsList = False 

4865 self._may_return_empty = True 

4866 return self 

4867 

4868 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4869 maxExcLoc = -1 

4870 maxException = None 

4871 

4872 for e in self.exprs: 

4873 try: 

4874 return e._parse(instring, loc, do_actions) 

4875 except ParseFatalException as pfe: 

4876 pfe.__traceback__ = None 

4877 pfe.parser_element = e 

4878 raise 

4879 except ParseException as err: 

4880 if err.loc > maxExcLoc: 

4881 maxException = err 

4882 maxExcLoc = err.loc 

4883 except IndexError: 

4884 if len(instring) > maxExcLoc: 

4885 maxException = ParseException( 

4886 instring, len(instring), e.errmsg, self 

4887 ) 

4888 maxExcLoc = len(instring) 

4889 

4890 if maxException is not None: 

4891 # infer from this check that all alternatives failed at the current position 

4892 # so emit this collective error message instead of any individual error message 

4893 parse_start_loc = self.preParse(instring, loc) 

4894 if maxExcLoc == parse_start_loc: 

4895 maxException.msg = self.errmsg or "" 

4896 raise maxException 

4897 

4898 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4899 

4900 def __ior__(self, other): 

4901 if isinstance(other, str_type): 

4902 other = self._literalStringClass(other) 

4903 if not isinstance(other, ParserElement): 

4904 return NotImplemented 

4905 return self.append(other) # MatchFirst([self, other]) 

4906 

4907 def _generateDefaultName(self) -> str: 

4908 return f"{{{' | '.join(str(e) for e in self.exprs)}}}" 

4909 

4910 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4911 if ( 

4912 __diag__.warn_multiple_tokens_in_named_alternation 

4913 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4914 not in self.suppress_warnings_ 

4915 ): 

4916 if any( 

4917 isinstance(e, And) 

4918 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4919 not in e.suppress_warnings_ 

4920 for e in self.exprs 

4921 ): 

4922 warning = ( 

4923 "warn_multiple_tokens_in_named_alternation:" 

4924 f" setting results name {name!r} on {type(self).__name__} expression" 

4925 " will return a list of all parsed tokens in an And alternative," 

4926 " in prior versions only the first token was returned; enclose" 

4927 " contained argument in Group" 

4928 ) 

4929 warnings.warn(warning, stacklevel=3) 

4930 

4931 return super()._setResultsName(name, list_all_matches) 

4932 

4933 

4934class Each(ParseExpression): 

4935 """Requires all given :class:`ParserElement` s to be found, but in 

4936 any order. Expressions may be separated by whitespace. 

4937 

4938 May be constructed using the ``'&'`` operator. 

4939 

4940 Example: 

4941 

4942 .. testcode:: 

4943 

4944 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4945 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4946 integer = Word(nums) 

4947 shape_attr = "shape:" + shape_type("shape") 

4948 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4949 color_attr = "color:" + color("color") 

4950 size_attr = "size:" + integer("size") 

4951 

4952 # use Each (using operator '&') to accept attributes in any order 

4953 # (shape and posn are required, color and size are optional) 

4954 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4955 

4956 shape_spec.run_tests(''' 

4957 shape: SQUARE color: BLACK posn: 100, 120 

4958 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4959 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4960 ''' 

4961 ) 

4962 

4963 prints: 

4964 

4965 .. testoutput:: 

4966 :options: +NORMALIZE_WHITESPACE 

4967 

4968 

4969 shape: SQUARE color: BLACK posn: 100, 120 

4970 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4971 - color: 'BLACK' 

4972 - posn: ['100', ',', '120'] 

4973 - x: '100' 

4974 - y: '120' 

4975 - shape: 'SQUARE' 

4976 ... 

4977 

4978 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4979 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 

4980 'posn:', ['50', ',', '80']] 

4981 - color: 'BLUE' 

4982 - posn: ['50', ',', '80'] 

4983 - x: '50' 

4984 - y: '80' 

4985 - shape: 'CIRCLE' 

4986 - size: '50' 

4987 ... 

4988 

4989 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4990 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 

4991 'posn:', ['20', ',', '40']] 

4992 - color: 'GREEN' 

4993 - posn: ['20', ',', '40'] 

4994 - x: '20' 

4995 - y: '40' 

4996 - shape: 'TRIANGLE' 

4997 - size: '20' 

4998 ... 

4999 """ 

5000 

5001 def __init__( 

5002 self, exprs: typing.Iterable[ParserElement], savelist: bool = True 

5003 ) -> None: 

5004 super().__init__(exprs, savelist) 

5005 if self.exprs: 

5006 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

5007 else: 

5008 self._may_return_empty = True 

5009 self.skipWhitespace = True 

5010 self.initExprGroups = True 

5011 self.saveAsList = True 

5012 

5013 def __iand__(self, other): 

5014 if isinstance(other, str_type): 

5015 other = self._literalStringClass(other) 

5016 if not isinstance(other, ParserElement): 

5017 return NotImplemented 

5018 return self.append(other) # Each([self, other]) 

5019 

5020 def streamline(self) -> ParserElement: 

5021 super().streamline() 

5022 if self.exprs: 

5023 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

5024 else: 

5025 self._may_return_empty = True 

5026 return self 

5027 

5028 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5029 if self.initExprGroups: 

5030 self.opt1map = dict( 

5031 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

5032 ) 

5033 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

5034 opt2 = [ 

5035 e 

5036 for e in self.exprs 

5037 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

5038 ] 

5039 self.optionals = opt1 + opt2 

5040 self.multioptionals = [ 

5041 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

5042 for e in self.exprs 

5043 if isinstance(e, _MultipleMatch) 

5044 ] 

5045 self.multirequired = [ 

5046 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

5047 for e in self.exprs 

5048 if isinstance(e, OneOrMore) 

5049 ] 

5050 self.required = [ 

5051 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

5052 ] 

5053 self.required += self.multirequired 

5054 self.initExprGroups = False 

5055 

5056 tmpLoc = loc 

5057 tmpReqd = self.required[:] 

5058 tmpOpt = self.optionals[:] 

5059 multis = self.multioptionals[:] 

5060 matchOrder: list[ParserElement] = [] 

5061 

5062 keepMatching = True 

5063 failed: list[ParserElement] = [] 

5064 fatals: list[ParseFatalException] = [] 

5065 while keepMatching: 

5066 tmpExprs = tmpReqd + tmpOpt + multis 

5067 failed.clear() 

5068 fatals.clear() 

5069 for e in tmpExprs: 

5070 try: 

5071 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

5072 except ParseFatalException as pfe: 

5073 pfe.__traceback__ = None 

5074 pfe.parser_element = e 

5075 fatals.append(pfe) 

5076 failed.append(e) 

5077 except ParseException: 

5078 failed.append(e) 

5079 else: 

5080 matchOrder.append(self.opt1map.get(id(e), e)) 

5081 if e in tmpReqd: 

5082 tmpReqd.remove(e) 

5083 elif e in tmpOpt: 

5084 tmpOpt.remove(e) 

5085 if len(failed) == len(tmpExprs): 

5086 keepMatching = False 

5087 

5088 # look for any ParseFatalExceptions 

5089 if fatals: 

5090 if len(fatals) > 1: 

5091 fatals.sort(key=lambda e: -e.loc) 

5092 if fatals[0].loc == fatals[1].loc: 

5093 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

5094 max_fatal = fatals[0] 

5095 raise max_fatal 

5096 

5097 if tmpReqd: 

5098 missing = ", ".join([str(e) for e in tmpReqd]) 

5099 raise ParseException( 

5100 instring, 

5101 loc, 

5102 f"Missing one or more required elements ({missing})", 

5103 ) 

5104 

5105 # add any unmatched Opts, in case they have default values defined 

5106 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

5107 

5108 total_results = ParseResults([]) 

5109 for e in matchOrder: 

5110 loc, results = e._parse(instring, loc, do_actions) 

5111 total_results += results 

5112 

5113 return loc, total_results 

5114 

5115 def _generateDefaultName(self) -> str: 

5116 return f"{{{' & '.join(str(e) for e in self.exprs)}}}" 

5117 

5118 

5119class ParseElementEnhance(ParserElement): 

5120 """Abstract subclass of :class:`ParserElement`, for combining and 

5121 post-processing parsed tokens. 

5122 """ 

5123 

5124 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

5125 super().__init__(savelist) 

5126 if isinstance(expr, str_type): 

5127 expr_str = typing.cast(str, expr) 

5128 if issubclass(self._literalStringClass, Token): 

5129 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

5130 elif issubclass(type(self), self._literalStringClass): 

5131 expr = Literal(expr_str) 

5132 else: 

5133 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

5134 expr = typing.cast(ParserElement, expr) 

5135 self.expr = expr 

5136 if expr is not None: 

5137 self.mayIndexError = expr.mayIndexError 

5138 self._may_return_empty = expr.mayReturnEmpty 

5139 self.set_whitespace_chars( 

5140 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

5141 ) 

5142 self.skipWhitespace = expr.skipWhitespace 

5143 self.saveAsList = expr.saveAsList 

5144 self.callPreparse = expr.callPreparse 

5145 self.ignoreExprs.extend(expr.ignoreExprs) 

5146 

5147 def recurse(self) -> list[ParserElement]: 

5148 return [self.expr] if self.expr is not None else [] 

5149 

5150 def parseImpl(self, instring, loc, do_actions=True): 

5151 if self.expr is None: 

5152 raise ParseException(instring, loc, "No expression defined", self) 

5153 

5154 try: 

5155 return self.expr._parse(instring, loc, do_actions, callPreParse=False) 

5156 except ParseSyntaxException: 

5157 raise 

5158 except ParseBaseException as pbe: 

5159 pbe.pstr = pbe.pstr or instring 

5160 pbe.loc = pbe.loc or loc 

5161 pbe.parser_element = pbe.parser_element or self 

5162 if not isinstance(self, Forward) and self.customName is not None: 

5163 if self.errmsg: 

5164 pbe.msg = self.errmsg 

5165 raise 

5166 

5167 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5168 """ 

5169 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

5170 the contained expression. 

5171 """ 

5172 super().leave_whitespace(recursive) 

5173 

5174 if recursive: 

5175 if self.expr is not None: 

5176 self.expr = self.expr.copy() 

5177 self.expr.leave_whitespace(recursive) 

5178 return self 

5179 

5180 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5181 """ 

5182 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on 

5183 the contained expression. 

5184 """ 

5185 super().ignore_whitespace(recursive) 

5186 

5187 if recursive: 

5188 if self.expr is not None: 

5189 self.expr = self.expr.copy() 

5190 self.expr.ignore_whitespace(recursive) 

5191 return self 

5192 

5193 def ignore(self, other) -> ParserElement: 

5194 """ 

5195 Define expression to be ignored (e.g., comments) while doing pattern 

5196 matching; may be called repeatedly, to define multiple comment or other 

5197 ignorable patterns. 

5198 """ 

5199 if not isinstance(other, Suppress) or other not in self.ignoreExprs: 

5200 super().ignore(other) 

5201 if self.expr is not None: 

5202 self.expr.ignore(self.ignoreExprs[-1]) 

5203 

5204 return self 

5205 

5206 def streamline(self) -> ParserElement: 

5207 super().streamline() 

5208 if self.expr is not None: 

5209 self.expr.streamline() 

5210 return self 

5211 

5212 def _checkRecursion(self, parseElementList): 

5213 if self in parseElementList: 

5214 raise RecursiveGrammarException(parseElementList + [self]) 

5215 subRecCheckList = parseElementList[:] + [self] 

5216 if self.expr is not None: 

5217 self.expr._checkRecursion(subRecCheckList) 

5218 

5219 def validate(self, validateTrace=None) -> None: 

5220 warnings.warn( 

5221 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5222 DeprecationWarning, 

5223 stacklevel=2, 

5224 ) 

5225 if validateTrace is None: 

5226 validateTrace = [] 

5227 tmp = validateTrace[:] + [self] 

5228 if self.expr is not None: 

5229 self.expr.validate(tmp) 

5230 self._checkRecursion([]) 

5231 

5232 def _generateDefaultName(self) -> str: 

5233 return f"{type(self).__name__}:({self.expr})" 

5234 

5235 # Compatibility synonyms 

5236 # fmt: off 

5237 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

5238 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

5239 # fmt: on 

5240 

5241 

5242class IndentedBlock(ParseElementEnhance): 

5243 """ 

5244 Expression to match one or more expressions at a given indentation level. 

5245 Useful for parsing text where structure is implied by indentation (like Python source code). 

5246 

5247 Example: 

5248 

5249 .. testcode:: 

5250 

5251 ''' 

5252 BNF: 

5253 statement ::= assignment_stmt | if_stmt 

5254 assignment_stmt ::= identifier '=' rvalue 

5255 rvalue ::= identifier | integer 

5256 if_stmt ::= 'if' bool_condition block 

5257 block ::= ([indent] statement)... 

5258 identifier ::= [A..Za..z] 

5259 integer ::= [0..9]... 

5260 bool_condition ::= 'TRUE' | 'FALSE' 

5261 ''' 

5262 

5263 IF, TRUE, FALSE = Keyword.using_each("IF TRUE FALSE".split()) 

5264 

5265 statement = Forward() 

5266 identifier = Char(alphas) 

5267 integer = Word(nums).add_parse_action(lambda t: int(t[0])) 

5268 rvalue = identifier | integer 

5269 assignment_stmt = identifier + "=" + rvalue 

5270 

5271 if_stmt = IF + (TRUE | FALSE) + IndentedBlock(statement) 

5272 

5273 statement <<= Group(assignment_stmt | if_stmt) 

5274 

5275 result = if_stmt.parse_string(''' 

5276 IF TRUE 

5277 a = 1000 

5278 b = 2000 

5279 IF FALSE 

5280 z = 100 

5281 ''') 

5282 print(result.dump()) 

5283 

5284 .. testoutput:: 

5285 

5286 ['IF', 'TRUE', [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]] 

5287 [0]: 

5288 IF 

5289 [1]: 

5290 TRUE 

5291 [2]: 

5292 [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]] 

5293 [0]: 

5294 ['a', '=', 1000] 

5295 [1]: 

5296 ['b', '=', 2000] 

5297 [2]: 

5298 ['IF', 'FALSE', [['z', '=', 100]]] 

5299 [0]: 

5300 IF 

5301 [1]: 

5302 FALSE 

5303 [2]: 

5304 [['z', '=', 100]] 

5305 [0]: 

5306 ['z', '=', 100] 

5307 """ 

5308 

5309 class _Indent(Empty): 

5310 def __init__(self, ref_col: int) -> None: 

5311 super().__init__() 

5312 self.errmsg = f"expected indent at column {ref_col}" 

5313 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

5314 

5315 class _IndentGreater(Empty): 

5316 def __init__(self, ref_col: int) -> None: 

5317 super().__init__() 

5318 self.errmsg = f"expected indent at column greater than {ref_col}" 

5319 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

5320 

5321 def __init__( 

5322 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

5323 ) -> None: 

5324 super().__init__(expr, savelist=True) 

5325 # if recursive: 

5326 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

5327 self._recursive = recursive 

5328 self._grouped = grouped 

5329 self.parent_anchor = 1 

5330 

5331 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5332 # advance parse position to non-whitespace by using an Empty() 

5333 # this should be the column to be used for all subsequent indented lines 

5334 anchor_loc = Empty().preParse(instring, loc) 

5335 

5336 # see if self.expr matches at the current location - if not it will raise an exception 

5337 # and no further work is necessary 

5338 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions) 

5339 

5340 indent_col = col(anchor_loc, instring) 

5341 peer_detect_expr = self._Indent(indent_col) 

5342 

5343 inner_expr = Empty() + peer_detect_expr + self.expr 

5344 if self._recursive: 

5345 sub_indent = self._IndentGreater(indent_col) 

5346 nested_block = IndentedBlock( 

5347 self.expr, recursive=self._recursive, grouped=self._grouped 

5348 ) 

5349 nested_block.set_debug(self.debug) 

5350 nested_block.parent_anchor = indent_col 

5351 inner_expr += Opt(sub_indent + nested_block) 

5352 

5353 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

5354 block = OneOrMore(inner_expr) 

5355 

5356 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

5357 

5358 if self._grouped: 

5359 wrapper = Group 

5360 else: 

5361 wrapper = lambda expr: expr # type: ignore[misc, assignment] 

5362 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

5363 instring, anchor_loc, do_actions 

5364 ) 

5365 

5366 

5367class AtStringStart(ParseElementEnhance): 

5368 """Matches if expression matches at the beginning of the parse 

5369 string:: 

5370 

5371 AtStringStart(Word(nums)).parse_string("123") 

5372 # prints ["123"] 

5373 

5374 AtStringStart(Word(nums)).parse_string(" 123") 

5375 # raises ParseException 

5376 """ 

5377 

5378 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5379 super().__init__(expr) 

5380 self.callPreparse = False 

5381 

5382 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5383 if loc != 0: 

5384 raise ParseException(instring, loc, "not found at string start") 

5385 return super().parseImpl(instring, loc, do_actions) 

5386 

5387 

5388class AtLineStart(ParseElementEnhance): 

5389 r"""Matches if an expression matches at the beginning of a line within 

5390 the parse string 

5391 

5392 Example: 

5393 

5394 .. testcode:: 

5395 

5396 test = '''\ 

5397 BBB this line 

5398 BBB and this line 

5399 BBB but not this one 

5400 A BBB and definitely not this one 

5401 ''' 

5402 

5403 for t in (AtLineStart('BBB') + rest_of_line).search_string(test): 

5404 print(t) 

5405 

5406 prints: 

5407 

5408 .. testoutput:: 

5409 

5410 ['BBB', ' this line'] 

5411 ['BBB', ' and this line'] 

5412 """ 

5413 

5414 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5415 super().__init__(expr) 

5416 self.callPreparse = False 

5417 

5418 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5419 if col(loc, instring) != 1: 

5420 raise ParseException(instring, loc, "not found at line start") 

5421 return super().parseImpl(instring, loc, do_actions) 

5422 

5423 

5424class FollowedBy(ParseElementEnhance): 

5425 """Lookahead matching of the given parse expression. 

5426 ``FollowedBy`` does *not* advance the parsing position within 

5427 the input string, it only verifies that the specified parse 

5428 expression matches at the current position. ``FollowedBy`` 

5429 always returns a null token list. If any results names are defined 

5430 in the lookahead expression, those *will* be returned for access by 

5431 name. 

5432 

5433 Example: 

5434 

5435 .. testcode:: 

5436 

5437 # use FollowedBy to match a label only if it is followed by a ':' 

5438 data_word = Word(alphas) 

5439 label = data_word + FollowedBy(':') 

5440 attr_expr = Group( 

5441 label + Suppress(':') 

5442 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) 

5443 ) 

5444 

5445 attr_expr[1, ...].parse_string( 

5446 "shape: SQUARE color: BLACK posn: upper left").pprint() 

5447 

5448 prints: 

5449 

5450 .. testoutput:: 

5451 

5452 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

5453 """ 

5454 

5455 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5456 super().__init__(expr) 

5457 self._may_return_empty = True 

5458 

5459 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5460 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

5461 # we keep any named results that were defined in the FollowedBy expression 

5462 _, ret = self.expr._parse(instring, loc, do_actions=do_actions) 

5463 del ret[:] 

5464 

5465 return loc, ret 

5466 

5467 

5468class PrecededBy(ParseElementEnhance): 

5469 """Lookbehind matching of the given parse expression. 

5470 ``PrecededBy`` does not advance the parsing position within the 

5471 input string, it only verifies that the specified parse expression 

5472 matches prior to the current position. ``PrecededBy`` always 

5473 returns a null token list, but if a results name is defined on the 

5474 given expression, it is returned. 

5475 

5476 Parameters: 

5477 

5478 - ``expr`` - expression that must match prior to the current parse 

5479 location 

5480 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

5481 to lookbehind prior to the current parse location 

5482 

5483 If the lookbehind expression is a string, :class:`Literal`, 

5484 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

5485 with a specified exact or maximum length, then the retreat 

5486 parameter is not required. Otherwise, retreat must be specified to 

5487 give a maximum number of characters to look back from 

5488 the current parse position for a lookbehind match. 

5489 

5490 Example: 

5491 

5492 .. testcode:: 

5493 

5494 # VB-style variable names with type prefixes 

5495 int_var = PrecededBy("#") + pyparsing_common.identifier 

5496 str_var = PrecededBy("$") + pyparsing_common.identifier 

5497 """ 

5498 

5499 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None: 

5500 super().__init__(expr) 

5501 self.expr = self.expr().leave_whitespace() 

5502 self._may_return_empty = True 

5503 self.mayIndexError = False 

5504 self.exact = False 

5505 if isinstance(expr, str_type): 

5506 expr = typing.cast(str, expr) 

5507 retreat = len(expr) 

5508 self.exact = True 

5509 elif isinstance(expr, (Literal, Keyword)): 

5510 retreat = expr.matchLen 

5511 self.exact = True 

5512 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

5513 retreat = expr.maxLen 

5514 self.exact = True 

5515 elif isinstance(expr, PositionToken): 

5516 retreat = 0 

5517 self.exact = True 

5518 self.retreat = retreat 

5519 self.errmsg = f"not preceded by {expr}" 

5520 self.skipWhitespace = False 

5521 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

5522 

5523 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType: 

5524 if self.exact: 

5525 if loc < self.retreat: 

5526 raise ParseException(instring, loc, self.errmsg, self) 

5527 start = loc - self.retreat 

5528 _, ret = self.expr._parse(instring, start) 

5529 return loc, ret 

5530 

5531 # retreat specified a maximum lookbehind window, iterate 

5532 test_expr = self.expr + StringEnd() 

5533 instring_slice = instring[max(0, loc - self.retreat) : loc] 

5534 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self) 

5535 

5536 for offset in range(1, min(loc, self.retreat + 1) + 1): 

5537 try: 

5538 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

5539 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

5540 except ParseBaseException as pbe: 

5541 last_expr = pbe 

5542 else: 

5543 break 

5544 else: 

5545 raise last_expr 

5546 

5547 return loc, ret 

5548 

5549 

5550class Located(ParseElementEnhance): 

5551 """ 

5552 Decorates a returned token with its starting and ending 

5553 locations in the input string. 

5554 

5555 This helper adds the following results names: 

5556 

5557 - ``locn_start`` - location where matched expression begins 

5558 - ``locn_end`` - location where matched expression ends 

5559 - ``value`` - the actual parsed results 

5560 

5561 Be careful if the input text contains ``<TAB>`` characters, you 

5562 may want to call :class:`ParserElement.parse_with_tabs` 

5563 

5564 Example: 

5565 

5566 .. testcode:: 

5567 

5568 wd = Word(alphas) 

5569 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

5570 print(match) 

5571 

5572 prints: 

5573 

5574 .. testoutput:: 

5575 

5576 [0, ['ljsdf'], 5] 

5577 [8, ['lksdjjf'], 15] 

5578 [18, ['lkkjj'], 23] 

5579 """ 

5580 

5581 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5582 start = loc 

5583 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False) 

5584 ret_tokens = ParseResults([start, tokens, loc]) 

5585 ret_tokens["locn_start"] = start 

5586 ret_tokens["value"] = tokens 

5587 ret_tokens["locn_end"] = loc 

5588 if self.resultsName: 

5589 # must return as a list, so that the name will be attached to the complete group 

5590 return loc, [ret_tokens] 

5591 else: 

5592 return loc, ret_tokens 

5593 

5594 

5595class NotAny(ParseElementEnhance): 

5596 """ 

5597 Lookahead to disallow matching with the given parse expression. 

5598 ``NotAny`` does *not* advance the parsing position within the 

5599 input string, it only verifies that the specified parse expression 

5600 does *not* match at the current position. Also, ``NotAny`` does 

5601 *not* skip over leading whitespace. ``NotAny`` always returns 

5602 a null token list. May be constructed using the ``'~'`` operator. 

5603 

5604 Example: 

5605 

5606 .. testcode:: 

5607 

5608 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

5609 

5610 # take care not to mistake keywords for identifiers 

5611 ident = ~(AND | OR | NOT) + Word(alphas) 

5612 boolean_term = Opt(NOT) + ident 

5613 

5614 # very crude boolean expression - to support parenthesis groups and 

5615 # operation hierarchy, use infix_notation 

5616 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

5617 

5618 # integers that are followed by "." are actually floats 

5619 integer = Word(nums) + ~Char(".") 

5620 """ 

5621 

5622 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5623 super().__init__(expr) 

5624 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

5625 # self.leave_whitespace() 

5626 self.skipWhitespace = False 

5627 

5628 self._may_return_empty = True 

5629 self.errmsg = f"Found unwanted token, {self.expr}" 

5630 

5631 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5632 if self.expr.can_parse_next(instring, loc, do_actions=do_actions): 

5633 raise ParseException(instring, loc, self.errmsg, self) 

5634 return loc, [] 

5635 

5636 def _generateDefaultName(self) -> str: 

5637 return f"~{{{self.expr}}}" 

5638 

5639 

5640class _MultipleMatch(ParseElementEnhance): 

5641 def __init__( 

5642 self, 

5643 expr: Union[str, ParserElement], 

5644 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5645 **kwargs, 

5646 ) -> None: 

5647 stopOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument( 

5648 kwargs, "stopOn", None 

5649 ) 

5650 

5651 super().__init__(expr) 

5652 stopOn = stopOn or stop_on 

5653 self.saveAsList = True 

5654 ender = stopOn 

5655 if isinstance(ender, str_type): 

5656 ender = self._literalStringClass(ender) 

5657 self.stopOn(ender) 

5658 

5659 def stop_on(self, ender) -> ParserElement: 

5660 if isinstance(ender, str_type): 

5661 ender = self._literalStringClass(ender) 

5662 self.not_ender = ~ender if ender is not None else None 

5663 return self 

5664 

5665 stopOn = stop_on 

5666 

5667 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5668 self_expr_parse = self.expr._parse 

5669 self_skip_ignorables = self._skipIgnorables 

5670 check_ender = False 

5671 if self.not_ender is not None: 

5672 try_not_ender = self.not_ender.try_parse 

5673 check_ender = True 

5674 

5675 # must be at least one (but first see if we are the stopOn sentinel; 

5676 # if so, fail) 

5677 if check_ender: 

5678 try_not_ender(instring, loc) 

5679 loc, tokens = self_expr_parse(instring, loc, do_actions) 

5680 try: 

5681 hasIgnoreExprs = not not self.ignoreExprs 

5682 while 1: 

5683 if check_ender: 

5684 try_not_ender(instring, loc) 

5685 if hasIgnoreExprs: 

5686 preloc = self_skip_ignorables(instring, loc) 

5687 else: 

5688 preloc = loc 

5689 loc, tmptokens = self_expr_parse(instring, preloc, do_actions) 

5690 tokens += tmptokens 

5691 except (ParseException, IndexError): 

5692 pass 

5693 

5694 return loc, tokens 

5695 

5696 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5697 if ( 

5698 __diag__.warn_ungrouped_named_tokens_in_collection 

5699 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5700 not in self.suppress_warnings_ 

5701 ): 

5702 for e in [self.expr] + self.expr.recurse(): 

5703 if ( 

5704 isinstance(e, ParserElement) 

5705 and e.resultsName 

5706 and ( 

5707 Diagnostics.warn_ungrouped_named_tokens_in_collection 

5708 not in e.suppress_warnings_ 

5709 ) 

5710 ): 

5711 warning = ( 

5712 "warn_ungrouped_named_tokens_in_collection:" 

5713 f" setting results name {name!r} on {type(self).__name__} expression" 

5714 f" collides with {e.resultsName!r} on contained expression" 

5715 ) 

5716 warnings.warn(warning, stacklevel=3) 

5717 break 

5718 

5719 return super()._setResultsName(name, list_all_matches) 

5720 

5721 

5722class OneOrMore(_MultipleMatch): 

5723 """ 

5724 Repetition of one or more of the given expression. 

5725 

5726 Parameters: 

5727 

5728 - ``expr`` - expression that must match one or more times 

5729 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5730 (only required if the sentinel would ordinarily match the repetition 

5731 expression) 

5732 

5733 Example: 

5734 

5735 .. doctest:: 

5736 

5737 >>> data_word = Word(alphas) 

5738 >>> label = data_word + FollowedBy(':') 

5739 >>> attr_expr = Group( 

5740 ... label + Suppress(':') 

5741 ... + OneOrMore(data_word).set_parse_action(' '.join)) 

5742 

5743 >>> text = "shape: SQUARE posn: upper left color: BLACK" 

5744 

5745 # Fail! read 'posn' as data instead of next label 

5746 >>> attr_expr[1, ...].parse_string(text).pprint() 

5747 [['shape', 'SQUARE posn']] 

5748 

5749 # use stop_on attribute for OneOrMore 

5750 # to avoid reading label string as part of the data 

5751 >>> attr_expr = Group( 

5752 ... label + Suppress(':') 

5753 ... + OneOrMore( 

5754 ... data_word, stop_on=label).set_parse_action(' '.join)) 

5755 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better 

5756 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5757 

5758 # could also be written as 

5759 >>> (attr_expr * (1,)).parse_string(text).pprint() 

5760 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5761 """ 

5762 

5763 def _generateDefaultName(self) -> str: 

5764 return f"{{{self.expr}}}..." 

5765 

5766 

5767class ZeroOrMore(_MultipleMatch): 

5768 """ 

5769 Optional repetition of zero or more of the given expression. 

5770 

5771 Parameters: 

5772 

5773 - ``expr`` - expression that must match zero or more times 

5774 - ``stop_on`` - expression for a terminating sentinel 

5775 (only required if the sentinel would ordinarily match the repetition 

5776 expression) - (default= ``None``) 

5777 

5778 Example: similar to :class:`OneOrMore` 

5779 """ 

5780 

5781 def __init__( 

5782 self, 

5783 expr: Union[str, ParserElement], 

5784 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5785 **kwargs, 

5786 ) -> None: 

5787 stopOn: Union[ParserElement, str] = deprecate_argument(kwargs, "stopOn", None) 

5788 

5789 super().__init__(expr, stop_on=stopOn or stop_on) 

5790 self._may_return_empty = True 

5791 

5792 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5793 try: 

5794 return super().parseImpl(instring, loc, do_actions) 

5795 except (ParseException, IndexError): 

5796 return loc, ParseResults([], name=self.resultsName) 

5797 

5798 def _generateDefaultName(self) -> str: 

5799 return f"[{self.expr}]..." 

5800 

5801 

5802class DelimitedList(ParseElementEnhance): 

5803 """Helper to define a delimited list of expressions - the delimiter 

5804 defaults to ','. By default, the list elements and delimiters can 

5805 have intervening whitespace, and comments, but this can be 

5806 overridden by passing ``combine=True`` in the constructor. If 

5807 ``combine`` is set to ``True``, the matching tokens are 

5808 returned as a single token string, with the delimiters included; 

5809 otherwise, the matching tokens are returned as a list of tokens, 

5810 with the delimiters suppressed. 

5811 

5812 If ``allow_trailing_delim`` is set to True, then the list may end with 

5813 a delimiter. 

5814 

5815 Example: 

5816 

5817 .. doctest:: 

5818 

5819 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc") 

5820 ParseResults(['aa', 'bb', 'cc'], {}) 

5821 >>> DelimitedList(Word(hexnums), delim=':', combine=True 

5822 ... ).parse_string("AA:BB:CC:DD:EE") 

5823 ParseResults(['AA:BB:CC:DD:EE'], {}) 

5824 

5825 .. versionadded:: 3.1.0 

5826 """ 

5827 

5828 def __init__( 

5829 self, 

5830 expr: Union[str, ParserElement], 

5831 delim: Union[str, ParserElement] = ",", 

5832 combine: bool = False, 

5833 min: typing.Optional[int] = None, 

5834 max: typing.Optional[int] = None, 

5835 *, 

5836 allow_trailing_delim: bool = False, 

5837 ) -> None: 

5838 if isinstance(expr, str_type): 

5839 expr = ParserElement._literalStringClass(expr) 

5840 expr = typing.cast(ParserElement, expr) 

5841 

5842 if min is not None and min < 1: 

5843 raise ValueError("min must be greater than 0") 

5844 

5845 if max is not None and min is not None and max < min: 

5846 raise ValueError("max must be greater than, or equal to min") 

5847 

5848 self.content = expr 

5849 self.raw_delim = str(delim) 

5850 self.delim = delim 

5851 self.combine = combine 

5852 if not combine: 

5853 self.delim = Suppress(delim) 

5854 self.min = min or 1 

5855 self.max = max 

5856 self.allow_trailing_delim = allow_trailing_delim 

5857 

5858 delim_list_expr = self.content + (self.delim + self.content) * ( 

5859 self.min - 1, 

5860 None if self.max is None else self.max - 1, 

5861 ) 

5862 if self.allow_trailing_delim: 

5863 delim_list_expr += Opt(self.delim) 

5864 

5865 if self.combine: 

5866 delim_list_expr = Combine(delim_list_expr) 

5867 

5868 super().__init__(delim_list_expr, savelist=True) 

5869 

5870 def _generateDefaultName(self) -> str: 

5871 content_expr = self.content.streamline() 

5872 return f"{content_expr} [{self.raw_delim} {content_expr}]..." 

5873 

5874 

5875class _NullToken: 

5876 def __bool__(self): 

5877 return False 

5878 

5879 def __str__(self): 

5880 return "" 

5881 

5882 

5883class Opt(ParseElementEnhance): 

5884 """ 

5885 Optional matching of the given expression. 

5886 

5887 :param expr: expression that must match zero or more times 

5888 :param default: (optional) - value to be returned 

5889 if the optional expression is not found. 

5890 

5891 Example: 

5892 

5893 .. testcode:: 

5894 

5895 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5896 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5897 zip.run_tests(''' 

5898 # traditional ZIP code 

5899 12345 

5900 

5901 # ZIP+4 form 

5902 12101-0001 

5903 

5904 # invalid ZIP 

5905 98765- 

5906 ''') 

5907 

5908 prints: 

5909 

5910 .. testoutput:: 

5911 :options: +NORMALIZE_WHITESPACE 

5912 

5913 

5914 # traditional ZIP code 

5915 12345 

5916 ['12345'] 

5917 

5918 # ZIP+4 form 

5919 12101-0001 

5920 ['12101-0001'] 

5921 

5922 # invalid ZIP 

5923 98765- 

5924 98765- 

5925 ^ 

5926 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6) 

5927 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6) 

5928 """ 

5929 

5930 __optionalNotMatched = _NullToken() 

5931 

5932 def __init__( 

5933 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5934 ) -> None: 

5935 super().__init__(expr, savelist=False) 

5936 self.saveAsList = self.expr.saveAsList 

5937 self.defaultValue = default 

5938 self._may_return_empty = True 

5939 

5940 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5941 self_expr = self.expr 

5942 try: 

5943 loc, tokens = self_expr._parse( 

5944 instring, loc, do_actions, callPreParse=False 

5945 ) 

5946 except (ParseException, IndexError): 

5947 default_value = self.defaultValue 

5948 if default_value is not self.__optionalNotMatched: 

5949 if self_expr.resultsName: 

5950 tokens = ParseResults([default_value]) 

5951 tokens[self_expr.resultsName] = default_value 

5952 else: 

5953 tokens = [default_value] # type: ignore[assignment] 

5954 else: 

5955 tokens = [] # type: ignore[assignment] 

5956 return loc, tokens 

5957 

5958 def _generateDefaultName(self) -> str: 

5959 inner = str(self.expr) 

5960 # strip off redundant inner {}'s 

5961 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5962 inner = inner[1:-1] 

5963 return f"[{inner}]" 

5964 

5965 

5966Optional = Opt 

5967 

5968 

5969class SkipTo(ParseElementEnhance): 

5970 """ 

5971 Token for skipping over all undefined text until the matched 

5972 expression is found. 

5973 

5974 :param expr: target expression marking the end of the data to be skipped 

5975 :param include: if ``True``, the target expression is also parsed 

5976 (the skipped text and target expression are returned 

5977 as a 2-element list) (default= ``False``). 

5978 

5979 :param ignore: (default= ``None``) used to define grammars 

5980 (typically quoted strings and comments) 

5981 that might contain false matches to the target expression 

5982 

5983 :param fail_on: (default= ``None``) define expressions that 

5984 are not allowed to be included in the skipped test; 

5985 if found before the target expression is found, 

5986 the :class:`SkipTo` is not a match 

5987 

5988 Example: 

5989 

5990 .. testcode:: 

5991 

5992 report = ''' 

5993 Outstanding Issues Report - 1 Jan 2000 

5994 

5995 # | Severity | Description | Days Open 

5996 -----+----------+-------------------------------------------+----------- 

5997 101 | Critical | Intermittent system crash | 6 

5998 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5999 79 | Minor | System slow when running too many reports | 47 

6000 ''' 

6001 integer = Word(nums) 

6002 SEP = Suppress('|') 

6003 # use SkipTo to simply match everything up until the next SEP 

6004 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

6005 # - parse action will call token.strip() for each matched token, i.e., the description body 

6006 string_data = SkipTo(SEP, ignore=quoted_string) 

6007 string_data.set_parse_action(token_map(str.strip)) 

6008 ticket_expr = (integer("issue_num") + SEP 

6009 + string_data("sev") + SEP 

6010 + string_data("desc") + SEP 

6011 + integer("days_open")) 

6012 

6013 for tkt in ticket_expr.search_string(report): 

6014 print(tkt.dump()) 

6015 

6016 prints: 

6017 

6018 .. testoutput:: 

6019 

6020 ['101', 'Critical', 'Intermittent system crash', '6'] 

6021 - days_open: '6' 

6022 - desc: 'Intermittent system crash' 

6023 - issue_num: '101' 

6024 - sev: 'Critical' 

6025 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

6026 - days_open: '14' 

6027 - desc: "Spelling error on Login ('log|n')" 

6028 - issue_num: '94' 

6029 - sev: 'Cosmetic' 

6030 ['79', 'Minor', 'System slow when running too many reports', '47'] 

6031 - days_open: '47' 

6032 - desc: 'System slow when running too many reports' 

6033 - issue_num: '79' 

6034 - sev: 'Minor' 

6035 """ 

6036 

6037 def __init__( 

6038 self, 

6039 other: Union[ParserElement, str], 

6040 include: bool = False, 

6041 ignore: typing.Optional[Union[ParserElement, str]] = None, 

6042 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

6043 **kwargs, 

6044 ) -> None: 

6045 failOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument( 

6046 kwargs, "failOn", None 

6047 ) 

6048 

6049 super().__init__(other) 

6050 failOn = failOn or fail_on 

6051 self.ignoreExpr = ignore 

6052 self._may_return_empty = True 

6053 self.mayIndexError = False 

6054 self.includeMatch = include 

6055 self.saveAsList = False 

6056 if isinstance(failOn, str_type): 

6057 self.failOn = self._literalStringClass(failOn) 

6058 else: 

6059 self.failOn = failOn 

6060 self.errmsg = f"No match found for {self.expr}" 

6061 self.ignorer = Empty().leave_whitespace() 

6062 self._update_ignorer() 

6063 

6064 def _update_ignorer(self): 

6065 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

6066 self.ignorer.ignoreExprs.clear() 

6067 for e in self.expr.ignoreExprs: 

6068 self.ignorer.ignore(e) 

6069 if self.ignoreExpr: 

6070 self.ignorer.ignore(self.ignoreExpr) 

6071 

6072 def ignore(self, expr): 

6073 """ 

6074 Define expression to be ignored (e.g., comments) while doing pattern 

6075 matching; may be called repeatedly, to define multiple comment or other 

6076 ignorable patterns. 

6077 """ 

6078 super().ignore(expr) 

6079 self._update_ignorer() 

6080 

6081 def parseImpl(self, instring, loc, do_actions=True): 

6082 startloc = loc 

6083 instrlen = len(instring) 

6084 self_expr_parse = self.expr._parse 

6085 self_failOn_canParseNext = ( 

6086 self.failOn.can_parse_next if self.failOn is not None else None 

6087 ) 

6088 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

6089 

6090 tmploc = loc 

6091 while tmploc <= instrlen: 

6092 if self_failOn_canParseNext is not None: 

6093 # break if failOn expression matches 

6094 if self_failOn_canParseNext(instring, tmploc): 

6095 break 

6096 

6097 if ignorer_try_parse is not None: 

6098 # advance past ignore expressions 

6099 prev_tmploc = tmploc 

6100 while 1: 

6101 try: 

6102 tmploc = ignorer_try_parse(instring, tmploc) 

6103 except ParseBaseException: 

6104 break 

6105 # see if all ignorers matched, but didn't actually ignore anything 

6106 if tmploc == prev_tmploc: 

6107 break 

6108 prev_tmploc = tmploc 

6109 

6110 try: 

6111 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False) 

6112 except (ParseException, IndexError): 

6113 # no match, advance loc in string 

6114 tmploc += 1 

6115 else: 

6116 # matched skipto expr, done 

6117 break 

6118 

6119 else: 

6120 # ran off the end of the input string without matching skipto expr, fail 

6121 raise ParseException(instring, loc, self.errmsg, self) 

6122 

6123 # build up return values 

6124 loc = tmploc 

6125 skiptext = instring[startloc:loc] 

6126 skipresult = ParseResults(skiptext) 

6127 

6128 if self.includeMatch: 

6129 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False) 

6130 skipresult += mat 

6131 

6132 return loc, skipresult 

6133 

6134 

6135class Forward(ParseElementEnhance): 

6136 """ 

6137 Forward declaration of an expression to be defined later - 

6138 used for recursive grammars, such as algebraic infix notation. 

6139 When the expression is known, it is assigned to the ``Forward`` 

6140 instance using the ``'<<'`` operator. 

6141 

6142 .. Note:: 

6143 

6144 Take care when assigning to ``Forward`` not to overlook 

6145 precedence of operators. 

6146 

6147 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

6148 

6149 fwd_expr << a | b | c 

6150 

6151 will actually be evaluated as:: 

6152 

6153 (fwd_expr << a) | b | c 

6154 

6155 thereby leaving b and c out as parseable alternatives. 

6156 It is recommended that you explicitly group the values 

6157 inserted into the :class:`Forward`:: 

6158 

6159 fwd_expr << (a | b | c) 

6160 

6161 Converting to use the ``'<<='`` operator instead will avoid this problem. 

6162 

6163 See :meth:`ParseResults.pprint` for an example of a recursive 

6164 parser created using :class:`Forward`. 

6165 """ 

6166 

6167 def __init__( 

6168 self, other: typing.Optional[Union[ParserElement, str]] = None 

6169 ) -> None: 

6170 self.caller_frame = traceback.extract_stack(limit=2)[0] 

6171 super().__init__(other, savelist=False) # type: ignore[arg-type] 

6172 self.lshift_line = None 

6173 

6174 def __lshift__(self, other) -> Forward: 

6175 if hasattr(self, "caller_frame"): 

6176 del self.caller_frame 

6177 if isinstance(other, str_type): 

6178 other = self._literalStringClass(other) 

6179 

6180 if not isinstance(other, ParserElement): 

6181 return NotImplemented 

6182 

6183 self.expr = other 

6184 self.streamlined = other.streamlined 

6185 self.mayIndexError = self.expr.mayIndexError 

6186 self._may_return_empty = self.expr.mayReturnEmpty 

6187 self.set_whitespace_chars( 

6188 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

6189 ) 

6190 self.skipWhitespace = self.expr.skipWhitespace 

6191 self.saveAsList = self.expr.saveAsList 

6192 self.ignoreExprs.extend(self.expr.ignoreExprs) 

6193 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

6194 return self 

6195 

6196 def __ilshift__(self, other) -> Forward: 

6197 if not isinstance(other, ParserElement): 

6198 return NotImplemented 

6199 

6200 return self << other 

6201 

6202 def __or__(self, other) -> ParserElement: 

6203 caller_line = traceback.extract_stack(limit=2)[-2] 

6204 if ( 

6205 __diag__.warn_on_match_first_with_lshift_operator 

6206 and caller_line == self.lshift_line 

6207 and Diagnostics.warn_on_match_first_with_lshift_operator 

6208 not in self.suppress_warnings_ 

6209 ): 

6210 warnings.warn( 

6211 "warn_on_match_first_with_lshift_operator:" 

6212 " using '<<' operator with '|' is probably an error, use '<<='", 

6213 stacklevel=2, 

6214 ) 

6215 ret = super().__or__(other) 

6216 return ret 

6217 

6218 def __del__(self): 

6219 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

6220 if ( 

6221 self.expr is None 

6222 and __diag__.warn_on_assignment_to_Forward 

6223 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

6224 ): 

6225 warnings.warn_explicit( 

6226 "warn_on_assignment_to_Forward:" 

6227 " Forward defined here but no expression attached later using '<<=' or '<<'", 

6228 UserWarning, 

6229 filename=self.caller_frame.filename, 

6230 lineno=self.caller_frame.lineno, 

6231 ) 

6232 

6233 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

6234 if ( 

6235 self.expr is None 

6236 and __diag__.warn_on_parse_using_empty_Forward 

6237 and Diagnostics.warn_on_parse_using_empty_Forward 

6238 not in self.suppress_warnings_ 

6239 ): 

6240 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

6241 parse_fns = ( 

6242 "parse_string", 

6243 "scan_string", 

6244 "search_string", 

6245 "transform_string", 

6246 ) 

6247 tb = traceback.extract_stack(limit=200) 

6248 for i, frm in enumerate(reversed(tb), start=1): 

6249 if frm.name in parse_fns: 

6250 stacklevel = i + 1 

6251 break 

6252 else: 

6253 stacklevel = 2 

6254 warnings.warn( 

6255 "warn_on_parse_using_empty_Forward:" 

6256 " Forward expression was never assigned a value, will not parse any input", 

6257 stacklevel=stacklevel, 

6258 ) 

6259 if not ParserElement._left_recursion_enabled: 

6260 return super().parseImpl(instring, loc, do_actions) 

6261 # ## Bounded Recursion algorithm ## 

6262 # Recursion only needs to be processed at ``Forward`` elements, since they are 

6263 # the only ones that can actually refer to themselves. The general idea is 

6264 # to handle recursion stepwise: We start at no recursion, then recurse once, 

6265 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

6266 # 

6267 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

6268 # - to *match* a specific recursion level, and 

6269 # - to *search* the bounded recursion level 

6270 # and the two run concurrently. The *search* must *match* each recursion level 

6271 # to find the best possible match. This is handled by a memo table, which 

6272 # provides the previous match to the next level match attempt. 

6273 # 

6274 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

6275 # 

6276 # There is a complication since we not only *parse* but also *transform* via 

6277 # actions: We do not want to run the actions too often while expanding. Thus, 

6278 # we expand using `do_actions=False` and only run `do_actions=True` if the next 

6279 # recursion level is acceptable. 

6280 with ParserElement.recursion_lock: 

6281 memo = ParserElement.recursion_memos 

6282 try: 

6283 # we are parsing at a specific recursion expansion - use it as-is 

6284 prev_loc, prev_result = memo[loc, self, do_actions] 

6285 if isinstance(prev_result, Exception): 

6286 raise prev_result 

6287 return prev_loc, prev_result.copy() 

6288 except KeyError: 

6289 act_key = (loc, self, True) 

6290 peek_key = (loc, self, False) 

6291 # we are searching for the best recursion expansion - keep on improving 

6292 # both `do_actions` cases must be tracked separately here! 

6293 prev_loc, prev_peek = memo[peek_key] = ( 

6294 loc - 1, 

6295 ParseException( 

6296 instring, loc, "Forward recursion without base case", self 

6297 ), 

6298 ) 

6299 if do_actions: 

6300 memo[act_key] = memo[peek_key] 

6301 while True: 

6302 try: 

6303 new_loc, new_peek = super().parseImpl(instring, loc, False) 

6304 except ParseException: 

6305 # we failed before getting any match - do not hide the error 

6306 if isinstance(prev_peek, Exception): 

6307 raise 

6308 new_loc, new_peek = prev_loc, prev_peek 

6309 # the match did not get better: we are done 

6310 if new_loc <= prev_loc: 

6311 if do_actions: 

6312 # replace the match for do_actions=False as well, 

6313 # in case the action did backtrack 

6314 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

6315 del memo[peek_key], memo[act_key] 

6316 return prev_loc, copy.copy(prev_result) 

6317 del memo[peek_key] 

6318 return prev_loc, copy.copy(prev_peek) 

6319 # the match did get better: see if we can improve further 

6320 if do_actions: 

6321 try: 

6322 memo[act_key] = super().parseImpl(instring, loc, True) 

6323 except ParseException as e: 

6324 memo[peek_key] = memo[act_key] = (new_loc, e) 

6325 raise 

6326 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

6327 

6328 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

6329 """ 

6330 Extends ``leave_whitespace`` defined in base class. 

6331 """ 

6332 self.skipWhitespace = False 

6333 return self 

6334 

6335 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

6336 """ 

6337 Extends ``ignore_whitespace`` defined in base class. 

6338 """ 

6339 self.skipWhitespace = True 

6340 return self 

6341 

6342 def streamline(self) -> ParserElement: 

6343 if not self.streamlined: 

6344 self.streamlined = True 

6345 if self.expr is not None: 

6346 self.expr.streamline() 

6347 return self 

6348 

6349 def validate(self, validateTrace=None) -> None: 

6350 warnings.warn( 

6351 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

6352 DeprecationWarning, 

6353 stacklevel=2, 

6354 ) 

6355 if validateTrace is None: 

6356 validateTrace = [] 

6357 

6358 if self not in validateTrace: 

6359 tmp = validateTrace[:] + [self] 

6360 if self.expr is not None: 

6361 self.expr.validate(tmp) 

6362 self._checkRecursion([]) 

6363 

6364 def _generateDefaultName(self) -> str: 

6365 # Avoid infinite recursion by setting a temporary _defaultName 

6366 save_default_name = self._defaultName 

6367 self._defaultName = ": ..." 

6368 

6369 # Use the string representation of main expression. 

6370 try: 

6371 if self.expr is not None: 

6372 ret_string = str(self.expr)[:1000] 

6373 else: 

6374 ret_string = "None" 

6375 except Exception: 

6376 ret_string = "..." 

6377 

6378 self._defaultName = save_default_name 

6379 return f"{type(self).__name__}: {ret_string}" 

6380 

6381 def copy(self) -> ParserElement: 

6382 """ 

6383 Returns a copy of this expression. 

6384 

6385 Generally only used internally by pyparsing. 

6386 """ 

6387 if self.expr is not None: 

6388 return super().copy() 

6389 else: 

6390 ret = Forward() 

6391 ret <<= self 

6392 return ret 

6393 

6394 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

6395 # fmt: off 

6396 if ( 

6397 __diag__.warn_name_set_on_empty_Forward 

6398 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ 

6399 and self.expr is None 

6400 ): 

6401 warning = ( 

6402 "warn_name_set_on_empty_Forward:" 

6403 f" setting results name {name!r} on {type(self).__name__} expression" 

6404 " that has no contained expression" 

6405 ) 

6406 warnings.warn(warning, stacklevel=3) 

6407 # fmt: on 

6408 

6409 return super()._setResultsName(name, list_all_matches) 

6410 

6411 # Compatibility synonyms 

6412 # fmt: off 

6413 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

6414 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

6415 # fmt: on 

6416 

6417 

6418class TokenConverter(ParseElementEnhance): 

6419 """ 

6420 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. 

6421 """ 

6422 

6423 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None: 

6424 super().__init__(expr) # , savelist) 

6425 self.saveAsList = False 

6426 

6427 

6428class Combine(TokenConverter): 

6429 """Converter to concatenate all matching tokens to a single string. 

6430 By default, the matching patterns must also be contiguous in the 

6431 input string; this can be disabled by specifying 

6432 ``'adjacent=False'`` in the constructor. 

6433 

6434 Example: 

6435 

6436 .. doctest:: 

6437 

6438 >>> real = Word(nums) + '.' + Word(nums) 

6439 >>> print(real.parse_string('3.1416')) 

6440 ['3', '.', '1416'] 

6441 

6442 >>> # will also erroneously match the following 

6443 >>> print(real.parse_string('3. 1416')) 

6444 ['3', '.', '1416'] 

6445 

6446 >>> real = Combine(Word(nums) + '.' + Word(nums)) 

6447 >>> print(real.parse_string('3.1416')) 

6448 ['3.1416'] 

6449 

6450 >>> # no match when there are internal spaces 

6451 >>> print(real.parse_string('3. 1416')) 

6452 Traceback (most recent call last): 

6453 ParseException: Expected W:(0123...) 

6454 """ 

6455 

6456 def __init__( 

6457 self, 

6458 expr: ParserElement, 

6459 join_string: str = "", 

6460 adjacent: bool = True, 

6461 *, 

6462 joinString: typing.Optional[str] = None, 

6463 ) -> None: 

6464 super().__init__(expr) 

6465 joinString = joinString if joinString is not None else join_string 

6466 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

6467 if adjacent: 

6468 self.leave_whitespace() 

6469 self.adjacent = adjacent 

6470 self.skipWhitespace = True 

6471 self.joinString = joinString 

6472 self.callPreparse = True 

6473 

6474 def ignore(self, other) -> ParserElement: 

6475 """ 

6476 Define expression to be ignored (e.g., comments) while doing pattern 

6477 matching; may be called repeatedly, to define multiple comment or other 

6478 ignorable patterns. 

6479 """ 

6480 if self.adjacent: 

6481 ParserElement.ignore(self, other) 

6482 else: 

6483 super().ignore(other) 

6484 return self 

6485 

6486 def postParse(self, instring, loc, tokenlist): 

6487 retToks = tokenlist.copy() 

6488 del retToks[:] 

6489 retToks += ParseResults( 

6490 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

6491 ) 

6492 

6493 if self.resultsName and retToks.haskeys(): 

6494 return [retToks] 

6495 else: 

6496 return retToks 

6497 

6498 

6499class Group(TokenConverter): 

6500 """Converter to return the matched tokens as a list - useful for 

6501 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

6502 

6503 The optional ``aslist`` argument when set to True will return the 

6504 parsed tokens as a Python list instead of a pyparsing ParseResults. 

6505 

6506 Example: 

6507 

6508 .. doctest:: 

6509 

6510 >>> ident = Word(alphas) 

6511 >>> num = Word(nums) 

6512 >>> term = ident | num 

6513 >>> func = ident + Opt(DelimitedList(term)) 

6514 >>> print(func.parse_string("fn a, b, 100")) 

6515 ['fn', 'a', 'b', '100'] 

6516 

6517 >>> func = ident + Group(Opt(DelimitedList(term))) 

6518 >>> print(func.parse_string("fn a, b, 100")) 

6519 ['fn', ['a', 'b', '100']] 

6520 """ 

6521 

6522 def __init__(self, expr: ParserElement, aslist: bool = False) -> None: 

6523 super().__init__(expr) 

6524 self.saveAsList = True 

6525 self._asPythonList = aslist 

6526 

6527 def postParse(self, instring, loc, tokenlist): 

6528 if self._asPythonList: 

6529 return ParseResults.List( 

6530 tokenlist.as_list() 

6531 if isinstance(tokenlist, ParseResults) 

6532 else list(tokenlist) 

6533 ) 

6534 

6535 return [tokenlist] 

6536 

6537 

6538class Dict(TokenConverter): 

6539 """Converter to return a repetitive expression as a list, but also 

6540 as a dictionary. Each element can also be referenced using the first 

6541 token in the expression as its key. Useful for tabular report 

6542 scraping when the first column can be used as a item key. 

6543 

6544 The optional ``asdict`` argument when set to True will return the 

6545 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

6546 

6547 Example: 

6548 

6549 .. doctest:: 

6550 

6551 >>> data_word = Word(alphas) 

6552 >>> label = data_word + FollowedBy(':') 

6553 

6554 >>> attr_expr = ( 

6555 ... label + Suppress(':') 

6556 ... + OneOrMore(data_word, stop_on=label) 

6557 ... .set_parse_action(' '.join) 

6558 ... ) 

6559 

6560 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

6561 

6562 >>> # print attributes as plain groups 

6563 >>> print(attr_expr[1, ...].parse_string(text).dump()) 

6564 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

6565 

6566 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) 

6567 # Dict will auto-assign names. 

6568 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

6569 >>> print(result.dump()) 

6570 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

6571 - color: 'light blue' 

6572 - posn: 'upper left' 

6573 - shape: 'SQUARE' 

6574 - texture: 'burlap' 

6575 [0]: 

6576 ['shape', 'SQUARE'] 

6577 [1]: 

6578 ['posn', 'upper left'] 

6579 [2]: 

6580 ['color', 'light blue'] 

6581 [3]: 

6582 ['texture', 'burlap'] 

6583 

6584 # access named fields as dict entries, or output as dict 

6585 >>> print(result['shape']) 

6586 SQUARE 

6587 >>> print(result.as_dict()) 

6588 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'} 

6589 

6590 See more examples at :class:`ParseResults` of accessing fields by results name. 

6591 """ 

6592 

6593 def __init__(self, expr: ParserElement, asdict: bool = False) -> None: 

6594 super().__init__(expr) 

6595 self.saveAsList = True 

6596 self._asPythonDict = asdict 

6597 

6598 def postParse(self, instring, loc, tokenlist): 

6599 for i, tok in enumerate(tokenlist): 

6600 if len(tok) == 0: 

6601 continue 

6602 

6603 ikey = tok[0] 

6604 if isinstance(ikey, int): 

6605 ikey = str(ikey).strip() 

6606 

6607 if len(tok) == 1: 

6608 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

6609 

6610 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

6611 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

6612 

6613 else: 

6614 try: 

6615 dictvalue = tok.copy() # ParseResults(i) 

6616 except Exception: 

6617 exc = TypeError( 

6618 "could not extract dict values from parsed results" 

6619 " - Dict expression must contain Grouped expressions" 

6620 ) 

6621 raise exc from None 

6622 

6623 del dictvalue[0] 

6624 

6625 if len(dictvalue) != 1 or ( 

6626 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

6627 ): 

6628 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

6629 else: 

6630 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

6631 

6632 if self._asPythonDict: 

6633 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

6634 

6635 return [tokenlist] if self.resultsName else tokenlist 

6636 

6637 

6638class Suppress(TokenConverter): 

6639 """Converter for ignoring the results of a parsed expression. 

6640 

6641 Example: 

6642 

6643 .. doctest:: 

6644 

6645 >>> source = "a, b, c,d" 

6646 >>> wd = Word(alphas) 

6647 >>> wd_list1 = wd + (',' + wd)[...] 

6648 >>> print(wd_list1.parse_string(source)) 

6649 ['a', ',', 'b', ',', 'c', ',', 'd'] 

6650 

6651 # often, delimiters that are useful during parsing are just in the 

6652 # way afterward - use Suppress to keep them out of the parsed output 

6653 >>> wd_list2 = wd + (Suppress(',') + wd)[...] 

6654 >>> print(wd_list2.parse_string(source)) 

6655 ['a', 'b', 'c', 'd'] 

6656 

6657 # Skipped text (using '...') can be suppressed as well 

6658 >>> source = "lead in START relevant text END trailing text" 

6659 >>> start_marker = Keyword("START") 

6660 >>> end_marker = Keyword("END") 

6661 >>> find_body = Suppress(...) + start_marker + ... + end_marker 

6662 >>> print(find_body.parse_string(source)) 

6663 ['START', 'relevant text ', 'END'] 

6664 

6665 (See also :class:`DelimitedList`.) 

6666 """ 

6667 

6668 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

6669 if expr is ...: 

6670 expr = _PendingSkip(NoMatch()) 

6671 super().__init__(expr) 

6672 

6673 def __add__(self, other) -> ParserElement: 

6674 if isinstance(self.expr, _PendingSkip): 

6675 return Suppress(SkipTo(other)) + other 

6676 

6677 return super().__add__(other) 

6678 

6679 def __sub__(self, other) -> ParserElement: 

6680 if isinstance(self.expr, _PendingSkip): 

6681 return Suppress(SkipTo(other)) - other 

6682 

6683 return super().__sub__(other) 

6684 

6685 def postParse(self, instring, loc, tokenlist): 

6686 return [] 

6687 

6688 def suppress(self) -> ParserElement: 

6689 return self 

6690 

6691 

6692# XXX: Example needs to be re-done for updated output 

6693def trace_parse_action(f: ParseAction) -> ParseAction: 

6694 """Decorator for debugging parse actions. 

6695 

6696 When the parse action is called, this decorator will print 

6697 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

6698 When the parse action completes, the decorator will print 

6699 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

6700 

6701 Example: 

6702 

6703 .. testsetup:: stderr 

6704 

6705 import sys 

6706 sys.stderr = sys.stdout 

6707 

6708 .. testcleanup:: stderr 

6709 

6710 sys.stderr = sys.__stderr__ 

6711 

6712 .. testcode:: stderr 

6713 

6714 wd = Word(alphas) 

6715 

6716 @trace_parse_action 

6717 def remove_duplicate_chars(tokens): 

6718 return ''.join(sorted(set(''.join(tokens)))) 

6719 

6720 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

6721 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

6722 

6723 prints: 

6724 

6725 .. testoutput:: stderr 

6726 :options: +NORMALIZE_WHITESPACE 

6727 

6728 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 

6729 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

6730 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

6731 ['dfjkls'] 

6732 

6733 .. versionchanged:: 3.1.0 

6734 Exception type added to output 

6735 """ 

6736 f = _trim_arity(f) 

6737 

6738 def z(*paArgs): 

6739 thisFunc = f.__name__ 

6740 s, l, t = paArgs[-3:] 

6741 if len(paArgs) > 3: 

6742 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" 

6743 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

6744 try: 

6745 ret = f(*paArgs) 

6746 except Exception as exc: 

6747 sys.stderr.write( 

6748 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n" 

6749 ) 

6750 raise 

6751 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

6752 return ret 

6753 

6754 z.__name__ = f.__name__ 

6755 return z 

6756 

6757 

6758# convenience constants for positional expressions 

6759empty = Empty().set_name("empty") 

6760line_start = LineStart().set_name("line_start") 

6761line_end = LineEnd().set_name("line_end") 

6762string_start = StringStart().set_name("string_start") 

6763string_end = StringEnd().set_name("string_end") 

6764 

6765_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

6766 lambda s, l, t: t[0][1] 

6767) 

6768_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

6769 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

6770) 

6771_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

6772 lambda s, l, t: chr(int(t[0][1:], 8)) 

6773) 

6774_singleChar = ( 

6775 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

6776) 

6777_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

6778_reBracketExpr = ( 

6779 Literal("[") 

6780 + Opt("^").set_results_name("negate") 

6781 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

6782 + Literal("]") 

6783) 

6784 

6785 

6786def srange(s: str) -> str: 

6787 r"""Helper to easily define string ranges for use in :class:`Word` 

6788 construction. Borrows syntax from regexp ``'[]'`` string range 

6789 definitions:: 

6790 

6791 srange("[0-9]") -> "0123456789" 

6792 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

6793 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

6794 

6795 The input string must be enclosed in []'s, and the returned string 

6796 is the expanded character set joined into a single string. The 

6797 values enclosed in the []'s may be: 

6798 

6799 - a single character 

6800 - an escaped character with a leading backslash (such as ``\-`` 

6801 or ``\]``) 

6802 - an escaped hex character with a leading ``'\x'`` 

6803 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

6804 is also supported for backwards compatibility) 

6805 - an escaped octal character with a leading ``'\0'`` 

6806 (``\041``, which is a ``'!'`` character) 

6807 - a range of any of the above, separated by a dash (``'a-z'``, 

6808 etc.) 

6809 - any combination of the above (``'aeiouy'``, 

6810 ``'a-zA-Z0-9_$'``, etc.) 

6811 """ 

6812 

6813 def _expanded(p): 

6814 if isinstance(p, ParseResults): 

6815 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

6816 else: 

6817 yield p 

6818 

6819 try: 

6820 return "".join( 

6821 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)] 

6822 ) 

6823 except Exception as e: 

6824 return "" 

6825 

6826 

6827def token_map(func, *args) -> ParseAction: 

6828 """Helper to define a parse action by mapping a function to all 

6829 elements of a :class:`ParseResults` list. If any additional args are passed, 

6830 they are forwarded to the given function as additional arguments 

6831 after the token, as in 

6832 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

6833 which will convert the parsed data to an integer using base 16. 

6834 

6835 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

6836 

6837 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6838 hex_ints.run_tests(''' 

6839 00 11 22 aa FF 0a 0d 1a 

6840 ''') 

6841 

6842 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6843 upperword[1, ...].run_tests(''' 

6844 my kingdom for a horse 

6845 ''') 

6846 

6847 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6848 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6849 now is the winter of our discontent made glorious summer by this sun of york 

6850 ''') 

6851 

6852 prints:: 

6853 

6854 00 11 22 aa FF 0a 0d 1a 

6855 [0, 17, 34, 170, 255, 10, 13, 26] 

6856 

6857 my kingdom for a horse 

6858 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6859 

6860 now is the winter of our discontent made glorious summer by this sun of york 

6861 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6862 """ 

6863 

6864 def pa(s, l, t): 

6865 return [func(tokn, *args) for tokn in t] 

6866 

6867 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6868 pa.__name__ = func_name 

6869 

6870 return pa 

6871 

6872 

6873def autoname_elements() -> None: 

6874 """ 

6875 Utility to simplify mass-naming of parser elements, for 

6876 generating railroad diagram with named subdiagrams. 

6877 """ 

6878 

6879 # guard against _getframe not being implemented in the current Python 

6880 getframe_fn = getattr(sys, "_getframe", lambda _: None) 

6881 calling_frame = getframe_fn(1) 

6882 if calling_frame is None: 

6883 return 

6884 

6885 # find all locals in the calling frame that are ParserElements 

6886 calling_frame = typing.cast(types.FrameType, calling_frame) 

6887 for name, var in calling_frame.f_locals.items(): 

6888 # if no custom name defined, set the name to the var name 

6889 if isinstance(var, ParserElement) and not var.customName: 

6890 var.set_name(name) 

6891 

6892 

6893dbl_quoted_string = Combine( 

6894 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6895).set_name("string enclosed in double quotes") 

6896 

6897sgl_quoted_string = Combine( 

6898 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6899).set_name("string enclosed in single quotes") 

6900 

6901quoted_string = Combine( 

6902 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6903 "double quoted string" 

6904 ) 

6905 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6906 "single quoted string" 

6907 ) 

6908).set_name("quoted string using single or double quotes") 

6909 

6910# XXX: Is there some way to make this show up in API docs? 

6911# .. versionadded:: 3.1.0 

6912python_quoted_string = Combine( 

6913 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6914 "multiline double quoted string" 

6915 ) 

6916 ^ ( 

6917 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6918 ).set_name("multiline single quoted string") 

6919 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6920 "double quoted string" 

6921 ) 

6922 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6923 "single quoted string" 

6924 ) 

6925).set_name("Python quoted string") 

6926 

6927unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6928 

6929 

6930alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6931punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6932 

6933# build list of built-in expressions, for future reference if a global default value 

6934# gets updated 

6935_builtin_exprs: list[ParserElement] = [ 

6936 v for v in vars().values() if isinstance(v, ParserElement) 

6937] 

6938 

6939# Compatibility synonyms 

6940# fmt: off 

6941sglQuotedString = sgl_quoted_string 

6942dblQuotedString = dbl_quoted_string 

6943quotedString = quoted_string 

6944unicodeString = unicode_string 

6945lineStart = line_start 

6946lineEnd = line_end 

6947stringStart = string_start 

6948stringEnd = string_end 

6949nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) 

6950traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) 

6951conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) 

6952tokenMap = replaced_by_pep8("tokenMap", token_map) 

6953# fmt: on