Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 46%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2723 statements  

1# 

2# core.py 

3# 

4from __future__ import annotations 

5 

6import collections.abc 

7from collections import deque 

8import os 

9import typing 

10from typing import ( 

11 Any, 

12 Callable, 

13 Generator, 

14 NamedTuple, 

15 Sequence, 

16 TextIO, 

17 Union, 

18 cast, 

19) 

20from abc import ABC, abstractmethod 

21from enum import Enum 

22import string 

23import copy 

24import warnings 

25import re 

26import sys 

27from collections.abc import Iterable 

28import traceback 

29import types 

30from operator import itemgetter 

31from functools import wraps 

32from threading import RLock 

33from pathlib import Path 

34 

35from .warnings import PyparsingDeprecationWarning, PyparsingDiagnosticWarning 

36from .util import ( 

37 _FifoCache, 

38 _UnboundedCache, 

39 __config_flags, 

40 _collapse_string_to_ranges, 

41 _convert_escaped_numerics_to_char, 

42 _escape_regex_range_chars, 

43 _flatten, 

44 LRUMemo as _LRUMemo, 

45 UnboundedMemo as _UnboundedMemo, 

46 deprecate_argument, 

47 replaced_by_pep8, 

48) 

49from .exceptions import * 

50from .actions import * 

51from .results import ParseResults, _ParseResultsWithOffset 

52from .unicode import pyparsing_unicode 

53 

54_MAX_INT = sys.maxsize 

55str_type: tuple[type, ...] = (str, bytes) 

56 

57# 

58# Copyright (c) 2003-2022 Paul T. McGuire 

59# 

60# Permission is hereby granted, free of charge, to any person obtaining 

61# a copy of this software and associated documentation files (the 

62# "Software"), to deal in the Software without restriction, including 

63# without limitation the rights to use, copy, modify, merge, publish, 

64# distribute, sublicense, and/or sell copies of the Software, and to 

65# permit persons to whom the Software is furnished to do so, subject to 

66# the following conditions: 

67# 

68# The above copyright notice and this permission notice shall be 

69# included in all copies or substantial portions of the Software. 

70# 

71# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

72# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

73# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

74# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

75# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

76# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

77# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

78# 

79 

80from functools import cached_property 

81 

82 

83class __compat__(__config_flags): 

84 """ 

85 A cross-version compatibility configuration for pyparsing features that will be 

86 released in a future version. By setting values in this configuration to True, 

87 those features can be enabled in prior versions for compatibility development 

88 and testing. 

89 

90 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

91 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

92 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

93 behavior 

94 """ 

95 

96 _type_desc = "compatibility" 

97 

98 collect_all_And_tokens = True 

99 

100 _all_names = [__ for __ in locals() if not __.startswith("_")] 

101 _fixed_names = """ 

102 collect_all_And_tokens 

103 """.split() 

104 

105 

106class __diag__(__config_flags): 

107 _type_desc = "diagnostic" 

108 

109 warn_multiple_tokens_in_named_alternation = False 

110 warn_ungrouped_named_tokens_in_collection = False 

111 warn_name_set_on_empty_Forward = False 

112 warn_on_parse_using_empty_Forward = False 

113 warn_on_assignment_to_Forward = False 

114 warn_on_multiple_string_args_to_oneof = False 

115 warn_on_match_first_with_lshift_operator = False 

116 enable_debug_on_named_expressions = False 

117 

118 _all_names = [__ for __ in locals() if not __.startswith("_")] 

119 _warning_names = [name for name in _all_names if name.startswith("warn")] 

120 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

121 

122 @classmethod 

123 def enable_all_warnings(cls) -> None: 

124 for name in cls._warning_names: 

125 cls.enable(name) 

126 

127 

128class Diagnostics(Enum): 

129 """ 

130 Diagnostic configuration (all default to disabled) 

131 

132 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

133 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

134 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

135 name is defined on a containing expression with ungrouped subexpressions that also 

136 have results names 

137 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

138 with a results name, but has no contents defined 

139 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

140 defined in a grammar but has never had an expression attached to it 

141 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

142 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

143 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

144 incorrectly called with multiple str arguments 

145 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

146 calls to :class:`ParserElement.set_name` 

147 

148 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

149 All warnings can be enabled by calling :class:`enable_all_warnings`. 

150 """ 

151 

152 warn_multiple_tokens_in_named_alternation = 0 

153 warn_ungrouped_named_tokens_in_collection = 1 

154 warn_name_set_on_empty_Forward = 2 

155 warn_on_parse_using_empty_Forward = 3 

156 warn_on_assignment_to_Forward = 4 

157 warn_on_multiple_string_args_to_oneof = 5 

158 warn_on_match_first_with_lshift_operator = 6 

159 enable_debug_on_named_expressions = 7 

160 

161 

162def enable_diag(diag_enum: Diagnostics) -> None: 

163 """ 

164 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

165 """ 

166 __diag__.enable(diag_enum.name) 

167 

168 

169def disable_diag(diag_enum: Diagnostics) -> None: 

170 """ 

171 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

172 """ 

173 __diag__.disable(diag_enum.name) 

174 

175 

176def enable_all_warnings() -> None: 

177 """ 

178 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

179 """ 

180 __diag__.enable_all_warnings() 

181 

182 

183# hide abstract class 

184del __config_flags 

185 

186 

187def _should_enable_warnings( 

188 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

189) -> bool: 

190 enable = bool(warn_env_var) 

191 for warn_opt in cmd_line_warn_options: 

192 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

193 ":" 

194 )[:5] 

195 if not w_action.lower().startswith("i") and ( 

196 not (w_message or w_category or w_module) or w_module == "pyparsing" 

197 ): 

198 enable = True 

199 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

200 enable = False 

201 return enable 

202 

203 

204if _should_enable_warnings( 

205 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

206): 

207 enable_all_warnings() 

208 

209 

210# build list of single arg builtins, that can be used as parse actions 

211# fmt: off 

212_single_arg_builtins = { 

213 sum, len, sorted, reversed, list, tuple, set, any, all, min, max 

214} 

215# fmt: on 

216 

217_generatorType = types.GeneratorType 

218ParseImplReturnType = tuple[int, Any] 

219PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

220 

221ParseCondition = Union[ 

222 Callable[[], bool], 

223 Callable[[ParseResults], bool], 

224 Callable[[int, ParseResults], bool], 

225 Callable[[str, int, ParseResults], bool], 

226] 

227ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

228DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

229DebugSuccessAction = Callable[ 

230 [str, int, int, "ParserElement", ParseResults, bool], None 

231] 

232DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

233 

234 

235alphas: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 

236identchars: str = pyparsing_unicode.Latin1.identchars 

237identbodychars: str = pyparsing_unicode.Latin1.identbodychars 

238nums: str = "0123456789" 

239hexnums: str = "0123456789ABCDEFabcdef" 

240alphanums: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" 

241printables: str = ( 

242 '!"' 

243 "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 

244 "[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" 

245) 

246 

247 

248class _ParseActionIndexError(Exception): 

249 """ 

250 Internal wrapper around IndexError so that IndexErrors raised inside 

251 parse actions aren't misinterpreted as IndexErrors raised inside 

252 ParserElement parseImpl methods. 

253 """ 

254 

255 def __init__(self, msg: str, exc: BaseException) -> None: 

256 self.msg: str = msg 

257 self.exc: BaseException = exc 

258 

259 

260_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

261pa_call_line_synth = () 

262 

263 

264def _trim_arity(func, max_limit=3): 

265 """decorator to trim function calls to match the arity of the target""" 

266 global _trim_arity_call_line, pa_call_line_synth 

267 

268 if func in _single_arg_builtins: 

269 return lambda s, l, t: func(t) 

270 

271 limit = 0 

272 found_arity = False 

273 

274 # synthesize what would be returned by traceback.extract_stack at the call to 

275 # user's parse action 'func', so that we don't incur call penalty at parse time 

276 

277 # fmt: off 

278 LINE_DIFF = 9 

279 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

280 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

281 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1] 

282 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

283 

284 def wrapper(*args): 

285 nonlocal found_arity, limit 

286 if found_arity: 

287 return func(*args[limit:]) 

288 while 1: 

289 try: 

290 ret = func(*args[limit:]) 

291 found_arity = True 

292 return ret 

293 except TypeError as te: 

294 # re-raise TypeErrors if they did not come from our arity testing 

295 if found_arity: 

296 raise 

297 else: 

298 tb = te.__traceback__ 

299 frames = traceback.extract_tb(tb, limit=2) 

300 frame_summary = frames[-1] 

301 trim_arity_type_error = ( 

302 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

303 ) 

304 del tb 

305 

306 if trim_arity_type_error: 

307 if limit < max_limit: 

308 limit += 1 

309 continue 

310 

311 raise 

312 except IndexError as ie: 

313 # wrap IndexErrors inside a _ParseActionIndexError 

314 raise _ParseActionIndexError( 

315 "IndexError raised in parse action", ie 

316 ).with_traceback(None) 

317 # fmt: on 

318 

319 # copy func name to wrapper for sensible debug output 

320 # (can't use functools.wraps, since that messes with function signature) 

321 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

322 wrapper.__name__ = func_name 

323 wrapper.__doc__ = func.__doc__ 

324 

325 return wrapper 

326 

327 

328def condition_as_parse_action( 

329 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

330) -> ParseAction: 

331 """ 

332 Function to convert a simple predicate function that returns ``True`` or ``False`` 

333 into a parse action. Can be used in places when a parse action is required 

334 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition 

335 to an operator level in :class:`infix_notation`). 

336 

337 Optional keyword arguments: 

338 

339 :param message: define a custom message to be used in the raised exception 

340 :param fatal: if ``True``, will raise :class:`ParseFatalException` 

341 to stop parsing immediately; 

342 otherwise will raise :class:`ParseException` 

343 

344 """ 

345 msg = message if message is not None else "failed user-defined condition" 

346 exc_type = ParseFatalException if fatal else ParseException 

347 fn = _trim_arity(fn) 

348 

349 @wraps(fn) 

350 def pa(s, l, t): 

351 if not bool(fn(s, l, t)): 

352 raise exc_type(s, l, msg) 

353 

354 return pa 

355 

356 

357def _default_start_debug_action( 

358 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False 

359): 

360 cache_hit_str = "*" if cache_hit else "" 

361 print( 

362 ( 

363 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

364 f" {line(loc, instring)}\n" 

365 f" {'^':>{col(loc, instring)}}" 

366 ) 

367 ) 

368 

369 

370def _default_success_debug_action( 

371 instring: str, 

372 startloc: int, 

373 endloc: int, 

374 expr: ParserElement, 

375 toks: ParseResults, 

376 cache_hit: bool = False, 

377): 

378 cache_hit_str = "*" if cache_hit else "" 

379 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

380 

381 

382def _default_exception_debug_action( 

383 instring: str, 

384 loc: int, 

385 expr: ParserElement, 

386 exc: Exception, 

387 cache_hit: bool = False, 

388): 

389 cache_hit_str = "*" if cache_hit else "" 

390 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

391 

392 

393def null_debug_action(*args): 

394 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

395 

396 

397class ParserElement(ABC): 

398 """Abstract base level parser element class.""" 

399 

400 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

401 verbose_stacktrace: bool = False 

402 _literalStringClass: type = None # type: ignore[assignment] 

403 

404 @staticmethod 

405 def set_default_whitespace_chars(chars: str) -> None: 

406 r""" 

407 Overrides the default whitespace chars 

408 

409 Example: 

410 

411 .. doctest:: 

412 

413 # default whitespace chars are space, <TAB> and newline 

414 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl") 

415 ParseResults(['abc', 'def', 'ghi', 'jkl'], {}) 

416 

417 # change to just treat newline as significant 

418 >>> ParserElement.set_default_whitespace_chars(" \t") 

419 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl") 

420 ParseResults(['abc', 'def'], {}) 

421 

422 # Reset to default 

423 >>> ParserElement.set_default_whitespace_chars(" \n\t\r") 

424 """ 

425 ParserElement.DEFAULT_WHITE_CHARS = chars 

426 

427 # update whitespace all parse expressions defined in this module 

428 for expr in _builtin_exprs: 

429 if expr.copyDefaultWhiteChars: 

430 expr.whiteChars = set(chars) 

431 

432 @staticmethod 

433 def inline_literals_using(cls: type) -> None: 

434 """ 

435 Set class to be used for inclusion of string literals into a parser. 

436 

437 Example: 

438 

439 .. doctest:: 

440 :options: +NORMALIZE_WHITESPACE 

441 

442 # default literal class used is Literal 

443 >>> integer = Word(nums) 

444 >>> date_str = ( 

445 ... integer("year") + '/' 

446 ... + integer("month") + '/' 

447 ... + integer("day") 

448 ... ) 

449 

450 >>> date_str.parse_string("1999/12/31") 

451 ParseResults(['1999', '/', '12', '/', '31'], 

452 {'year': '1999', 'month': '12', 'day': '31'}) 

453 

454 # change to Suppress 

455 >>> ParserElement.inline_literals_using(Suppress) 

456 >>> date_str = ( 

457 ... integer("year") + '/' 

458 ... + integer("month") + '/' 

459 ... + integer("day") 

460 ... ) 

461 

462 >>> date_str.parse_string("1999/12/31") 

463 ParseResults(['1999', '12', '31'], 

464 {'year': '1999', 'month': '12', 'day': '31'}) 

465 

466 # Reset 

467 >>> ParserElement.inline_literals_using(Literal) 

468 """ 

469 ParserElement._literalStringClass = cls 

470 

471 @classmethod 

472 def using_each(cls, seq, **class_kwargs): 

473 """ 

474 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq. 

475 

476 Example: 

477 

478 .. testcode:: 

479 

480 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

481 

482 .. versionadded:: 3.1.0 

483 """ 

484 yield from (cls(obj, **class_kwargs) for obj in seq) 

485 

486 class DebugActions(NamedTuple): 

487 debug_try: typing.Optional[DebugStartAction] 

488 debug_match: typing.Optional[DebugSuccessAction] 

489 debug_fail: typing.Optional[DebugExceptionAction] 

490 

491 def __init__(self, savelist: bool = False) -> None: 

492 self.parseAction: list[ParseAction] = list() 

493 self.failAction: typing.Optional[ParseFailAction] = None 

494 self.customName: str = None # type: ignore[assignment] 

495 self._defaultName: typing.Optional[str] = None 

496 self.resultsName: str = None # type: ignore[assignment] 

497 self.saveAsList: bool = savelist 

498 self.skipWhitespace: bool = True 

499 self.whiteChars: set[str] = set(ParserElement.DEFAULT_WHITE_CHARS) 

500 self.copyDefaultWhiteChars: bool = True 

501 # used when checking for left-recursion 

502 self._may_return_empty: bool = False 

503 self.keepTabs: bool = False 

504 self.ignoreExprs: list[ParserElement] = list() 

505 self.debug: bool = False 

506 self.streamlined: bool = False 

507 # optimize exception handling for subclasses that don't advance parse index 

508 self.mayIndexError: bool = True 

509 self.errmsg: Union[str, None] = "" 

510 # mark results names as modal (report only last) or cumulative (list all) 

511 self.modalResults: bool = True 

512 # custom debug actions 

513 self.debugActions = self.DebugActions(None, None, None) 

514 # avoid redundant calls to preParse 

515 self.callPreparse: bool = True 

516 self.callDuringTry: bool = False 

517 self.suppress_warnings_: list[Diagnostics] = [] 

518 self.show_in_diagram: bool = True 

519 

520 @property 

521 def mayReturnEmpty(self) -> bool: 

522 """ 

523 .. deprecated:: 3.3.0 

524 use _may_return_empty instead. 

525 """ 

526 return self._may_return_empty 

527 

528 @mayReturnEmpty.setter 

529 def mayReturnEmpty(self, value) -> None: 

530 """ 

531 .. deprecated:: 3.3.0 

532 use _may_return_empty instead. 

533 """ 

534 self._may_return_empty = value 

535 

536 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: 

537 """ 

538 Suppress warnings emitted for a particular diagnostic on this expression. 

539 

540 Example: 

541 

542 .. doctest:: 

543 

544 >>> label = pp.Word(pp.alphas) 

545 

546 # Normally using an empty Forward in a grammar 

547 # would print a warning, but we can suppress that 

548 >>> base = pp.Forward().suppress_warning( 

549 ... pp.Diagnostics.warn_on_parse_using_empty_Forward) 

550 

551 >>> grammar = base | label 

552 >>> print(grammar.parse_string("x")) 

553 ['x'] 

554 """ 

555 self.suppress_warnings_.append(warning_type) 

556 return self 

557 

558 def visit_all(self): 

559 """General-purpose method to yield all expressions and sub-expressions 

560 in a grammar. Typically just for internal use. 

561 """ 

562 to_visit = deque([self]) 

563 seen = set() 

564 while to_visit: 

565 cur = to_visit.popleft() 

566 

567 # guard against looping forever through recursive grammars 

568 if cur in seen: 

569 continue 

570 seen.add(cur) 

571 

572 to_visit.extend(cur.recurse()) 

573 yield cur 

574 

575 def copy(self) -> ParserElement: 

576 """ 

577 Make a copy of this :class:`ParserElement`. Useful for defining 

578 different parse actions for the same parsing pattern, using copies of 

579 the original parse element. 

580 

581 Example: 

582 

583 .. testcode:: 

584 

585 integer = Word(nums).set_parse_action( 

586 lambda toks: int(toks[0])) 

587 integerK = integer.copy().add_parse_action( 

588 lambda toks: toks[0] * 1024) + Suppress("K") 

589 integerM = integer.copy().add_parse_action( 

590 lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

591 

592 print( 

593 (integerK | integerM | integer)[1, ...].parse_string( 

594 "5K 100 640K 256M") 

595 ) 

596 

597 prints: 

598 

599 .. testoutput:: 

600 

601 [5120, 100, 655360, 268435456] 

602 

603 Equivalent form of ``expr.copy()`` is just ``expr()``: 

604 

605 .. testcode:: 

606 

607 integerM = integer().add_parse_action( 

608 lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

609 """ 

610 cpy = copy.copy(self) 

611 cpy.parseAction = self.parseAction[:] 

612 cpy.ignoreExprs = self.ignoreExprs[:] 

613 if self.copyDefaultWhiteChars: 

614 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

615 return cpy 

616 

617 def set_results_name( 

618 self, name: str, list_all_matches: bool = False, **kwargs 

619 ) -> ParserElement: 

620 """ 

621 Define name for referencing matching tokens as a nested attribute 

622 of the returned parse results. 

623 

624 Normally, results names are assigned as you would assign keys in a dict: 

625 any existing value is overwritten by later values. If it is necessary to 

626 keep all values captured for a particular results name, call ``set_results_name`` 

627 with ``list_all_matches`` = True. 

628 

629 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

630 this is so that the client can define a basic element, such as an 

631 integer, and reference it in multiple places with different names. 

632 

633 You can also set results names using the abbreviated syntax, 

634 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

635 - see :meth:`__call__`. If ``list_all_matches`` is required, use 

636 ``expr("name*")``. 

637 

638 Example: 

639 

640 .. testcode:: 

641 

642 integer = Word(nums) 

643 date_str = (integer.set_results_name("year") + '/' 

644 + integer.set_results_name("month") + '/' 

645 + integer.set_results_name("day")) 

646 

647 # equivalent form: 

648 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

649 """ 

650 listAllMatches: bool = deprecate_argument(kwargs, "listAllMatches", False) 

651 

652 list_all_matches = listAllMatches or list_all_matches 

653 return self._setResultsName(name, list_all_matches) 

654 

655 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

656 if name is None: 

657 return self 

658 newself = self.copy() 

659 if name.endswith("*"): 

660 name = name[:-1] 

661 list_all_matches = True 

662 newself.resultsName = name 

663 newself.modalResults = not list_all_matches 

664 return newself 

665 

666 def set_break(self, break_flag: bool = True) -> ParserElement: 

667 """ 

668 Method to invoke the Python pdb debugger when this element is 

669 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

670 disable. 

671 """ 

672 if break_flag: 

673 _parseMethod = self._parse 

674 

675 def breaker(instring, loc, do_actions=True, callPreParse=True): 

676 # this call to breakpoint() is intentional, not a checkin error 

677 breakpoint() 

678 return _parseMethod(instring, loc, do_actions, callPreParse) 

679 

680 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

681 self._parse = breaker # type: ignore [method-assign] 

682 elif hasattr(self._parse, "_originalParseMethod"): 

683 self._parse = self._parse._originalParseMethod # type: ignore [method-assign] 

684 return self 

685 

686 def set_parse_action( 

687 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any 

688 ) -> ParserElement: 

689 """ 

690 Define one or more actions to perform when successfully matching parse element definition. 

691 

692 Parse actions can be called to perform data conversions, do extra validation, 

693 update external data structures, or enhance or replace the parsed tokens. 

694 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

695 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

696 

697 - ``s`` = the original string being parsed (see note below) 

698 - ``loc`` = the location of the matching substring 

699 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

700 

701 The parsed tokens are passed to the parse action as ParseResults. They can be 

702 modified in place using list-style append, extend, and pop operations to update 

703 the parsed list elements; and with dictionary-style item set and del operations 

704 to add, update, or remove any named results. If the tokens are modified in place, 

705 it is not necessary to return them with a return statement. 

706 

707 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

708 object, or with some entirely different object (common for parse actions that perform data 

709 conversions). A convenient way to build a new parse result is to define the values 

710 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

711 

712 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

713 expression are cleared. 

714 

715 Optional keyword arguments: 

716 

717 :param call_during_try: (default= ``False``) indicate if parse action 

718 should be run during lookaheads and alternate 

719 testing. For parse actions that have side 

720 effects, it is important to only call the parse 

721 action once it is determined that it is being 

722 called as part of a successful parse. 

723 For parse actions that perform additional 

724 validation, then ``call_during_try`` should 

725 be passed as True, so that the validation code 

726 is included in the preliminary "try" parses. 

727 

728 .. Note:: 

729 The default parsing behavior is to expand tabs in the input string 

730 before starting the parsing process. 

731 See :meth:`parse_string` for more information on parsing strings 

732 containing ``<TAB>`` s, and suggested methods to maintain a 

733 consistent view of the parsed string, the parse location, and 

734 line and column positions within the parsed string. 

735 

736 Example: Parse dates in the form ``YYYY/MM/DD`` 

737 ----------------------------------------------- 

738 

739 Setup code: 

740 

741 .. testcode:: 

742 

743 def convert_to_int(toks): 

744 '''a parse action to convert toks from str to int 

745 at parse time''' 

746 return int(toks[0]) 

747 

748 def is_valid_date(instring, loc, toks): 

749 '''a parse action to verify that the date is a valid date''' 

750 from datetime import date 

751 year, month, day = toks[::2] 

752 try: 

753 date(year, month, day) 

754 except ValueError: 

755 raise ParseException(instring, loc, "invalid date given") 

756 

757 integer = Word(nums) 

758 date_str = integer + '/' + integer + '/' + integer 

759 

760 # add parse actions 

761 integer.set_parse_action(convert_to_int) 

762 date_str.set_parse_action(is_valid_date) 

763 

764 Successful parse - note that integer fields are converted to ints: 

765 

766 .. testcode:: 

767 

768 print(date_str.parse_string("1999/12/31")) 

769 

770 prints: 

771 

772 .. testoutput:: 

773 

774 [1999, '/', 12, '/', 31] 

775 

776 Failure - invalid date: 

777 

778 .. testcode:: 

779 

780 date_str.parse_string("1999/13/31") 

781 

782 prints: 

783 

784 .. testoutput:: 

785 

786 Traceback (most recent call last): 

787 ParseException: invalid date given, found '1999' ... 

788 """ 

789 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False) 

790 

791 if list(fns) == [None]: 

792 self.parseAction.clear() 

793 return self 

794 

795 if not all(callable(fn) for fn in fns): 

796 raise TypeError("parse actions must be callable") 

797 self.parseAction[:] = [_trim_arity(fn) for fn in fns] 

798 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry 

799 

800 return self 

801 

802 def add_parse_action( 

803 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any 

804 ) -> ParserElement: 

805 """ 

806 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

807 

808 See examples in :class:`copy`. 

809 """ 

810 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False) 

811 

812 self.parseAction += [_trim_arity(fn) for fn in fns] 

813 self.callDuringTry = self.callDuringTry or callDuringTry or call_during_try 

814 return self 

815 

816 def add_condition( 

817 self, *fns: ParseCondition, call_during_try: bool = False, **kwargs: Any 

818 ) -> ParserElement: 

819 """Add a boolean predicate function to expression's list of parse actions. See 

820 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

821 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

822 

823 Optional keyword arguments: 

824 

825 - ``message`` = define a custom message to be used in the raised exception 

826 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

827 ParseException 

828 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

829 default=False 

830 

831 Example: 

832 

833 .. doctest:: 

834 :options: +NORMALIZE_WHITESPACE 

835 

836 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

837 >>> year_int = integer.copy().add_condition( 

838 ... lambda toks: toks[0] >= 2000, 

839 ... message="Only support years 2000 and later") 

840 >>> date_str = year_int + '/' + integer + '/' + integer 

841 

842 >>> result = date_str.parse_string("1999/12/31") 

843 Traceback (most recent call last): 

844 ParseException: Only support years 2000 and later... 

845 """ 

846 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False) 

847 

848 for fn in fns: 

849 self.parseAction.append( 

850 condition_as_parse_action( 

851 fn, 

852 message=str(kwargs.get("message")), 

853 fatal=bool(kwargs.get("fatal", False)), 

854 ) 

855 ) 

856 

857 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry 

858 return self 

859 

860 def set_fail_action(self, fn: ParseFailAction) -> ParserElement: 

861 """ 

862 Define action to perform if parsing fails at this expression. 

863 Fail acton fn is a callable function that takes the arguments 

864 ``fn(s, loc, expr, err)`` where: 

865 

866 - ``s`` = string being parsed 

867 - ``loc`` = location where expression match was attempted and failed 

868 - ``expr`` = the parse expression that failed 

869 - ``err`` = the exception thrown 

870 

871 The function returns no value. It may throw :class:`ParseFatalException` 

872 if it is desired to stop parsing immediately.""" 

873 self.failAction = fn 

874 return self 

875 

876 def _skipIgnorables(self, instring: str, loc: int) -> int: 

877 if not self.ignoreExprs: 

878 return loc 

879 exprsFound = True 

880 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

881 last_loc = loc 

882 while exprsFound: 

883 exprsFound = False 

884 for ignore_fn in ignore_expr_fns: 

885 try: 

886 while 1: 

887 loc, dummy = ignore_fn(instring, loc) 

888 exprsFound = True 

889 except ParseException: 

890 pass 

891 # check if all ignore exprs matched but didn't actually advance the parse location 

892 if loc == last_loc: 

893 break 

894 last_loc = loc 

895 return loc 

896 

897 def preParse(self, instring: str, loc: int) -> int: 

898 if self.ignoreExprs: 

899 loc = self._skipIgnorables(instring, loc) 

900 

901 if self.skipWhitespace: 

902 instrlen = len(instring) 

903 white_chars = self.whiteChars 

904 while loc < instrlen and instring[loc] in white_chars: 

905 loc += 1 

906 

907 return loc 

908 

909 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

910 return loc, [] 

911 

912 def postParse(self, instring, loc, tokenlist): 

913 return tokenlist 

914 

915 # @profile 

916 def _parseNoCache( 

917 self, instring, loc, do_actions=True, callPreParse=True 

918 ) -> tuple[int, ParseResults]: 

919 debugging = self.debug # and do_actions) 

920 len_instring = len(instring) 

921 

922 if debugging or self.failAction: 

923 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

924 try: 

925 if callPreParse and self.callPreparse: 

926 pre_loc = self.preParse(instring, loc) 

927 else: 

928 pre_loc = loc 

929 tokens_start = pre_loc 

930 if self.debugActions.debug_try: 

931 self.debugActions.debug_try(instring, tokens_start, self, False) 

932 if self.mayIndexError or pre_loc >= len_instring: 

933 try: 

934 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

935 except IndexError: 

936 raise ParseException(instring, len_instring, self.errmsg, self) 

937 else: 

938 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

939 except Exception as err: 

940 # print("Exception raised:", err) 

941 if self.debugActions.debug_fail: 

942 self.debugActions.debug_fail( 

943 instring, tokens_start, self, err, False 

944 ) 

945 if self.failAction: 

946 self.failAction(instring, tokens_start, self, err) 

947 raise 

948 else: 

949 if callPreParse and self.callPreparse: 

950 pre_loc = self.preParse(instring, loc) 

951 else: 

952 pre_loc = loc 

953 tokens_start = pre_loc 

954 if self.mayIndexError or pre_loc >= len_instring: 

955 try: 

956 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

957 except IndexError: 

958 raise ParseException(instring, len_instring, self.errmsg, self) 

959 else: 

960 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

961 

962 tokens = self.postParse(instring, loc, tokens) 

963 

964 ret_tokens = ParseResults( 

965 tokens, self.resultsName, aslist=self.saveAsList, modal=self.modalResults 

966 ) 

967 if self.parseAction and (do_actions or self.callDuringTry): 

968 if debugging: 

969 try: 

970 for fn in self.parseAction: 

971 try: 

972 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

973 except IndexError as parse_action_exc: 

974 exc = ParseException("exception raised in parse action") 

975 raise exc from parse_action_exc 

976 

977 if tokens is not None and tokens is not ret_tokens: 

978 ret_tokens = ParseResults( 

979 tokens, 

980 self.resultsName, 

981 aslist=self.saveAsList 

982 and isinstance(tokens, (ParseResults, list)), 

983 modal=self.modalResults, 

984 ) 

985 except Exception as err: 

986 # print "Exception raised in user parse action:", err 

987 if self.debugActions.debug_fail: 

988 self.debugActions.debug_fail( 

989 instring, tokens_start, self, err, False 

990 ) 

991 raise 

992 else: 

993 for fn in self.parseAction: 

994 try: 

995 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

996 except IndexError as parse_action_exc: 

997 exc = ParseException("exception raised in parse action") 

998 raise exc from parse_action_exc 

999 

1000 if tokens is not None and tokens is not ret_tokens: 

1001 ret_tokens = ParseResults( 

1002 tokens, 

1003 self.resultsName, 

1004 aslist=self.saveAsList 

1005 and isinstance(tokens, (ParseResults, list)), 

1006 modal=self.modalResults, 

1007 ) 

1008 if debugging: 

1009 # print("Matched", self, "->", ret_tokens.as_list()) 

1010 if self.debugActions.debug_match: 

1011 self.debugActions.debug_match( 

1012 instring, tokens_start, loc, self, ret_tokens, False 

1013 ) 

1014 

1015 return loc, ret_tokens 

1016 

1017 def try_parse( 

1018 self, 

1019 instring: str, 

1020 loc: int, 

1021 *, 

1022 raise_fatal: bool = False, 

1023 do_actions: bool = False, 

1024 ) -> int: 

1025 try: 

1026 return self._parse(instring, loc, do_actions=do_actions)[0] 

1027 except ParseFatalException: 

1028 if raise_fatal: 

1029 raise 

1030 raise ParseException(instring, loc, self.errmsg, self) 

1031 

1032 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

1033 try: 

1034 self.try_parse(instring, loc, do_actions=do_actions) 

1035 except (ParseException, IndexError): 

1036 return False 

1037 else: 

1038 return True 

1039 

1040 # cache for left-recursion in Forward references 

1041 recursion_lock = RLock() 

1042 recursion_memos: collections.abc.MutableMapping[ 

1043 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]] 

1044 ] = {} 

1045 

1046 class _CacheType(typing.Protocol): 

1047 """ 

1048 Class to be used for packrat and left-recursion cacheing of results 

1049 and exceptions. 

1050 """ 

1051 

1052 not_in_cache: bool 

1053 

1054 def get(self, *args) -> typing.Any: ... 

1055 

1056 def set(self, *args) -> None: ... 

1057 

1058 def clear(self) -> None: ... 

1059 

1060 class NullCache(dict): 

1061 """ 

1062 A null cache type for initialization of the packrat_cache class variable. 

1063 If/when enable_packrat() is called, this null cache will be replaced by a 

1064 proper _CacheType class instance. 

1065 """ 

1066 

1067 not_in_cache: bool = True 

1068 

1069 def get(self, *args) -> typing.Any: ... 

1070 

1071 def set(self, *args) -> None: ... 

1072 

1073 def clear(self) -> None: ... 

1074 

1075 # class-level argument cache for optimizing repeated calls when backtracking 

1076 # through recursive expressions 

1077 packrat_cache: _CacheType = NullCache() 

1078 packrat_cache_lock = RLock() 

1079 packrat_cache_stats = [0, 0] 

1080 

1081 # this method gets repeatedly called during backtracking with the same arguments - 

1082 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

1083 def _parseCache( 

1084 self, instring, loc, do_actions=True, callPreParse=True 

1085 ) -> tuple[int, ParseResults]: 

1086 HIT, MISS = 0, 1 

1087 lookup = (self, instring, loc, callPreParse, do_actions) 

1088 with ParserElement.packrat_cache_lock: 

1089 cache = ParserElement.packrat_cache 

1090 value = cache.get(lookup) 

1091 if value is cache.not_in_cache: 

1092 ParserElement.packrat_cache_stats[MISS] += 1 

1093 try: 

1094 value = self._parseNoCache(instring, loc, do_actions, callPreParse) 

1095 except ParseBaseException as pe: 

1096 # cache a copy of the exception, without the traceback 

1097 cache.set(lookup, pe.__class__(*pe.args)) 

1098 raise 

1099 else: 

1100 cache.set(lookup, (value[0], value[1].copy(), loc)) 

1101 return value 

1102 else: 

1103 ParserElement.packrat_cache_stats[HIT] += 1 

1104 if self.debug and self.debugActions.debug_try: 

1105 try: 

1106 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

1107 except TypeError: 

1108 pass 

1109 if isinstance(value, Exception): 

1110 if self.debug and self.debugActions.debug_fail: 

1111 try: 

1112 self.debugActions.debug_fail( 

1113 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

1114 ) 

1115 except TypeError: 

1116 pass 

1117 raise value 

1118 

1119 value = cast(tuple[int, ParseResults, int], value) 

1120 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1121 if self.debug and self.debugActions.debug_match: 

1122 try: 

1123 self.debugActions.debug_match( 

1124 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1125 ) 

1126 except TypeError: 

1127 pass 

1128 

1129 return loc_, result 

1130 

1131 _parse = _parseNoCache 

1132 

1133 @staticmethod 

1134 def reset_cache() -> None: 

1135 """ 

1136 Clears caches used by packrat and left-recursion. 

1137 """ 

1138 with ParserElement.packrat_cache_lock: 

1139 ParserElement.packrat_cache.clear() 

1140 ParserElement.packrat_cache_stats[:] = [0] * len( 

1141 ParserElement.packrat_cache_stats 

1142 ) 

1143 ParserElement.recursion_memos.clear() 

1144 

1145 # class attributes to keep caching status 

1146 _packratEnabled = False 

1147 _left_recursion_enabled = False 

1148 

1149 @staticmethod 

1150 def disable_memoization() -> None: 

1151 """ 

1152 Disables active Packrat or Left Recursion parsing and their memoization 

1153 

1154 This method also works if neither Packrat nor Left Recursion are enabled. 

1155 This makes it safe to call before activating Packrat nor Left Recursion 

1156 to clear any previous settings. 

1157 """ 

1158 with ParserElement.packrat_cache_lock: 

1159 ParserElement.reset_cache() 

1160 ParserElement._left_recursion_enabled = False 

1161 ParserElement._packratEnabled = False 

1162 ParserElement._parse = ParserElement._parseNoCache 

1163 

1164 @staticmethod 

1165 def enable_left_recursion( 

1166 cache_size_limit: typing.Optional[int] = None, *, force=False 

1167 ) -> None: 

1168 """ 

1169 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1170 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1171 repeatedly matched with a fixed recursion depth that is gradually increased 

1172 until finding the longest match. 

1173 

1174 Example: 

1175 

1176 .. testcode:: 

1177 

1178 import pyparsing as pp 

1179 pp.ParserElement.enable_left_recursion() 

1180 

1181 E = pp.Forward("E") 

1182 num = pp.Word(pp.nums) 

1183 

1184 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1185 E <<= E + '+' - num | num 

1186 

1187 print(E.parse_string("1+2+3+4")) 

1188 

1189 prints: 

1190 

1191 .. testoutput:: 

1192 

1193 ['1', '+', '2', '+', '3', '+', '4'] 

1194 

1195 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1196 thus skip reevaluation of parse actions during backtracking. This may break 

1197 programs with parse actions which rely on strict ordering of side-effects. 

1198 

1199 Parameters: 

1200 

1201 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1202 ``Forward`` elements during matching; if ``None`` (the default), 

1203 memoize all ``Forward`` elements. 

1204 

1205 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1206 thus the two cannot be used together. Use ``force=True`` to disable any 

1207 previous, conflicting settings. 

1208 """ 

1209 with ParserElement.packrat_cache_lock: 

1210 if force: 

1211 ParserElement.disable_memoization() 

1212 elif ParserElement._packratEnabled: 

1213 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1214 if cache_size_limit is None: 

1215 ParserElement.recursion_memos = _UnboundedMemo() 

1216 elif cache_size_limit > 0: 

1217 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1218 else: 

1219 raise NotImplementedError(f"Memo size of {cache_size_limit}") 

1220 ParserElement._left_recursion_enabled = True 

1221 

1222 @staticmethod 

1223 def enable_packrat( 

1224 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1225 ) -> None: 

1226 """ 

1227 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1228 Repeated parse attempts at the same string location (which happens 

1229 often in many complex grammars) can immediately return a cached value, 

1230 instead of re-executing parsing/validating code. Memoizing is done of 

1231 both valid results and parsing exceptions. 

1232 

1233 Parameters: 

1234 

1235 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1236 will limit the size of the packrat cache; if None is passed, then 

1237 the cache size will be unbounded; if 0 is passed, the cache will 

1238 be effectively disabled. 

1239 

1240 This speedup may break existing programs that use parse actions that 

1241 have side-effects. For this reason, packrat parsing is disabled when 

1242 you first import pyparsing. To activate the packrat feature, your 

1243 program must call the class method :class:`ParserElement.enable_packrat`. 

1244 For best results, call ``enable_packrat()`` immediately after 

1245 importing pyparsing. 

1246 

1247 .. Can't really be doctested, alas 

1248 

1249 Example:: 

1250 

1251 import pyparsing 

1252 pyparsing.ParserElement.enable_packrat() 

1253 

1254 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1255 thus the two cannot be used together. Use ``force=True`` to disable any 

1256 previous, conflicting settings. 

1257 """ 

1258 with ParserElement.packrat_cache_lock: 

1259 if force: 

1260 ParserElement.disable_memoization() 

1261 elif ParserElement._left_recursion_enabled: 

1262 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1263 

1264 if ParserElement._packratEnabled: 

1265 return 

1266 

1267 ParserElement._packratEnabled = True 

1268 if cache_size_limit is None: 

1269 ParserElement.packrat_cache = _UnboundedCache() 

1270 else: 

1271 ParserElement.packrat_cache = _FifoCache(cache_size_limit) 

1272 ParserElement._parse = ParserElement._parseCache 

1273 

1274 def parse_string( 

1275 self, instring: str, parse_all: bool = False, **kwargs 

1276 ) -> ParseResults: 

1277 """ 

1278 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1279 client code. 

1280 

1281 :param instring: The input string to be parsed. 

1282 :param parse_all: If set, the entire input string must match the grammar. 

1283 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1284 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1285 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1286 an object with attributes if the given parser includes results names. 

1287 

1288 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1289 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1290 

1291 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1292 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1293 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1294 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1295 

1296 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1297 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1298 parse action's ``s`` argument, or 

1299 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1300 

1301 Examples: 

1302 

1303 By default, partial matches are OK. 

1304 

1305 .. doctest:: 

1306 

1307 >>> res = Word('a').parse_string('aaaaabaaa') 

1308 >>> print(res) 

1309 ['aaaaa'] 

1310 

1311 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1312 directly to see more examples. 

1313 

1314 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1315 

1316 .. doctest:: 

1317 

1318 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1319 Traceback (most recent call last): 

1320 ParseException: Expected end of text, found 'b' ... 

1321 """ 

1322 parseAll: bool = deprecate_argument(kwargs, "parseAll", False) 

1323 

1324 parse_all = parse_all or parseAll 

1325 

1326 ParserElement.reset_cache() 

1327 if not self.streamlined: 

1328 self.streamline() 

1329 for e in self.ignoreExprs: 

1330 e.streamline() 

1331 if not self.keepTabs: 

1332 instring = instring.expandtabs() 

1333 try: 

1334 loc, tokens = self._parse(instring, 0) 

1335 if parse_all: 

1336 loc = self.preParse(instring, loc) 

1337 se = Empty() + StringEnd().set_debug(False) 

1338 se._parse(instring, loc) 

1339 except _ParseActionIndexError as pa_exc: 

1340 raise pa_exc.exc 

1341 except ParseBaseException as exc: 

1342 if ParserElement.verbose_stacktrace: 

1343 raise 

1344 

1345 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1346 raise exc.with_traceback(None) 

1347 else: 

1348 return tokens 

1349 

1350 def scan_string( 

1351 self, 

1352 instring: str, 

1353 max_matches: int = _MAX_INT, 

1354 overlap: bool = False, 

1355 always_skip_whitespace=True, 

1356 *, 

1357 debug: bool = False, 

1358 **kwargs, 

1359 ) -> Generator[tuple[ParseResults, int, int], None, None]: 

1360 """ 

1361 Scan the input string for expression matches. Each match will return the 

1362 matching tokens, start location, and end location. May be called with optional 

1363 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1364 ``overlap`` is specified, then overlapping matches will be reported. 

1365 

1366 Note that the start and end locations are reported relative to the string 

1367 being parsed. See :class:`parse_string` for more information on parsing 

1368 strings with embedded tabs. 

1369 

1370 Example: 

1371 

1372 .. testcode:: 

1373 

1374 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1375 print(source) 

1376 for tokens, start, end in Word(alphas).scan_string(source): 

1377 print(' '*start + '^'*(end-start)) 

1378 print(' '*start + tokens[0]) 

1379 

1380 prints: 

1381 

1382 .. testoutput:: 

1383 

1384 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1385 ^^^^^ 

1386 sldjf 

1387 ^^^^^^^ 

1388 lsdjjkf 

1389 ^^^^^^ 

1390 sldkjf 

1391 ^^^^^^ 

1392 lkjsfd 

1393 """ 

1394 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT) 

1395 

1396 max_matches = min(maxMatches, max_matches) 

1397 if not self.streamlined: 

1398 self.streamline() 

1399 for e in self.ignoreExprs: 

1400 e.streamline() 

1401 

1402 if not self.keepTabs: 

1403 instring = str(instring).expandtabs() 

1404 instrlen = len(instring) 

1405 loc = 0 

1406 if always_skip_whitespace: 

1407 preparser = Empty() 

1408 preparser.ignoreExprs = self.ignoreExprs 

1409 preparser.whiteChars = self.whiteChars 

1410 preparseFn = preparser.preParse 

1411 else: 

1412 preparseFn = self.preParse 

1413 parseFn = self._parse 

1414 ParserElement.reset_cache() 

1415 matches = 0 

1416 try: 

1417 while loc <= instrlen and matches < max_matches: 

1418 try: 

1419 preloc: int = preparseFn(instring, loc) 

1420 nextLoc: int 

1421 tokens: ParseResults 

1422 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1423 except ParseException: 

1424 loc = preloc + 1 

1425 else: 

1426 if nextLoc > loc: 

1427 matches += 1 

1428 if debug: 

1429 print( 

1430 { 

1431 "tokens": tokens.as_list(), 

1432 "start": preloc, 

1433 "end": nextLoc, 

1434 } 

1435 ) 

1436 yield tokens, preloc, nextLoc 

1437 if overlap: 

1438 nextloc = preparseFn(instring, loc) 

1439 if nextloc > loc: 

1440 loc = nextLoc 

1441 else: 

1442 loc += 1 

1443 else: 

1444 loc = nextLoc 

1445 else: 

1446 loc = preloc + 1 

1447 except ParseBaseException as exc: 

1448 if ParserElement.verbose_stacktrace: 

1449 raise 

1450 

1451 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1452 raise exc.with_traceback(None) 

1453 

1454 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1455 """ 

1456 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1457 be returned from a parse action. To use ``transform_string``, define a grammar and 

1458 attach a parse action to it that modifies the returned token list. 

1459 Invoking ``transform_string()`` on a target string will then scan for matches, 

1460 and replace the matched text patterns according to the logic in the parse 

1461 action. ``transform_string()`` returns the resulting transformed string. 

1462 

1463 Example: 

1464 

1465 .. testcode:: 

1466 

1467 quote = '''now is the winter of our discontent, 

1468 made glorious summer by this sun of york.''' 

1469 

1470 wd = Word(alphas) 

1471 wd.set_parse_action(lambda toks: toks[0].title()) 

1472 

1473 print(wd.transform_string(quote)) 

1474 

1475 prints: 

1476 

1477 .. testoutput:: 

1478 

1479 Now Is The Winter Of Our Discontent, 

1480 Made Glorious Summer By This Sun Of York. 

1481 """ 

1482 out: list[str] = [] 

1483 lastE = 0 

1484 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1485 # keep string locs straight between transform_string and scan_string 

1486 self.keepTabs = True 

1487 try: 

1488 for t, s, e in self.scan_string(instring, debug=debug): 

1489 if s > lastE: 

1490 out.append(instring[lastE:s]) 

1491 lastE = e 

1492 

1493 if not t: 

1494 continue 

1495 

1496 if isinstance(t, ParseResults): 

1497 out += t.as_list() 

1498 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1499 out.extend(t) 

1500 else: 

1501 out.append(t) 

1502 

1503 out.append(instring[lastE:]) 

1504 out = [o for o in out if o] 

1505 return "".join([str(s) for s in _flatten(out)]) 

1506 except ParseBaseException as exc: 

1507 if ParserElement.verbose_stacktrace: 

1508 raise 

1509 

1510 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1511 raise exc.with_traceback(None) 

1512 

1513 def search_string( 

1514 self, 

1515 instring: str, 

1516 max_matches: int = _MAX_INT, 

1517 *, 

1518 debug: bool = False, 

1519 **kwargs, 

1520 ) -> ParseResults: 

1521 """ 

1522 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1523 to match the given parse expression. May be called with optional 

1524 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1525 

1526 Example: 

1527 

1528 .. testcode:: 

1529 

1530 quote = '''More than Iron, more than Lead, 

1531 more than Gold I need Electricity''' 

1532 

1533 # a capitalized word starts with an uppercase letter, 

1534 # followed by zero or more lowercase letters 

1535 cap_word = Word(alphas.upper(), alphas.lower()) 

1536 

1537 print(cap_word.search_string(quote)) 

1538 

1539 # the sum() builtin can be used to merge results 

1540 # into a single ParseResults object 

1541 print(sum(cap_word.search_string(quote))) 

1542 

1543 prints: 

1544 

1545 .. testoutput:: 

1546 

1547 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1548 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1549 """ 

1550 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT) 

1551 

1552 max_matches = min(maxMatches, max_matches) 

1553 try: 

1554 return ParseResults( 

1555 [ 

1556 t 

1557 for t, s, e in self.scan_string( 

1558 instring, 

1559 max_matches=max_matches, 

1560 always_skip_whitespace=False, 

1561 debug=debug, 

1562 ) 

1563 ] 

1564 ) 

1565 except ParseBaseException as exc: 

1566 if ParserElement.verbose_stacktrace: 

1567 raise 

1568 

1569 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1570 raise exc.with_traceback(None) 

1571 

1572 def split( 

1573 self, 

1574 instring: str, 

1575 maxsplit: int = _MAX_INT, 

1576 include_separators: bool = False, 

1577 **kwargs, 

1578 ) -> Generator[str, None, None]: 

1579 """ 

1580 Generator method to split a string using the given expression as a separator. 

1581 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1582 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1583 matching text should be included in the split results. 

1584 

1585 Example: 

1586 

1587 .. testcode:: 

1588 

1589 punc = one_of(list(".,;:/-!?")) 

1590 print(list(punc.split( 

1591 "This, this?, this sentence, is badly punctuated!"))) 

1592 

1593 prints: 

1594 

1595 .. testoutput:: 

1596 

1597 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1598 """ 

1599 includeSeparators: bool = deprecate_argument(kwargs, "includeSeparators", False) 

1600 

1601 include_separators = includeSeparators or include_separators 

1602 last = 0 

1603 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1604 yield instring[last:s] 

1605 if include_separators: 

1606 yield t[0] 

1607 last = e 

1608 yield instring[last:] 

1609 

1610 def __add__(self, other) -> ParserElement: 

1611 """ 

1612 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1613 converts them to :class:`Literal`\\ s by default. 

1614 

1615 Example: 

1616 

1617 .. testcode:: 

1618 

1619 greet = Word(alphas) + "," + Word(alphas) + "!" 

1620 hello = "Hello, World!" 

1621 print(hello, "->", greet.parse_string(hello)) 

1622 

1623 prints: 

1624 

1625 .. testoutput:: 

1626 

1627 Hello, World! -> ['Hello', ',', 'World', '!'] 

1628 

1629 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`: 

1630 

1631 .. testcode:: 

1632 

1633 Literal('start') + ... + Literal('end') 

1634 

1635 is equivalent to: 

1636 

1637 .. testcode:: 

1638 

1639 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1640 

1641 Note that the skipped text is returned with '_skipped' as a results name, 

1642 and to support having multiple skips in the same parser, the value returned is 

1643 a list of all skipped text. 

1644 """ 

1645 if other is Ellipsis: 

1646 return _PendingSkip(self) 

1647 

1648 if isinstance(other, str_type): 

1649 other = self._literalStringClass(other) 

1650 if not isinstance(other, ParserElement): 

1651 return NotImplemented 

1652 return And([self, other]) 

1653 

1654 def __radd__(self, other) -> ParserElement: 

1655 """ 

1656 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1657 """ 

1658 if other is Ellipsis: 

1659 return SkipTo(self)("_skipped*") + self 

1660 

1661 if isinstance(other, str_type): 

1662 other = self._literalStringClass(other) 

1663 if not isinstance(other, ParserElement): 

1664 return NotImplemented 

1665 return other + self 

1666 

1667 def __sub__(self, other) -> ParserElement: 

1668 """ 

1669 Implementation of ``-`` operator, returns :class:`And` with error stop 

1670 """ 

1671 if isinstance(other, str_type): 

1672 other = self._literalStringClass(other) 

1673 if not isinstance(other, ParserElement): 

1674 return NotImplemented 

1675 return self + And._ErrorStop() + other 

1676 

1677 def __rsub__(self, other) -> ParserElement: 

1678 """ 

1679 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1680 """ 

1681 if isinstance(other, str_type): 

1682 other = self._literalStringClass(other) 

1683 if not isinstance(other, ParserElement): 

1684 return NotImplemented 

1685 return other - self 

1686 

1687 def __mul__(self, other) -> ParserElement: 

1688 """ 

1689 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1690 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1691 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1692 may also include ``None`` as in: 

1693 

1694 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1695 to ``expr*n + ZeroOrMore(expr)`` 

1696 (read as "at least n instances of ``expr``") 

1697 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1698 (read as "0 to n instances of ``expr``") 

1699 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1700 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1701 

1702 Note that ``expr*(None, n)`` does not raise an exception if 

1703 more than n exprs exist in the input stream; that is, 

1704 ``expr*(None, n)`` does not enforce a maximum number of expr 

1705 occurrences. If this behavior is desired, then write 

1706 ``expr*(None, n) + ~expr`` 

1707 """ 

1708 if other is Ellipsis: 

1709 other = (0, None) 

1710 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1711 other = (0, *other[1:2], None)[:2] 

1712 

1713 if not isinstance(other, (int, tuple)): 

1714 return NotImplemented 

1715 

1716 if isinstance(other, int): 

1717 minElements, optElements = other, 0 

1718 else: 

1719 other = tuple(o if o is not Ellipsis else None for o in other) 

1720 other = (*other, None, None)[:2] 

1721 if other[0] is None: 

1722 other = (0, other[1]) 

1723 if isinstance(other[0], int) and other[1] is None: 

1724 if other[0] == 0: 

1725 return ZeroOrMore(self) 

1726 if other[0] == 1: 

1727 return OneOrMore(self) 

1728 else: 

1729 return self * other[0] + ZeroOrMore(self) 

1730 elif isinstance(other[0], int) and isinstance(other[1], int): 

1731 minElements, optElements = other 

1732 optElements -= minElements 

1733 else: 

1734 return NotImplemented 

1735 

1736 if minElements < 0: 

1737 raise ValueError("cannot multiply ParserElement by negative value") 

1738 if optElements < 0: 

1739 raise ValueError( 

1740 "second tuple value must be greater or equal to first tuple value" 

1741 ) 

1742 if minElements == optElements == 0: 

1743 return And([]) 

1744 

1745 if optElements: 

1746 

1747 def makeOptionalList(n): 

1748 if n > 1: 

1749 return Opt(self + makeOptionalList(n - 1)) 

1750 else: 

1751 return Opt(self) 

1752 

1753 if minElements: 

1754 if minElements == 1: 

1755 ret = self + makeOptionalList(optElements) 

1756 else: 

1757 ret = And([self] * minElements) + makeOptionalList(optElements) 

1758 else: 

1759 ret = makeOptionalList(optElements) 

1760 else: 

1761 if minElements == 1: 

1762 ret = self 

1763 else: 

1764 ret = And([self] * minElements) 

1765 return ret 

1766 

1767 def __rmul__(self, other) -> ParserElement: 

1768 return self.__mul__(other) 

1769 

1770 def __or__(self, other) -> ParserElement: 

1771 """ 

1772 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1773 

1774 .. versionchanged:: 3.1.0 

1775 Support ``expr | ""`` as a synonym for ``Optional(expr)``. 

1776 """ 

1777 if other is Ellipsis: 

1778 return _PendingSkip(self, must_skip=True) 

1779 

1780 if isinstance(other, str_type): 

1781 # `expr | ""` is equivalent to `Opt(expr)` 

1782 if other == "": 

1783 return Opt(self) 

1784 other = self._literalStringClass(other) 

1785 if not isinstance(other, ParserElement): 

1786 return NotImplemented 

1787 return MatchFirst([self, other]) 

1788 

1789 def __ror__(self, other) -> ParserElement: 

1790 """ 

1791 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1792 """ 

1793 if isinstance(other, str_type): 

1794 other = self._literalStringClass(other) 

1795 if not isinstance(other, ParserElement): 

1796 return NotImplemented 

1797 return other | self 

1798 

1799 def __xor__(self, other) -> ParserElement: 

1800 """ 

1801 Implementation of ``^`` operator - returns :class:`Or` 

1802 """ 

1803 if isinstance(other, str_type): 

1804 other = self._literalStringClass(other) 

1805 if not isinstance(other, ParserElement): 

1806 return NotImplemented 

1807 return Or([self, other]) 

1808 

1809 def __rxor__(self, other) -> ParserElement: 

1810 """ 

1811 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1812 """ 

1813 if isinstance(other, str_type): 

1814 other = self._literalStringClass(other) 

1815 if not isinstance(other, ParserElement): 

1816 return NotImplemented 

1817 return other ^ self 

1818 

1819 def __and__(self, other) -> ParserElement: 

1820 """ 

1821 Implementation of ``&`` operator - returns :class:`Each` 

1822 """ 

1823 if isinstance(other, str_type): 

1824 other = self._literalStringClass(other) 

1825 if not isinstance(other, ParserElement): 

1826 return NotImplemented 

1827 return Each([self, other]) 

1828 

1829 def __rand__(self, other) -> ParserElement: 

1830 """ 

1831 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1832 """ 

1833 if isinstance(other, str_type): 

1834 other = self._literalStringClass(other) 

1835 if not isinstance(other, ParserElement): 

1836 return NotImplemented 

1837 return other & self 

1838 

1839 def __invert__(self) -> ParserElement: 

1840 """ 

1841 Implementation of ``~`` operator - returns :class:`NotAny` 

1842 """ 

1843 return NotAny(self) 

1844 

1845 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1846 # iterate over a sequence 

1847 __iter__ = None 

1848 

1849 def __getitem__(self, key): 

1850 """ 

1851 use ``[]`` indexing notation as a short form for expression repetition: 

1852 

1853 - ``expr[n]`` is equivalent to ``expr*n`` 

1854 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1855 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1856 to ``expr*n + ZeroOrMore(expr)`` 

1857 (read as "at least n instances of ``expr``") 

1858 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1859 (read as "0 to n instances of ``expr``") 

1860 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1861 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1862 

1863 ``None`` may be used in place of ``...``. 

1864 

1865 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1866 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1867 desired, then write ``expr[..., n] + ~expr``. 

1868 

1869 For repetition with a stop_on expression, use slice notation: 

1870 

1871 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1872 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1873 

1874 .. versionchanged:: 3.1.0 

1875 Support for slice notation. 

1876 """ 

1877 

1878 stop_on_defined = False 

1879 stop_on = NoMatch() 

1880 if isinstance(key, slice): 

1881 key, stop_on = key.start, key.stop 

1882 if key is None: 

1883 key = ... 

1884 stop_on_defined = True 

1885 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1886 key, stop_on = (key[0], key[1].start), key[1].stop 

1887 stop_on_defined = True 

1888 

1889 # convert single arg keys to tuples 

1890 if isinstance(key, str_type): 

1891 key = (key,) 

1892 try: 

1893 iter(key) 

1894 except TypeError: 

1895 key = (key, key) 

1896 

1897 if len(key) > 2: 

1898 raise TypeError( 

1899 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1900 ) 

1901 

1902 # clip to 2 elements 

1903 ret = self * tuple(key[:2]) 

1904 ret = typing.cast(_MultipleMatch, ret) 

1905 

1906 if stop_on_defined: 

1907 ret.stopOn(stop_on) 

1908 

1909 return ret 

1910 

1911 def __call__(self, name: typing.Optional[str] = None) -> ParserElement: 

1912 """ 

1913 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1914 

1915 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1916 passed as ``True``. 

1917 

1918 If ``name`` is omitted, same as calling :class:`copy`. 

1919 

1920 Example: 

1921 

1922 .. testcode:: 

1923 

1924 # these are equivalent 

1925 userdata = ( 

1926 Word(alphas).set_results_name("name") 

1927 + Word(nums + "-").set_results_name("socsecno") 

1928 ) 

1929 

1930 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1931 """ 

1932 if name is not None: 

1933 return self._setResultsName(name) 

1934 

1935 return self.copy() 

1936 

1937 def suppress(self) -> ParserElement: 

1938 """ 

1939 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1940 cluttering up returned output. 

1941 """ 

1942 return Suppress(self) 

1943 

1944 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

1945 """ 

1946 Enables the skipping of whitespace before matching the characters in the 

1947 :class:`ParserElement`'s defined pattern. 

1948 

1949 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1950 """ 

1951 self.skipWhitespace = True 

1952 return self 

1953 

1954 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

1955 """ 

1956 Disables the skipping of whitespace before matching the characters in the 

1957 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1958 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1959 

1960 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1961 """ 

1962 self.skipWhitespace = False 

1963 return self 

1964 

1965 def set_whitespace_chars( 

1966 self, chars: Union[set[str], str], copy_defaults: bool = False 

1967 ) -> ParserElement: 

1968 """ 

1969 Overrides the default whitespace chars 

1970 """ 

1971 self.skipWhitespace = True 

1972 self.whiteChars = set(chars) 

1973 self.copyDefaultWhiteChars = copy_defaults 

1974 return self 

1975 

1976 def parse_with_tabs(self) -> ParserElement: 

1977 """ 

1978 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1979 Must be called before ``parse_string`` when the input grammar contains elements that 

1980 match ``<TAB>`` characters. 

1981 """ 

1982 self.keepTabs = True 

1983 return self 

1984 

1985 def ignore(self, other: ParserElement) -> ParserElement: 

1986 """ 

1987 Define expression to be ignored (e.g., comments) while doing pattern 

1988 matching; may be called repeatedly, to define multiple comment or other 

1989 ignorable patterns. 

1990 

1991 Example: 

1992 

1993 .. doctest:: 

1994 

1995 >>> patt = Word(alphas)[...] 

1996 >>> print(patt.parse_string('ablaj /* comment */ lskjd')) 

1997 ['ablaj'] 

1998 

1999 >>> patt = Word(alphas)[...].ignore(c_style_comment) 

2000 >>> print(patt.parse_string('ablaj /* comment */ lskjd')) 

2001 ['ablaj', 'lskjd'] 

2002 """ 

2003 if isinstance(other, str_type): 

2004 other = Suppress(other) 

2005 

2006 if isinstance(other, Suppress): 

2007 if other not in self.ignoreExprs: 

2008 self.ignoreExprs.append(other) 

2009 else: 

2010 self.ignoreExprs.append(Suppress(other.copy())) 

2011 return self 

2012 

2013 def set_debug_actions( 

2014 self, 

2015 start_action: DebugStartAction, 

2016 success_action: DebugSuccessAction, 

2017 exception_action: DebugExceptionAction, 

2018 ) -> ParserElement: 

2019 """ 

2020 Customize display of debugging messages while doing pattern matching: 

2021 

2022 :param start_action: method to be called when an expression is about to be parsed; 

2023 should have the signature:: 

2024 

2025 fn(input_string: str, 

2026 location: int, 

2027 expression: ParserElement, 

2028 cache_hit: bool) 

2029 

2030 :param success_action: method to be called when an expression has successfully parsed; 

2031 should have the signature:: 

2032 

2033 fn(input_string: str, 

2034 start_location: int, 

2035 end_location: int, 

2036 expression: ParserELement, 

2037 parsed_tokens: ParseResults, 

2038 cache_hit: bool) 

2039 

2040 :param exception_action: method to be called when expression fails to parse; 

2041 should have the signature:: 

2042 

2043 fn(input_string: str, 

2044 location: int, 

2045 expression: ParserElement, 

2046 exception: Exception, 

2047 cache_hit: bool) 

2048 """ 

2049 self.debugActions = self.DebugActions( 

2050 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

2051 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

2052 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

2053 ) 

2054 self.debug = any(self.debugActions) 

2055 return self 

2056 

2057 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement: 

2058 """ 

2059 Enable display of debugging messages while doing pattern matching. 

2060 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

2061 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

2062 

2063 Example: 

2064 

2065 .. testcode:: 

2066 

2067 wd = Word(alphas).set_name("alphaword") 

2068 integer = Word(nums).set_name("numword") 

2069 term = wd | integer 

2070 

2071 # turn on debugging for wd 

2072 wd.set_debug() 

2073 

2074 term[1, ...].parse_string("abc 123 xyz 890") 

2075 

2076 prints: 

2077 

2078 .. testoutput:: 

2079 :options: +NORMALIZE_WHITESPACE 

2080 

2081 Match alphaword at loc 0(1,1) 

2082 abc 123 xyz 890 

2083 ^ 

2084 Matched alphaword -> ['abc'] 

2085 Match alphaword at loc 4(1,5) 

2086 abc 123 xyz 890 

2087 ^ 

2088 Match alphaword failed, ParseException raised: Expected alphaword, ... 

2089 Match alphaword at loc 8(1,9) 

2090 abc 123 xyz 890 

2091 ^ 

2092 Matched alphaword -> ['xyz'] 

2093 Match alphaword at loc 12(1,13) 

2094 abc 123 xyz 890 

2095 ^ 

2096 Match alphaword failed, ParseException raised: Expected alphaword, ... 

2097 abc 123 xyz 890 

2098 ^ 

2099 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ... 

2100 

2101 The output shown is that produced by the default debug actions - custom debug actions can be 

2102 specified using :meth:`set_debug_actions`. Prior to attempting 

2103 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

2104 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

2105 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression, 

2106 which makes debugging and exception messages easier to understand - for instance, the default 

2107 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``. 

2108 

2109 .. versionchanged:: 3.1.0 

2110 ``recurse`` argument added. 

2111 """ 

2112 if recurse: 

2113 for expr in self.visit_all(): 

2114 expr.set_debug(flag, recurse=False) 

2115 return self 

2116 

2117 if flag: 

2118 self.set_debug_actions( 

2119 _default_start_debug_action, 

2120 _default_success_debug_action, 

2121 _default_exception_debug_action, 

2122 ) 

2123 else: 

2124 self.debug = False 

2125 return self 

2126 

2127 @property 

2128 def default_name(self) -> str: 

2129 if self._defaultName is None: 

2130 self._defaultName = self._generateDefaultName() 

2131 return self._defaultName 

2132 

2133 @abstractmethod 

2134 def _generateDefaultName(self) -> str: 

2135 """ 

2136 Child classes must define this method, which defines how the ``default_name`` is set. 

2137 """ 

2138 

2139 def set_name(self, name: typing.Optional[str]) -> ParserElement: 

2140 """ 

2141 Define name for this expression, makes debugging and exception messages clearer. If 

2142 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also 

2143 enable debug for this expression. 

2144 

2145 If `name` is None, clears any custom name for this expression, and clears the 

2146 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`. 

2147 

2148 Example: 

2149 

2150 .. doctest:: 

2151 

2152 >>> integer = Word(nums) 

2153 >>> integer.parse_string("ABC") 

2154 Traceback (most recent call last): 

2155 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1) 

2156 

2157 >>> integer.set_name("integer") 

2158 integer 

2159 >>> integer.parse_string("ABC") 

2160 Traceback (most recent call last): 

2161 ParseException: Expected integer (at char 0), (line:1, col:1) 

2162 

2163 .. versionchanged:: 3.1.0 

2164 Accept ``None`` as the ``name`` argument. 

2165 """ 

2166 self.customName = name # type: ignore[assignment] 

2167 self.errmsg = f"Expected {str(self)}" 

2168 

2169 if __diag__.enable_debug_on_named_expressions: 

2170 self.set_debug(name is not None) 

2171 

2172 return self 

2173 

2174 @property 

2175 def name(self) -> str: 

2176 """ 

2177 Returns a user-defined name if available, but otherwise defaults back to the auto-generated name 

2178 """ 

2179 return self.customName if self.customName is not None else self.default_name 

2180 

2181 @name.setter 

2182 def name(self, new_name) -> None: 

2183 self.set_name(new_name) 

2184 

2185 def __str__(self) -> str: 

2186 return self.name 

2187 

2188 def __repr__(self) -> str: 

2189 return str(self) 

2190 

2191 def streamline(self) -> ParserElement: 

2192 self.streamlined = True 

2193 self._defaultName = None 

2194 return self 

2195 

2196 def recurse(self) -> list[ParserElement]: 

2197 return [] 

2198 

2199 def _checkRecursion(self, parseElementList): 

2200 subRecCheckList = parseElementList[:] + [self] 

2201 for e in self.recurse(): 

2202 e._checkRecursion(subRecCheckList) 

2203 

2204 def validate(self, validateTrace=None) -> None: 

2205 """ 

2206 .. deprecated:: 3.0.0 

2207 Do not use to check for left recursion. 

2208 

2209 Check defined expressions for valid structure, check for infinite recursive definitions. 

2210 

2211 """ 

2212 warnings.warn( 

2213 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

2214 PyparsingDeprecationWarning, 

2215 stacklevel=2, 

2216 ) 

2217 self._checkRecursion([]) 

2218 

2219 def parse_file( 

2220 self, 

2221 file_or_filename: Union[str, Path, TextIO], 

2222 encoding: str = "utf-8", 

2223 parse_all: bool = False, 

2224 **kwargs, 

2225 ) -> ParseResults: 

2226 """ 

2227 Execute the parse expression on the given file or filename. 

2228 If a filename is specified (instead of a file object), 

2229 the entire file is opened, read, and closed before parsing. 

2230 """ 

2231 parseAll: bool = deprecate_argument(kwargs, "parseAll", False) 

2232 

2233 parse_all = parse_all or parseAll 

2234 try: 

2235 file_or_filename = typing.cast(TextIO, file_or_filename) 

2236 file_contents = file_or_filename.read() 

2237 except AttributeError: 

2238 file_or_filename = typing.cast(str, file_or_filename) 

2239 with open(file_or_filename, "r", encoding=encoding) as f: 

2240 file_contents = f.read() 

2241 try: 

2242 return self.parse_string(file_contents, parse_all) 

2243 except ParseBaseException as exc: 

2244 if ParserElement.verbose_stacktrace: 

2245 raise 

2246 

2247 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

2248 raise exc.with_traceback(None) 

2249 

2250 def __eq__(self, other): 

2251 if self is other: 

2252 return True 

2253 elif isinstance(other, str_type): 

2254 return self.matches(other, parse_all=True) 

2255 elif isinstance(other, ParserElement): 

2256 return vars(self) == vars(other) 

2257 return False 

2258 

2259 def __hash__(self): 

2260 return id(self) 

2261 

2262 def matches(self, test_string: str, parse_all: bool = True, **kwargs) -> bool: 

2263 """ 

2264 Method for quick testing of a parser against a test string. Good for simple 

2265 inline microtests of sub expressions while building up larger parser. 

2266 

2267 :param test_string: to test against this expression for a match 

2268 :param parse_all: flag to pass to :meth:`parse_string` when running tests 

2269 

2270 Example: 

2271 

2272 .. doctest:: 

2273 

2274 >>> expr = Word(nums) 

2275 >>> expr.matches("100") 

2276 True 

2277 """ 

2278 parseAll: bool = deprecate_argument(kwargs, "parseAll", True) 

2279 

2280 parse_all = parse_all and parseAll 

2281 try: 

2282 self.parse_string(str(test_string), parse_all=parse_all) 

2283 return True 

2284 except ParseBaseException: 

2285 return False 

2286 

2287 def run_tests( 

2288 self, 

2289 tests: Union[str, list[str]], 

2290 parse_all: bool = True, 

2291 comment: typing.Optional[Union[ParserElement, str]] = "#", 

2292 full_dump: bool = True, 

2293 print_results: bool = True, 

2294 failure_tests: bool = False, 

2295 post_parse: typing.Optional[ 

2296 Callable[[str, ParseResults], typing.Optional[str]] 

2297 ] = None, 

2298 file: typing.Optional[TextIO] = None, 

2299 with_line_numbers: bool = False, 

2300 *, 

2301 parseAll: bool = True, 

2302 fullDump: bool = True, 

2303 printResults: bool = True, 

2304 failureTests: bool = False, 

2305 postParse: typing.Optional[ 

2306 Callable[[str, ParseResults], typing.Optional[str]] 

2307 ] = None, 

2308 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]: 

2309 """ 

2310 Execute the parse expression on a series of test strings, showing each 

2311 test, the parsed results or where the parse failed. Quick and easy way to 

2312 run a parse expression against a list of sample strings. 

2313 

2314 Parameters: 

2315 

2316 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2317 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2318 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2319 string; pass None to disable comment filtering 

2320 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2321 if False, only dump nested list 

2322 - ``print_results`` - (default= ``True``) prints test output to stdout 

2323 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2324 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2325 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2326 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2327 if None, will default to ``sys.stdout`` 

2328 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2329 

2330 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2331 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2332 test's output 

2333 

2334 Passing example: 

2335 

2336 .. testcode:: 

2337 

2338 number_expr = pyparsing_common.number.copy() 

2339 

2340 result = number_expr.run_tests(''' 

2341 # unsigned integer 

2342 100 

2343 # negative integer 

2344 -100 

2345 # float with scientific notation 

2346 6.02e23 

2347 # integer with scientific notation 

2348 1e-12 

2349 # negative decimal number without leading digit 

2350 -.100 

2351 ''') 

2352 print("Success" if result[0] else "Failed!") 

2353 

2354 prints: 

2355 

2356 .. testoutput:: 

2357 :options: +NORMALIZE_WHITESPACE 

2358 

2359 

2360 # unsigned integer 

2361 100 

2362 [100] 

2363 

2364 # negative integer 

2365 -100 

2366 [-100] 

2367 

2368 # float with scientific notation 

2369 6.02e23 

2370 [6.02e+23] 

2371 

2372 # integer with scientific notation 

2373 1e-12 

2374 [1e-12] 

2375 

2376 # negative decimal number without leading digit 

2377 -.100 

2378 [-0.1] 

2379 Success 

2380 

2381 Failure-test example: 

2382 

2383 .. testcode:: 

2384 

2385 result = number_expr.run_tests(''' 

2386 # stray character 

2387 100Z 

2388 # too many '.' 

2389 3.14.159 

2390 ''', failure_tests=True) 

2391 print("Success" if result[0] else "Failed!") 

2392 

2393 prints: 

2394 

2395 .. testoutput:: 

2396 :options: +NORMALIZE_WHITESPACE 

2397 

2398 

2399 # stray character 

2400 100Z 

2401 100Z 

2402 ^ 

2403 ParseException: Expected end of text, found 'Z' ... 

2404 

2405 # too many '.' 

2406 3.14.159 

2407 3.14.159 

2408 ^ 

2409 ParseException: Expected end of text, found '.' ... 

2410 FAIL: Expected end of text, found '.' ... 

2411 Success 

2412 

2413 Each test string must be on a single line. If you want to test a string that spans multiple 

2414 lines, create a test like this: 

2415 

2416 .. testcode:: 

2417 

2418 expr = Word(alphanums)[1,...] 

2419 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2420 

2421 .. testoutput:: 

2422 :options: +NORMALIZE_WHITESPACE 

2423 :hide: 

2424 

2425 

2426 this is a test\\n of strings that spans \\n 3 lines 

2427 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines'] 

2428 

2429 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2430 """ 

2431 from .testing import pyparsing_test 

2432 

2433 parseAll = parseAll and parse_all 

2434 fullDump = fullDump and full_dump 

2435 printResults = printResults and print_results 

2436 failureTests = failureTests or failure_tests 

2437 postParse = postParse or post_parse 

2438 if isinstance(tests, str_type): 

2439 tests = typing.cast(str, tests) 

2440 line_strip = type(tests).strip 

2441 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2442 comment_specified = comment is not None 

2443 if comment_specified: 

2444 if isinstance(comment, str_type): 

2445 comment = typing.cast(str, comment) 

2446 comment = Literal(comment) 

2447 comment = typing.cast(ParserElement, comment) 

2448 if file is None: 

2449 file = sys.stdout 

2450 print_ = file.write 

2451 

2452 result: Union[ParseResults, Exception] 

2453 allResults: list[tuple[str, Union[ParseResults, Exception]]] = [] 

2454 comments: list[str] = [] 

2455 success = True 

2456 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2457 BOM = "\ufeff" 

2458 nlstr = "\n" 

2459 for t in tests: 

2460 if comment_specified and comment.matches(t, False) or comments and not t: 

2461 comments.append( 

2462 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2463 ) 

2464 continue 

2465 if not t: 

2466 continue 

2467 out = [ 

2468 f"{nlstr}{nlstr.join(comments) if comments else ''}", 

2469 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2470 ] 

2471 comments.clear() 

2472 try: 

2473 # convert newline marks to actual newlines, and strip leading BOM if present 

2474 t = NL.transform_string(t.lstrip(BOM)) 

2475 result = self.parse_string(t, parse_all=parse_all) 

2476 except ParseBaseException as pe: 

2477 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" 

2478 out.append(pe.explain()) 

2479 out.append(f"FAIL: {fatal}{pe}") 

2480 if ParserElement.verbose_stacktrace: 

2481 out.extend(traceback.format_tb(pe.__traceback__)) 

2482 success = success and failureTests 

2483 result = pe 

2484 except Exception as exc: 

2485 tag = "FAIL-EXCEPTION" 

2486 

2487 # see if this exception was raised in a parse action 

2488 tb = exc.__traceback__ 

2489 it = iter(traceback.walk_tb(tb)) 

2490 for f, line in it: 

2491 if (f.f_code.co_filename, line) == pa_call_line_synth: 

2492 next_f = next(it)[0] 

2493 tag += f" (raised in parse action {next_f.f_code.co_name!r})" 

2494 break 

2495 

2496 out.append(f"{tag}: {type(exc).__name__}: {exc}") 

2497 if ParserElement.verbose_stacktrace: 

2498 out.extend(traceback.format_tb(exc.__traceback__)) 

2499 success = success and failureTests 

2500 result = exc 

2501 else: 

2502 success = success and not failureTests 

2503 if postParse is not None: 

2504 try: 

2505 pp_value = postParse(t, result) 

2506 if pp_value is not None: 

2507 if isinstance(pp_value, ParseResults): 

2508 out.append(pp_value.dump()) 

2509 else: 

2510 out.append(str(pp_value)) 

2511 else: 

2512 out.append(result.dump()) 

2513 except Exception as e: 

2514 out.append(result.dump(full=fullDump)) 

2515 out.append( 

2516 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2517 ) 

2518 else: 

2519 out.append(result.dump(full=fullDump)) 

2520 out.append("") 

2521 

2522 if printResults: 

2523 print_("\n".join(out)) 

2524 

2525 allResults.append((t, result)) 

2526 

2527 return success, allResults 

2528 

2529 def create_diagram( 

2530 self, 

2531 output_html: Union[TextIO, Path, str], 

2532 vertical: int = 3, 

2533 show_results_names: bool = False, 

2534 show_groups: bool = False, 

2535 embed: bool = False, 

2536 show_hidden: bool = False, 

2537 **kwargs, 

2538 ) -> None: 

2539 """ 

2540 Create a railroad diagram for the parser. 

2541 

2542 Parameters: 

2543 

2544 - ``output_html`` (str or file-like object) - output target for generated 

2545 diagram HTML 

2546 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2547 instead of horizontally (default=3) 

2548 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2549 defined results names 

2550 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2551 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden 

2552 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2553 the resulting HTML in an enclosing HTML source 

2554 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2555 can be used to insert custom CSS styling 

2556 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2557 generated code 

2558 

2559 Additional diagram-formatting keyword arguments can also be included; 

2560 see railroad.Diagram class. 

2561 

2562 .. versionchanged:: 3.1.0 

2563 ``embed`` argument added. 

2564 """ 

2565 

2566 try: 

2567 from .diagram import to_railroad, railroad_to_html 

2568 except ImportError as ie: 

2569 raise Exception( 

2570 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2571 ) from ie 

2572 

2573 self.streamline() 

2574 

2575 railroad = to_railroad( 

2576 self, 

2577 vertical=vertical, 

2578 show_results_names=show_results_names, 

2579 show_groups=show_groups, 

2580 show_hidden=show_hidden, 

2581 diagram_kwargs=kwargs, 

2582 ) 

2583 if not isinstance(output_html, (str, Path)): 

2584 # we were passed a file-like object, just write to it 

2585 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2586 return 

2587 

2588 with open(output_html, "w", encoding="utf-8") as diag_file: 

2589 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2590 

2591 # Compatibility synonyms 

2592 # fmt: off 

2593 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)) 

2594 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8( 

2595 "setDefaultWhitespaceChars", set_default_whitespace_chars 

2596 )) 

2597 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization)) 

2598 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion)) 

2599 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat)) 

2600 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache)) 

2601 

2602 setResultsName = replaced_by_pep8("setResultsName", set_results_name) 

2603 setBreak = replaced_by_pep8("setBreak", set_break) 

2604 setParseAction = replaced_by_pep8("setParseAction", set_parse_action) 

2605 addParseAction = replaced_by_pep8("addParseAction", add_parse_action) 

2606 addCondition = replaced_by_pep8("addCondition", add_condition) 

2607 setFailAction = replaced_by_pep8("setFailAction", set_fail_action) 

2608 tryParse = replaced_by_pep8("tryParse", try_parse) 

2609 parseString = replaced_by_pep8("parseString", parse_string) 

2610 scanString = replaced_by_pep8("scanString", scan_string) 

2611 transformString = replaced_by_pep8("transformString", transform_string) 

2612 searchString = replaced_by_pep8("searchString", search_string) 

2613 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

2614 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

2615 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) 

2616 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) 

2617 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) 

2618 setDebug = replaced_by_pep8("setDebug", set_debug) 

2619 setName = replaced_by_pep8("setName", set_name) 

2620 parseFile = replaced_by_pep8("parseFile", parse_file) 

2621 runTests = replaced_by_pep8("runTests", run_tests) 

2622 canParseNext = replaced_by_pep8("canParseNext", can_parse_next) 

2623 defaultName = default_name 

2624 # fmt: on 

2625 

2626 

2627class _PendingSkip(ParserElement): 

2628 # internal placeholder class to hold a place were '...' is added to a parser element, 

2629 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2630 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None: 

2631 super().__init__() 

2632 self.anchor = expr 

2633 self.must_skip = must_skip 

2634 

2635 def _generateDefaultName(self) -> str: 

2636 return str(self.anchor + Empty()).replace("Empty", "...") 

2637 

2638 def __add__(self, other) -> ParserElement: 

2639 skipper = SkipTo(other).set_name("...")("_skipped*") 

2640 if self.must_skip: 

2641 

2642 def must_skip(t): 

2643 if not t._skipped or t._skipped.as_list() == [""]: 

2644 del t[0] 

2645 t.pop("_skipped", None) 

2646 

2647 def show_skip(t): 

2648 if t._skipped.as_list()[-1:] == [""]: 

2649 t.pop("_skipped") 

2650 t["_skipped"] = f"missing <{self.anchor!r}>" 

2651 

2652 return ( 

2653 self.anchor + skipper().add_parse_action(must_skip) 

2654 | skipper().add_parse_action(show_skip) 

2655 ) + other 

2656 

2657 return self.anchor + skipper + other 

2658 

2659 def __repr__(self): 

2660 return self.defaultName 

2661 

2662 def parseImpl(self, *args) -> ParseImplReturnType: 

2663 raise Exception( 

2664 "use of `...` expression without following SkipTo target expression" 

2665 ) 

2666 

2667 

2668class Token(ParserElement): 

2669 """Abstract :class:`ParserElement` subclass, for defining atomic 

2670 matching patterns. 

2671 """ 

2672 

2673 def __init__(self) -> None: 

2674 super().__init__(savelist=False) 

2675 

2676 def _generateDefaultName(self) -> str: 

2677 return type(self).__name__ 

2678 

2679 

2680class NoMatch(Token): 

2681 """ 

2682 A token that will never match. 

2683 """ 

2684 

2685 def __init__(self) -> None: 

2686 super().__init__() 

2687 self._may_return_empty = True 

2688 self.mayIndexError = False 

2689 self.errmsg = "Unmatchable token" 

2690 

2691 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2692 raise ParseException(instring, loc, self.errmsg, self) 

2693 

2694 

2695class Literal(Token): 

2696 """ 

2697 Token to exactly match a specified string. 

2698 

2699 Example: 

2700 

2701 .. doctest:: 

2702 

2703 >>> Literal('abc').parse_string('abc') 

2704 ParseResults(['abc'], {}) 

2705 >>> Literal('abc').parse_string('abcdef') 

2706 ParseResults(['abc'], {}) 

2707 >>> Literal('abc').parse_string('ab') 

2708 Traceback (most recent call last): 

2709 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1) 

2710 

2711 For case-insensitive matching, use :class:`CaselessLiteral`. 

2712 

2713 For keyword matching (force word break before and after the matched string), 

2714 use :class:`Keyword` or :class:`CaselessKeyword`. 

2715 """ 

2716 

2717 def __new__(cls, match_string: str = "", **kwargs): 

2718 # Performance tuning: select a subclass with optimized parseImpl 

2719 if cls is Literal: 

2720 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2721 

2722 match_string = matchString or match_string 

2723 if not match_string: 

2724 return super().__new__(Empty) 

2725 if len(match_string) == 1: 

2726 return super().__new__(_SingleCharLiteral) 

2727 

2728 # Default behavior 

2729 return super().__new__(cls) 

2730 

2731 # Needed to make copy.copy() work correctly if we customize __new__ 

2732 def __getnewargs__(self): 

2733 return (self.match,) 

2734 

2735 def __init__(self, match_string: str = "", **kwargs) -> None: 

2736 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2737 

2738 super().__init__() 

2739 match_string = matchString or match_string 

2740 self.match = match_string 

2741 self.matchLen = len(match_string) 

2742 self.firstMatchChar = match_string[:1] 

2743 self.errmsg = f"Expected {self.name}" 

2744 self._may_return_empty = False 

2745 self.mayIndexError = False 

2746 

2747 def _generateDefaultName(self) -> str: 

2748 return repr(self.match) 

2749 

2750 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2751 if instring[loc] == self.firstMatchChar and instring.startswith( 

2752 self.match, loc 

2753 ): 

2754 return loc + self.matchLen, self.match 

2755 raise ParseException(instring, loc, self.errmsg, self) 

2756 

2757 

2758class Empty(Literal): 

2759 """ 

2760 An empty token, will always match. 

2761 """ 

2762 

2763 def __init__(self, match_string="", *, matchString="") -> None: 

2764 super().__init__("") 

2765 self._may_return_empty = True 

2766 self.mayIndexError = False 

2767 

2768 def _generateDefaultName(self) -> str: 

2769 return "Empty" 

2770 

2771 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2772 return loc, [] 

2773 

2774 

2775class _SingleCharLiteral(Literal): 

2776 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2777 if instring[loc] == self.firstMatchChar: 

2778 return loc + 1, self.match 

2779 raise ParseException(instring, loc, self.errmsg, self) 

2780 

2781 

2782ParserElement._literalStringClass = Literal 

2783 

2784 

2785class Keyword(Token): 

2786 """ 

2787 Token to exactly match a specified string as a keyword, that is, 

2788 it must be immediately preceded and followed by whitespace or 

2789 non-keyword characters. Compare with :class:`Literal`: 

2790 

2791 - ``Literal("if")`` will match the leading ``'if'`` in 

2792 ``'ifAndOnlyIf'``. 

2793 - ``Keyword("if")`` will not; it will only match the leading 

2794 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2795 

2796 Accepts two optional constructor arguments in addition to the 

2797 keyword string: 

2798 

2799 - ``ident_chars`` is a string of characters that would be valid 

2800 identifier characters, defaulting to all alphanumerics + "_" and 

2801 "$" 

2802 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2803 

2804 Example: 

2805 

2806 .. doctest:: 

2807 :options: +NORMALIZE_WHITESPACE 

2808 

2809 >>> Keyword("start").parse_string("start") 

2810 ParseResults(['start'], {}) 

2811 >>> Keyword("start").parse_string("starting") 

2812 Traceback (most recent call last): 

2813 ParseException: Expected Keyword 'start', keyword was immediately 

2814 followed by keyword character, found 'ing' (at char 5), (line:1, col:6) 

2815 

2816 .. doctest:: 

2817 :options: +NORMALIZE_WHITESPACE 

2818 

2819 >>> Keyword("start").parse_string("starting").debug() 

2820 Traceback (most recent call last): 

2821 ParseException: Expected Keyword "start", keyword was immediately 

2822 followed by keyword character, found 'ing' ... 

2823 

2824 For case-insensitive matching, use :class:`CaselessKeyword`. 

2825 """ 

2826 

2827 DEFAULT_KEYWORD_CHARS = f"{alphanums}_$" 

2828 

2829 def __init__( 

2830 self, 

2831 match_string: str = "", 

2832 ident_chars: typing.Optional[str] = None, 

2833 caseless: bool = False, 

2834 **kwargs, 

2835 ) -> None: 

2836 matchString = deprecate_argument(kwargs, "matchString", "") 

2837 identChars = deprecate_argument(kwargs, "identChars", None) 

2838 

2839 super().__init__() 

2840 identChars = identChars or ident_chars 

2841 if identChars is None: 

2842 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2843 match_string = matchString or match_string 

2844 self.match = match_string 

2845 self.matchLen = len(match_string) 

2846 self.firstMatchChar = match_string[:1] 

2847 if not self.firstMatchChar: 

2848 raise ValueError("null string passed to Keyword; use Empty() instead") 

2849 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2850 self._may_return_empty = False 

2851 self.mayIndexError = False 

2852 self.caseless = caseless 

2853 if caseless: 

2854 self.caselessmatch = match_string.upper() 

2855 identChars = identChars.upper() 

2856 self.ident_chars = set(identChars) 

2857 

2858 @property 

2859 def identChars(self) -> set[str]: 

2860 """ 

2861 .. deprecated:: 3.3.0 

2862 use ident_chars instead. 

2863 

2864 Property returning the characters being used as keyword characters for this expression. 

2865 """ 

2866 return self.ident_chars 

2867 

2868 def _generateDefaultName(self) -> str: 

2869 return repr(self.match) 

2870 

2871 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2872 errmsg = self.errmsg or "" 

2873 errloc = loc 

2874 if self.caseless: 

2875 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2876 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2877 if ( 

2878 loc >= len(instring) - self.matchLen 

2879 or instring[loc + self.matchLen].upper() not in self.identChars 

2880 ): 

2881 return loc + self.matchLen, self.match 

2882 

2883 # followed by keyword char 

2884 errmsg += ", was immediately followed by keyword character" 

2885 errloc = loc + self.matchLen 

2886 else: 

2887 # preceded by keyword char 

2888 errmsg += ", keyword was immediately preceded by keyword character" 

2889 errloc = loc - 1 

2890 # else no match just raise plain exception 

2891 

2892 elif ( 

2893 instring[loc] == self.firstMatchChar 

2894 and self.matchLen == 1 

2895 or instring.startswith(self.match, loc) 

2896 ): 

2897 if loc == 0 or instring[loc - 1] not in self.identChars: 

2898 if ( 

2899 loc >= len(instring) - self.matchLen 

2900 or instring[loc + self.matchLen] not in self.identChars 

2901 ): 

2902 return loc + self.matchLen, self.match 

2903 

2904 # followed by keyword char 

2905 errmsg += ", keyword was immediately followed by keyword character" 

2906 errloc = loc + self.matchLen 

2907 else: 

2908 # preceded by keyword char 

2909 errmsg += ", keyword was immediately preceded by keyword character" 

2910 errloc = loc - 1 

2911 # else no match just raise plain exception 

2912 

2913 raise ParseException(instring, errloc, errmsg, self) 

2914 

2915 @staticmethod 

2916 def set_default_keyword_chars(chars) -> None: 

2917 """ 

2918 Overrides the default characters used by :class:`Keyword` expressions. 

2919 """ 

2920 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2921 

2922 # Compatibility synonyms 

2923 setDefaultKeywordChars = staticmethod( 

2924 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars) 

2925 ) 

2926 

2927 

2928class CaselessLiteral(Literal): 

2929 """ 

2930 Token to match a specified string, ignoring case of letters. 

2931 Note: the matched results will always be in the case of the given 

2932 match string, NOT the case of the input text. 

2933 

2934 Example: 

2935 

2936 .. doctest:: 

2937 

2938 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2939 ParseResults(['CMD', 'CMD', 'CMD'], {}) 

2940 

2941 (Contrast with example for :class:`CaselessKeyword`.) 

2942 """ 

2943 

2944 def __init__(self, match_string: str = "", **kwargs) -> None: 

2945 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2946 

2947 match_string = matchString or match_string 

2948 super().__init__(match_string.upper()) 

2949 # Preserve the defining literal. 

2950 self.returnString = match_string 

2951 self.errmsg = f"Expected {self.name}" 

2952 

2953 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2954 if instring[loc : loc + self.matchLen].upper() == self.match: 

2955 return loc + self.matchLen, self.returnString 

2956 raise ParseException(instring, loc, self.errmsg, self) 

2957 

2958 

2959class CaselessKeyword(Keyword): 

2960 """ 

2961 Caseless version of :class:`Keyword`. 

2962 

2963 Example: 

2964 

2965 .. doctest:: 

2966 

2967 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2968 ParseResults(['CMD', 'CMD'], {}) 

2969 

2970 (Contrast with example for :class:`CaselessLiteral`.) 

2971 """ 

2972 

2973 def __init__( 

2974 self, match_string: str = "", ident_chars: typing.Optional[str] = None, **kwargs 

2975 ) -> None: 

2976 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2977 identChars: typing.Optional[str] = deprecate_argument( 

2978 kwargs, "identChars", None 

2979 ) 

2980 

2981 identChars = identChars or ident_chars 

2982 match_string = matchString or match_string 

2983 super().__init__(match_string, identChars, caseless=True) 

2984 

2985 

2986class CloseMatch(Token): 

2987 """A variation on :class:`Literal` which matches "close" matches, 

2988 that is, strings with at most 'n' mismatching characters. 

2989 :class:`CloseMatch` takes parameters: 

2990 

2991 - ``match_string`` - string to be matched 

2992 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2993 - ``max_mismatches`` - (``default=1``) maximum number of 

2994 mismatches allowed to count as a match 

2995 

2996 The results from a successful parse will contain the matched text 

2997 from the input string and the following named results: 

2998 

2999 - ``mismatches`` - a list of the positions within the 

3000 match_string where mismatches were found 

3001 - ``original`` - the original match_string used to compare 

3002 against the input string 

3003 

3004 If ``mismatches`` is an empty list, then the match was an exact 

3005 match. 

3006 

3007 Example: 

3008 

3009 .. doctest:: 

3010 :options: +NORMALIZE_WHITESPACE 

3011 

3012 >>> patt = CloseMatch("ATCATCGAATGGA") 

3013 >>> patt.parse_string("ATCATCGAAXGGA") 

3014 ParseResults(['ATCATCGAAXGGA'], 

3015 {'original': 'ATCATCGAATGGA', 'mismatches': [9]}) 

3016 

3017 >>> patt.parse_string("ATCAXCGAAXGGA") 

3018 Traceback (most recent call last): 

3019 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches), 

3020 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1) 

3021 

3022 # exact match 

3023 >>> patt.parse_string("ATCATCGAATGGA") 

3024 ParseResults(['ATCATCGAATGGA'], 

3025 {'original': 'ATCATCGAATGGA', 'mismatches': []}) 

3026 

3027 # close match allowing up to 2 mismatches 

3028 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

3029 >>> patt.parse_string("ATCAXCGAAXGGA") 

3030 ParseResults(['ATCAXCGAAXGGA'], 

3031 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]}) 

3032 """ 

3033 

3034 def __init__( 

3035 self, 

3036 match_string: str, 

3037 max_mismatches: typing.Optional[int] = None, 

3038 *, 

3039 caseless=False, 

3040 **kwargs, 

3041 ) -> None: 

3042 maxMismatches: int = deprecate_argument(kwargs, "maxMismatches", 1) 

3043 

3044 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

3045 super().__init__() 

3046 self.match_string = match_string 

3047 self.maxMismatches = maxMismatches 

3048 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

3049 self.caseless = caseless 

3050 self.mayIndexError = False 

3051 self._may_return_empty = False 

3052 

3053 def _generateDefaultName(self) -> str: 

3054 return f"{type(self).__name__}:{self.match_string!r}" 

3055 

3056 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3057 start = loc 

3058 instrlen = len(instring) 

3059 maxloc = start + len(self.match_string) 

3060 

3061 if maxloc <= instrlen: 

3062 match_string = self.match_string 

3063 match_stringloc = 0 

3064 mismatches = [] 

3065 maxMismatches = self.maxMismatches 

3066 

3067 for match_stringloc, s_m in enumerate( 

3068 zip(instring[loc:maxloc], match_string) 

3069 ): 

3070 src, mat = s_m 

3071 if self.caseless: 

3072 src, mat = src.lower(), mat.lower() 

3073 

3074 if src != mat: 

3075 mismatches.append(match_stringloc) 

3076 if len(mismatches) > maxMismatches: 

3077 break 

3078 else: 

3079 loc = start + match_stringloc + 1 

3080 results = ParseResults([instring[start:loc]]) 

3081 results["original"] = match_string 

3082 results["mismatches"] = mismatches 

3083 return loc, results 

3084 

3085 raise ParseException(instring, loc, self.errmsg, self) 

3086 

3087 

3088class Word(Token): 

3089 """Token for matching words composed of allowed character sets. 

3090 

3091 Parameters: 

3092 

3093 - ``init_chars`` - string of all characters that should be used to 

3094 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

3095 if ``body_chars`` is also specified, then this is the string of 

3096 initial characters 

3097 - ``body_chars`` - string of characters that 

3098 can be used for matching after a matched initial character as 

3099 given in ``init_chars``; if omitted, same as the initial characters 

3100 (default=``None``) 

3101 - ``min`` - minimum number of characters to match (default=1) 

3102 - ``max`` - maximum number of characters to match (default=0) 

3103 - ``exact`` - exact number of characters to match (default=0) 

3104 - ``as_keyword`` - match as a keyword (default=``False``) 

3105 - ``exclude_chars`` - characters that might be 

3106 found in the input ``body_chars`` string but which should not be 

3107 accepted for matching ;useful to define a word of all 

3108 printables except for one or two characters, for instance 

3109 (default=``None``) 

3110 

3111 :class:`srange` is useful for defining custom character set strings 

3112 for defining :class:`Word` expressions, using range notation from 

3113 regular expression character sets. 

3114 

3115 A common mistake is to use :class:`Word` to match a specific literal 

3116 string, as in ``Word("Address")``. Remember that :class:`Word` 

3117 uses the string argument to define *sets* of matchable characters. 

3118 This expression would match "Add", "AAA", "dAred", or any other word 

3119 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

3120 exact literal string, use :class:`Literal` or :class:`Keyword`. 

3121 

3122 pyparsing includes helper strings for building Words: 

3123 

3124 - :attr:`alphas` 

3125 - :attr:`nums` 

3126 - :attr:`alphanums` 

3127 - :attr:`hexnums` 

3128 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255 

3129 - accented, tilded, umlauted, etc.) 

3130 - :attr:`punc8bit` (non-alphabetic characters in ASCII range 

3131 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

3132 - :attr:`printables` (any non-whitespace character) 

3133 

3134 ``alphas``, ``nums``, and ``printables`` are also defined in several 

3135 Unicode sets - see :class:`pyparsing_unicode`. 

3136 

3137 Example: 

3138 

3139 .. testcode:: 

3140 

3141 # a word composed of digits 

3142 integer = Word(nums) 

3143 # Two equivalent alternate forms: 

3144 Word("0123456789") 

3145 Word(srange("[0-9]")) 

3146 

3147 # a word with a leading capital, and zero or more lowercase 

3148 capitalized_word = Word(alphas.upper(), alphas.lower()) 

3149 

3150 # hostnames are alphanumeric, with leading alpha, and '-' 

3151 hostname = Word(alphas, alphanums + '-') 

3152 

3153 # roman numeral 

3154 # (not a strict parser, accepts invalid mix of characters) 

3155 roman = Word("IVXLCDM") 

3156 

3157 # any string of non-whitespace characters, except for ',' 

3158 csv_value = Word(printables, exclude_chars=",") 

3159 

3160 :raises ValueError: If ``min`` and ``max`` are both specified 

3161 and the test ``min <= max`` fails. 

3162 

3163 .. versionchanged:: 3.1.0 

3164 Raises :exc:`ValueError` if ``min`` > ``max``. 

3165 """ 

3166 

3167 def __init__( 

3168 self, 

3169 init_chars: str = "", 

3170 body_chars: typing.Optional[str] = None, 

3171 min: int = 1, 

3172 max: int = 0, 

3173 exact: int = 0, 

3174 as_keyword: bool = False, 

3175 exclude_chars: typing.Optional[str] = None, 

3176 **kwargs, 

3177 ) -> None: 

3178 initChars: typing.Optional[str] = deprecate_argument(kwargs, "initChars", None) 

3179 bodyChars: typing.Optional[str] = deprecate_argument(kwargs, "bodyChars", None) 

3180 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

3181 excludeChars: typing.Optional[str] = deprecate_argument( 

3182 kwargs, "excludeChars", None 

3183 ) 

3184 

3185 initChars = initChars or init_chars 

3186 bodyChars = bodyChars or body_chars 

3187 asKeyword = asKeyword or as_keyword 

3188 excludeChars = excludeChars or exclude_chars 

3189 super().__init__() 

3190 if not initChars: 

3191 raise ValueError( 

3192 f"invalid {type(self).__name__}, initChars cannot be empty string" 

3193 ) 

3194 

3195 initChars_set = set(initChars) 

3196 if excludeChars: 

3197 excludeChars_set = set(excludeChars) 

3198 initChars_set -= excludeChars_set 

3199 if bodyChars: 

3200 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

3201 self.init_chars = initChars_set 

3202 self.initCharsOrig = "".join(sorted(initChars_set)) 

3203 

3204 if bodyChars: 

3205 self.bodyChars = set(bodyChars) 

3206 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

3207 else: 

3208 self.bodyChars = initChars_set 

3209 self.bodyCharsOrig = self.initCharsOrig 

3210 

3211 self.maxSpecified = max > 0 

3212 

3213 if min < 1: 

3214 raise ValueError( 

3215 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

3216 ) 

3217 

3218 if self.maxSpecified and min > max: 

3219 raise ValueError( 

3220 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

3221 ) 

3222 

3223 self.minLen = min 

3224 

3225 if max > 0: 

3226 self.maxLen = max 

3227 else: 

3228 self.maxLen = _MAX_INT 

3229 

3230 if exact > 0: 

3231 min = max = exact 

3232 self.maxLen = exact 

3233 self.minLen = exact 

3234 

3235 self.errmsg = f"Expected {self.name}" 

3236 self.mayIndexError = False 

3237 self.asKeyword = asKeyword 

3238 if self.asKeyword: 

3239 self.errmsg += " as a keyword" 

3240 

3241 # see if we can make a regex for this Word 

3242 if " " not in (self.initChars | self.bodyChars): 

3243 if len(self.initChars) == 1: 

3244 re_leading_fragment = re.escape(self.initCharsOrig) 

3245 else: 

3246 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

3247 

3248 if self.bodyChars == self.initChars: 

3249 if max == 0 and self.minLen == 1: 

3250 repeat = "+" 

3251 elif max == 1: 

3252 repeat = "" 

3253 else: 

3254 if self.minLen != self.maxLen: 

3255 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

3256 else: 

3257 repeat = f"{{{self.minLen}}}" 

3258 self.reString = f"{re_leading_fragment}{repeat}" 

3259 else: 

3260 if max == 1: 

3261 re_body_fragment = "" 

3262 repeat = "" 

3263 else: 

3264 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

3265 if max == 0 and self.minLen == 1: 

3266 repeat = "*" 

3267 elif max == 2: 

3268 repeat = "?" if min <= 1 else "" 

3269 else: 

3270 if min != max: 

3271 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

3272 else: 

3273 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

3274 

3275 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

3276 

3277 if self.asKeyword: 

3278 self.reString = rf"\b{self.reString}\b" 

3279 

3280 try: 

3281 self.re = re.compile(self.reString) 

3282 except re.error: 

3283 self.re = None # type: ignore[assignment] 

3284 else: 

3285 self.re_match = self.re.match 

3286 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign] 

3287 

3288 @property 

3289 def initChars(self) -> set[str]: 

3290 """ 

3291 .. deprecated:: 3.3.0 

3292 use `init_chars` instead. 

3293 

3294 Property returning the initial chars to be used when matching this 

3295 Word expression. If no body chars were specified, the initial characters 

3296 will also be the body characters. 

3297 """ 

3298 return set(self.init_chars) 

3299 

3300 def copy(self) -> Word: 

3301 """ 

3302 Returns a copy of this expression. 

3303 

3304 Generally only used internally by pyparsing. 

3305 """ 

3306 ret: Word = cast(Word, super().copy()) 

3307 if hasattr(self, "re_match"): 

3308 ret.re_match = self.re_match 

3309 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign] 

3310 return ret 

3311 

3312 def _generateDefaultName(self) -> str: 

3313 def charsAsStr(s): 

3314 max_repr_len = 16 

3315 s = _collapse_string_to_ranges(s, re_escape=False) 

3316 

3317 if len(s) > max_repr_len: 

3318 return f"{s[:max_repr_len - 3]}..." 

3319 

3320 return s 

3321 

3322 if self.initChars != self.bodyChars: 

3323 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

3324 else: 

3325 base = f"W:({charsAsStr(self.initChars)})" 

3326 

3327 # add length specification 

3328 if self.minLen > 1 or self.maxLen != _MAX_INT: 

3329 if self.minLen == self.maxLen: 

3330 if self.minLen == 1: 

3331 return base[2:] 

3332 else: 

3333 return base + f"{{{self.minLen}}}" 

3334 elif self.maxLen == _MAX_INT: 

3335 return base + f"{{{self.minLen},...}}" 

3336 else: 

3337 return base + f"{{{self.minLen},{self.maxLen}}}" 

3338 return base 

3339 

3340 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3341 if instring[loc] not in self.initChars: 

3342 raise ParseException(instring, loc, self.errmsg, self) 

3343 

3344 start = loc 

3345 loc += 1 

3346 instrlen = len(instring) 

3347 body_chars: set[str] = self.bodyChars 

3348 maxloc = start + self.maxLen 

3349 maxloc = min(maxloc, instrlen) 

3350 while loc < maxloc and instring[loc] in body_chars: 

3351 loc += 1 

3352 

3353 throw_exception = False 

3354 if loc - start < self.minLen: 

3355 throw_exception = True 

3356 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars: 

3357 throw_exception = True 

3358 elif self.asKeyword and ( 

3359 (start > 0 and instring[start - 1] in body_chars) 

3360 or (loc < instrlen and instring[loc] in body_chars) 

3361 ): 

3362 throw_exception = True 

3363 

3364 if throw_exception: 

3365 raise ParseException(instring, loc, self.errmsg, self) 

3366 

3367 return loc, instring[start:loc] 

3368 

3369 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3370 result = self.re_match(instring, loc) 

3371 if not result: 

3372 raise ParseException(instring, loc, self.errmsg, self) 

3373 

3374 loc = result.end() 

3375 return loc, result[0] 

3376 

3377 

3378class Char(Word): 

3379 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

3380 when defining a match of any single character in a string of 

3381 characters. 

3382 """ 

3383 

3384 def __init__( 

3385 self, 

3386 charset: str, 

3387 as_keyword: bool = False, 

3388 exclude_chars: typing.Optional[str] = None, 

3389 **kwargs, 

3390 ) -> None: 

3391 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

3392 excludeChars: typing.Optional[str] = deprecate_argument( 

3393 kwargs, "excludeChars", None 

3394 ) 

3395 

3396 asKeyword = asKeyword or as_keyword 

3397 excludeChars = excludeChars or exclude_chars 

3398 super().__init__( 

3399 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

3400 ) 

3401 

3402 

3403class Regex(Token): 

3404 r"""Token for matching strings that match a given regular 

3405 expression. Defined with string specifying the regular expression in 

3406 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3407 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3408 these will be preserved as named :class:`ParseResults`. 

3409 

3410 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3411 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3412 a compiled RE that was compiled using ``regex``. 

3413 

3414 The parameters ``pattern`` and ``flags`` are passed 

3415 to the ``re.compile()`` function as-is. See the Python 

3416 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3417 explanation of the acceptable patterns and flags. 

3418 

3419 Example: 

3420 

3421 .. testcode:: 

3422 

3423 realnum = Regex(r"[+-]?\d+\.\d*") 

3424 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3425 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3426 

3427 # named fields in a regex will be returned as named results 

3428 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3429 

3430 # the Regex class will accept regular expressions compiled using the 

3431 # re module 

3432 import re 

3433 parser = pp.Regex(re.compile(r'[0-9]')) 

3434 """ 

3435 

3436 def __init__( 

3437 self, 

3438 pattern: Any, 

3439 flags: Union[re.RegexFlag, int] = 0, 

3440 as_group_list: bool = False, 

3441 as_match: bool = False, 

3442 **kwargs, 

3443 ) -> None: 

3444 super().__init__() 

3445 asGroupList: bool = deprecate_argument(kwargs, "asGroupList", False) 

3446 asMatch: bool = deprecate_argument(kwargs, "asMatch", False) 

3447 

3448 asGroupList = asGroupList or as_group_list 

3449 asMatch = asMatch or as_match 

3450 

3451 if isinstance(pattern, str_type): 

3452 if not pattern: 

3453 raise ValueError("null string passed to Regex; use Empty() instead") 

3454 

3455 self._re = None 

3456 self._may_return_empty = None # type: ignore [assignment] 

3457 self.reString = self.pattern = pattern 

3458 

3459 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3460 self._re = pattern 

3461 self._may_return_empty = None # type: ignore [assignment] 

3462 self.pattern = self.reString = pattern.pattern 

3463 

3464 elif callable(pattern): 

3465 # defer creating this pattern until we really need it 

3466 self.pattern = pattern 

3467 self._may_return_empty = None # type: ignore [assignment] 

3468 self._re = None 

3469 

3470 else: 

3471 raise TypeError( 

3472 "Regex may only be constructed with a string or a compiled RE object," 

3473 " or a callable that takes no arguments and returns a string or a" 

3474 " compiled RE object" 

3475 ) 

3476 

3477 self.flags = flags 

3478 self.errmsg = f"Expected {self.name}" 

3479 self.mayIndexError = False 

3480 self.asGroupList = asGroupList 

3481 self.asMatch = asMatch 

3482 if self.asGroupList: 

3483 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign] 

3484 if self.asMatch: 

3485 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign] 

3486 

3487 def copy(self) -> Regex: 

3488 """ 

3489 Returns a copy of this expression. 

3490 

3491 Generally only used internally by pyparsing. 

3492 """ 

3493 ret: Regex = cast(Regex, super().copy()) 

3494 if self.asGroupList: 

3495 ret.parseImpl = ret.parseImplAsGroupList # type: ignore [method-assign] 

3496 if self.asMatch: 

3497 ret.parseImpl = ret.parseImplAsMatch # type: ignore [method-assign] 

3498 return ret 

3499 

3500 @cached_property 

3501 def re(self) -> re.Pattern: 

3502 """ 

3503 Property returning the compiled regular expression for this Regex. 

3504 

3505 Generally only used internally by pyparsing. 

3506 """ 

3507 if self._re: 

3508 return self._re 

3509 

3510 if callable(self.pattern): 

3511 # replace self.pattern with the string returned by calling self.pattern() 

3512 self.pattern = cast(Callable[[], str], self.pattern)() 

3513 

3514 # see if we got a compiled RE back instead of a str - if so, we're done 

3515 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"): 

3516 self._re = cast(re.Pattern[str], self.pattern) 

3517 self.pattern = self.reString = self._re.pattern 

3518 return self._re 

3519 

3520 try: 

3521 self._re = re.compile(self.pattern, self.flags) 

3522 except re.error: 

3523 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3524 else: 

3525 self._may_return_empty = self.re.match("", pos=0) is not None 

3526 return self._re 

3527 

3528 @cached_property 

3529 def re_match(self) -> Callable[[str, int], Any]: 

3530 return self.re.match 

3531 

3532 @property 

3533 def mayReturnEmpty(self): 

3534 if self._may_return_empty is None: 

3535 # force compile of regex pattern, to set may_return_empty flag 

3536 self.re # noqa 

3537 return self._may_return_empty 

3538 

3539 @mayReturnEmpty.setter 

3540 def mayReturnEmpty(self, value): 

3541 self._may_return_empty = value 

3542 

3543 def _generateDefaultName(self) -> str: 

3544 unescaped = repr(self.pattern).replace("\\\\", "\\") 

3545 return f"Re:({unescaped})" 

3546 

3547 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3548 # explicit check for matching past the length of the string; 

3549 # this is done because the re module will not complain about 

3550 # a match with `pos > len(instring)`, it will just return "" 

3551 if loc > len(instring) and self.mayReturnEmpty: 

3552 raise ParseException(instring, loc, self.errmsg, self) 

3553 

3554 result = self.re_match(instring, loc) 

3555 if not result: 

3556 raise ParseException(instring, loc, self.errmsg, self) 

3557 

3558 loc = result.end() 

3559 ret = ParseResults(result[0]) 

3560 d = result.groupdict() 

3561 

3562 for k, v in d.items(): 

3563 ret[k] = v 

3564 

3565 return loc, ret 

3566 

3567 def parseImplAsGroupList(self, instring, loc, do_actions=True): 

3568 if loc > len(instring) and self.mayReturnEmpty: 

3569 raise ParseException(instring, loc, self.errmsg, self) 

3570 

3571 result = self.re_match(instring, loc) 

3572 if not result: 

3573 raise ParseException(instring, loc, self.errmsg, self) 

3574 

3575 loc = result.end() 

3576 ret = result.groups() 

3577 return loc, ret 

3578 

3579 def parseImplAsMatch(self, instring, loc, do_actions=True): 

3580 if loc > len(instring) and self.mayReturnEmpty: 

3581 raise ParseException(instring, loc, self.errmsg, self) 

3582 

3583 result = self.re_match(instring, loc) 

3584 if not result: 

3585 raise ParseException(instring, loc, self.errmsg, self) 

3586 

3587 loc = result.end() 

3588 ret = result 

3589 return loc, ret 

3590 

3591 def sub(self, repl: str) -> ParserElement: 

3592 r""" 

3593 Return :class:`Regex` with an attached parse action to transform the parsed 

3594 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3595 

3596 Example: 

3597 

3598 .. testcode:: 

3599 

3600 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3601 print(make_html.transform_string("h1:main title:")) 

3602 

3603 .. testoutput:: 

3604 

3605 <h1>main title</h1> 

3606 """ 

3607 if self.asGroupList: 

3608 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3609 

3610 if self.asMatch and callable(repl): 

3611 raise TypeError( 

3612 "cannot use sub() with a callable with Regex(as_match=True)" 

3613 ) 

3614 

3615 if self.asMatch: 

3616 

3617 def pa(tokens): 

3618 return tokens[0].expand(repl) 

3619 

3620 else: 

3621 

3622 def pa(tokens): 

3623 return self.re.sub(repl, tokens[0]) 

3624 

3625 return self.add_parse_action(pa) 

3626 

3627 

3628class QuotedString(Token): 

3629 r""" 

3630 Token for matching strings that are delimited by quoting characters. 

3631 

3632 Defined with the following parameters: 

3633 

3634 - ``quote_char`` - string of one or more characters defining the 

3635 quote delimiting string 

3636 - ``esc_char`` - character to re_escape quotes, typically backslash 

3637 (default= ``None``) 

3638 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3639 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3640 (default= ``None``) 

3641 - ``multiline`` - boolean indicating whether quotes can span 

3642 multiple lines (default= ``False``) 

3643 - ``unquote_results`` - boolean indicating whether the matched text 

3644 should be unquoted (default= ``True``) 

3645 - ``end_quote_char`` - string of one or more characters defining the 

3646 end of the quote delimited string (default= ``None`` => same as 

3647 quote_char) 

3648 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3649 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3650 (default= ``True``) 

3651 

3652 .. caution:: ``convert_whitespace_escapes`` has no effect if 

3653 ``unquote_results`` is ``False``. 

3654 

3655 Example: 

3656 

3657 .. doctest:: 

3658 

3659 >>> qs = QuotedString('"') 

3660 >>> print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3661 [['This is the quote']] 

3662 >>> complex_qs = QuotedString('{{', end_quote_char='}}') 

3663 >>> print(complex_qs.search_string( 

3664 ... 'lsjdf {{This is the "quote"}} sldjf')) 

3665 [['This is the "quote"']] 

3666 >>> sql_qs = QuotedString('"', esc_quote='""') 

3667 >>> print(sql_qs.search_string( 

3668 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3669 [['This is the quote with "embedded" quotes']] 

3670 """ 

3671 

3672 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3673 

3674 def __init__( 

3675 self, 

3676 quote_char: str = "", 

3677 esc_char: typing.Optional[str] = None, 

3678 esc_quote: typing.Optional[str] = None, 

3679 multiline: bool = False, 

3680 unquote_results: bool = True, 

3681 end_quote_char: typing.Optional[str] = None, 

3682 convert_whitespace_escapes: bool = True, 

3683 **kwargs, 

3684 ) -> None: 

3685 super().__init__() 

3686 quoteChar: str = deprecate_argument(kwargs, "quoteChar", "") 

3687 escChar: str = deprecate_argument(kwargs, "escChar", None) 

3688 escQuote: str = deprecate_argument(kwargs, "escQuote", None) 

3689 unquoteResults: bool = deprecate_argument(kwargs, "unquoteResults", True) 

3690 endQuoteChar: typing.Optional[str] = deprecate_argument( 

3691 kwargs, "endQuoteChar", None 

3692 ) 

3693 convertWhitespaceEscapes: bool = deprecate_argument( 

3694 kwargs, "convertWhitespaceEscapes", True 

3695 ) 

3696 

3697 esc_char = escChar or esc_char 

3698 esc_quote = escQuote or esc_quote 

3699 unquote_results = unquoteResults and unquote_results 

3700 end_quote_char = endQuoteChar or end_quote_char 

3701 convert_whitespace_escapes = ( 

3702 convertWhitespaceEscapes and convert_whitespace_escapes 

3703 ) 

3704 quote_char = quoteChar or quote_char 

3705 

3706 # remove white space from quote chars 

3707 quote_char = quote_char.strip() 

3708 if not quote_char: 

3709 raise ValueError("quote_char cannot be the empty string") 

3710 

3711 if end_quote_char is None: 

3712 end_quote_char = quote_char 

3713 else: 

3714 end_quote_char = end_quote_char.strip() 

3715 if not end_quote_char: 

3716 raise ValueError("end_quote_char cannot be the empty string") 

3717 

3718 self.quote_char: str = quote_char 

3719 self.quote_char_len: int = len(quote_char) 

3720 self.first_quote_char: str = quote_char[0] 

3721 self.end_quote_char: str = end_quote_char 

3722 self.end_quote_char_len: int = len(end_quote_char) 

3723 self.esc_char: str = esc_char or "" 

3724 self.has_esc_char: bool = esc_char is not None 

3725 self.esc_quote: str = esc_quote or "" 

3726 self.unquote_results: bool = unquote_results 

3727 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3728 self.multiline = multiline 

3729 self.re_flags = re.RegexFlag(0) 

3730 

3731 # fmt: off 

3732 # build up re pattern for the content between the quote delimiters 

3733 inner_pattern: list[str] = [] 

3734 

3735 if esc_quote: 

3736 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3737 

3738 if esc_char: 

3739 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3740 

3741 if len(self.end_quote_char) > 1: 

3742 inner_pattern.append( 

3743 "(?:" 

3744 + "|".join( 

3745 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3746 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3747 ) 

3748 + ")" 

3749 ) 

3750 

3751 if self.multiline: 

3752 self.re_flags |= re.MULTILINE | re.DOTALL 

3753 inner_pattern.append( 

3754 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3755 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3756 ) 

3757 else: 

3758 inner_pattern.append( 

3759 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3760 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3761 ) 

3762 

3763 self.pattern = "".join( 

3764 [ 

3765 re.escape(self.quote_char), 

3766 "(?:", 

3767 '|'.join(inner_pattern), 

3768 ")*", 

3769 re.escape(self.end_quote_char), 

3770 ] 

3771 ) 

3772 

3773 if self.unquote_results: 

3774 if self.convert_whitespace_escapes: 

3775 self.unquote_scan_re = re.compile( 

3776 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3777 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})" 

3778 rf"|({re.escape(self.esc_char)}.)" 

3779 rf"|(\n|.)", 

3780 flags=self.re_flags, 

3781 ) 

3782 else: 

3783 self.unquote_scan_re = re.compile( 

3784 rf"({re.escape(self.esc_char)}.)" 

3785 rf"|(\n|.)", 

3786 flags=self.re_flags 

3787 ) 

3788 # fmt: on 

3789 

3790 try: 

3791 self.re = re.compile(self.pattern, self.re_flags) 

3792 self.reString = self.pattern 

3793 self.re_match = self.re.match 

3794 except re.error: 

3795 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3796 

3797 self.errmsg = f"Expected {self.name}" 

3798 self.mayIndexError = False 

3799 self._may_return_empty = True 

3800 

3801 def _generateDefaultName(self) -> str: 

3802 if self.quote_char == self.end_quote_char and isinstance( 

3803 self.quote_char, str_type 

3804 ): 

3805 return f"string enclosed in {self.quote_char!r}" 

3806 

3807 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3808 

3809 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3810 # check first character of opening quote to see if that is a match 

3811 # before doing the more complicated regex match 

3812 result = ( 

3813 instring[loc] == self.first_quote_char 

3814 and self.re_match(instring, loc) 

3815 or None 

3816 ) 

3817 if not result: 

3818 raise ParseException(instring, loc, self.errmsg, self) 

3819 

3820 # get ending loc and matched string from regex matching result 

3821 loc = result.end() 

3822 ret = result[0] 

3823 

3824 if self.unquote_results: 

3825 # strip off quotes 

3826 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3827 

3828 if isinstance(ret, str_type): 

3829 # fmt: off 

3830 if self.convert_whitespace_escapes: 

3831 # as we iterate over matches in the input string, 

3832 # collect from whichever match group of the unquote_scan_re 

3833 # regex matches (only 1 group will match at any given time) 

3834 ret = "".join( 

3835 # match group 1 matches \t, \n, etc. 

3836 self.ws_map[g] if (g := match[1]) 

3837 # match group 2 matches escaped octal, null, hex, and Unicode 

3838 # sequences 

3839 else _convert_escaped_numerics_to_char(g[1:]) if (g := match[2]) 

3840 # match group 3 matches escaped characters 

3841 else g[-1] if (g := match[3]) 

3842 # match group 4 matches any character 

3843 else match[4] 

3844 for match in self.unquote_scan_re.finditer(ret) 

3845 ) 

3846 else: 

3847 ret = "".join( 

3848 # match group 1 matches escaped characters 

3849 g[-1] if (g := match[1]) 

3850 # match group 2 matches any character 

3851 else match[2] 

3852 for match in self.unquote_scan_re.finditer(ret) 

3853 ) 

3854 # fmt: on 

3855 

3856 # replace escaped quotes 

3857 if self.esc_quote: 

3858 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3859 

3860 return loc, ret 

3861 

3862 

3863class CharsNotIn(Token): 

3864 """Token for matching words composed of characters *not* in a given 

3865 set (will include whitespace in matched characters if not listed in 

3866 the provided exclusion set - see example). Defined with string 

3867 containing all disallowed characters, and an optional minimum, 

3868 maximum, and/or exact length. The default value for ``min`` is 

3869 1 (a minimum value < 1 is not valid); the default values for 

3870 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3871 length restriction. 

3872 

3873 Example: 

3874 

3875 .. testcode:: 

3876 

3877 # define a comma-separated-value as anything that is not a ',' 

3878 csv_value = CharsNotIn(',') 

3879 print( 

3880 DelimitedList(csv_value).parse_string( 

3881 "dkls,lsdkjf,s12 34,@!#,213" 

3882 ) 

3883 ) 

3884 

3885 prints: 

3886 

3887 .. testoutput:: 

3888 

3889 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3890 """ 

3891 

3892 def __init__( 

3893 self, not_chars: str = "", min: int = 1, max: int = 0, exact: int = 0, **kwargs 

3894 ) -> None: 

3895 super().__init__() 

3896 notChars: str = deprecate_argument(kwargs, "notChars", "") 

3897 

3898 self.skipWhitespace = False 

3899 self.notChars = not_chars or notChars 

3900 self.notCharsSet = set(self.notChars) 

3901 

3902 if min < 1: 

3903 raise ValueError( 

3904 "cannot specify a minimum length < 1; use" 

3905 " Opt(CharsNotIn()) if zero-length char group is permitted" 

3906 ) 

3907 

3908 self.minLen = min 

3909 

3910 if max > 0: 

3911 self.maxLen = max 

3912 else: 

3913 self.maxLen = _MAX_INT 

3914 

3915 if exact > 0: 

3916 self.maxLen = exact 

3917 self.minLen = exact 

3918 

3919 self.errmsg = f"Expected {self.name}" 

3920 self._may_return_empty = self.minLen == 0 

3921 self.mayIndexError = False 

3922 

3923 def _generateDefaultName(self) -> str: 

3924 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3925 if len(not_chars_str) > 16: 

3926 return f"!W:({self.notChars[: 16 - 3]}...)" 

3927 else: 

3928 return f"!W:({self.notChars})" 

3929 

3930 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3931 notchars = self.notCharsSet 

3932 if instring[loc] in notchars: 

3933 raise ParseException(instring, loc, self.errmsg, self) 

3934 

3935 start = loc 

3936 loc += 1 

3937 maxlen = min(start + self.maxLen, len(instring)) 

3938 while loc < maxlen and instring[loc] not in notchars: 

3939 loc += 1 

3940 

3941 if loc - start < self.minLen: 

3942 raise ParseException(instring, loc, self.errmsg, self) 

3943 

3944 return loc, instring[start:loc] 

3945 

3946 

3947class White(Token): 

3948 """Special matching class for matching whitespace. Normally, 

3949 whitespace is ignored by pyparsing grammars. This class is included 

3950 when some whitespace structures are significant. Define with 

3951 a string containing the whitespace characters to be matched; default 

3952 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3953 ``max``, and ``exact`` arguments, as defined for the 

3954 :class:`Word` class. 

3955 """ 

3956 

3957 whiteStrs = { 

3958 " ": "<SP>", 

3959 "\t": "<TAB>", 

3960 "\n": "<LF>", 

3961 "\r": "<CR>", 

3962 "\f": "<FF>", 

3963 "\u00a0": "<NBSP>", 

3964 "\u1680": "<OGHAM_SPACE_MARK>", 

3965 "\u180e": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3966 "\u2000": "<EN_QUAD>", 

3967 "\u2001": "<EM_QUAD>", 

3968 "\u2002": "<EN_SPACE>", 

3969 "\u2003": "<EM_SPACE>", 

3970 "\u2004": "<THREE-PER-EM_SPACE>", 

3971 "\u2005": "<FOUR-PER-EM_SPACE>", 

3972 "\u2006": "<SIX-PER-EM_SPACE>", 

3973 "\u2007": "<FIGURE_SPACE>", 

3974 "\u2008": "<PUNCTUATION_SPACE>", 

3975 "\u2009": "<THIN_SPACE>", 

3976 "\u200a": "<HAIR_SPACE>", 

3977 "\u200b": "<ZERO_WIDTH_SPACE>", 

3978 "\u202f": "<NNBSP>", 

3979 "\u205f": "<MMSP>", 

3980 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3981 } 

3982 

3983 def __init__( 

3984 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0 

3985 ) -> None: 

3986 super().__init__() 

3987 self.matchWhite = ws 

3988 self.set_whitespace_chars( 

3989 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3990 copy_defaults=True, 

3991 ) 

3992 # self.leave_whitespace() 

3993 self._may_return_empty = True 

3994 self.errmsg = f"Expected {self.name}" 

3995 

3996 self.minLen = min 

3997 

3998 if max > 0: 

3999 self.maxLen = max 

4000 else: 

4001 self.maxLen = _MAX_INT 

4002 

4003 if exact > 0: 

4004 self.maxLen = exact 

4005 self.minLen = exact 

4006 

4007 def _generateDefaultName(self) -> str: 

4008 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

4009 

4010 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4011 if instring[loc] not in self.matchWhite: 

4012 raise ParseException(instring, loc, self.errmsg, self) 

4013 start = loc 

4014 loc += 1 

4015 maxloc = start + self.maxLen 

4016 maxloc = min(maxloc, len(instring)) 

4017 while loc < maxloc and instring[loc] in self.matchWhite: 

4018 loc += 1 

4019 

4020 if loc - start < self.minLen: 

4021 raise ParseException(instring, loc, self.errmsg, self) 

4022 

4023 return loc, instring[start:loc] 

4024 

4025 

4026class PositionToken(Token): 

4027 def __init__(self) -> None: 

4028 super().__init__() 

4029 self._may_return_empty = True 

4030 self.mayIndexError = False 

4031 

4032 

4033class GoToColumn(PositionToken): 

4034 """Token to advance to a specific column of input text; useful for 

4035 tabular report scraping. 

4036 """ 

4037 

4038 def __init__(self, colno: int) -> None: 

4039 super().__init__() 

4040 self.col = colno 

4041 

4042 def preParse(self, instring: str, loc: int) -> int: 

4043 if col(loc, instring) == self.col: 

4044 return loc 

4045 

4046 instrlen = len(instring) 

4047 if self.ignoreExprs: 

4048 loc = self._skipIgnorables(instring, loc) 

4049 while ( 

4050 loc < instrlen 

4051 and instring[loc].isspace() 

4052 and col(loc, instring) != self.col 

4053 ): 

4054 loc += 1 

4055 

4056 return loc 

4057 

4058 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4059 thiscol = col(loc, instring) 

4060 if thiscol > self.col: 

4061 raise ParseException(instring, loc, "Text not in expected column", self) 

4062 newloc = loc + self.col - thiscol 

4063 ret = instring[loc:newloc] 

4064 return newloc, ret 

4065 

4066 

4067class LineStart(PositionToken): 

4068 r"""Matches if current position is at the logical beginning of a line (after skipping whitespace) 

4069 within the parse string 

4070 

4071 Example: 

4072 

4073 .. testcode:: 

4074 

4075 test = '''\ 

4076 AAA this line 

4077 AAA and this line 

4078 AAA and even this line 

4079 B AAA but definitely not this line 

4080 ''' 

4081 

4082 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

4083 print(t) 

4084 

4085 prints: 

4086 

4087 .. testoutput:: 

4088 

4089 ['AAA', ' this line'] 

4090 ['AAA', ' and this line'] 

4091 ['AAA', ' and even this line'] 

4092 

4093 """ 

4094 

4095 def __init__(self) -> None: 

4096 super().__init__() 

4097 self.leave_whitespace() 

4098 self.orig_whiteChars = set() | self.whiteChars 

4099 self.whiteChars.discard("\n") 

4100 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

4101 self.set_name("start of line") 

4102 

4103 def preParse(self, instring: str, loc: int) -> int: 

4104 if loc == 0: 

4105 return loc 

4106 

4107 ret = self.skipper.preParse(instring, loc) 

4108 

4109 if "\n" in self.orig_whiteChars: 

4110 while instring[ret : ret + 1] == "\n": 

4111 ret = self.skipper.preParse(instring, ret + 1) 

4112 

4113 return ret 

4114 

4115 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4116 if col(loc, instring) == 1: 

4117 return loc, [] 

4118 raise ParseException(instring, loc, self.errmsg, self) 

4119 

4120 

4121class LineEnd(PositionToken): 

4122 """Matches if current position is at the end of a line within the 

4123 parse string 

4124 """ 

4125 

4126 def __init__(self) -> None: 

4127 super().__init__() 

4128 self.whiteChars.discard("\n") 

4129 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

4130 self.set_name("end of line") 

4131 

4132 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4133 if loc < len(instring): 

4134 if instring[loc] == "\n": 

4135 return loc + 1, "\n" 

4136 else: 

4137 raise ParseException(instring, loc, self.errmsg, self) 

4138 elif loc == len(instring): 

4139 return loc + 1, [] 

4140 else: 

4141 raise ParseException(instring, loc, self.errmsg, self) 

4142 

4143 

4144class StringStart(PositionToken): 

4145 """Matches if current position is at the beginning of the parse 

4146 string 

4147 """ 

4148 

4149 def __init__(self) -> None: 

4150 super().__init__() 

4151 self.set_name("start of text") 

4152 

4153 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4154 # see if entire string up to here is just whitespace and ignoreables 

4155 if loc != 0 and loc != self.preParse(instring, 0): 

4156 raise ParseException(instring, loc, self.errmsg, self) 

4157 

4158 return loc, [] 

4159 

4160 

4161class StringEnd(PositionToken): 

4162 """ 

4163 Matches if current position is at the end of the parse string 

4164 """ 

4165 

4166 def __init__(self) -> None: 

4167 super().__init__() 

4168 self.set_name("end of text") 

4169 

4170 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4171 if loc < len(instring): 

4172 raise ParseException(instring, loc, self.errmsg, self) 

4173 if loc == len(instring): 

4174 return loc + 1, [] 

4175 if loc > len(instring): 

4176 return loc, [] 

4177 

4178 raise ParseException(instring, loc, self.errmsg, self) 

4179 

4180 

4181class WordStart(PositionToken): 

4182 """Matches if the current position is at the beginning of a 

4183 :class:`Word`, and is not preceded by any character in a given 

4184 set of ``word_chars`` (default= ``printables``). To emulate the 

4185 ``\b`` behavior of regular expressions, use 

4186 ``WordStart(alphanums)``. ``WordStart`` will also match at 

4187 the beginning of the string being parsed, or at the beginning of 

4188 a line. 

4189 """ 

4190 

4191 def __init__(self, word_chars: str = printables, **kwargs) -> None: 

4192 wordChars: str = deprecate_argument(kwargs, "wordChars", printables) 

4193 

4194 wordChars = word_chars if wordChars == printables else wordChars 

4195 super().__init__() 

4196 self.wordChars = set(wordChars) 

4197 self.set_name("start of a word") 

4198 

4199 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4200 if loc != 0: 

4201 if ( 

4202 instring[loc - 1] in self.wordChars 

4203 or instring[loc] not in self.wordChars 

4204 ): 

4205 raise ParseException(instring, loc, self.errmsg, self) 

4206 return loc, [] 

4207 

4208 

4209class WordEnd(PositionToken): 

4210 """Matches if the current position is at the end of a :class:`Word`, 

4211 and is not followed by any character in a given set of ``word_chars`` 

4212 (default= ``printables``). To emulate the ``\b`` behavior of 

4213 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

4214 will also match at the end of the string being parsed, or at the end 

4215 of a line. 

4216 """ 

4217 

4218 def __init__(self, word_chars: str = printables, **kwargs) -> None: 

4219 wordChars: str = deprecate_argument(kwargs, "wordChars", printables) 

4220 

4221 wordChars = word_chars if wordChars == printables else wordChars 

4222 super().__init__() 

4223 self.wordChars = set(wordChars) 

4224 self.skipWhitespace = False 

4225 self.set_name("end of a word") 

4226 

4227 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4228 instrlen = len(instring) 

4229 if instrlen > 0 and loc < instrlen: 

4230 if ( 

4231 instring[loc] in self.wordChars 

4232 or instring[loc - 1] not in self.wordChars 

4233 ): 

4234 raise ParseException(instring, loc, self.errmsg, self) 

4235 return loc, [] 

4236 

4237 

4238class Tag(Token): 

4239 """ 

4240 A meta-element for inserting a named result into the parsed 

4241 tokens that may be checked later in a parse action or while 

4242 processing the parsed results. Accepts an optional tag value, 

4243 defaulting to `True`. 

4244 

4245 Example: 

4246 

4247 .. doctest:: 

4248 

4249 >>> end_punc = "." | ("!" + Tag("enthusiastic")) 

4250 >>> greeting = "Hello," + Word(alphas) + end_punc 

4251 

4252 >>> result = greeting.parse_string("Hello, World.") 

4253 >>> print(result.dump()) 

4254 ['Hello,', 'World', '.'] 

4255 

4256 >>> result = greeting.parse_string("Hello, World!") 

4257 >>> print(result.dump()) 

4258 ['Hello,', 'World', '!'] 

4259 - enthusiastic: True 

4260 

4261 .. versionadded:: 3.1.0 

4262 """ 

4263 

4264 def __init__(self, tag_name: str, value: Any = True) -> None: 

4265 super().__init__() 

4266 self._may_return_empty = True 

4267 self.mayIndexError = False 

4268 self.leave_whitespace() 

4269 self.tag_name = tag_name 

4270 self.tag_value = value 

4271 self.add_parse_action(self._add_tag) 

4272 self.show_in_diagram = False 

4273 

4274 def _add_tag(self, tokens: ParseResults): 

4275 tokens[self.tag_name] = self.tag_value 

4276 

4277 def _generateDefaultName(self) -> str: 

4278 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}" 

4279 

4280 

4281class ParseExpression(ParserElement): 

4282 """Abstract subclass of ParserElement, for combining and 

4283 post-processing parsed tokens. 

4284 """ 

4285 

4286 def __init__( 

4287 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4288 ) -> None: 

4289 super().__init__(savelist) 

4290 self.exprs: list[ParserElement] 

4291 if isinstance(exprs, _generatorType): 

4292 exprs = list(exprs) 

4293 

4294 if isinstance(exprs, str_type): 

4295 self.exprs = [self._literalStringClass(exprs)] 

4296 elif isinstance(exprs, ParserElement): 

4297 self.exprs = [exprs] 

4298 elif isinstance(exprs, Iterable): 

4299 exprs = list(exprs) 

4300 # if sequence of strings provided, wrap with Literal 

4301 if any(isinstance(expr, str_type) for expr in exprs): 

4302 exprs = ( 

4303 self._literalStringClass(e) if isinstance(e, str_type) else e 

4304 for e in exprs 

4305 ) 

4306 self.exprs = list(exprs) 

4307 else: 

4308 try: 

4309 self.exprs = list(exprs) 

4310 except TypeError: 

4311 self.exprs = [exprs] 

4312 self.callPreparse = False 

4313 

4314 def recurse(self) -> list[ParserElement]: 

4315 return self.exprs[:] 

4316 

4317 def append(self, other) -> ParserElement: 

4318 """ 

4319 Add an expression to the list of expressions related to this ParseExpression instance. 

4320 """ 

4321 self.exprs.append(other) 

4322 self._defaultName = None 

4323 return self 

4324 

4325 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4326 """ 

4327 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

4328 all contained expressions. 

4329 """ 

4330 super().leave_whitespace(recursive) 

4331 

4332 if recursive: 

4333 self.exprs = [e.copy() for e in self.exprs] 

4334 for e in self.exprs: 

4335 e.leave_whitespace(recursive) 

4336 return self 

4337 

4338 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4339 """ 

4340 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on 

4341 all contained expressions. 

4342 """ 

4343 super().ignore_whitespace(recursive) 

4344 if recursive: 

4345 self.exprs = [e.copy() for e in self.exprs] 

4346 for e in self.exprs: 

4347 e.ignore_whitespace(recursive) 

4348 return self 

4349 

4350 def ignore(self, other) -> ParserElement: 

4351 """ 

4352 Define expression to be ignored (e.g., comments) while doing pattern 

4353 matching; may be called repeatedly, to define multiple comment or other 

4354 ignorable patterns. 

4355 """ 

4356 if isinstance(other, Suppress): 

4357 if other not in self.ignoreExprs: 

4358 super().ignore(other) 

4359 for e in self.exprs: 

4360 e.ignore(self.ignoreExprs[-1]) 

4361 else: 

4362 super().ignore(other) 

4363 for e in self.exprs: 

4364 e.ignore(self.ignoreExprs[-1]) 

4365 return self 

4366 

4367 def _generateDefaultName(self) -> str: 

4368 return f"{type(self).__name__}:({self.exprs})" 

4369 

4370 def streamline(self) -> ParserElement: 

4371 if self.streamlined: 

4372 return self 

4373 

4374 super().streamline() 

4375 

4376 for e in self.exprs: 

4377 e.streamline() 

4378 

4379 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

4380 # but only if there are no parse actions or resultsNames on the nested And's 

4381 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

4382 if len(self.exprs) == 2: 

4383 first, second = self.exprs 

4384 if ( 

4385 isinstance(first, self.__class__) 

4386 and not first.parseAction 

4387 and first.resultsName is None 

4388 and not first.debug 

4389 ): 

4390 self.exprs[:] = (*first.exprs, second) 

4391 self._defaultName = None 

4392 self._may_return_empty |= first.mayReturnEmpty 

4393 self.mayIndexError |= first.mayIndexError 

4394 

4395 last = self.exprs[-1] 

4396 if ( 

4397 isinstance(last, self.__class__) 

4398 and not last.parseAction 

4399 and last.resultsName is None 

4400 and not last.debug 

4401 ): 

4402 self.exprs[-1:] = last.exprs 

4403 self._defaultName = None 

4404 self._may_return_empty |= last.mayReturnEmpty 

4405 self.mayIndexError |= last.mayIndexError 

4406 

4407 self.errmsg = f"Expected {self}" 

4408 

4409 return self 

4410 

4411 def validate(self, validateTrace=None) -> None: 

4412 warnings.warn( 

4413 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4414 PyparsingDeprecationWarning, 

4415 stacklevel=2, 

4416 ) 

4417 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

4418 for e in self.exprs: 

4419 e.validate(tmp) 

4420 self._checkRecursion([]) 

4421 

4422 def copy(self) -> ParserElement: 

4423 """ 

4424 Returns a copy of this expression. 

4425 

4426 Generally only used internally by pyparsing. 

4427 """ 

4428 ret = super().copy() 

4429 ret = typing.cast(ParseExpression, ret) 

4430 ret.exprs = [e.copy() for e in self.exprs] 

4431 return ret 

4432 

4433 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4434 if not ( 

4435 __diag__.warn_ungrouped_named_tokens_in_collection 

4436 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4437 not in self.suppress_warnings_ 

4438 ): 

4439 return super()._setResultsName(name, list_all_matches) 

4440 

4441 for e in self.exprs: 

4442 if ( 

4443 isinstance(e, ParserElement) 

4444 and e.resultsName 

4445 and ( 

4446 Diagnostics.warn_ungrouped_named_tokens_in_collection 

4447 not in e.suppress_warnings_ 

4448 ) 

4449 ): 

4450 warning = ( 

4451 "warn_ungrouped_named_tokens_in_collection:" 

4452 f" setting results name {name!r} on {type(self).__name__} expression" 

4453 f" collides with {e.resultsName!r} on contained expression" 

4454 ) 

4455 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

4456 break 

4457 

4458 return super()._setResultsName(name, list_all_matches) 

4459 

4460 # Compatibility synonyms 

4461 # fmt: off 

4462 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4463 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4464 # fmt: on 

4465 

4466 

4467class And(ParseExpression): 

4468 """ 

4469 Requires all given :class:`ParserElement` s to be found in the given order. 

4470 Expressions may be separated by whitespace. 

4471 May be constructed using the ``'+'`` operator. 

4472 May also be constructed using the ``'-'`` operator, which will 

4473 suppress backtracking. 

4474 

4475 Example: 

4476 

4477 .. testcode:: 

4478 

4479 integer = Word(nums) 

4480 name_expr = Word(alphas)[1, ...] 

4481 

4482 expr = And([integer("id"), name_expr("name"), integer("age")]) 

4483 # more easily written as: 

4484 expr = integer("id") + name_expr("name") + integer("age") 

4485 """ 

4486 

4487 class _ErrorStop(Empty): 

4488 def __init__(self, *args, **kwargs) -> None: 

4489 super().__init__(*args, **kwargs) 

4490 self.leave_whitespace() 

4491 

4492 def _generateDefaultName(self) -> str: 

4493 return "-" 

4494 

4495 def __init__( 

4496 self, 

4497 exprs_arg: typing.Iterable[Union[ParserElement, str]], 

4498 savelist: bool = True, 

4499 ) -> None: 

4500 # instantiate exprs as a list, converting strs to ParserElements 

4501 exprs: list[ParserElement] = [ 

4502 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg 

4503 ] 

4504 

4505 # convert any Ellipsis elements to SkipTo 

4506 if Ellipsis in exprs: 

4507 

4508 # Ellipsis cannot be the last element 

4509 if exprs[-1] is Ellipsis: 

4510 raise Exception("cannot construct And with sequence ending in ...") 

4511 

4512 tmp: list[ParserElement] = [] 

4513 for cur_expr, next_expr in zip(exprs, exprs[1:]): 

4514 if cur_expr is Ellipsis: 

4515 tmp.append(SkipTo(next_expr)("_skipped*")) 

4516 else: 

4517 tmp.append(cur_expr) 

4518 

4519 exprs[:-1] = tmp 

4520 

4521 super().__init__(exprs, savelist) 

4522 if self.exprs: 

4523 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4524 if not isinstance(self.exprs[0], White): 

4525 self.set_whitespace_chars( 

4526 self.exprs[0].whiteChars, 

4527 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

4528 ) 

4529 self.skipWhitespace = self.exprs[0].skipWhitespace 

4530 else: 

4531 self.skipWhitespace = False 

4532 else: 

4533 self._may_return_empty = True 

4534 self.callPreparse = True 

4535 

4536 def streamline(self) -> ParserElement: 

4537 """ 

4538 Collapse `And` expressions like `And(And(And(A, B), C), D)` 

4539 to `And(A, B, C, D)`. 

4540 

4541 .. doctest:: 

4542 

4543 >>> expr = Word("A") + Word("B") + Word("C") + Word("D") 

4544 >>> # Using '+' operator creates nested And expression 

4545 >>> expr 

4546 {{{W:(A) W:(B)} W:(C)} W:(D)} 

4547 >>> # streamline simplifies to a single And with multiple expressions 

4548 >>> expr.streamline() 

4549 {W:(A) W:(B) W:(C) W:(D)} 

4550 

4551 Guards against collapsing out expressions that have special features, 

4552 such as results names or parse actions. 

4553 

4554 Resolves pending Skip commands defined using `...` terms. 

4555 """ 

4556 # collapse any _PendingSkip's 

4557 if self.exprs and any( 

4558 isinstance(e, ParseExpression) 

4559 and e.exprs 

4560 and isinstance(e.exprs[-1], _PendingSkip) 

4561 for e in self.exprs[:-1] 

4562 ): 

4563 deleted_expr_marker = NoMatch() 

4564 for i, e in enumerate(self.exprs[:-1]): 

4565 if e is deleted_expr_marker: 

4566 continue 

4567 if ( 

4568 isinstance(e, ParseExpression) 

4569 and e.exprs 

4570 and isinstance(e.exprs[-1], _PendingSkip) 

4571 ): 

4572 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4573 self.exprs[i + 1] = deleted_expr_marker 

4574 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

4575 

4576 super().streamline() 

4577 

4578 # link any IndentedBlocks to the prior expression 

4579 prev: ParserElement 

4580 cur: ParserElement 

4581 for prev, cur in zip(self.exprs, self.exprs[1:]): 

4582 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

4583 # (but watch out for recursive grammar) 

4584 seen = set() 

4585 while True: 

4586 if id(cur) in seen: 

4587 break 

4588 seen.add(id(cur)) 

4589 if isinstance(cur, IndentedBlock): 

4590 prev.add_parse_action( 

4591 lambda s, l, t, cur_=cur: setattr( 

4592 cur_, "parent_anchor", col(l, s) 

4593 ) 

4594 ) 

4595 break 

4596 subs = cur.recurse() 

4597 next_first = next(iter(subs), None) 

4598 if next_first is None: 

4599 break 

4600 cur = typing.cast(ParserElement, next_first) 

4601 

4602 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4603 return self 

4604 

4605 def parseImpl(self, instring, loc, do_actions=True): 

4606 

4607 # if no exprs defined, assume we contain a single Empty 

4608 # (consistent with behavior of `all([])` returning True) 

4609 exprs = iter(self.exprs or (Empty(),)) 

4610 

4611 # pass False as callPreParse arg to _parse for first element, since we already 

4612 # pre-parsed the string as part of our And pre-parsing 

4613 loc, resultlist = next(exprs)._parse( 

4614 instring, loc, do_actions, callPreParse=False 

4615 ) 

4616 

4617 # iterate over remaining expressions 

4618 raise_syntax_error_immediately = False 

4619 for e in exprs: 

4620 # if isinstance(e, And._ErrorStop): 

4621 if type(e) is And._ErrorStop: 

4622 raise_syntax_error_immediately = True 

4623 continue 

4624 

4625 if raise_syntax_error_immediately: 

4626 try: 

4627 loc, exprtokens = e._parse(instring, loc, do_actions) 

4628 except ParseSyntaxException: 

4629 raise 

4630 except ParseBaseException as pe: 

4631 pe.__traceback__ = None 

4632 raise ParseSyntaxException._from_exception(pe) 

4633 except IndexError: 

4634 raise ParseSyntaxException( 

4635 instring, len(instring), self.errmsg, self 

4636 ) 

4637 else: 

4638 loc, exprtokens = e._parse(instring, loc, do_actions) 

4639 resultlist += exprtokens 

4640 return loc, resultlist 

4641 

4642 def __iadd__(self, other): 

4643 if isinstance(other, str_type): 

4644 other = self._literalStringClass(other) 

4645 if not isinstance(other, ParserElement): 

4646 return NotImplemented 

4647 return self.append(other) # And([self, other]) 

4648 

4649 def _checkRecursion(self, parseElementList): 

4650 subRecCheckList = parseElementList[:] + [self] 

4651 for e in self.exprs: 

4652 e._checkRecursion(subRecCheckList) 

4653 if not e.mayReturnEmpty: 

4654 break 

4655 

4656 def _generateDefaultName(self) -> str: 

4657 inner = " ".join(str(e) for e in self.exprs) 

4658 # strip off redundant inner {}'s 

4659 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4660 inner = inner[1:-1] 

4661 return f"{{{inner}}}" 

4662 

4663 

4664class Or(ParseExpression): 

4665 """Requires that at least one :class:`ParserElement` is found. If 

4666 two expressions match, the expression that matches the longest 

4667 string will be used. May be constructed using the ``'^'`` 

4668 operator. 

4669 

4670 Example: 

4671 

4672 .. testcode:: 

4673 

4674 # construct Or using '^' operator 

4675 

4676 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4677 print(number.search_string("123 3.1416 789")) 

4678 

4679 prints: 

4680 

4681 .. testoutput:: 

4682 

4683 [['123'], ['3.1416'], ['789']] 

4684 """ 

4685 

4686 def __init__( 

4687 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4688 ) -> None: 

4689 super().__init__(exprs, savelist) 

4690 if self.exprs: 

4691 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4692 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4693 else: 

4694 self._may_return_empty = True 

4695 

4696 def streamline(self) -> ParserElement: 

4697 super().streamline() 

4698 if self.exprs: 

4699 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4700 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4701 self.skipWhitespace = all( 

4702 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4703 ) 

4704 else: 

4705 self.saveAsList = False 

4706 return self 

4707 

4708 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4709 maxExcLoc = -1 

4710 maxException = None 

4711 matches: list[tuple[int, ParserElement]] = [] 

4712 fatals: list[ParseFatalException] = [] 

4713 if all(e.callPreparse for e in self.exprs): 

4714 loc = self.preParse(instring, loc) 

4715 for e in self.exprs: 

4716 try: 

4717 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4718 except ParseFatalException as pfe: 

4719 pfe.__traceback__ = None 

4720 pfe.parser_element = e 

4721 fatals.append(pfe) 

4722 maxException = None 

4723 maxExcLoc = -1 

4724 except ParseException as err: 

4725 if not fatals: 

4726 err.__traceback__ = None 

4727 if err.loc > maxExcLoc: 

4728 maxException = err 

4729 maxExcLoc = err.loc 

4730 except IndexError: 

4731 if len(instring) > maxExcLoc: 

4732 maxException = ParseException( 

4733 instring, len(instring), e.errmsg, self 

4734 ) 

4735 maxExcLoc = len(instring) 

4736 else: 

4737 # save match among all matches, to retry longest to shortest 

4738 matches.append((loc2, e)) 

4739 

4740 if matches: 

4741 # re-evaluate all matches in descending order of length of match, in case attached actions 

4742 # might change whether or how much they match of the input. 

4743 matches.sort(key=itemgetter(0), reverse=True) 

4744 

4745 if not do_actions: 

4746 # no further conditions or parse actions to change the selection of 

4747 # alternative, so the first match will be the best match 

4748 best_expr = matches[0][1] 

4749 return best_expr._parse(instring, loc, do_actions) 

4750 

4751 longest: tuple[int, typing.Optional[ParseResults]] = -1, None 

4752 for loc1, expr1 in matches: 

4753 if loc1 <= longest[0]: 

4754 # already have a longer match than this one will deliver, we are done 

4755 return longest 

4756 

4757 try: 

4758 loc2, toks = expr1._parse(instring, loc, do_actions) 

4759 except ParseException as err: 

4760 err.__traceback__ = None 

4761 if err.loc > maxExcLoc: 

4762 maxException = err 

4763 maxExcLoc = err.loc 

4764 else: 

4765 if loc2 >= loc1: 

4766 return loc2, toks 

4767 # didn't match as much as before 

4768 elif loc2 > longest[0]: 

4769 longest = loc2, toks 

4770 

4771 if longest != (-1, None): 

4772 return longest 

4773 

4774 if fatals: 

4775 if len(fatals) > 1: 

4776 fatals.sort(key=lambda e: -e.loc) 

4777 if fatals[0].loc == fatals[1].loc: 

4778 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4779 max_fatal = fatals[0] 

4780 raise max_fatal 

4781 

4782 if maxException is not None: 

4783 # infer from this check that all alternatives failed at the current position 

4784 # so emit this collective error message instead of any single error message 

4785 parse_start_loc = self.preParse(instring, loc) 

4786 if maxExcLoc == parse_start_loc: 

4787 maxException.msg = self.errmsg or "" 

4788 raise maxException 

4789 

4790 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4791 

4792 def __ixor__(self, other): 

4793 if isinstance(other, str_type): 

4794 other = self._literalStringClass(other) 

4795 if not isinstance(other, ParserElement): 

4796 return NotImplemented 

4797 return self.append(other) # Or([self, other]) 

4798 

4799 def _generateDefaultName(self) -> str: 

4800 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" 

4801 

4802 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4803 if ( 

4804 __diag__.warn_multiple_tokens_in_named_alternation 

4805 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4806 not in self.suppress_warnings_ 

4807 ): 

4808 if any( 

4809 isinstance(e, And) 

4810 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4811 not in e.suppress_warnings_ 

4812 for e in self.exprs 

4813 ): 

4814 warning = ( 

4815 "warn_multiple_tokens_in_named_alternation:" 

4816 f" setting results name {name!r} on {type(self).__name__} expression" 

4817 " will return a list of all parsed tokens in an And alternative," 

4818 " in prior versions only the first token was returned; enclose" 

4819 " contained argument in Group" 

4820 ) 

4821 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

4822 

4823 return super()._setResultsName(name, list_all_matches) 

4824 

4825 

4826class MatchFirst(ParseExpression): 

4827 """Requires that at least one :class:`ParserElement` is found. If 

4828 more than one expression matches, the first one listed is the one that will 

4829 match. May be constructed using the ``'|'`` operator. 

4830 

4831 Example: Construct MatchFirst using '|' operator 

4832 

4833 .. doctest:: 

4834 

4835 # watch the order of expressions to match 

4836 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4837 >>> print(number.search_string("123 3.1416 789")) # Fail! 

4838 [['123'], ['3'], ['1416'], ['789']] 

4839 

4840 # put more selective expression first 

4841 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4842 >>> print(number.search_string("123 3.1416 789")) # Better 

4843 [['123'], ['3.1416'], ['789']] 

4844 """ 

4845 

4846 def __init__( 

4847 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4848 ) -> None: 

4849 super().__init__(exprs, savelist) 

4850 if self.exprs: 

4851 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4852 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4853 else: 

4854 self._may_return_empty = True 

4855 

4856 def streamline(self) -> ParserElement: 

4857 if self.streamlined: 

4858 return self 

4859 

4860 super().streamline() 

4861 if self.exprs: 

4862 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4863 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4864 self.skipWhitespace = all( 

4865 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4866 ) 

4867 else: 

4868 self.saveAsList = False 

4869 self._may_return_empty = True 

4870 return self 

4871 

4872 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4873 maxExcLoc = -1 

4874 maxException = None 

4875 

4876 for e in self.exprs: 

4877 try: 

4878 return e._parse(instring, loc, do_actions) 

4879 except ParseFatalException as pfe: 

4880 pfe.__traceback__ = None 

4881 pfe.parser_element = e 

4882 raise 

4883 except ParseException as err: 

4884 if err.loc > maxExcLoc: 

4885 maxException = err 

4886 maxExcLoc = err.loc 

4887 except IndexError: 

4888 if len(instring) > maxExcLoc: 

4889 maxException = ParseException( 

4890 instring, len(instring), e.errmsg, self 

4891 ) 

4892 maxExcLoc = len(instring) 

4893 

4894 if maxException is not None: 

4895 # infer from this check that all alternatives failed at the current position 

4896 # so emit this collective error message instead of any individual error message 

4897 parse_start_loc = self.preParse(instring, loc) 

4898 if maxExcLoc == parse_start_loc: 

4899 maxException.msg = self.errmsg or "" 

4900 raise maxException 

4901 

4902 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4903 

4904 def __ior__(self, other): 

4905 if isinstance(other, str_type): 

4906 other = self._literalStringClass(other) 

4907 if not isinstance(other, ParserElement): 

4908 return NotImplemented 

4909 return self.append(other) # MatchFirst([self, other]) 

4910 

4911 def _generateDefaultName(self) -> str: 

4912 return f"{{{' | '.join(str(e) for e in self.exprs)}}}" 

4913 

4914 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4915 if ( 

4916 __diag__.warn_multiple_tokens_in_named_alternation 

4917 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4918 not in self.suppress_warnings_ 

4919 ): 

4920 if any( 

4921 isinstance(e, And) 

4922 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4923 not in e.suppress_warnings_ 

4924 for e in self.exprs 

4925 ): 

4926 warning = ( 

4927 "warn_multiple_tokens_in_named_alternation:" 

4928 f" setting results name {name!r} on {type(self).__name__} expression" 

4929 " will return a list of all parsed tokens in an And alternative," 

4930 " in prior versions only the first token was returned; enclose" 

4931 " contained argument in Group" 

4932 ) 

4933 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

4934 

4935 return super()._setResultsName(name, list_all_matches) 

4936 

4937 

4938class Each(ParseExpression): 

4939 """Requires all given :class:`ParserElement` s to be found, but in 

4940 any order. Expressions may be separated by whitespace. 

4941 

4942 May be constructed using the ``'&'`` operator. 

4943 

4944 Example: 

4945 

4946 .. testcode:: 

4947 

4948 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4949 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4950 integer = Word(nums) 

4951 shape_attr = "shape:" + shape_type("shape") 

4952 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4953 color_attr = "color:" + color("color") 

4954 size_attr = "size:" + integer("size") 

4955 

4956 # use Each (using operator '&') to accept attributes in any order 

4957 # (shape and posn are required, color and size are optional) 

4958 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4959 

4960 shape_spec.run_tests(''' 

4961 shape: SQUARE color: BLACK posn: 100, 120 

4962 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4963 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4964 ''' 

4965 ) 

4966 

4967 prints: 

4968 

4969 .. testoutput:: 

4970 :options: +NORMALIZE_WHITESPACE 

4971 

4972 

4973 shape: SQUARE color: BLACK posn: 100, 120 

4974 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4975 - color: 'BLACK' 

4976 - posn: ['100', ',', '120'] 

4977 - x: '100' 

4978 - y: '120' 

4979 - shape: 'SQUARE' 

4980 ... 

4981 

4982 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4983 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 

4984 'posn:', ['50', ',', '80']] 

4985 - color: 'BLUE' 

4986 - posn: ['50', ',', '80'] 

4987 - x: '50' 

4988 - y: '80' 

4989 - shape: 'CIRCLE' 

4990 - size: '50' 

4991 ... 

4992 

4993 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4994 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 

4995 'posn:', ['20', ',', '40']] 

4996 - color: 'GREEN' 

4997 - posn: ['20', ',', '40'] 

4998 - x: '20' 

4999 - y: '40' 

5000 - shape: 'TRIANGLE' 

5001 - size: '20' 

5002 ... 

5003 """ 

5004 

5005 def __init__( 

5006 self, exprs: typing.Iterable[ParserElement], savelist: bool = True 

5007 ) -> None: 

5008 super().__init__(exprs, savelist) 

5009 if self.exprs: 

5010 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

5011 else: 

5012 self._may_return_empty = True 

5013 self.skipWhitespace = True 

5014 self.initExprGroups = True 

5015 self.saveAsList = True 

5016 

5017 def __iand__(self, other): 

5018 if isinstance(other, str_type): 

5019 other = self._literalStringClass(other) 

5020 if not isinstance(other, ParserElement): 

5021 return NotImplemented 

5022 return self.append(other) # Each([self, other]) 

5023 

5024 def streamline(self) -> ParserElement: 

5025 super().streamline() 

5026 if self.exprs: 

5027 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

5028 else: 

5029 self._may_return_empty = True 

5030 return self 

5031 

5032 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5033 if self.initExprGroups: 

5034 self.opt1map = dict( 

5035 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

5036 ) 

5037 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

5038 opt2 = [ 

5039 e 

5040 for e in self.exprs 

5041 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

5042 ] 

5043 self.optionals = opt1 + opt2 

5044 self.multioptionals = [ 

5045 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

5046 for e in self.exprs 

5047 if isinstance(e, _MultipleMatch) 

5048 ] 

5049 self.multirequired = [ 

5050 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

5051 for e in self.exprs 

5052 if isinstance(e, OneOrMore) 

5053 ] 

5054 self.required = [ 

5055 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

5056 ] 

5057 self.required += self.multirequired 

5058 self.initExprGroups = False 

5059 

5060 tmpLoc = loc 

5061 tmpReqd = self.required[:] 

5062 tmpOpt = self.optionals[:] 

5063 multis = self.multioptionals[:] 

5064 matchOrder: list[ParserElement] = [] 

5065 

5066 keepMatching = True 

5067 failed: list[ParserElement] = [] 

5068 fatals: list[ParseFatalException] = [] 

5069 while keepMatching: 

5070 tmpExprs = tmpReqd + tmpOpt + multis 

5071 failed.clear() 

5072 fatals.clear() 

5073 for e in tmpExprs: 

5074 try: 

5075 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

5076 except ParseFatalException as pfe: 

5077 pfe.__traceback__ = None 

5078 pfe.parser_element = e 

5079 fatals.append(pfe) 

5080 failed.append(e) 

5081 except ParseException: 

5082 failed.append(e) 

5083 else: 

5084 matchOrder.append(self.opt1map.get(id(e), e)) 

5085 if e in tmpReqd: 

5086 tmpReqd.remove(e) 

5087 elif e in tmpOpt: 

5088 tmpOpt.remove(e) 

5089 if len(failed) == len(tmpExprs): 

5090 keepMatching = False 

5091 

5092 # look for any ParseFatalExceptions 

5093 if fatals: 

5094 if len(fatals) > 1: 

5095 fatals.sort(key=lambda e: -e.loc) 

5096 if fatals[0].loc == fatals[1].loc: 

5097 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

5098 max_fatal = fatals[0] 

5099 raise max_fatal 

5100 

5101 if tmpReqd: 

5102 missing = ", ".join([str(e) for e in tmpReqd]) 

5103 raise ParseException( 

5104 instring, 

5105 loc, 

5106 f"Missing one or more required elements ({missing})", 

5107 ) 

5108 

5109 # add any unmatched Opts, in case they have default values defined 

5110 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

5111 

5112 total_results = ParseResults([]) 

5113 for e in matchOrder: 

5114 loc, results = e._parse(instring, loc, do_actions) 

5115 total_results += results 

5116 

5117 return loc, total_results 

5118 

5119 def _generateDefaultName(self) -> str: 

5120 return f"{{{' & '.join(str(e) for e in self.exprs)}}}" 

5121 

5122 

5123class ParseElementEnhance(ParserElement): 

5124 """Abstract subclass of :class:`ParserElement`, for combining and 

5125 post-processing parsed tokens. 

5126 """ 

5127 

5128 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

5129 super().__init__(savelist) 

5130 if isinstance(expr, str_type): 

5131 expr_str = typing.cast(str, expr) 

5132 if issubclass(self._literalStringClass, Token): 

5133 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

5134 elif issubclass(type(self), self._literalStringClass): 

5135 expr = Literal(expr_str) 

5136 else: 

5137 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

5138 expr = typing.cast(ParserElement, expr) 

5139 self.expr = expr 

5140 if expr is not None: 

5141 self.mayIndexError = expr.mayIndexError 

5142 self._may_return_empty = expr.mayReturnEmpty 

5143 self.set_whitespace_chars( 

5144 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

5145 ) 

5146 self.skipWhitespace = expr.skipWhitespace 

5147 self.saveAsList = expr.saveAsList 

5148 self.callPreparse = expr.callPreparse 

5149 self.ignoreExprs.extend(expr.ignoreExprs) 

5150 

5151 def recurse(self) -> list[ParserElement]: 

5152 return [self.expr] if self.expr is not None else [] 

5153 

5154 def parseImpl(self, instring, loc, do_actions=True): 

5155 if self.expr is None: 

5156 raise ParseException(instring, loc, "No expression defined", self) 

5157 

5158 try: 

5159 return self.expr._parse(instring, loc, do_actions, callPreParse=False) 

5160 except ParseSyntaxException: 

5161 raise 

5162 except ParseBaseException as pbe: 

5163 pbe.pstr = pbe.pstr or instring 

5164 pbe.loc = pbe.loc or loc 

5165 pbe.parser_element = pbe.parser_element or self 

5166 if not isinstance(self, Forward) and self.customName is not None: 

5167 if self.errmsg: 

5168 pbe.msg = self.errmsg 

5169 raise 

5170 

5171 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5172 """ 

5173 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

5174 the contained expression. 

5175 """ 

5176 super().leave_whitespace(recursive) 

5177 

5178 if recursive: 

5179 if self.expr is not None: 

5180 self.expr = self.expr.copy() 

5181 self.expr.leave_whitespace(recursive) 

5182 return self 

5183 

5184 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5185 """ 

5186 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on 

5187 the contained expression. 

5188 """ 

5189 super().ignore_whitespace(recursive) 

5190 

5191 if recursive: 

5192 if self.expr is not None: 

5193 self.expr = self.expr.copy() 

5194 self.expr.ignore_whitespace(recursive) 

5195 return self 

5196 

5197 def ignore(self, other) -> ParserElement: 

5198 """ 

5199 Define expression to be ignored (e.g., comments) while doing pattern 

5200 matching; may be called repeatedly, to define multiple comment or other 

5201 ignorable patterns. 

5202 """ 

5203 if not isinstance(other, Suppress) or other not in self.ignoreExprs: 

5204 super().ignore(other) 

5205 if self.expr is not None: 

5206 self.expr.ignore(self.ignoreExprs[-1]) 

5207 

5208 return self 

5209 

5210 def streamline(self) -> ParserElement: 

5211 super().streamline() 

5212 if self.expr is not None: 

5213 self.expr.streamline() 

5214 return self 

5215 

5216 def _checkRecursion(self, parseElementList): 

5217 if self in parseElementList: 

5218 raise RecursiveGrammarException(parseElementList + [self]) 

5219 subRecCheckList = parseElementList[:] + [self] 

5220 if self.expr is not None: 

5221 self.expr._checkRecursion(subRecCheckList) 

5222 

5223 def validate(self, validateTrace=None) -> None: 

5224 warnings.warn( 

5225 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5226 PyparsingDeprecationWarning, 

5227 stacklevel=2, 

5228 ) 

5229 if validateTrace is None: 

5230 validateTrace = [] 

5231 tmp = validateTrace[:] + [self] 

5232 if self.expr is not None: 

5233 self.expr.validate(tmp) 

5234 self._checkRecursion([]) 

5235 

5236 def _generateDefaultName(self) -> str: 

5237 return f"{type(self).__name__}:({self.expr})" 

5238 

5239 # Compatibility synonyms 

5240 # fmt: off 

5241 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

5242 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

5243 # fmt: on 

5244 

5245 

5246class IndentedBlock(ParseElementEnhance): 

5247 """ 

5248 Expression to match one or more expressions at a given indentation level. 

5249 Useful for parsing text where structure is implied by indentation (like Python source code). 

5250 

5251 Example: 

5252 

5253 .. testcode:: 

5254 

5255 ''' 

5256 BNF: 

5257 statement ::= assignment_stmt | if_stmt 

5258 assignment_stmt ::= identifier '=' rvalue 

5259 rvalue ::= identifier | integer 

5260 if_stmt ::= 'if' bool_condition block 

5261 block ::= ([indent] statement)... 

5262 identifier ::= [A..Za..z] 

5263 integer ::= [0..9]... 

5264 bool_condition ::= 'TRUE' | 'FALSE' 

5265 ''' 

5266 

5267 IF, TRUE, FALSE = Keyword.using_each("IF TRUE FALSE".split()) 

5268 

5269 statement = Forward() 

5270 identifier = Char(alphas) 

5271 integer = Word(nums).add_parse_action(lambda t: int(t[0])) 

5272 rvalue = identifier | integer 

5273 assignment_stmt = identifier + "=" + rvalue 

5274 

5275 if_stmt = IF + (TRUE | FALSE) + IndentedBlock(statement) 

5276 

5277 statement <<= Group(assignment_stmt | if_stmt) 

5278 

5279 result = if_stmt.parse_string(''' 

5280 IF TRUE 

5281 a = 1000 

5282 b = 2000 

5283 IF FALSE 

5284 z = 100 

5285 ''') 

5286 print(result.dump()) 

5287 

5288 .. testoutput:: 

5289 

5290 ['IF', 'TRUE', [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]] 

5291 [0]: 

5292 IF 

5293 [1]: 

5294 TRUE 

5295 [2]: 

5296 [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]] 

5297 [0]: 

5298 ['a', '=', 1000] 

5299 [1]: 

5300 ['b', '=', 2000] 

5301 [2]: 

5302 ['IF', 'FALSE', [['z', '=', 100]]] 

5303 [0]: 

5304 IF 

5305 [1]: 

5306 FALSE 

5307 [2]: 

5308 [['z', '=', 100]] 

5309 [0]: 

5310 ['z', '=', 100] 

5311 """ 

5312 

5313 class _Indent(Empty): 

5314 def __init__(self, ref_col: int) -> None: 

5315 super().__init__() 

5316 self.errmsg = f"expected indent at column {ref_col}" 

5317 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

5318 

5319 class _IndentGreater(Empty): 

5320 def __init__(self, ref_col: int) -> None: 

5321 super().__init__() 

5322 self.errmsg = f"expected indent at column greater than {ref_col}" 

5323 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

5324 

5325 def __init__( 

5326 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

5327 ) -> None: 

5328 super().__init__(expr, savelist=True) 

5329 # if recursive: 

5330 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

5331 self._recursive = recursive 

5332 self._grouped = grouped 

5333 self.parent_anchor = 1 

5334 

5335 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5336 # advance parse position to non-whitespace by using an Empty() 

5337 # this should be the column to be used for all subsequent indented lines 

5338 anchor_loc = Empty().preParse(instring, loc) 

5339 

5340 # see if self.expr matches at the current location - if not it will raise an exception 

5341 # and no further work is necessary 

5342 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions) 

5343 

5344 indent_col = col(anchor_loc, instring) 

5345 peer_detect_expr = self._Indent(indent_col) 

5346 

5347 inner_expr = Empty() + peer_detect_expr + self.expr 

5348 if self._recursive: 

5349 sub_indent = self._IndentGreater(indent_col) 

5350 nested_block = IndentedBlock( 

5351 self.expr, recursive=self._recursive, grouped=self._grouped 

5352 ) 

5353 nested_block.set_debug(self.debug) 

5354 nested_block.parent_anchor = indent_col 

5355 inner_expr += Opt(sub_indent + nested_block) 

5356 

5357 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

5358 block = OneOrMore(inner_expr) 

5359 

5360 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

5361 

5362 if self._grouped: 

5363 wrapper = Group 

5364 else: 

5365 wrapper = lambda expr: expr # type: ignore[misc, assignment] 

5366 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

5367 instring, anchor_loc, do_actions 

5368 ) 

5369 

5370 

5371class AtStringStart(ParseElementEnhance): 

5372 """Matches if expression matches at the beginning of the parse 

5373 string:: 

5374 

5375 AtStringStart(Word(nums)).parse_string("123") 

5376 # prints ["123"] 

5377 

5378 AtStringStart(Word(nums)).parse_string(" 123") 

5379 # raises ParseException 

5380 """ 

5381 

5382 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5383 super().__init__(expr) 

5384 self.callPreparse = False 

5385 

5386 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5387 if loc != 0: 

5388 raise ParseException(instring, loc, "not found at string start") 

5389 return super().parseImpl(instring, loc, do_actions) 

5390 

5391 

5392class AtLineStart(ParseElementEnhance): 

5393 r"""Matches if an expression matches at the beginning of a line within 

5394 the parse string 

5395 

5396 Example: 

5397 

5398 .. testcode:: 

5399 

5400 test = '''\ 

5401 BBB this line 

5402 BBB and this line 

5403 BBB but not this one 

5404 A BBB and definitely not this one 

5405 ''' 

5406 

5407 for t in (AtLineStart('BBB') + rest_of_line).search_string(test): 

5408 print(t) 

5409 

5410 prints: 

5411 

5412 .. testoutput:: 

5413 

5414 ['BBB', ' this line'] 

5415 ['BBB', ' and this line'] 

5416 """ 

5417 

5418 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5419 super().__init__(expr) 

5420 self.callPreparse = False 

5421 

5422 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5423 if col(loc, instring) != 1: 

5424 raise ParseException(instring, loc, "not found at line start") 

5425 return super().parseImpl(instring, loc, do_actions) 

5426 

5427 

5428class FollowedBy(ParseElementEnhance): 

5429 """Lookahead matching of the given parse expression. 

5430 ``FollowedBy`` does *not* advance the parsing position within 

5431 the input string, it only verifies that the specified parse 

5432 expression matches at the current position. ``FollowedBy`` 

5433 always returns a null token list. If any results names are defined 

5434 in the lookahead expression, those *will* be returned for access by 

5435 name. 

5436 

5437 Example: 

5438 

5439 .. testcode:: 

5440 

5441 # use FollowedBy to match a label only if it is followed by a ':' 

5442 data_word = Word(alphas) 

5443 label = data_word + FollowedBy(':') 

5444 attr_expr = Group( 

5445 label + Suppress(':') 

5446 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) 

5447 ) 

5448 

5449 attr_expr[1, ...].parse_string( 

5450 "shape: SQUARE color: BLACK posn: upper left").pprint() 

5451 

5452 prints: 

5453 

5454 .. testoutput:: 

5455 

5456 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

5457 """ 

5458 

5459 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5460 super().__init__(expr) 

5461 self._may_return_empty = True 

5462 

5463 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5464 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

5465 # we keep any named results that were defined in the FollowedBy expression 

5466 _, ret = self.expr._parse(instring, loc, do_actions=do_actions) 

5467 del ret[:] 

5468 

5469 return loc, ret 

5470 

5471 

5472class PrecededBy(ParseElementEnhance): 

5473 """Lookbehind matching of the given parse expression. 

5474 ``PrecededBy`` does not advance the parsing position within the 

5475 input string, it only verifies that the specified parse expression 

5476 matches prior to the current position. ``PrecededBy`` always 

5477 returns a null token list, but if a results name is defined on the 

5478 given expression, it is returned. 

5479 

5480 Parameters: 

5481 

5482 - ``expr`` - expression that must match prior to the current parse 

5483 location 

5484 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

5485 to lookbehind prior to the current parse location 

5486 

5487 If the lookbehind expression is a string, :class:`Literal`, 

5488 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

5489 with a specified exact or maximum length, then the retreat 

5490 parameter is not required. Otherwise, retreat must be specified to 

5491 give a maximum number of characters to look back from 

5492 the current parse position for a lookbehind match. 

5493 

5494 Example: 

5495 

5496 .. testcode:: 

5497 

5498 # VB-style variable names with type prefixes 

5499 int_var = PrecededBy("#") + pyparsing_common.identifier 

5500 str_var = PrecededBy("$") + pyparsing_common.identifier 

5501 """ 

5502 

5503 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None: 

5504 super().__init__(expr) 

5505 self.expr = self.expr().leave_whitespace() 

5506 self._may_return_empty = True 

5507 self.mayIndexError = False 

5508 self.exact = False 

5509 if isinstance(expr, str_type): 

5510 expr = typing.cast(str, expr) 

5511 retreat = len(expr) 

5512 self.exact = True 

5513 elif isinstance(expr, (Literal, Keyword)): 

5514 retreat = expr.matchLen 

5515 self.exact = True 

5516 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

5517 retreat = expr.maxLen 

5518 self.exact = True 

5519 elif isinstance(expr, PositionToken): 

5520 retreat = 0 

5521 self.exact = True 

5522 self.retreat = retreat 

5523 self.errmsg = f"not preceded by {expr}" 

5524 self.skipWhitespace = False 

5525 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

5526 

5527 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType: 

5528 if self.exact: 

5529 if loc < self.retreat: 

5530 raise ParseException(instring, loc, self.errmsg, self) 

5531 start = loc - self.retreat 

5532 _, ret = self.expr._parse(instring, start) 

5533 return loc, ret 

5534 

5535 # retreat specified a maximum lookbehind window, iterate 

5536 test_expr = self.expr + StringEnd() 

5537 instring_slice = instring[max(0, loc - self.retreat) : loc] 

5538 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self) 

5539 

5540 for offset in range(1, min(loc, self.retreat + 1) + 1): 

5541 try: 

5542 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

5543 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

5544 except ParseBaseException as pbe: 

5545 last_expr = pbe 

5546 else: 

5547 break 

5548 else: 

5549 raise last_expr 

5550 

5551 return loc, ret 

5552 

5553 

5554class Located(ParseElementEnhance): 

5555 """ 

5556 Decorates a returned token with its starting and ending 

5557 locations in the input string. 

5558 

5559 This helper adds the following results names: 

5560 

5561 - ``locn_start`` - location where matched expression begins 

5562 - ``locn_end`` - location where matched expression ends 

5563 - ``value`` - the actual parsed results 

5564 

5565 Be careful if the input text contains ``<TAB>`` characters, you 

5566 may want to call :class:`ParserElement.parse_with_tabs` 

5567 

5568 Example: 

5569 

5570 .. testcode:: 

5571 

5572 wd = Word(alphas) 

5573 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

5574 print(match) 

5575 

5576 prints: 

5577 

5578 .. testoutput:: 

5579 

5580 [0, ['ljsdf'], 5] 

5581 [8, ['lksdjjf'], 15] 

5582 [18, ['lkkjj'], 23] 

5583 """ 

5584 

5585 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5586 start = loc 

5587 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False) 

5588 ret_tokens = ParseResults([start, tokens, loc]) 

5589 ret_tokens["locn_start"] = start 

5590 ret_tokens["value"] = tokens 

5591 ret_tokens["locn_end"] = loc 

5592 if self.resultsName: 

5593 # must return as a list, so that the name will be attached to the complete group 

5594 return loc, [ret_tokens] 

5595 else: 

5596 return loc, ret_tokens 

5597 

5598 

5599class NotAny(ParseElementEnhance): 

5600 """ 

5601 Lookahead to disallow matching with the given parse expression. 

5602 ``NotAny`` does *not* advance the parsing position within the 

5603 input string, it only verifies that the specified parse expression 

5604 does *not* match at the current position. Also, ``NotAny`` does 

5605 *not* skip over leading whitespace. ``NotAny`` always returns 

5606 a null token list. May be constructed using the ``'~'`` operator. 

5607 

5608 Example: 

5609 

5610 .. testcode:: 

5611 

5612 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

5613 

5614 # take care not to mistake keywords for identifiers 

5615 ident = ~(AND | OR | NOT) + Word(alphas) 

5616 boolean_term = Opt(NOT) + ident 

5617 

5618 # very crude boolean expression - to support parenthesis groups and 

5619 # operation hierarchy, use infix_notation 

5620 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

5621 

5622 # integers that are followed by "." are actually floats 

5623 integer = Word(nums) + ~Char(".") 

5624 """ 

5625 

5626 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5627 super().__init__(expr) 

5628 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

5629 # self.leave_whitespace() 

5630 self.skipWhitespace = False 

5631 

5632 self._may_return_empty = True 

5633 self.errmsg = f"Found unwanted token, {self.expr}" 

5634 

5635 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5636 if self.expr.can_parse_next(instring, loc, do_actions=do_actions): 

5637 raise ParseException(instring, loc, self.errmsg, self) 

5638 return loc, [] 

5639 

5640 def _generateDefaultName(self) -> str: 

5641 return f"~{{{self.expr}}}" 

5642 

5643 

5644class _MultipleMatch(ParseElementEnhance): 

5645 def __init__( 

5646 self, 

5647 expr: Union[str, ParserElement], 

5648 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5649 **kwargs, 

5650 ) -> None: 

5651 stopOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument( 

5652 kwargs, "stopOn", None 

5653 ) 

5654 

5655 super().__init__(expr) 

5656 stopOn = stopOn or stop_on 

5657 self.saveAsList = True 

5658 ender = stopOn 

5659 if isinstance(ender, str_type): 

5660 ender = self._literalStringClass(ender) 

5661 self.stopOn(ender) 

5662 

5663 def stop_on(self, ender) -> ParserElement: 

5664 if isinstance(ender, str_type): 

5665 ender = self._literalStringClass(ender) 

5666 self.not_ender = ~ender if ender is not None else None 

5667 return self 

5668 

5669 stopOn = stop_on 

5670 

5671 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5672 self_expr_parse = self.expr._parse 

5673 self_skip_ignorables = self._skipIgnorables 

5674 check_ender = False 

5675 if self.not_ender is not None: 

5676 try_not_ender = self.not_ender.try_parse 

5677 check_ender = True 

5678 

5679 # must be at least one (but first see if we are the stopOn sentinel; 

5680 # if so, fail) 

5681 if check_ender: 

5682 try_not_ender(instring, loc) 

5683 loc, tokens = self_expr_parse(instring, loc, do_actions) 

5684 try: 

5685 hasIgnoreExprs = not not self.ignoreExprs 

5686 while 1: 

5687 if check_ender: 

5688 try_not_ender(instring, loc) 

5689 if hasIgnoreExprs: 

5690 preloc = self_skip_ignorables(instring, loc) 

5691 else: 

5692 preloc = loc 

5693 loc, tmptokens = self_expr_parse(instring, preloc, do_actions) 

5694 tokens += tmptokens 

5695 except (ParseException, IndexError): 

5696 pass 

5697 

5698 return loc, tokens 

5699 

5700 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5701 if ( 

5702 __diag__.warn_ungrouped_named_tokens_in_collection 

5703 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5704 not in self.suppress_warnings_ 

5705 ): 

5706 for e in [self.expr] + self.expr.recurse(): 

5707 if ( 

5708 isinstance(e, ParserElement) 

5709 and e.resultsName 

5710 and ( 

5711 Diagnostics.warn_ungrouped_named_tokens_in_collection 

5712 not in e.suppress_warnings_ 

5713 ) 

5714 ): 

5715 warning = ( 

5716 "warn_ungrouped_named_tokens_in_collection:" 

5717 f" setting results name {name!r} on {type(self).__name__} expression" 

5718 f" collides with {e.resultsName!r} on contained expression" 

5719 ) 

5720 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

5721 break 

5722 

5723 return super()._setResultsName(name, list_all_matches) 

5724 

5725 

5726class OneOrMore(_MultipleMatch): 

5727 """ 

5728 Repetition of one or more of the given expression. 

5729 

5730 Parameters: 

5731 

5732 - ``expr`` - expression that must match one or more times 

5733 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5734 (only required if the sentinel would ordinarily match the repetition 

5735 expression) 

5736 

5737 Example: 

5738 

5739 .. doctest:: 

5740 

5741 >>> data_word = Word(alphas) 

5742 >>> label = data_word + FollowedBy(':') 

5743 >>> attr_expr = Group( 

5744 ... label + Suppress(':') 

5745 ... + OneOrMore(data_word).set_parse_action(' '.join)) 

5746 

5747 >>> text = "shape: SQUARE posn: upper left color: BLACK" 

5748 

5749 # Fail! read 'posn' as data instead of next label 

5750 >>> attr_expr[1, ...].parse_string(text).pprint() 

5751 [['shape', 'SQUARE posn']] 

5752 

5753 # use stop_on attribute for OneOrMore 

5754 # to avoid reading label string as part of the data 

5755 >>> attr_expr = Group( 

5756 ... label + Suppress(':') 

5757 ... + OneOrMore( 

5758 ... data_word, stop_on=label).set_parse_action(' '.join)) 

5759 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better 

5760 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5761 

5762 # could also be written as 

5763 >>> (attr_expr * (1,)).parse_string(text).pprint() 

5764 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5765 """ 

5766 

5767 def _generateDefaultName(self) -> str: 

5768 return f"{{{self.expr}}}..." 

5769 

5770 

5771class ZeroOrMore(_MultipleMatch): 

5772 """ 

5773 Optional repetition of zero or more of the given expression. 

5774 

5775 Parameters: 

5776 

5777 - ``expr`` - expression that must match zero or more times 

5778 - ``stop_on`` - expression for a terminating sentinel 

5779 (only required if the sentinel would ordinarily match the repetition 

5780 expression) - (default= ``None``) 

5781 

5782 Example: similar to :class:`OneOrMore` 

5783 """ 

5784 

5785 def __init__( 

5786 self, 

5787 expr: Union[str, ParserElement], 

5788 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5789 **kwargs, 

5790 ) -> None: 

5791 stopOn: Union[ParserElement, str] = deprecate_argument(kwargs, "stopOn", None) 

5792 

5793 super().__init__(expr, stop_on=stopOn or stop_on) 

5794 self._may_return_empty = True 

5795 

5796 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5797 try: 

5798 return super().parseImpl(instring, loc, do_actions) 

5799 except (ParseException, IndexError): 

5800 return loc, ParseResults([], name=self.resultsName) 

5801 

5802 def _generateDefaultName(self) -> str: 

5803 return f"[{self.expr}]..." 

5804 

5805 

5806class DelimitedList(ParseElementEnhance): 

5807 """Helper to define a delimited list of expressions - the delimiter 

5808 defaults to ','. By default, the list elements and delimiters can 

5809 have intervening whitespace, and comments, but this can be 

5810 overridden by passing ``combine=True`` in the constructor. If 

5811 ``combine`` is set to ``True``, the matching tokens are 

5812 returned as a single token string, with the delimiters included; 

5813 otherwise, the matching tokens are returned as a list of tokens, 

5814 with the delimiters suppressed. 

5815 

5816 If ``allow_trailing_delim`` is set to True, then the list may end with 

5817 a delimiter. 

5818 

5819 Example: 

5820 

5821 .. doctest:: 

5822 

5823 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc") 

5824 ParseResults(['aa', 'bb', 'cc'], {}) 

5825 >>> DelimitedList(Word(hexnums), delim=':', combine=True 

5826 ... ).parse_string("AA:BB:CC:DD:EE") 

5827 ParseResults(['AA:BB:CC:DD:EE'], {}) 

5828 

5829 .. versionadded:: 3.1.0 

5830 """ 

5831 

5832 def __init__( 

5833 self, 

5834 expr: Union[str, ParserElement], 

5835 delim: Union[str, ParserElement] = ",", 

5836 combine: bool = False, 

5837 min: typing.Optional[int] = None, 

5838 max: typing.Optional[int] = None, 

5839 *, 

5840 allow_trailing_delim: bool = False, 

5841 ) -> None: 

5842 if isinstance(expr, str_type): 

5843 expr = ParserElement._literalStringClass(expr) 

5844 expr = typing.cast(ParserElement, expr) 

5845 

5846 if min is not None and min < 1: 

5847 raise ValueError("min must be greater than 0") 

5848 

5849 if max is not None and min is not None and max < min: 

5850 raise ValueError("max must be greater than, or equal to min") 

5851 

5852 self.content = expr 

5853 self.raw_delim = str(delim) 

5854 self.delim = delim 

5855 self.combine = combine 

5856 if not combine: 

5857 self.delim = Suppress(delim) if not isinstance(delim, Suppress) else delim 

5858 self.min = min or 1 

5859 self.max = max 

5860 self.allow_trailing_delim = allow_trailing_delim 

5861 

5862 delim_list_expr = self.content + (self.delim + self.content) * ( 

5863 self.min - 1, 

5864 None if self.max is None else self.max - 1, 

5865 ) 

5866 if self.allow_trailing_delim: 

5867 delim_list_expr += Opt(self.delim) 

5868 

5869 if self.combine: 

5870 delim_list_expr = Combine(delim_list_expr) 

5871 

5872 super().__init__(delim_list_expr, savelist=True) 

5873 

5874 def _generateDefaultName(self) -> str: 

5875 content_expr = self.content.streamline() 

5876 return f"{content_expr} [{self.raw_delim} {content_expr}]..." 

5877 

5878 

5879class _NullToken: 

5880 def __bool__(self): 

5881 return False 

5882 

5883 def __str__(self): 

5884 return "" 

5885 

5886 

5887class Opt(ParseElementEnhance): 

5888 """ 

5889 Optional matching of the given expression. 

5890 

5891 :param expr: expression that must match zero or more times 

5892 :param default: (optional) - value to be returned 

5893 if the optional expression is not found. 

5894 

5895 Example: 

5896 

5897 .. testcode:: 

5898 

5899 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5900 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5901 zip.run_tests(''' 

5902 # traditional ZIP code 

5903 12345 

5904 

5905 # ZIP+4 form 

5906 12101-0001 

5907 

5908 # invalid ZIP 

5909 98765- 

5910 ''') 

5911 

5912 prints: 

5913 

5914 .. testoutput:: 

5915 :options: +NORMALIZE_WHITESPACE 

5916 

5917 

5918 # traditional ZIP code 

5919 12345 

5920 ['12345'] 

5921 

5922 # ZIP+4 form 

5923 12101-0001 

5924 ['12101-0001'] 

5925 

5926 # invalid ZIP 

5927 98765- 

5928 98765- 

5929 ^ 

5930 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6) 

5931 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6) 

5932 """ 

5933 

5934 __optionalNotMatched = _NullToken() 

5935 

5936 def __init__( 

5937 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5938 ) -> None: 

5939 super().__init__(expr, savelist=False) 

5940 self.saveAsList = self.expr.saveAsList 

5941 self.defaultValue = default 

5942 self._may_return_empty = True 

5943 

5944 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5945 self_expr = self.expr 

5946 try: 

5947 loc, tokens = self_expr._parse( 

5948 instring, loc, do_actions, callPreParse=False 

5949 ) 

5950 except (ParseException, IndexError): 

5951 default_value = self.defaultValue 

5952 if default_value is not self.__optionalNotMatched: 

5953 if self_expr.resultsName: 

5954 tokens = ParseResults([default_value]) 

5955 tokens[self_expr.resultsName] = default_value 

5956 else: 

5957 tokens = [default_value] # type: ignore[assignment] 

5958 else: 

5959 tokens = [] # type: ignore[assignment] 

5960 return loc, tokens 

5961 

5962 def _generateDefaultName(self) -> str: 

5963 inner = str(self.expr) 

5964 # strip off redundant inner {}'s 

5965 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5966 inner = inner[1:-1] 

5967 return f"[{inner}]" 

5968 

5969 

5970Optional = Opt 

5971 

5972 

5973class SkipTo(ParseElementEnhance): 

5974 """ 

5975 Token for skipping over all undefined text until the matched 

5976 expression is found. 

5977 

5978 :param expr: target expression marking the end of the data to be skipped 

5979 :param include: if ``True``, the target expression is also parsed 

5980 (the skipped text and target expression are returned 

5981 as a 2-element list) (default= ``False``). 

5982 

5983 :param ignore: (default= ``None``) used to define grammars 

5984 (typically quoted strings and comments) 

5985 that might contain false matches to the target expression 

5986 

5987 :param fail_on: (default= ``None``) define expressions that 

5988 are not allowed to be included in the skipped test; 

5989 if found before the target expression is found, 

5990 the :class:`SkipTo` is not a match 

5991 

5992 Example: 

5993 

5994 .. testcode:: 

5995 

5996 report = ''' 

5997 Outstanding Issues Report - 1 Jan 2000 

5998 

5999 # | Severity | Description | Days Open 

6000 -----+----------+-------------------------------------------+----------- 

6001 101 | Critical | Intermittent system crash | 6 

6002 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

6003 79 | Minor | System slow when running too many reports | 47 

6004 ''' 

6005 integer = Word(nums) 

6006 SEP = Suppress('|') 

6007 # use SkipTo to simply match everything up until the next SEP 

6008 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

6009 # - parse action will call token.strip() for each matched token, i.e., the description body 

6010 string_data = SkipTo(SEP, ignore=quoted_string) 

6011 string_data.set_parse_action(token_map(str.strip)) 

6012 ticket_expr = (integer("issue_num") + SEP 

6013 + string_data("sev") + SEP 

6014 + string_data("desc") + SEP 

6015 + integer("days_open")) 

6016 

6017 for tkt in ticket_expr.search_string(report): 

6018 print(tkt.dump()) 

6019 

6020 prints: 

6021 

6022 .. testoutput:: 

6023 

6024 ['101', 'Critical', 'Intermittent system crash', '6'] 

6025 - days_open: '6' 

6026 - desc: 'Intermittent system crash' 

6027 - issue_num: '101' 

6028 - sev: 'Critical' 

6029 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

6030 - days_open: '14' 

6031 - desc: "Spelling error on Login ('log|n')" 

6032 - issue_num: '94' 

6033 - sev: 'Cosmetic' 

6034 ['79', 'Minor', 'System slow when running too many reports', '47'] 

6035 - days_open: '47' 

6036 - desc: 'System slow when running too many reports' 

6037 - issue_num: '79' 

6038 - sev: 'Minor' 

6039 """ 

6040 

6041 def __init__( 

6042 self, 

6043 other: Union[ParserElement, str], 

6044 include: bool = False, 

6045 ignore: typing.Optional[Union[ParserElement, str]] = None, 

6046 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

6047 **kwargs, 

6048 ) -> None: 

6049 failOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument( 

6050 kwargs, "failOn", None 

6051 ) 

6052 

6053 super().__init__(other) 

6054 failOn = failOn or fail_on 

6055 self.ignoreExpr = ignore 

6056 self._may_return_empty = True 

6057 self.mayIndexError = False 

6058 self.includeMatch = include 

6059 self.saveAsList = False 

6060 if isinstance(failOn, str_type): 

6061 self.failOn = self._literalStringClass(failOn) 

6062 else: 

6063 self.failOn = failOn 

6064 self.errmsg = f"No match found for {self.expr}" 

6065 self.ignorer = Empty().leave_whitespace() 

6066 self._update_ignorer() 

6067 

6068 def _update_ignorer(self): 

6069 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

6070 self.ignorer.ignoreExprs.clear() 

6071 for e in self.expr.ignoreExprs: 

6072 self.ignorer.ignore(e) 

6073 if self.ignoreExpr: 

6074 self.ignorer.ignore(self.ignoreExpr) 

6075 

6076 def ignore(self, expr): 

6077 """ 

6078 Define expression to be ignored (e.g., comments) while doing pattern 

6079 matching; may be called repeatedly, to define multiple comment or other 

6080 ignorable patterns. 

6081 """ 

6082 super().ignore(expr) 

6083 self._update_ignorer() 

6084 

6085 def parseImpl(self, instring, loc, do_actions=True): 

6086 startloc = loc 

6087 instrlen = len(instring) 

6088 self_expr_parse = self.expr._parse 

6089 self_failOn_canParseNext = ( 

6090 self.failOn.can_parse_next if self.failOn is not None else None 

6091 ) 

6092 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

6093 

6094 tmploc = loc 

6095 while tmploc <= instrlen: 

6096 if self_failOn_canParseNext is not None: 

6097 # break if failOn expression matches 

6098 if self_failOn_canParseNext(instring, tmploc): 

6099 break 

6100 

6101 if ignorer_try_parse is not None: 

6102 # advance past ignore expressions 

6103 prev_tmploc = tmploc 

6104 while 1: 

6105 try: 

6106 tmploc = ignorer_try_parse(instring, tmploc) 

6107 except ParseBaseException: 

6108 break 

6109 # see if all ignorers matched, but didn't actually ignore anything 

6110 if tmploc == prev_tmploc: 

6111 break 

6112 prev_tmploc = tmploc 

6113 

6114 try: 

6115 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False) 

6116 except (ParseException, IndexError): 

6117 # no match, advance loc in string 

6118 tmploc += 1 

6119 else: 

6120 # matched skipto expr, done 

6121 break 

6122 

6123 else: 

6124 # ran off the end of the input string without matching skipto expr, fail 

6125 raise ParseException(instring, loc, self.errmsg, self) 

6126 

6127 # build up return values 

6128 loc = tmploc 

6129 skiptext = instring[startloc:loc] 

6130 skipresult = ParseResults(skiptext) 

6131 

6132 if self.includeMatch: 

6133 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False) 

6134 skipresult += mat 

6135 

6136 return loc, skipresult 

6137 

6138 

6139class Forward(ParseElementEnhance): 

6140 """ 

6141 Forward declaration of an expression to be defined later - 

6142 used for recursive grammars, such as algebraic infix notation. 

6143 When the expression is known, it is assigned to the ``Forward`` 

6144 instance using the ``'<<'`` operator. 

6145 

6146 .. Note:: 

6147 

6148 Take care when assigning to ``Forward`` not to overlook 

6149 precedence of operators. 

6150 

6151 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

6152 

6153 fwd_expr << a | b | c 

6154 

6155 will actually be evaluated as:: 

6156 

6157 (fwd_expr << a) | b | c 

6158 

6159 thereby leaving b and c out as parseable alternatives. 

6160 It is recommended that you explicitly group the values 

6161 inserted into the :class:`Forward`:: 

6162 

6163 fwd_expr << (a | b | c) 

6164 

6165 Converting to use the ``'<<='`` operator instead will avoid this problem. 

6166 

6167 See :meth:`ParseResults.pprint` for an example of a recursive 

6168 parser created using :class:`Forward`. 

6169 """ 

6170 

6171 def __init__( 

6172 self, other: typing.Optional[Union[ParserElement, str]] = None 

6173 ) -> None: 

6174 self.caller_frame = traceback.extract_stack(limit=2)[0] 

6175 super().__init__(other, savelist=False) # type: ignore[arg-type] 

6176 self.lshift_line = None 

6177 

6178 def __lshift__(self, other) -> Forward: 

6179 if hasattr(self, "caller_frame"): 

6180 del self.caller_frame 

6181 if isinstance(other, str_type): 

6182 other = self._literalStringClass(other) 

6183 

6184 if not isinstance(other, ParserElement): 

6185 return NotImplemented 

6186 

6187 self.expr = other 

6188 self.streamlined = other.streamlined 

6189 self.mayIndexError = self.expr.mayIndexError 

6190 self._may_return_empty = self.expr.mayReturnEmpty 

6191 self.set_whitespace_chars( 

6192 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

6193 ) 

6194 self.skipWhitespace = self.expr.skipWhitespace 

6195 self.saveAsList = self.expr.saveAsList 

6196 self.ignoreExprs.extend(self.expr.ignoreExprs) 

6197 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

6198 return self 

6199 

6200 def __ilshift__(self, other) -> Forward: 

6201 if not isinstance(other, ParserElement): 

6202 return NotImplemented 

6203 

6204 return self << other 

6205 

6206 def __or__(self, other) -> ParserElement: 

6207 caller_line = traceback.extract_stack(limit=2)[-2] 

6208 if ( 

6209 __diag__.warn_on_match_first_with_lshift_operator 

6210 and caller_line == self.lshift_line 

6211 and Diagnostics.warn_on_match_first_with_lshift_operator 

6212 not in self.suppress_warnings_ 

6213 ): 

6214 warnings.warn( 

6215 "warn_on_match_first_with_lshift_operator:" 

6216 " using '<<' operator with '|' is probably an error, use '<<='", 

6217 PyparsingDiagnosticWarning, 

6218 stacklevel=2, 

6219 ) 

6220 ret = super().__or__(other) 

6221 return ret 

6222 

6223 def __del__(self): 

6224 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

6225 if ( 

6226 self.expr is None 

6227 and __diag__.warn_on_assignment_to_Forward 

6228 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

6229 ): 

6230 warnings.warn_explicit( 

6231 "warn_on_assignment_to_Forward:" 

6232 " Forward defined here but no expression attached later using '<<=' or '<<'", 

6233 UserWarning, 

6234 filename=self.caller_frame.filename, 

6235 lineno=self.caller_frame.lineno, 

6236 ) 

6237 

6238 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

6239 if ( 

6240 self.expr is None 

6241 and __diag__.warn_on_parse_using_empty_Forward 

6242 and Diagnostics.warn_on_parse_using_empty_Forward 

6243 not in self.suppress_warnings_ 

6244 ): 

6245 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

6246 parse_fns = ( 

6247 "parse_string", 

6248 "scan_string", 

6249 "search_string", 

6250 "transform_string", 

6251 ) 

6252 tb = traceback.extract_stack(limit=200) 

6253 for i, frm in enumerate(reversed(tb), start=1): 

6254 if frm.name in parse_fns: 

6255 stacklevel = i + 1 

6256 break 

6257 else: 

6258 stacklevel = 2 

6259 warnings.warn( 

6260 "warn_on_parse_using_empty_Forward:" 

6261 " Forward expression was never assigned a value, will not parse any input", 

6262 PyparsingDiagnosticWarning, 

6263 stacklevel=stacklevel, 

6264 ) 

6265 if not ParserElement._left_recursion_enabled: 

6266 return super().parseImpl(instring, loc, do_actions) 

6267 # ## Bounded Recursion algorithm ## 

6268 # Recursion only needs to be processed at ``Forward`` elements, since they are 

6269 # the only ones that can actually refer to themselves. The general idea is 

6270 # to handle recursion stepwise: We start at no recursion, then recurse once, 

6271 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

6272 # 

6273 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

6274 # - to *match* a specific recursion level, and 

6275 # - to *search* the bounded recursion level 

6276 # and the two run concurrently. The *search* must *match* each recursion level 

6277 # to find the best possible match. This is handled by a memo table, which 

6278 # provides the previous match to the next level match attempt. 

6279 # 

6280 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

6281 # 

6282 # There is a complication since we not only *parse* but also *transform* via 

6283 # actions: We do not want to run the actions too often while expanding. Thus, 

6284 # we expand using `do_actions=False` and only run `do_actions=True` if the next 

6285 # recursion level is acceptable. 

6286 with ParserElement.recursion_lock: 

6287 memo = ParserElement.recursion_memos 

6288 try: 

6289 # we are parsing at a specific recursion expansion - use it as-is 

6290 prev_loc, prev_result = memo[loc, self, do_actions] 

6291 if isinstance(prev_result, Exception): 

6292 raise prev_result 

6293 return prev_loc, prev_result.copy() 

6294 except KeyError: 

6295 act_key = (loc, self, True) 

6296 peek_key = (loc, self, False) 

6297 # we are searching for the best recursion expansion - keep on improving 

6298 # both `do_actions` cases must be tracked separately here! 

6299 prev_loc, prev_peek = memo[peek_key] = ( 

6300 loc - 1, 

6301 ParseException( 

6302 instring, loc, "Forward recursion without base case", self 

6303 ), 

6304 ) 

6305 if do_actions: 

6306 memo[act_key] = memo[peek_key] 

6307 while True: 

6308 try: 

6309 new_loc, new_peek = super().parseImpl(instring, loc, False) 

6310 except ParseException: 

6311 # we failed before getting any match - do not hide the error 

6312 if isinstance(prev_peek, Exception): 

6313 raise 

6314 new_loc, new_peek = prev_loc, prev_peek 

6315 # the match did not get better: we are done 

6316 if new_loc <= prev_loc: 

6317 if do_actions: 

6318 # replace the match for do_actions=False as well, 

6319 # in case the action did backtrack 

6320 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

6321 del memo[peek_key], memo[act_key] 

6322 return prev_loc, copy.copy(prev_result) 

6323 del memo[peek_key] 

6324 return prev_loc, copy.copy(prev_peek) 

6325 # the match did get better: see if we can improve further 

6326 if do_actions: 

6327 try: 

6328 memo[act_key] = super().parseImpl(instring, loc, True) 

6329 except ParseException as e: 

6330 memo[peek_key] = memo[act_key] = (new_loc, e) 

6331 raise 

6332 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

6333 

6334 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

6335 """ 

6336 Extends ``leave_whitespace`` defined in base class. 

6337 """ 

6338 self.skipWhitespace = False 

6339 return self 

6340 

6341 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

6342 """ 

6343 Extends ``ignore_whitespace`` defined in base class. 

6344 """ 

6345 self.skipWhitespace = True 

6346 return self 

6347 

6348 def streamline(self) -> ParserElement: 

6349 if not self.streamlined: 

6350 self.streamlined = True 

6351 if self.expr is not None: 

6352 self.expr.streamline() 

6353 return self 

6354 

6355 def validate(self, validateTrace=None) -> None: 

6356 warnings.warn( 

6357 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

6358 PyparsingDeprecationWarning, 

6359 stacklevel=2, 

6360 ) 

6361 if validateTrace is None: 

6362 validateTrace = [] 

6363 

6364 if self not in validateTrace: 

6365 tmp = validateTrace[:] + [self] 

6366 if self.expr is not None: 

6367 self.expr.validate(tmp) 

6368 self._checkRecursion([]) 

6369 

6370 def _generateDefaultName(self) -> str: 

6371 # Avoid infinite recursion by setting a temporary _defaultName 

6372 save_default_name = self._defaultName 

6373 self._defaultName = ": ..." 

6374 

6375 # Use the string representation of main expression. 

6376 try: 

6377 if self.expr is not None: 

6378 ret_string = str(self.expr)[:1000] 

6379 else: 

6380 ret_string = "None" 

6381 except Exception: 

6382 ret_string = "..." 

6383 

6384 self._defaultName = save_default_name 

6385 return f"{type(self).__name__}: {ret_string}" 

6386 

6387 def copy(self) -> ParserElement: 

6388 """ 

6389 Returns a copy of this expression. 

6390 

6391 Generally only used internally by pyparsing. 

6392 """ 

6393 if self.expr is not None: 

6394 return super().copy() 

6395 else: 

6396 ret = Forward() 

6397 ret <<= self 

6398 return ret 

6399 

6400 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

6401 # fmt: off 

6402 if ( 

6403 __diag__.warn_name_set_on_empty_Forward 

6404 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ 

6405 and self.expr is None 

6406 ): 

6407 warning = ( 

6408 "warn_name_set_on_empty_Forward:" 

6409 f" setting results name {name!r} on {type(self).__name__} expression" 

6410 " that has no contained expression" 

6411 ) 

6412 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

6413 # fmt: on 

6414 

6415 return super()._setResultsName(name, list_all_matches) 

6416 

6417 # Compatibility synonyms 

6418 # fmt: off 

6419 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

6420 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

6421 # fmt: on 

6422 

6423 

6424class TokenConverter(ParseElementEnhance): 

6425 """ 

6426 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. 

6427 """ 

6428 

6429 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None: 

6430 super().__init__(expr) # , savelist) 

6431 self.saveAsList = False 

6432 

6433 

6434class Combine(TokenConverter): 

6435 """Converter to concatenate all matching tokens to a single string. 

6436 By default, the matching patterns must also be contiguous in the 

6437 input string; this can be disabled by specifying 

6438 ``'adjacent=False'`` in the constructor. 

6439 

6440 Example: 

6441 

6442 .. doctest:: 

6443 

6444 >>> real = Word(nums) + '.' + Word(nums) 

6445 >>> print(real.parse_string('3.1416')) 

6446 ['3', '.', '1416'] 

6447 

6448 >>> # will also erroneously match the following 

6449 >>> print(real.parse_string('3. 1416')) 

6450 ['3', '.', '1416'] 

6451 

6452 >>> real = Combine(Word(nums) + '.' + Word(nums)) 

6453 >>> print(real.parse_string('3.1416')) 

6454 ['3.1416'] 

6455 

6456 >>> # no match when there are internal spaces 

6457 >>> print(real.parse_string('3. 1416')) 

6458 Traceback (most recent call last): 

6459 ParseException: Expected W:(0123...) 

6460 """ 

6461 

6462 def __init__( 

6463 self, 

6464 expr: ParserElement, 

6465 join_string: str = "", 

6466 adjacent: bool = True, 

6467 *, 

6468 joinString: typing.Optional[str] = None, 

6469 ) -> None: 

6470 super().__init__(expr) 

6471 joinString = joinString if joinString is not None else join_string 

6472 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

6473 if adjacent: 

6474 self.leave_whitespace() 

6475 self.adjacent = adjacent 

6476 self.skipWhitespace = True 

6477 self.joinString = joinString 

6478 self.callPreparse = True 

6479 

6480 def ignore(self, other) -> ParserElement: 

6481 """ 

6482 Define expression to be ignored (e.g., comments) while doing pattern 

6483 matching; may be called repeatedly, to define multiple comment or other 

6484 ignorable patterns. 

6485 """ 

6486 if self.adjacent: 

6487 ParserElement.ignore(self, other) 

6488 else: 

6489 super().ignore(other) 

6490 return self 

6491 

6492 def postParse(self, instring, loc, tokenlist): 

6493 retToks = tokenlist.copy() 

6494 del retToks[:] 

6495 retToks += ParseResults( 

6496 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

6497 ) 

6498 

6499 if self.resultsName and retToks.haskeys(): 

6500 return [retToks] 

6501 else: 

6502 return retToks 

6503 

6504 

6505class Group(TokenConverter): 

6506 """Converter to return the matched tokens as a list - useful for 

6507 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

6508 

6509 The optional ``aslist`` argument when set to True will return the 

6510 parsed tokens as a Python list instead of a pyparsing ParseResults. 

6511 

6512 Example: 

6513 

6514 .. doctest:: 

6515 

6516 >>> ident = Word(alphas) 

6517 >>> num = Word(nums) 

6518 >>> term = ident | num 

6519 >>> func = ident + Opt(DelimitedList(term)) 

6520 >>> print(func.parse_string("fn a, b, 100")) 

6521 ['fn', 'a', 'b', '100'] 

6522 

6523 >>> func = ident + Group(Opt(DelimitedList(term))) 

6524 >>> print(func.parse_string("fn a, b, 100")) 

6525 ['fn', ['a', 'b', '100']] 

6526 """ 

6527 

6528 def __init__(self, expr: ParserElement, aslist: bool = False) -> None: 

6529 super().__init__(expr) 

6530 self.saveAsList = True 

6531 self._asPythonList = aslist 

6532 

6533 def postParse(self, instring, loc, tokenlist): 

6534 if self._asPythonList: 

6535 return ParseResults.List( 

6536 tokenlist.as_list() 

6537 if isinstance(tokenlist, ParseResults) 

6538 else list(tokenlist) 

6539 ) 

6540 

6541 return [tokenlist] 

6542 

6543 

6544class Dict(TokenConverter): 

6545 """Converter to return a repetitive expression as a list, but also 

6546 as a dictionary. Each element can also be referenced using the first 

6547 token in the expression as its key. Useful for tabular report 

6548 scraping when the first column can be used as a item key. 

6549 

6550 The optional ``asdict`` argument when set to True will return the 

6551 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

6552 

6553 Example: 

6554 

6555 .. doctest:: 

6556 

6557 >>> data_word = Word(alphas) 

6558 >>> label = data_word + FollowedBy(':') 

6559 

6560 >>> attr_expr = ( 

6561 ... label + Suppress(':') 

6562 ... + OneOrMore(data_word, stop_on=label) 

6563 ... .set_parse_action(' '.join) 

6564 ... ) 

6565 

6566 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

6567 

6568 >>> # print attributes as plain groups 

6569 >>> print(attr_expr[1, ...].parse_string(text).dump()) 

6570 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

6571 

6572 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) 

6573 # Dict will auto-assign names. 

6574 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

6575 >>> print(result.dump()) 

6576 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

6577 - color: 'light blue' 

6578 - posn: 'upper left' 

6579 - shape: 'SQUARE' 

6580 - texture: 'burlap' 

6581 [0]: 

6582 ['shape', 'SQUARE'] 

6583 [1]: 

6584 ['posn', 'upper left'] 

6585 [2]: 

6586 ['color', 'light blue'] 

6587 [3]: 

6588 ['texture', 'burlap'] 

6589 

6590 # access named fields as dict entries, or output as dict 

6591 >>> print(result['shape']) 

6592 SQUARE 

6593 >>> print(result.as_dict()) 

6594 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'} 

6595 

6596 See more examples at :class:`ParseResults` of accessing fields by results name. 

6597 """ 

6598 

6599 def __init__(self, expr: ParserElement, asdict: bool = False) -> None: 

6600 super().__init__(expr) 

6601 self.saveAsList = True 

6602 self._asPythonDict = asdict 

6603 

6604 def postParse(self, instring, loc, tokenlist): 

6605 for i, tok in enumerate(tokenlist): 

6606 if len(tok) == 0: 

6607 continue 

6608 

6609 ikey = tok[0] 

6610 if isinstance(ikey, int): 

6611 ikey = str(ikey).strip() 

6612 

6613 if len(tok) == 1: 

6614 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

6615 

6616 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

6617 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

6618 

6619 else: 

6620 try: 

6621 dictvalue = tok.copy() # ParseResults(i) 

6622 except Exception: 

6623 exc = TypeError( 

6624 "could not extract dict values from parsed results" 

6625 " - Dict expression must contain Grouped expressions" 

6626 ) 

6627 raise exc from None 

6628 

6629 del dictvalue[0] 

6630 

6631 if len(dictvalue) != 1 or ( 

6632 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

6633 ): 

6634 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

6635 else: 

6636 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

6637 

6638 if self._asPythonDict: 

6639 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

6640 

6641 return [tokenlist] if self.resultsName else tokenlist 

6642 

6643 

6644class Suppress(TokenConverter): 

6645 """Converter for ignoring the results of a parsed expression. 

6646 

6647 Example: 

6648 

6649 .. doctest:: 

6650 

6651 >>> source = "a, b, c,d" 

6652 >>> wd = Word(alphas) 

6653 >>> wd_list1 = wd + (',' + wd)[...] 

6654 >>> print(wd_list1.parse_string(source)) 

6655 ['a', ',', 'b', ',', 'c', ',', 'd'] 

6656 

6657 # often, delimiters that are useful during parsing are just in the 

6658 # way afterward - use Suppress to keep them out of the parsed output 

6659 >>> wd_list2 = wd + (Suppress(',') + wd)[...] 

6660 >>> print(wd_list2.parse_string(source)) 

6661 ['a', 'b', 'c', 'd'] 

6662 

6663 # Skipped text (using '...') can be suppressed as well 

6664 >>> source = "lead in START relevant text END trailing text" 

6665 >>> start_marker = Keyword("START") 

6666 >>> end_marker = Keyword("END") 

6667 >>> find_body = Suppress(...) + start_marker + ... + end_marker 

6668 >>> print(find_body.parse_string(source)) 

6669 ['START', 'relevant text ', 'END'] 

6670 

6671 (See also :class:`DelimitedList`.) 

6672 """ 

6673 

6674 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

6675 if expr is ...: 

6676 expr = _PendingSkip(NoMatch()) 

6677 super().__init__(expr) 

6678 

6679 def __add__(self, other) -> ParserElement: 

6680 if isinstance(self.expr, _PendingSkip): 

6681 return Suppress(SkipTo(other)) + other 

6682 

6683 return super().__add__(other) 

6684 

6685 def __sub__(self, other) -> ParserElement: 

6686 if isinstance(self.expr, _PendingSkip): 

6687 return Suppress(SkipTo(other)) - other 

6688 

6689 return super().__sub__(other) 

6690 

6691 def postParse(self, instring, loc, tokenlist): 

6692 return [] 

6693 

6694 def suppress(self) -> ParserElement: 

6695 return self 

6696 

6697 

6698# XXX: Example needs to be re-done for updated output 

6699def trace_parse_action(f: ParseAction) -> ParseAction: 

6700 """Decorator for debugging parse actions. 

6701 

6702 When the parse action is called, this decorator will print 

6703 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

6704 When the parse action completes, the decorator will print 

6705 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

6706 

6707 Example: 

6708 

6709 .. testsetup:: stderr 

6710 

6711 import sys 

6712 sys.stderr = sys.stdout 

6713 

6714 .. testcleanup:: stderr 

6715 

6716 sys.stderr = sys.__stderr__ 

6717 

6718 .. testcode:: stderr 

6719 

6720 wd = Word(alphas) 

6721 

6722 @trace_parse_action 

6723 def remove_duplicate_chars(tokens): 

6724 return ''.join(sorted(set(''.join(tokens)))) 

6725 

6726 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

6727 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

6728 

6729 prints: 

6730 

6731 .. testoutput:: stderr 

6732 :options: +NORMALIZE_WHITESPACE 

6733 

6734 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 

6735 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

6736 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

6737 ['dfjkls'] 

6738 

6739 .. versionchanged:: 3.1.0 

6740 Exception type added to output 

6741 """ 

6742 f = _trim_arity(f) 

6743 

6744 def z(*paArgs): 

6745 thisFunc = f.__name__ 

6746 s, l, t = paArgs[-3:] 

6747 if len(paArgs) > 3: 

6748 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" 

6749 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

6750 try: 

6751 ret = f(*paArgs) 

6752 except Exception as exc: 

6753 sys.stderr.write( 

6754 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n" 

6755 ) 

6756 raise 

6757 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

6758 return ret 

6759 

6760 z.__name__ = f.__name__ 

6761 return z 

6762 

6763 

6764# convenience constants for positional expressions 

6765empty = Empty().set_name("empty") 

6766line_start = LineStart().set_name("line_start") 

6767line_end = LineEnd().set_name("line_end") 

6768string_start = StringStart().set_name("string_start") 

6769string_end = StringEnd().set_name("string_end") 

6770 

6771_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

6772 lambda s, l, t: t[0][1] 

6773) 

6774_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

6775 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

6776) 

6777_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

6778 lambda s, l, t: chr(int(t[0][1:], 8)) 

6779) 

6780_singleChar = ( 

6781 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

6782) 

6783_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

6784_reBracketExpr = ( 

6785 Literal("[") 

6786 + Opt("^").set_results_name("negate") 

6787 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

6788 + Literal("]") 

6789) 

6790 

6791 

6792def srange(s: str) -> str: 

6793 r"""Helper to easily define string ranges for use in :class:`Word` 

6794 construction. Borrows syntax from regexp ``'[]'`` string range 

6795 definitions:: 

6796 

6797 srange("[0-9]") -> "0123456789" 

6798 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

6799 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

6800 

6801 The input string must be enclosed in []'s, and the returned string 

6802 is the expanded character set joined into a single string. The 

6803 values enclosed in the []'s may be: 

6804 

6805 - a single character 

6806 - an escaped character with a leading backslash (such as ``\-`` 

6807 or ``\]``) 

6808 - an escaped hex character with a leading ``'\x'`` 

6809 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

6810 is also supported for backwards compatibility) 

6811 - an escaped octal character with a leading ``'\0'`` 

6812 (``\041``, which is a ``'!'`` character) 

6813 - a range of any of the above, separated by a dash (``'a-z'``, 

6814 etc.) 

6815 - any combination of the above (``'aeiouy'``, 

6816 ``'a-zA-Z0-9_$'``, etc.) 

6817 """ 

6818 

6819 def _expanded(p): 

6820 if isinstance(p, ParseResults): 

6821 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

6822 else: 

6823 yield p 

6824 

6825 try: 

6826 return "".join( 

6827 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)] 

6828 ) 

6829 except Exception as e: 

6830 return "" 

6831 

6832 

6833def token_map(func, *args) -> ParseAction: 

6834 """Helper to define a parse action by mapping a function to all 

6835 elements of a :class:`ParseResults` list. If any additional args are passed, 

6836 they are forwarded to the given function as additional arguments 

6837 after the token, as in 

6838 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

6839 which will convert the parsed data to an integer using base 16. 

6840 

6841 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

6842 

6843 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6844 hex_ints.run_tests(''' 

6845 00 11 22 aa FF 0a 0d 1a 

6846 ''') 

6847 

6848 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6849 upperword[1, ...].run_tests(''' 

6850 my kingdom for a horse 

6851 ''') 

6852 

6853 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6854 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6855 now is the winter of our discontent made glorious summer by this sun of york 

6856 ''') 

6857 

6858 prints:: 

6859 

6860 00 11 22 aa FF 0a 0d 1a 

6861 [0, 17, 34, 170, 255, 10, 13, 26] 

6862 

6863 my kingdom for a horse 

6864 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6865 

6866 now is the winter of our discontent made glorious summer by this sun of york 

6867 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6868 """ 

6869 

6870 def pa(s, l, t): 

6871 return [func(tokn, *args) for tokn in t] 

6872 

6873 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6874 pa.__name__ = func_name 

6875 

6876 return pa 

6877 

6878 

6879def autoname_elements() -> None: 

6880 """ 

6881 Utility to simplify mass-naming of parser elements, for 

6882 generating railroad diagram with named subdiagrams. 

6883 """ 

6884 

6885 # guard against _getframe not being implemented in the current Python 

6886 getframe_fn = getattr(sys, "_getframe", lambda _: None) 

6887 calling_frame = getframe_fn(1) 

6888 if calling_frame is None: 

6889 return 

6890 

6891 # find all locals in the calling frame that are ParserElements 

6892 calling_frame = typing.cast(types.FrameType, calling_frame) 

6893 for name, var in calling_frame.f_locals.items(): 

6894 # if no custom name defined, set the name to the var name 

6895 if isinstance(var, ParserElement) and not var.customName: 

6896 var.set_name(name) 

6897 

6898 

6899dbl_quoted_string = Combine( 

6900 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6901).set_name("string enclosed in double quotes") 

6902 

6903sgl_quoted_string = Combine( 

6904 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6905).set_name("string enclosed in single quotes") 

6906 

6907quoted_string = Combine( 

6908 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6909 "double quoted string" 

6910 ) 

6911 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6912 "single quoted string" 

6913 ) 

6914).set_name("quoted string using single or double quotes") 

6915 

6916# XXX: Is there some way to make this show up in API docs? 

6917# .. versionadded:: 3.1.0 

6918python_quoted_string = Combine( 

6919 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6920 "multiline double quoted string" 

6921 ) 

6922 ^ ( 

6923 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6924 ).set_name("multiline single quoted string") 

6925 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6926 "double quoted string" 

6927 ) 

6928 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6929 "single quoted string" 

6930 ) 

6931).set_name("Python quoted string") 

6932 

6933unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6934 

6935 

6936alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6937punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6938 

6939# build list of built-in expressions, for future reference if a global default value 

6940# gets updated 

6941_builtin_exprs: list[ParserElement] = [ 

6942 v for v in vars().values() if isinstance(v, ParserElement) 

6943] 

6944 

6945# Compatibility synonyms 

6946# fmt: off 

6947sglQuotedString = sgl_quoted_string 

6948dblQuotedString = dbl_quoted_string 

6949quotedString = quoted_string 

6950unicodeString = unicode_string 

6951lineStart = line_start 

6952lineEnd = line_end 

6953stringStart = string_start 

6954stringEnd = string_end 

6955nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) 

6956traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) 

6957conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) 

6958tokenMap = replaced_by_pep8("tokenMap", token_map) 

6959# fmt: on