Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 46%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2722 statements  

1# 

2# core.py 

3# 

4from __future__ import annotations 

5 

6import collections.abc 

7from collections import deque 

8import os 

9import typing 

10from typing import ( 

11 Any, 

12 Callable, 

13 Generator, 

14 NamedTuple, 

15 Sequence, 

16 TextIO, 

17 Union, 

18 cast, 

19) 

20from abc import ABC, abstractmethod 

21from enum import Enum 

22import string 

23import copy 

24import warnings 

25import re 

26import sys 

27from collections.abc import Iterable 

28import traceback 

29import types 

30from operator import itemgetter 

31from functools import wraps 

32from threading import RLock 

33from pathlib import Path 

34 

35from .warnings import PyparsingDeprecationWarning, PyparsingDiagnosticWarning 

36from .util import ( 

37 _FifoCache, 

38 _UnboundedCache, 

39 __config_flags, 

40 _collapse_string_to_ranges, 

41 _convert_escaped_numerics_to_char, 

42 _escape_regex_range_chars, 

43 _flatten, 

44 LRUMemo as _LRUMemo, 

45 UnboundedMemo as _UnboundedMemo, 

46 deprecate_argument, 

47 replaced_by_pep8, 

48) 

49from .exceptions import * 

50from .actions import * 

51from .results import ParseResults, _ParseResultsWithOffset 

52from .unicode import pyparsing_unicode 

53 

54_MAX_INT = sys.maxsize 

55str_type: tuple[type, ...] = (str, bytes) 

56 

57# 

58# Copyright (c) 2003-2022 Paul T. McGuire 

59# 

60# Permission is hereby granted, free of charge, to any person obtaining 

61# a copy of this software and associated documentation files (the 

62# "Software"), to deal in the Software without restriction, including 

63# without limitation the rights to use, copy, modify, merge, publish, 

64# distribute, sublicense, and/or sell copies of the Software, and to 

65# permit persons to whom the Software is furnished to do so, subject to 

66# the following conditions: 

67# 

68# The above copyright notice and this permission notice shall be 

69# included in all copies or substantial portions of the Software. 

70# 

71# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

72# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

73# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

74# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

75# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

76# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

77# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

78# 

79 

80from functools import cached_property 

81 

82 

83class __compat__(__config_flags): 

84 """ 

85 A cross-version compatibility configuration for pyparsing features that will be 

86 released in a future version. By setting values in this configuration to True, 

87 those features can be enabled in prior versions for compatibility development 

88 and testing. 

89 

90 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

91 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

92 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

93 behavior 

94 """ 

95 

96 _type_desc = "compatibility" 

97 

98 collect_all_And_tokens = True 

99 

100 _all_names = [__ for __ in locals() if not __.startswith("_")] 

101 _fixed_names = """ 

102 collect_all_And_tokens 

103 """.split() 

104 

105 

106class __diag__(__config_flags): 

107 _type_desc = "diagnostic" 

108 

109 warn_multiple_tokens_in_named_alternation = False 

110 warn_ungrouped_named_tokens_in_collection = False 

111 warn_name_set_on_empty_Forward = False 

112 warn_on_parse_using_empty_Forward = False 

113 warn_on_assignment_to_Forward = False 

114 warn_on_multiple_string_args_to_oneof = False 

115 warn_on_match_first_with_lshift_operator = False 

116 enable_debug_on_named_expressions = False 

117 

118 _all_names = [__ for __ in locals() if not __.startswith("_")] 

119 _warning_names = [name for name in _all_names if name.startswith("warn")] 

120 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

121 

122 @classmethod 

123 def enable_all_warnings(cls) -> None: 

124 for name in cls._warning_names: 

125 cls.enable(name) 

126 

127 

128class Diagnostics(Enum): 

129 """ 

130 Diagnostic configuration (all default to disabled) 

131 

132 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

133 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

134 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

135 name is defined on a containing expression with ungrouped subexpressions that also 

136 have results names 

137 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

138 with a results name, but has no contents defined 

139 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

140 defined in a grammar but has never had an expression attached to it 

141 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

142 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

143 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

144 incorrectly called with multiple str arguments 

145 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

146 calls to :class:`ParserElement.set_name` 

147 

148 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

149 All warnings can be enabled by calling :class:`enable_all_warnings`. 

150 """ 

151 

152 warn_multiple_tokens_in_named_alternation = 0 

153 warn_ungrouped_named_tokens_in_collection = 1 

154 warn_name_set_on_empty_Forward = 2 

155 warn_on_parse_using_empty_Forward = 3 

156 warn_on_assignment_to_Forward = 4 

157 warn_on_multiple_string_args_to_oneof = 5 

158 warn_on_match_first_with_lshift_operator = 6 

159 enable_debug_on_named_expressions = 7 

160 

161 

162def enable_diag(diag_enum: Diagnostics) -> None: 

163 """ 

164 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

165 """ 

166 __diag__.enable(diag_enum.name) 

167 

168 

169def disable_diag(diag_enum: Diagnostics) -> None: 

170 """ 

171 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

172 """ 

173 __diag__.disable(diag_enum.name) 

174 

175 

176def enable_all_warnings() -> None: 

177 """ 

178 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

179 """ 

180 __diag__.enable_all_warnings() 

181 

182 

183# hide abstract class 

184del __config_flags 

185 

186 

187def _should_enable_warnings( 

188 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

189) -> bool: 

190 enable = bool(warn_env_var) 

191 for warn_opt in cmd_line_warn_options: 

192 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

193 ":" 

194 )[:5] 

195 if not w_action.lower().startswith("i") and ( 

196 not (w_message or w_category or w_module) or w_module == "pyparsing" 

197 ): 

198 enable = True 

199 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

200 enable = False 

201 return enable 

202 

203 

204if _should_enable_warnings( 

205 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

206): 

207 enable_all_warnings() 

208 

209 

210# build list of single arg builtins, that can be used as parse actions 

211# fmt: off 

212_single_arg_builtins = { 

213 sum, len, sorted, reversed, list, tuple, set, any, all, min, max 

214} 

215# fmt: on 

216 

217_generatorType = types.GeneratorType 

218ParseImplReturnType = tuple[int, Any] 

219PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

220 

221ParseCondition = Union[ 

222 Callable[[], bool], 

223 Callable[[ParseResults], bool], 

224 Callable[[int, ParseResults], bool], 

225 Callable[[str, int, ParseResults], bool], 

226] 

227ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

228DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

229DebugSuccessAction = Callable[ 

230 [str, int, int, "ParserElement", ParseResults, bool], None 

231] 

232DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

233 

234 

235alphas: str = string.ascii_uppercase + string.ascii_lowercase 

236identchars: str = pyparsing_unicode.Latin1.identchars 

237identbodychars: str = pyparsing_unicode.Latin1.identbodychars 

238nums: str = "0123456789" 

239hexnums: str = nums + "ABCDEFabcdef" 

240alphanums: str = alphas + nums 

241printables: str = "".join([c for c in string.printable if c not in string.whitespace]) 

242 

243 

244class _ParseActionIndexError(Exception): 

245 """ 

246 Internal wrapper around IndexError so that IndexErrors raised inside 

247 parse actions aren't misinterpreted as IndexErrors raised inside 

248 ParserElement parseImpl methods. 

249 """ 

250 

251 def __init__(self, msg: str, exc: BaseException) -> None: 

252 self.msg: str = msg 

253 self.exc: BaseException = exc 

254 

255 

256_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

257pa_call_line_synth = () 

258 

259 

260def _trim_arity(func, max_limit=3): 

261 """decorator to trim function calls to match the arity of the target""" 

262 global _trim_arity_call_line, pa_call_line_synth 

263 

264 if func in _single_arg_builtins: 

265 return lambda s, l, t: func(t) 

266 

267 limit = 0 

268 found_arity = False 

269 

270 # synthesize what would be returned by traceback.extract_stack at the call to 

271 # user's parse action 'func', so that we don't incur call penalty at parse time 

272 

273 # fmt: off 

274 LINE_DIFF = 9 

275 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

276 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

277 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1] 

278 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

279 

280 def wrapper(*args): 

281 nonlocal found_arity, limit 

282 if found_arity: 

283 return func(*args[limit:]) 

284 while 1: 

285 try: 

286 ret = func(*args[limit:]) 

287 found_arity = True 

288 return ret 

289 except TypeError as te: 

290 # re-raise TypeErrors if they did not come from our arity testing 

291 if found_arity: 

292 raise 

293 else: 

294 tb = te.__traceback__ 

295 frames = traceback.extract_tb(tb, limit=2) 

296 frame_summary = frames[-1] 

297 trim_arity_type_error = ( 

298 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

299 ) 

300 del tb 

301 

302 if trim_arity_type_error: 

303 if limit < max_limit: 

304 limit += 1 

305 continue 

306 

307 raise 

308 except IndexError as ie: 

309 # wrap IndexErrors inside a _ParseActionIndexError 

310 raise _ParseActionIndexError( 

311 "IndexError raised in parse action", ie 

312 ).with_traceback(None) 

313 # fmt: on 

314 

315 # copy func name to wrapper for sensible debug output 

316 # (can't use functools.wraps, since that messes with function signature) 

317 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

318 wrapper.__name__ = func_name 

319 wrapper.__doc__ = func.__doc__ 

320 

321 return wrapper 

322 

323 

324def condition_as_parse_action( 

325 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

326) -> ParseAction: 

327 """ 

328 Function to convert a simple predicate function that returns ``True`` or ``False`` 

329 into a parse action. Can be used in places when a parse action is required 

330 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition 

331 to an operator level in :class:`infix_notation`). 

332 

333 Optional keyword arguments: 

334 

335 :param message: define a custom message to be used in the raised exception 

336 :param fatal: if ``True``, will raise :class:`ParseFatalException` 

337 to stop parsing immediately; 

338 otherwise will raise :class:`ParseException` 

339 

340 """ 

341 msg = message if message is not None else "failed user-defined condition" 

342 exc_type = ParseFatalException if fatal else ParseException 

343 fn = _trim_arity(fn) 

344 

345 @wraps(fn) 

346 def pa(s, l, t): 

347 if not bool(fn(s, l, t)): 

348 raise exc_type(s, l, msg) 

349 

350 return pa 

351 

352 

353def _default_start_debug_action( 

354 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False 

355): 

356 cache_hit_str = "*" if cache_hit else "" 

357 print( 

358 ( 

359 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

360 f" {line(loc, instring)}\n" 

361 f" {'^':>{col(loc, instring)}}" 

362 ) 

363 ) 

364 

365 

366def _default_success_debug_action( 

367 instring: str, 

368 startloc: int, 

369 endloc: int, 

370 expr: ParserElement, 

371 toks: ParseResults, 

372 cache_hit: bool = False, 

373): 

374 cache_hit_str = "*" if cache_hit else "" 

375 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

376 

377 

378def _default_exception_debug_action( 

379 instring: str, 

380 loc: int, 

381 expr: ParserElement, 

382 exc: Exception, 

383 cache_hit: bool = False, 

384): 

385 cache_hit_str = "*" if cache_hit else "" 

386 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

387 

388 

389def null_debug_action(*args): 

390 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

391 

392 

393class ParserElement(ABC): 

394 """Abstract base level parser element class.""" 

395 

396 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

397 verbose_stacktrace: bool = False 

398 _literalStringClass: type = None # type: ignore[assignment] 

399 

400 @staticmethod 

401 def set_default_whitespace_chars(chars: str) -> None: 

402 r""" 

403 Overrides the default whitespace chars 

404 

405 Example: 

406 

407 .. doctest:: 

408 

409 # default whitespace chars are space, <TAB> and newline 

410 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl") 

411 ParseResults(['abc', 'def', 'ghi', 'jkl'], {}) 

412 

413 # change to just treat newline as significant 

414 >>> ParserElement.set_default_whitespace_chars(" \t") 

415 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl") 

416 ParseResults(['abc', 'def'], {}) 

417 

418 # Reset to default 

419 >>> ParserElement.set_default_whitespace_chars(" \n\t\r") 

420 """ 

421 ParserElement.DEFAULT_WHITE_CHARS = chars 

422 

423 # update whitespace all parse expressions defined in this module 

424 for expr in _builtin_exprs: 

425 if expr.copyDefaultWhiteChars: 

426 expr.whiteChars = set(chars) 

427 

428 @staticmethod 

429 def inline_literals_using(cls: type) -> None: 

430 """ 

431 Set class to be used for inclusion of string literals into a parser. 

432 

433 Example: 

434 

435 .. doctest:: 

436 :options: +NORMALIZE_WHITESPACE 

437 

438 # default literal class used is Literal 

439 >>> integer = Word(nums) 

440 >>> date_str = ( 

441 ... integer("year") + '/' 

442 ... + integer("month") + '/' 

443 ... + integer("day") 

444 ... ) 

445 

446 >>> date_str.parse_string("1999/12/31") 

447 ParseResults(['1999', '/', '12', '/', '31'], 

448 {'year': '1999', 'month': '12', 'day': '31'}) 

449 

450 # change to Suppress 

451 >>> ParserElement.inline_literals_using(Suppress) 

452 >>> date_str = ( 

453 ... integer("year") + '/' 

454 ... + integer("month") + '/' 

455 ... + integer("day") 

456 ... ) 

457 

458 >>> date_str.parse_string("1999/12/31") 

459 ParseResults(['1999', '12', '31'], 

460 {'year': '1999', 'month': '12', 'day': '31'}) 

461 

462 # Reset 

463 >>> ParserElement.inline_literals_using(Literal) 

464 """ 

465 ParserElement._literalStringClass = cls 

466 

467 @classmethod 

468 def using_each(cls, seq, **class_kwargs): 

469 """ 

470 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq. 

471 

472 Example: 

473 

474 .. testcode:: 

475 

476 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

477 

478 .. versionadded:: 3.1.0 

479 """ 

480 yield from (cls(obj, **class_kwargs) for obj in seq) 

481 

482 class DebugActions(NamedTuple): 

483 debug_try: typing.Optional[DebugStartAction] 

484 debug_match: typing.Optional[DebugSuccessAction] 

485 debug_fail: typing.Optional[DebugExceptionAction] 

486 

487 def __init__(self, savelist: bool = False) -> None: 

488 self.parseAction: list[ParseAction] = list() 

489 self.failAction: typing.Optional[ParseFailAction] = None 

490 self.customName: str = None # type: ignore[assignment] 

491 self._defaultName: typing.Optional[str] = None 

492 self.resultsName: str = None # type: ignore[assignment] 

493 self.saveAsList: bool = savelist 

494 self.skipWhitespace: bool = True 

495 self.whiteChars: set[str] = set(ParserElement.DEFAULT_WHITE_CHARS) 

496 self.copyDefaultWhiteChars: bool = True 

497 # used when checking for left-recursion 

498 self._may_return_empty: bool = False 

499 self.keepTabs: bool = False 

500 self.ignoreExprs: list[ParserElement] = list() 

501 self.debug: bool = False 

502 self.streamlined: bool = False 

503 # optimize exception handling for subclasses that don't advance parse index 

504 self.mayIndexError: bool = True 

505 self.errmsg: Union[str, None] = "" 

506 # mark results names as modal (report only last) or cumulative (list all) 

507 self.modalResults: bool = True 

508 # custom debug actions 

509 self.debugActions = self.DebugActions(None, None, None) 

510 # avoid redundant calls to preParse 

511 self.callPreparse: bool = True 

512 self.callDuringTry: bool = False 

513 self.suppress_warnings_: list[Diagnostics] = [] 

514 self.show_in_diagram: bool = True 

515 

516 @property 

517 def mayReturnEmpty(self) -> bool: 

518 """ 

519 .. deprecated:: 3.3.0 

520 use _may_return_empty instead. 

521 """ 

522 return self._may_return_empty 

523 

524 @mayReturnEmpty.setter 

525 def mayReturnEmpty(self, value) -> None: 

526 """ 

527 .. deprecated:: 3.3.0 

528 use _may_return_empty instead. 

529 """ 

530 self._may_return_empty = value 

531 

532 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: 

533 """ 

534 Suppress warnings emitted for a particular diagnostic on this expression. 

535 

536 Example: 

537 

538 .. doctest:: 

539 

540 >>> label = pp.Word(pp.alphas) 

541 

542 # Normally using an empty Forward in a grammar 

543 # would print a warning, but we can suppress that 

544 >>> base = pp.Forward().suppress_warning( 

545 ... pp.Diagnostics.warn_on_parse_using_empty_Forward) 

546 

547 >>> grammar = base | label 

548 >>> print(grammar.parse_string("x")) 

549 ['x'] 

550 """ 

551 self.suppress_warnings_.append(warning_type) 

552 return self 

553 

554 def visit_all(self): 

555 """General-purpose method to yield all expressions and sub-expressions 

556 in a grammar. Typically just for internal use. 

557 """ 

558 to_visit = deque([self]) 

559 seen = set() 

560 while to_visit: 

561 cur = to_visit.popleft() 

562 

563 # guard against looping forever through recursive grammars 

564 if cur in seen: 

565 continue 

566 seen.add(cur) 

567 

568 to_visit.extend(cur.recurse()) 

569 yield cur 

570 

571 def copy(self) -> ParserElement: 

572 """ 

573 Make a copy of this :class:`ParserElement`. Useful for defining 

574 different parse actions for the same parsing pattern, using copies of 

575 the original parse element. 

576 

577 Example: 

578 

579 .. testcode:: 

580 

581 integer = Word(nums).set_parse_action( 

582 lambda toks: int(toks[0])) 

583 integerK = integer.copy().add_parse_action( 

584 lambda toks: toks[0] * 1024) + Suppress("K") 

585 integerM = integer.copy().add_parse_action( 

586 lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

587 

588 print( 

589 (integerK | integerM | integer)[1, ...].parse_string( 

590 "5K 100 640K 256M") 

591 ) 

592 

593 prints: 

594 

595 .. testoutput:: 

596 

597 [5120, 100, 655360, 268435456] 

598 

599 Equivalent form of ``expr.copy()`` is just ``expr()``: 

600 

601 .. testcode:: 

602 

603 integerM = integer().add_parse_action( 

604 lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

605 """ 

606 cpy = copy.copy(self) 

607 cpy.parseAction = self.parseAction[:] 

608 cpy.ignoreExprs = self.ignoreExprs[:] 

609 if self.copyDefaultWhiteChars: 

610 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

611 return cpy 

612 

613 def set_results_name( 

614 self, name: str, list_all_matches: bool = False, **kwargs 

615 ) -> ParserElement: 

616 """ 

617 Define name for referencing matching tokens as a nested attribute 

618 of the returned parse results. 

619 

620 Normally, results names are assigned as you would assign keys in a dict: 

621 any existing value is overwritten by later values. If it is necessary to 

622 keep all values captured for a particular results name, call ``set_results_name`` 

623 with ``list_all_matches`` = True. 

624 

625 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

626 this is so that the client can define a basic element, such as an 

627 integer, and reference it in multiple places with different names. 

628 

629 You can also set results names using the abbreviated syntax, 

630 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

631 - see :meth:`__call__`. If ``list_all_matches`` is required, use 

632 ``expr("name*")``. 

633 

634 Example: 

635 

636 .. testcode:: 

637 

638 integer = Word(nums) 

639 date_str = (integer.set_results_name("year") + '/' 

640 + integer.set_results_name("month") + '/' 

641 + integer.set_results_name("day")) 

642 

643 # equivalent form: 

644 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

645 """ 

646 listAllMatches: bool = deprecate_argument(kwargs, "listAllMatches", False) 

647 

648 list_all_matches = listAllMatches or list_all_matches 

649 return self._setResultsName(name, list_all_matches) 

650 

651 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

652 if name is None: 

653 return self 

654 newself = self.copy() 

655 if name.endswith("*"): 

656 name = name[:-1] 

657 list_all_matches = True 

658 newself.resultsName = name 

659 newself.modalResults = not list_all_matches 

660 return newself 

661 

662 def set_break(self, break_flag: bool = True) -> ParserElement: 

663 """ 

664 Method to invoke the Python pdb debugger when this element is 

665 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

666 disable. 

667 """ 

668 if break_flag: 

669 _parseMethod = self._parse 

670 

671 def breaker(instring, loc, do_actions=True, callPreParse=True): 

672 # this call to breakpoint() is intentional, not a checkin error 

673 breakpoint() 

674 return _parseMethod(instring, loc, do_actions, callPreParse) 

675 

676 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

677 self._parse = breaker # type: ignore [method-assign] 

678 elif hasattr(self._parse, "_originalParseMethod"): 

679 self._parse = self._parse._originalParseMethod # type: ignore [method-assign] 

680 return self 

681 

682 def set_parse_action( 

683 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any 

684 ) -> ParserElement: 

685 """ 

686 Define one or more actions to perform when successfully matching parse element definition. 

687 

688 Parse actions can be called to perform data conversions, do extra validation, 

689 update external data structures, or enhance or replace the parsed tokens. 

690 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

691 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

692 

693 - ``s`` = the original string being parsed (see note below) 

694 - ``loc`` = the location of the matching substring 

695 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

696 

697 The parsed tokens are passed to the parse action as ParseResults. They can be 

698 modified in place using list-style append, extend, and pop operations to update 

699 the parsed list elements; and with dictionary-style item set and del operations 

700 to add, update, or remove any named results. If the tokens are modified in place, 

701 it is not necessary to return them with a return statement. 

702 

703 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

704 object, or with some entirely different object (common for parse actions that perform data 

705 conversions). A convenient way to build a new parse result is to define the values 

706 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

707 

708 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

709 expression are cleared. 

710 

711 Optional keyword arguments: 

712 

713 :param call_during_try: (default= ``False``) indicate if parse action 

714 should be run during lookaheads and alternate 

715 testing. For parse actions that have side 

716 effects, it is important to only call the parse 

717 action once it is determined that it is being 

718 called as part of a successful parse. 

719 For parse actions that perform additional 

720 validation, then ``call_during_try`` should 

721 be passed as True, so that the validation code 

722 is included in the preliminary "try" parses. 

723 

724 .. Note:: 

725 The default parsing behavior is to expand tabs in the input string 

726 before starting the parsing process. 

727 See :meth:`parse_string` for more information on parsing strings 

728 containing ``<TAB>`` s, and suggested methods to maintain a 

729 consistent view of the parsed string, the parse location, and 

730 line and column positions within the parsed string. 

731 

732 Example: Parse dates in the form ``YYYY/MM/DD`` 

733 ----------------------------------------------- 

734 

735 Setup code: 

736 

737 .. testcode:: 

738 

739 def convert_to_int(toks): 

740 '''a parse action to convert toks from str to int 

741 at parse time''' 

742 return int(toks[0]) 

743 

744 def is_valid_date(instring, loc, toks): 

745 '''a parse action to verify that the date is a valid date''' 

746 from datetime import date 

747 year, month, day = toks[::2] 

748 try: 

749 date(year, month, day) 

750 except ValueError: 

751 raise ParseException(instring, loc, "invalid date given") 

752 

753 integer = Word(nums) 

754 date_str = integer + '/' + integer + '/' + integer 

755 

756 # add parse actions 

757 integer.set_parse_action(convert_to_int) 

758 date_str.set_parse_action(is_valid_date) 

759 

760 Successful parse - note that integer fields are converted to ints: 

761 

762 .. testcode:: 

763 

764 print(date_str.parse_string("1999/12/31")) 

765 

766 prints: 

767 

768 .. testoutput:: 

769 

770 [1999, '/', 12, '/', 31] 

771 

772 Failure - invalid date: 

773 

774 .. testcode:: 

775 

776 date_str.parse_string("1999/13/31") 

777 

778 prints: 

779 

780 .. testoutput:: 

781 

782 Traceback (most recent call last): 

783 ParseException: invalid date given, found '1999' ... 

784 """ 

785 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False) 

786 

787 if list(fns) == [None]: 

788 self.parseAction.clear() 

789 return self 

790 

791 if not all(callable(fn) for fn in fns): 

792 raise TypeError("parse actions must be callable") 

793 self.parseAction[:] = [_trim_arity(fn) for fn in fns] 

794 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry 

795 

796 return self 

797 

798 def add_parse_action( 

799 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any 

800 ) -> ParserElement: 

801 """ 

802 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

803 

804 See examples in :class:`copy`. 

805 """ 

806 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False) 

807 

808 self.parseAction += [_trim_arity(fn) for fn in fns] 

809 self.callDuringTry = self.callDuringTry or callDuringTry or call_during_try 

810 return self 

811 

812 def add_condition( 

813 self, *fns: ParseCondition, call_during_try: bool = False, **kwargs: Any 

814 ) -> ParserElement: 

815 """Add a boolean predicate function to expression's list of parse actions. See 

816 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

817 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

818 

819 Optional keyword arguments: 

820 

821 - ``message`` = define a custom message to be used in the raised exception 

822 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

823 ParseException 

824 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

825 default=False 

826 

827 Example: 

828 

829 .. doctest:: 

830 :options: +NORMALIZE_WHITESPACE 

831 

832 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

833 >>> year_int = integer.copy().add_condition( 

834 ... lambda toks: toks[0] >= 2000, 

835 ... message="Only support years 2000 and later") 

836 >>> date_str = year_int + '/' + integer + '/' + integer 

837 

838 >>> result = date_str.parse_string("1999/12/31") 

839 Traceback (most recent call last): 

840 ParseException: Only support years 2000 and later... 

841 """ 

842 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False) 

843 

844 for fn in fns: 

845 self.parseAction.append( 

846 condition_as_parse_action( 

847 fn, 

848 message=str(kwargs.get("message")), 

849 fatal=bool(kwargs.get("fatal", False)), 

850 ) 

851 ) 

852 

853 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry 

854 return self 

855 

856 def set_fail_action(self, fn: ParseFailAction) -> ParserElement: 

857 """ 

858 Define action to perform if parsing fails at this expression. 

859 Fail acton fn is a callable function that takes the arguments 

860 ``fn(s, loc, expr, err)`` where: 

861 

862 - ``s`` = string being parsed 

863 - ``loc`` = location where expression match was attempted and failed 

864 - ``expr`` = the parse expression that failed 

865 - ``err`` = the exception thrown 

866 

867 The function returns no value. It may throw :class:`ParseFatalException` 

868 if it is desired to stop parsing immediately.""" 

869 self.failAction = fn 

870 return self 

871 

872 def _skipIgnorables(self, instring: str, loc: int) -> int: 

873 if not self.ignoreExprs: 

874 return loc 

875 exprsFound = True 

876 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

877 last_loc = loc 

878 while exprsFound: 

879 exprsFound = False 

880 for ignore_fn in ignore_expr_fns: 

881 try: 

882 while 1: 

883 loc, dummy = ignore_fn(instring, loc) 

884 exprsFound = True 

885 except ParseException: 

886 pass 

887 # check if all ignore exprs matched but didn't actually advance the parse location 

888 if loc == last_loc: 

889 break 

890 last_loc = loc 

891 return loc 

892 

893 def preParse(self, instring: str, loc: int) -> int: 

894 if self.ignoreExprs: 

895 loc = self._skipIgnorables(instring, loc) 

896 

897 if self.skipWhitespace: 

898 instrlen = len(instring) 

899 white_chars = self.whiteChars 

900 while loc < instrlen and instring[loc] in white_chars: 

901 loc += 1 

902 

903 return loc 

904 

905 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

906 return loc, [] 

907 

908 def postParse(self, instring, loc, tokenlist): 

909 return tokenlist 

910 

911 # @profile 

912 def _parseNoCache( 

913 self, instring, loc, do_actions=True, callPreParse=True 

914 ) -> tuple[int, ParseResults]: 

915 debugging = self.debug # and do_actions) 

916 len_instring = len(instring) 

917 

918 if debugging or self.failAction: 

919 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

920 try: 

921 if callPreParse and self.callPreparse: 

922 pre_loc = self.preParse(instring, loc) 

923 else: 

924 pre_loc = loc 

925 tokens_start = pre_loc 

926 if self.debugActions.debug_try: 

927 self.debugActions.debug_try(instring, tokens_start, self, False) 

928 if self.mayIndexError or pre_loc >= len_instring: 

929 try: 

930 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

931 except IndexError: 

932 raise ParseException(instring, len_instring, self.errmsg, self) 

933 else: 

934 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

935 except Exception as err: 

936 # print("Exception raised:", err) 

937 if self.debugActions.debug_fail: 

938 self.debugActions.debug_fail( 

939 instring, tokens_start, self, err, False 

940 ) 

941 if self.failAction: 

942 self.failAction(instring, tokens_start, self, err) 

943 raise 

944 else: 

945 if callPreParse and self.callPreparse: 

946 pre_loc = self.preParse(instring, loc) 

947 else: 

948 pre_loc = loc 

949 tokens_start = pre_loc 

950 if self.mayIndexError or pre_loc >= len_instring: 

951 try: 

952 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

953 except IndexError: 

954 raise ParseException(instring, len_instring, self.errmsg, self) 

955 else: 

956 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

957 

958 tokens = self.postParse(instring, loc, tokens) 

959 

960 ret_tokens = ParseResults( 

961 tokens, self.resultsName, aslist=self.saveAsList, modal=self.modalResults 

962 ) 

963 if self.parseAction and (do_actions or self.callDuringTry): 

964 if debugging: 

965 try: 

966 for fn in self.parseAction: 

967 try: 

968 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

969 except IndexError as parse_action_exc: 

970 exc = ParseException("exception raised in parse action") 

971 raise exc from parse_action_exc 

972 

973 if tokens is not None and tokens is not ret_tokens: 

974 ret_tokens = ParseResults( 

975 tokens, 

976 self.resultsName, 

977 aslist=self.saveAsList 

978 and isinstance(tokens, (ParseResults, list)), 

979 modal=self.modalResults, 

980 ) 

981 except Exception as err: 

982 # print "Exception raised in user parse action:", err 

983 if self.debugActions.debug_fail: 

984 self.debugActions.debug_fail( 

985 instring, tokens_start, self, err, False 

986 ) 

987 raise 

988 else: 

989 for fn in self.parseAction: 

990 try: 

991 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

992 except IndexError as parse_action_exc: 

993 exc = ParseException("exception raised in parse action") 

994 raise exc from parse_action_exc 

995 

996 if tokens is not None and tokens is not ret_tokens: 

997 ret_tokens = ParseResults( 

998 tokens, 

999 self.resultsName, 

1000 aslist=self.saveAsList 

1001 and isinstance(tokens, (ParseResults, list)), 

1002 modal=self.modalResults, 

1003 ) 

1004 if debugging: 

1005 # print("Matched", self, "->", ret_tokens.as_list()) 

1006 if self.debugActions.debug_match: 

1007 self.debugActions.debug_match( 

1008 instring, tokens_start, loc, self, ret_tokens, False 

1009 ) 

1010 

1011 return loc, ret_tokens 

1012 

1013 def try_parse( 

1014 self, 

1015 instring: str, 

1016 loc: int, 

1017 *, 

1018 raise_fatal: bool = False, 

1019 do_actions: bool = False, 

1020 ) -> int: 

1021 try: 

1022 return self._parse(instring, loc, do_actions=do_actions)[0] 

1023 except ParseFatalException: 

1024 if raise_fatal: 

1025 raise 

1026 raise ParseException(instring, loc, self.errmsg, self) 

1027 

1028 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

1029 try: 

1030 self.try_parse(instring, loc, do_actions=do_actions) 

1031 except (ParseException, IndexError): 

1032 return False 

1033 else: 

1034 return True 

1035 

1036 # cache for left-recursion in Forward references 

1037 recursion_lock = RLock() 

1038 recursion_memos: collections.abc.MutableMapping[ 

1039 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]] 

1040 ] = {} 

1041 

1042 class _CacheType(typing.Protocol): 

1043 """ 

1044 Class to be used for packrat and left-recursion cacheing of results 

1045 and exceptions. 

1046 """ 

1047 

1048 not_in_cache: bool 

1049 

1050 def get(self, *args) -> typing.Any: ... 

1051 

1052 def set(self, *args) -> None: ... 

1053 

1054 def clear(self) -> None: ... 

1055 

1056 class NullCache(dict): 

1057 """ 

1058 A null cache type for initialization of the packrat_cache class variable. 

1059 If/when enable_packrat() is called, this null cache will be replaced by a 

1060 proper _CacheType class instance. 

1061 """ 

1062 

1063 not_in_cache: bool = True 

1064 

1065 def get(self, *args) -> typing.Any: ... 

1066 

1067 def set(self, *args) -> None: ... 

1068 

1069 def clear(self) -> None: ... 

1070 

1071 # class-level argument cache for optimizing repeated calls when backtracking 

1072 # through recursive expressions 

1073 packrat_cache: _CacheType = NullCache() 

1074 packrat_cache_lock = RLock() 

1075 packrat_cache_stats = [0, 0] 

1076 

1077 # this method gets repeatedly called during backtracking with the same arguments - 

1078 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

1079 def _parseCache( 

1080 self, instring, loc, do_actions=True, callPreParse=True 

1081 ) -> tuple[int, ParseResults]: 

1082 HIT, MISS = 0, 1 

1083 lookup = (self, instring, loc, callPreParse, do_actions) 

1084 with ParserElement.packrat_cache_lock: 

1085 cache = ParserElement.packrat_cache 

1086 value = cache.get(lookup) 

1087 if value is cache.not_in_cache: 

1088 ParserElement.packrat_cache_stats[MISS] += 1 

1089 try: 

1090 value = self._parseNoCache(instring, loc, do_actions, callPreParse) 

1091 except ParseBaseException as pe: 

1092 # cache a copy of the exception, without the traceback 

1093 cache.set(lookup, pe.__class__(*pe.args)) 

1094 raise 

1095 else: 

1096 cache.set(lookup, (value[0], value[1].copy(), loc)) 

1097 return value 

1098 else: 

1099 ParserElement.packrat_cache_stats[HIT] += 1 

1100 if self.debug and self.debugActions.debug_try: 

1101 try: 

1102 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

1103 except TypeError: 

1104 pass 

1105 if isinstance(value, Exception): 

1106 if self.debug and self.debugActions.debug_fail: 

1107 try: 

1108 self.debugActions.debug_fail( 

1109 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

1110 ) 

1111 except TypeError: 

1112 pass 

1113 raise value 

1114 

1115 value = cast(tuple[int, ParseResults, int], value) 

1116 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1117 if self.debug and self.debugActions.debug_match: 

1118 try: 

1119 self.debugActions.debug_match( 

1120 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1121 ) 

1122 except TypeError: 

1123 pass 

1124 

1125 return loc_, result 

1126 

1127 _parse = _parseNoCache 

1128 

1129 @staticmethod 

1130 def reset_cache() -> None: 

1131 """ 

1132 Clears caches used by packrat and left-recursion. 

1133 """ 

1134 with ParserElement.packrat_cache_lock: 

1135 ParserElement.packrat_cache.clear() 

1136 ParserElement.packrat_cache_stats[:] = [0] * len( 

1137 ParserElement.packrat_cache_stats 

1138 ) 

1139 ParserElement.recursion_memos.clear() 

1140 

1141 # class attributes to keep caching status 

1142 _packratEnabled = False 

1143 _left_recursion_enabled = False 

1144 

1145 @staticmethod 

1146 def disable_memoization() -> None: 

1147 """ 

1148 Disables active Packrat or Left Recursion parsing and their memoization 

1149 

1150 This method also works if neither Packrat nor Left Recursion are enabled. 

1151 This makes it safe to call before activating Packrat nor Left Recursion 

1152 to clear any previous settings. 

1153 """ 

1154 with ParserElement.packrat_cache_lock: 

1155 ParserElement.reset_cache() 

1156 ParserElement._left_recursion_enabled = False 

1157 ParserElement._packratEnabled = False 

1158 ParserElement._parse = ParserElement._parseNoCache 

1159 

1160 @staticmethod 

1161 def enable_left_recursion( 

1162 cache_size_limit: typing.Optional[int] = None, *, force=False 

1163 ) -> None: 

1164 """ 

1165 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1166 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1167 repeatedly matched with a fixed recursion depth that is gradually increased 

1168 until finding the longest match. 

1169 

1170 Example: 

1171 

1172 .. testcode:: 

1173 

1174 import pyparsing as pp 

1175 pp.ParserElement.enable_left_recursion() 

1176 

1177 E = pp.Forward("E") 

1178 num = pp.Word(pp.nums) 

1179 

1180 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1181 E <<= E + '+' - num | num 

1182 

1183 print(E.parse_string("1+2+3+4")) 

1184 

1185 prints: 

1186 

1187 .. testoutput:: 

1188 

1189 ['1', '+', '2', '+', '3', '+', '4'] 

1190 

1191 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1192 thus skip reevaluation of parse actions during backtracking. This may break 

1193 programs with parse actions which rely on strict ordering of side-effects. 

1194 

1195 Parameters: 

1196 

1197 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1198 ``Forward`` elements during matching; if ``None`` (the default), 

1199 memoize all ``Forward`` elements. 

1200 

1201 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1202 thus the two cannot be used together. Use ``force=True`` to disable any 

1203 previous, conflicting settings. 

1204 """ 

1205 with ParserElement.packrat_cache_lock: 

1206 if force: 

1207 ParserElement.disable_memoization() 

1208 elif ParserElement._packratEnabled: 

1209 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1210 if cache_size_limit is None: 

1211 ParserElement.recursion_memos = _UnboundedMemo() 

1212 elif cache_size_limit > 0: 

1213 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1214 else: 

1215 raise NotImplementedError(f"Memo size of {cache_size_limit}") 

1216 ParserElement._left_recursion_enabled = True 

1217 

1218 @staticmethod 

1219 def enable_packrat( 

1220 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1221 ) -> None: 

1222 """ 

1223 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1224 Repeated parse attempts at the same string location (which happens 

1225 often in many complex grammars) can immediately return a cached value, 

1226 instead of re-executing parsing/validating code. Memoizing is done of 

1227 both valid results and parsing exceptions. 

1228 

1229 Parameters: 

1230 

1231 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1232 will limit the size of the packrat cache; if None is passed, then 

1233 the cache size will be unbounded; if 0 is passed, the cache will 

1234 be effectively disabled. 

1235 

1236 This speedup may break existing programs that use parse actions that 

1237 have side-effects. For this reason, packrat parsing is disabled when 

1238 you first import pyparsing. To activate the packrat feature, your 

1239 program must call the class method :class:`ParserElement.enable_packrat`. 

1240 For best results, call ``enable_packrat()`` immediately after 

1241 importing pyparsing. 

1242 

1243 .. Can't really be doctested, alas 

1244 

1245 Example:: 

1246 

1247 import pyparsing 

1248 pyparsing.ParserElement.enable_packrat() 

1249 

1250 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1251 thus the two cannot be used together. Use ``force=True`` to disable any 

1252 previous, conflicting settings. 

1253 """ 

1254 with ParserElement.packrat_cache_lock: 

1255 if force: 

1256 ParserElement.disable_memoization() 

1257 elif ParserElement._left_recursion_enabled: 

1258 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1259 

1260 if ParserElement._packratEnabled: 

1261 return 

1262 

1263 ParserElement._packratEnabled = True 

1264 if cache_size_limit is None: 

1265 ParserElement.packrat_cache = _UnboundedCache() 

1266 else: 

1267 ParserElement.packrat_cache = _FifoCache(cache_size_limit) 

1268 ParserElement._parse = ParserElement._parseCache 

1269 

1270 def parse_string( 

1271 self, instring: str, parse_all: bool = False, **kwargs 

1272 ) -> ParseResults: 

1273 """ 

1274 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1275 client code. 

1276 

1277 :param instring: The input string to be parsed. 

1278 :param parse_all: If set, the entire input string must match the grammar. 

1279 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1280 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1281 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1282 an object with attributes if the given parser includes results names. 

1283 

1284 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1285 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1286 

1287 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1288 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1289 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1290 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1291 

1292 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1293 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1294 parse action's ``s`` argument, or 

1295 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1296 

1297 Examples: 

1298 

1299 By default, partial matches are OK. 

1300 

1301 .. doctest:: 

1302 

1303 >>> res = Word('a').parse_string('aaaaabaaa') 

1304 >>> print(res) 

1305 ['aaaaa'] 

1306 

1307 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1308 directly to see more examples. 

1309 

1310 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1311 

1312 .. doctest:: 

1313 

1314 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1315 Traceback (most recent call last): 

1316 ParseException: Expected end of text, found 'b' ... 

1317 """ 

1318 parseAll: bool = deprecate_argument(kwargs, "parseAll", False) 

1319 

1320 parse_all = parse_all or parseAll 

1321 

1322 ParserElement.reset_cache() 

1323 if not self.streamlined: 

1324 self.streamline() 

1325 for e in self.ignoreExprs: 

1326 e.streamline() 

1327 if not self.keepTabs: 

1328 instring = instring.expandtabs() 

1329 try: 

1330 loc, tokens = self._parse(instring, 0) 

1331 if parse_all: 

1332 loc = self.preParse(instring, loc) 

1333 se = Empty() + StringEnd().set_debug(False) 

1334 se._parse(instring, loc) 

1335 except _ParseActionIndexError as pa_exc: 

1336 raise pa_exc.exc 

1337 except ParseBaseException as exc: 

1338 if ParserElement.verbose_stacktrace: 

1339 raise 

1340 

1341 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1342 raise exc.with_traceback(None) 

1343 else: 

1344 return tokens 

1345 

1346 def scan_string( 

1347 self, 

1348 instring: str, 

1349 max_matches: int = _MAX_INT, 

1350 overlap: bool = False, 

1351 always_skip_whitespace=True, 

1352 *, 

1353 debug: bool = False, 

1354 **kwargs, 

1355 ) -> Generator[tuple[ParseResults, int, int], None, None]: 

1356 """ 

1357 Scan the input string for expression matches. Each match will return the 

1358 matching tokens, start location, and end location. May be called with optional 

1359 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1360 ``overlap`` is specified, then overlapping matches will be reported. 

1361 

1362 Note that the start and end locations are reported relative to the string 

1363 being parsed. See :class:`parse_string` for more information on parsing 

1364 strings with embedded tabs. 

1365 

1366 Example: 

1367 

1368 .. testcode:: 

1369 

1370 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1371 print(source) 

1372 for tokens, start, end in Word(alphas).scan_string(source): 

1373 print(' '*start + '^'*(end-start)) 

1374 print(' '*start + tokens[0]) 

1375 

1376 prints: 

1377 

1378 .. testoutput:: 

1379 

1380 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1381 ^^^^^ 

1382 sldjf 

1383 ^^^^^^^ 

1384 lsdjjkf 

1385 ^^^^^^ 

1386 sldkjf 

1387 ^^^^^^ 

1388 lkjsfd 

1389 """ 

1390 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT) 

1391 

1392 max_matches = min(maxMatches, max_matches) 

1393 if not self.streamlined: 

1394 self.streamline() 

1395 for e in self.ignoreExprs: 

1396 e.streamline() 

1397 

1398 if not self.keepTabs: 

1399 instring = str(instring).expandtabs() 

1400 instrlen = len(instring) 

1401 loc = 0 

1402 if always_skip_whitespace: 

1403 preparser = Empty() 

1404 preparser.ignoreExprs = self.ignoreExprs 

1405 preparser.whiteChars = self.whiteChars 

1406 preparseFn = preparser.preParse 

1407 else: 

1408 preparseFn = self.preParse 

1409 parseFn = self._parse 

1410 ParserElement.reset_cache() 

1411 matches = 0 

1412 try: 

1413 while loc <= instrlen and matches < max_matches: 

1414 try: 

1415 preloc: int = preparseFn(instring, loc) 

1416 nextLoc: int 

1417 tokens: ParseResults 

1418 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1419 except ParseException: 

1420 loc = preloc + 1 

1421 else: 

1422 if nextLoc > loc: 

1423 matches += 1 

1424 if debug: 

1425 print( 

1426 { 

1427 "tokens": tokens.as_list(), 

1428 "start": preloc, 

1429 "end": nextLoc, 

1430 } 

1431 ) 

1432 yield tokens, preloc, nextLoc 

1433 if overlap: 

1434 nextloc = preparseFn(instring, loc) 

1435 if nextloc > loc: 

1436 loc = nextLoc 

1437 else: 

1438 loc += 1 

1439 else: 

1440 loc = nextLoc 

1441 else: 

1442 loc = preloc + 1 

1443 except ParseBaseException as exc: 

1444 if ParserElement.verbose_stacktrace: 

1445 raise 

1446 

1447 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1448 raise exc.with_traceback(None) 

1449 

1450 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1451 """ 

1452 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1453 be returned from a parse action. To use ``transform_string``, define a grammar and 

1454 attach a parse action to it that modifies the returned token list. 

1455 Invoking ``transform_string()`` on a target string will then scan for matches, 

1456 and replace the matched text patterns according to the logic in the parse 

1457 action. ``transform_string()`` returns the resulting transformed string. 

1458 

1459 Example: 

1460 

1461 .. testcode:: 

1462 

1463 quote = '''now is the winter of our discontent, 

1464 made glorious summer by this sun of york.''' 

1465 

1466 wd = Word(alphas) 

1467 wd.set_parse_action(lambda toks: toks[0].title()) 

1468 

1469 print(wd.transform_string(quote)) 

1470 

1471 prints: 

1472 

1473 .. testoutput:: 

1474 

1475 Now Is The Winter Of Our Discontent, 

1476 Made Glorious Summer By This Sun Of York. 

1477 """ 

1478 out: list[str] = [] 

1479 lastE = 0 

1480 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1481 # keep string locs straight between transform_string and scan_string 

1482 self.keepTabs = True 

1483 try: 

1484 for t, s, e in self.scan_string(instring, debug=debug): 

1485 if s > lastE: 

1486 out.append(instring[lastE:s]) 

1487 lastE = e 

1488 

1489 if not t: 

1490 continue 

1491 

1492 if isinstance(t, ParseResults): 

1493 out += t.as_list() 

1494 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1495 out.extend(t) 

1496 else: 

1497 out.append(t) 

1498 

1499 out.append(instring[lastE:]) 

1500 out = [o for o in out if o] 

1501 return "".join([str(s) for s in _flatten(out)]) 

1502 except ParseBaseException as exc: 

1503 if ParserElement.verbose_stacktrace: 

1504 raise 

1505 

1506 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1507 raise exc.with_traceback(None) 

1508 

1509 def search_string( 

1510 self, 

1511 instring: str, 

1512 max_matches: int = _MAX_INT, 

1513 *, 

1514 debug: bool = False, 

1515 **kwargs, 

1516 ) -> ParseResults: 

1517 """ 

1518 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1519 to match the given parse expression. May be called with optional 

1520 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1521 

1522 Example: 

1523 

1524 .. testcode:: 

1525 

1526 quote = '''More than Iron, more than Lead, 

1527 more than Gold I need Electricity''' 

1528 

1529 # a capitalized word starts with an uppercase letter, 

1530 # followed by zero or more lowercase letters 

1531 cap_word = Word(alphas.upper(), alphas.lower()) 

1532 

1533 print(cap_word.search_string(quote)) 

1534 

1535 # the sum() builtin can be used to merge results 

1536 # into a single ParseResults object 

1537 print(sum(cap_word.search_string(quote))) 

1538 

1539 prints: 

1540 

1541 .. testoutput:: 

1542 

1543 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1544 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1545 """ 

1546 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT) 

1547 

1548 max_matches = min(maxMatches, max_matches) 

1549 try: 

1550 return ParseResults( 

1551 [ 

1552 t 

1553 for t, s, e in self.scan_string( 

1554 instring, 

1555 max_matches=max_matches, 

1556 always_skip_whitespace=False, 

1557 debug=debug, 

1558 ) 

1559 ] 

1560 ) 

1561 except ParseBaseException as exc: 

1562 if ParserElement.verbose_stacktrace: 

1563 raise 

1564 

1565 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1566 raise exc.with_traceback(None) 

1567 

1568 def split( 

1569 self, 

1570 instring: str, 

1571 maxsplit: int = _MAX_INT, 

1572 include_separators: bool = False, 

1573 **kwargs, 

1574 ) -> Generator[str, None, None]: 

1575 """ 

1576 Generator method to split a string using the given expression as a separator. 

1577 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1578 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1579 matching text should be included in the split results. 

1580 

1581 Example: 

1582 

1583 .. testcode:: 

1584 

1585 punc = one_of(list(".,;:/-!?")) 

1586 print(list(punc.split( 

1587 "This, this?, this sentence, is badly punctuated!"))) 

1588 

1589 prints: 

1590 

1591 .. testoutput:: 

1592 

1593 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1594 """ 

1595 includeSeparators: bool = deprecate_argument(kwargs, "includeSeparators", False) 

1596 

1597 include_separators = includeSeparators or include_separators 

1598 last = 0 

1599 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1600 yield instring[last:s] 

1601 if include_separators: 

1602 yield t[0] 

1603 last = e 

1604 yield instring[last:] 

1605 

1606 def __add__(self, other) -> ParserElement: 

1607 """ 

1608 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1609 converts them to :class:`Literal`\\ s by default. 

1610 

1611 Example: 

1612 

1613 .. testcode:: 

1614 

1615 greet = Word(alphas) + "," + Word(alphas) + "!" 

1616 hello = "Hello, World!" 

1617 print(hello, "->", greet.parse_string(hello)) 

1618 

1619 prints: 

1620 

1621 .. testoutput:: 

1622 

1623 Hello, World! -> ['Hello', ',', 'World', '!'] 

1624 

1625 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`: 

1626 

1627 .. testcode:: 

1628 

1629 Literal('start') + ... + Literal('end') 

1630 

1631 is equivalent to: 

1632 

1633 .. testcode:: 

1634 

1635 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1636 

1637 Note that the skipped text is returned with '_skipped' as a results name, 

1638 and to support having multiple skips in the same parser, the value returned is 

1639 a list of all skipped text. 

1640 """ 

1641 if other is Ellipsis: 

1642 return _PendingSkip(self) 

1643 

1644 if isinstance(other, str_type): 

1645 other = self._literalStringClass(other) 

1646 if not isinstance(other, ParserElement): 

1647 return NotImplemented 

1648 return And([self, other]) 

1649 

1650 def __radd__(self, other) -> ParserElement: 

1651 """ 

1652 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1653 """ 

1654 if other is Ellipsis: 

1655 return SkipTo(self)("_skipped*") + self 

1656 

1657 if isinstance(other, str_type): 

1658 other = self._literalStringClass(other) 

1659 if not isinstance(other, ParserElement): 

1660 return NotImplemented 

1661 return other + self 

1662 

1663 def __sub__(self, other) -> ParserElement: 

1664 """ 

1665 Implementation of ``-`` operator, returns :class:`And` with error stop 

1666 """ 

1667 if isinstance(other, str_type): 

1668 other = self._literalStringClass(other) 

1669 if not isinstance(other, ParserElement): 

1670 return NotImplemented 

1671 return self + And._ErrorStop() + other 

1672 

1673 def __rsub__(self, other) -> ParserElement: 

1674 """ 

1675 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1676 """ 

1677 if isinstance(other, str_type): 

1678 other = self._literalStringClass(other) 

1679 if not isinstance(other, ParserElement): 

1680 return NotImplemented 

1681 return other - self 

1682 

1683 def __mul__(self, other) -> ParserElement: 

1684 """ 

1685 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1686 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1687 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1688 may also include ``None`` as in: 

1689 

1690 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1691 to ``expr*n + ZeroOrMore(expr)`` 

1692 (read as "at least n instances of ``expr``") 

1693 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1694 (read as "0 to n instances of ``expr``") 

1695 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1696 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1697 

1698 Note that ``expr*(None, n)`` does not raise an exception if 

1699 more than n exprs exist in the input stream; that is, 

1700 ``expr*(None, n)`` does not enforce a maximum number of expr 

1701 occurrences. If this behavior is desired, then write 

1702 ``expr*(None, n) + ~expr`` 

1703 """ 

1704 if other is Ellipsis: 

1705 other = (0, None) 

1706 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1707 other = ((0,) + other[1:] + (None,))[:2] 

1708 

1709 if not isinstance(other, (int, tuple)): 

1710 return NotImplemented 

1711 

1712 if isinstance(other, int): 

1713 minElements, optElements = other, 0 

1714 else: 

1715 other = tuple(o if o is not Ellipsis else None for o in other) 

1716 other = (other + (None, None))[:2] 

1717 if other[0] is None: 

1718 other = (0, other[1]) 

1719 if isinstance(other[0], int) and other[1] is None: 

1720 if other[0] == 0: 

1721 return ZeroOrMore(self) 

1722 if other[0] == 1: 

1723 return OneOrMore(self) 

1724 else: 

1725 return self * other[0] + ZeroOrMore(self) 

1726 elif isinstance(other[0], int) and isinstance(other[1], int): 

1727 minElements, optElements = other 

1728 optElements -= minElements 

1729 else: 

1730 return NotImplemented 

1731 

1732 if minElements < 0: 

1733 raise ValueError("cannot multiply ParserElement by negative value") 

1734 if optElements < 0: 

1735 raise ValueError( 

1736 "second tuple value must be greater or equal to first tuple value" 

1737 ) 

1738 if minElements == optElements == 0: 

1739 return And([]) 

1740 

1741 if optElements: 

1742 

1743 def makeOptionalList(n): 

1744 if n > 1: 

1745 return Opt(self + makeOptionalList(n - 1)) 

1746 else: 

1747 return Opt(self) 

1748 

1749 if minElements: 

1750 if minElements == 1: 

1751 ret = self + makeOptionalList(optElements) 

1752 else: 

1753 ret = And([self] * minElements) + makeOptionalList(optElements) 

1754 else: 

1755 ret = makeOptionalList(optElements) 

1756 else: 

1757 if minElements == 1: 

1758 ret = self 

1759 else: 

1760 ret = And([self] * minElements) 

1761 return ret 

1762 

1763 def __rmul__(self, other) -> ParserElement: 

1764 return self.__mul__(other) 

1765 

1766 def __or__(self, other) -> ParserElement: 

1767 """ 

1768 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1769 

1770 .. versionchanged:: 3.1.0 

1771 Support ``expr | ""`` as a synonym for ``Optional(expr)``. 

1772 """ 

1773 if other is Ellipsis: 

1774 return _PendingSkip(self, must_skip=True) 

1775 

1776 if isinstance(other, str_type): 

1777 # `expr | ""` is equivalent to `Opt(expr)` 

1778 if other == "": 

1779 return Opt(self) 

1780 other = self._literalStringClass(other) 

1781 if not isinstance(other, ParserElement): 

1782 return NotImplemented 

1783 return MatchFirst([self, other]) 

1784 

1785 def __ror__(self, other) -> ParserElement: 

1786 """ 

1787 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1788 """ 

1789 if isinstance(other, str_type): 

1790 other = self._literalStringClass(other) 

1791 if not isinstance(other, ParserElement): 

1792 return NotImplemented 

1793 return other | self 

1794 

1795 def __xor__(self, other) -> ParserElement: 

1796 """ 

1797 Implementation of ``^`` operator - returns :class:`Or` 

1798 """ 

1799 if isinstance(other, str_type): 

1800 other = self._literalStringClass(other) 

1801 if not isinstance(other, ParserElement): 

1802 return NotImplemented 

1803 return Or([self, other]) 

1804 

1805 def __rxor__(self, other) -> ParserElement: 

1806 """ 

1807 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1808 """ 

1809 if isinstance(other, str_type): 

1810 other = self._literalStringClass(other) 

1811 if not isinstance(other, ParserElement): 

1812 return NotImplemented 

1813 return other ^ self 

1814 

1815 def __and__(self, other) -> ParserElement: 

1816 """ 

1817 Implementation of ``&`` operator - returns :class:`Each` 

1818 """ 

1819 if isinstance(other, str_type): 

1820 other = self._literalStringClass(other) 

1821 if not isinstance(other, ParserElement): 

1822 return NotImplemented 

1823 return Each([self, other]) 

1824 

1825 def __rand__(self, other) -> ParserElement: 

1826 """ 

1827 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1828 """ 

1829 if isinstance(other, str_type): 

1830 other = self._literalStringClass(other) 

1831 if not isinstance(other, ParserElement): 

1832 return NotImplemented 

1833 return other & self 

1834 

1835 def __invert__(self) -> ParserElement: 

1836 """ 

1837 Implementation of ``~`` operator - returns :class:`NotAny` 

1838 """ 

1839 return NotAny(self) 

1840 

1841 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1842 # iterate over a sequence 

1843 __iter__ = None 

1844 

1845 def __getitem__(self, key): 

1846 """ 

1847 use ``[]`` indexing notation as a short form for expression repetition: 

1848 

1849 - ``expr[n]`` is equivalent to ``expr*n`` 

1850 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1851 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1852 to ``expr*n + ZeroOrMore(expr)`` 

1853 (read as "at least n instances of ``expr``") 

1854 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1855 (read as "0 to n instances of ``expr``") 

1856 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1857 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1858 

1859 ``None`` may be used in place of ``...``. 

1860 

1861 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1862 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1863 desired, then write ``expr[..., n] + ~expr``. 

1864 

1865 For repetition with a stop_on expression, use slice notation: 

1866 

1867 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1868 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1869 

1870 .. versionchanged:: 3.1.0 

1871 Support for slice notation. 

1872 """ 

1873 

1874 stop_on_defined = False 

1875 stop_on = NoMatch() 

1876 if isinstance(key, slice): 

1877 key, stop_on = key.start, key.stop 

1878 if key is None: 

1879 key = ... 

1880 stop_on_defined = True 

1881 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1882 key, stop_on = (key[0], key[1].start), key[1].stop 

1883 stop_on_defined = True 

1884 

1885 # convert single arg keys to tuples 

1886 if isinstance(key, str_type): 

1887 key = (key,) 

1888 try: 

1889 iter(key) 

1890 except TypeError: 

1891 key = (key, key) 

1892 

1893 if len(key) > 2: 

1894 raise TypeError( 

1895 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1896 ) 

1897 

1898 # clip to 2 elements 

1899 ret = self * tuple(key[:2]) 

1900 ret = typing.cast(_MultipleMatch, ret) 

1901 

1902 if stop_on_defined: 

1903 ret.stopOn(stop_on) 

1904 

1905 return ret 

1906 

1907 def __call__(self, name: typing.Optional[str] = None) -> ParserElement: 

1908 """ 

1909 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1910 

1911 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1912 passed as ``True``. 

1913 

1914 If ``name`` is omitted, same as calling :class:`copy`. 

1915 

1916 Example: 

1917 

1918 .. testcode:: 

1919 

1920 # these are equivalent 

1921 userdata = ( 

1922 Word(alphas).set_results_name("name") 

1923 + Word(nums + "-").set_results_name("socsecno") 

1924 ) 

1925 

1926 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1927 """ 

1928 if name is not None: 

1929 return self._setResultsName(name) 

1930 

1931 return self.copy() 

1932 

1933 def suppress(self) -> ParserElement: 

1934 """ 

1935 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1936 cluttering up returned output. 

1937 """ 

1938 return Suppress(self) 

1939 

1940 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

1941 """ 

1942 Enables the skipping of whitespace before matching the characters in the 

1943 :class:`ParserElement`'s defined pattern. 

1944 

1945 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1946 """ 

1947 self.skipWhitespace = True 

1948 return self 

1949 

1950 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

1951 """ 

1952 Disables the skipping of whitespace before matching the characters in the 

1953 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1954 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1955 

1956 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1957 """ 

1958 self.skipWhitespace = False 

1959 return self 

1960 

1961 def set_whitespace_chars( 

1962 self, chars: Union[set[str], str], copy_defaults: bool = False 

1963 ) -> ParserElement: 

1964 """ 

1965 Overrides the default whitespace chars 

1966 """ 

1967 self.skipWhitespace = True 

1968 self.whiteChars = set(chars) 

1969 self.copyDefaultWhiteChars = copy_defaults 

1970 return self 

1971 

1972 def parse_with_tabs(self) -> ParserElement: 

1973 """ 

1974 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1975 Must be called before ``parse_string`` when the input grammar contains elements that 

1976 match ``<TAB>`` characters. 

1977 """ 

1978 self.keepTabs = True 

1979 return self 

1980 

1981 def ignore(self, other: ParserElement) -> ParserElement: 

1982 """ 

1983 Define expression to be ignored (e.g., comments) while doing pattern 

1984 matching; may be called repeatedly, to define multiple comment or other 

1985 ignorable patterns. 

1986 

1987 Example: 

1988 

1989 .. doctest:: 

1990 

1991 >>> patt = Word(alphas)[...] 

1992 >>> print(patt.parse_string('ablaj /* comment */ lskjd')) 

1993 ['ablaj'] 

1994 

1995 >>> patt = Word(alphas)[...].ignore(c_style_comment) 

1996 >>> print(patt.parse_string('ablaj /* comment */ lskjd')) 

1997 ['ablaj', 'lskjd'] 

1998 """ 

1999 if isinstance(other, str_type): 

2000 other = Suppress(other) 

2001 

2002 if isinstance(other, Suppress): 

2003 if other not in self.ignoreExprs: 

2004 self.ignoreExprs.append(other) 

2005 else: 

2006 self.ignoreExprs.append(Suppress(other.copy())) 

2007 return self 

2008 

2009 def set_debug_actions( 

2010 self, 

2011 start_action: DebugStartAction, 

2012 success_action: DebugSuccessAction, 

2013 exception_action: DebugExceptionAction, 

2014 ) -> ParserElement: 

2015 """ 

2016 Customize display of debugging messages while doing pattern matching: 

2017 

2018 :param start_action: method to be called when an expression is about to be parsed; 

2019 should have the signature:: 

2020 

2021 fn(input_string: str, 

2022 location: int, 

2023 expression: ParserElement, 

2024 cache_hit: bool) 

2025 

2026 :param success_action: method to be called when an expression has successfully parsed; 

2027 should have the signature:: 

2028 

2029 fn(input_string: str, 

2030 start_location: int, 

2031 end_location: int, 

2032 expression: ParserELement, 

2033 parsed_tokens: ParseResults, 

2034 cache_hit: bool) 

2035 

2036 :param exception_action: method to be called when expression fails to parse; 

2037 should have the signature:: 

2038 

2039 fn(input_string: str, 

2040 location: int, 

2041 expression: ParserElement, 

2042 exception: Exception, 

2043 cache_hit: bool) 

2044 """ 

2045 self.debugActions = self.DebugActions( 

2046 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

2047 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

2048 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

2049 ) 

2050 self.debug = any(self.debugActions) 

2051 return self 

2052 

2053 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement: 

2054 """ 

2055 Enable display of debugging messages while doing pattern matching. 

2056 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

2057 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

2058 

2059 Example: 

2060 

2061 .. testcode:: 

2062 

2063 wd = Word(alphas).set_name("alphaword") 

2064 integer = Word(nums).set_name("numword") 

2065 term = wd | integer 

2066 

2067 # turn on debugging for wd 

2068 wd.set_debug() 

2069 

2070 term[1, ...].parse_string("abc 123 xyz 890") 

2071 

2072 prints: 

2073 

2074 .. testoutput:: 

2075 :options: +NORMALIZE_WHITESPACE 

2076 

2077 Match alphaword at loc 0(1,1) 

2078 abc 123 xyz 890 

2079 ^ 

2080 Matched alphaword -> ['abc'] 

2081 Match alphaword at loc 4(1,5) 

2082 abc 123 xyz 890 

2083 ^ 

2084 Match alphaword failed, ParseException raised: Expected alphaword, ... 

2085 Match alphaword at loc 8(1,9) 

2086 abc 123 xyz 890 

2087 ^ 

2088 Matched alphaword -> ['xyz'] 

2089 Match alphaword at loc 12(1,13) 

2090 abc 123 xyz 890 

2091 ^ 

2092 Match alphaword failed, ParseException raised: Expected alphaword, ... 

2093 abc 123 xyz 890 

2094 ^ 

2095 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ... 

2096 

2097 The output shown is that produced by the default debug actions - custom debug actions can be 

2098 specified using :meth:`set_debug_actions`. Prior to attempting 

2099 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

2100 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

2101 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression, 

2102 which makes debugging and exception messages easier to understand - for instance, the default 

2103 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``. 

2104 

2105 .. versionchanged:: 3.1.0 

2106 ``recurse`` argument added. 

2107 """ 

2108 if recurse: 

2109 for expr in self.visit_all(): 

2110 expr.set_debug(flag, recurse=False) 

2111 return self 

2112 

2113 if flag: 

2114 self.set_debug_actions( 

2115 _default_start_debug_action, 

2116 _default_success_debug_action, 

2117 _default_exception_debug_action, 

2118 ) 

2119 else: 

2120 self.debug = False 

2121 return self 

2122 

2123 @property 

2124 def default_name(self) -> str: 

2125 if self._defaultName is None: 

2126 self._defaultName = self._generateDefaultName() 

2127 return self._defaultName 

2128 

2129 @abstractmethod 

2130 def _generateDefaultName(self) -> str: 

2131 """ 

2132 Child classes must define this method, which defines how the ``default_name`` is set. 

2133 """ 

2134 

2135 def set_name(self, name: typing.Optional[str]) -> ParserElement: 

2136 """ 

2137 Define name for this expression, makes debugging and exception messages clearer. If 

2138 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also 

2139 enable debug for this expression. 

2140 

2141 If `name` is None, clears any custom name for this expression, and clears the 

2142 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`. 

2143 

2144 Example: 

2145 

2146 .. doctest:: 

2147 

2148 >>> integer = Word(nums) 

2149 >>> integer.parse_string("ABC") 

2150 Traceback (most recent call last): 

2151 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1) 

2152 

2153 >>> integer.set_name("integer") 

2154 integer 

2155 >>> integer.parse_string("ABC") 

2156 Traceback (most recent call last): 

2157 ParseException: Expected integer (at char 0), (line:1, col:1) 

2158 

2159 .. versionchanged:: 3.1.0 

2160 Accept ``None`` as the ``name`` argument. 

2161 """ 

2162 self.customName = name # type: ignore[assignment] 

2163 self.errmsg = f"Expected {str(self)}" 

2164 

2165 if __diag__.enable_debug_on_named_expressions: 

2166 self.set_debug(name is not None) 

2167 

2168 return self 

2169 

2170 @property 

2171 def name(self) -> str: 

2172 """ 

2173 Returns a user-defined name if available, but otherwise defaults back to the auto-generated name 

2174 """ 

2175 return self.customName if self.customName is not None else self.default_name 

2176 

2177 @name.setter 

2178 def name(self, new_name) -> None: 

2179 self.set_name(new_name) 

2180 

2181 def __str__(self) -> str: 

2182 return self.name 

2183 

2184 def __repr__(self) -> str: 

2185 return str(self) 

2186 

2187 def streamline(self) -> ParserElement: 

2188 self.streamlined = True 

2189 self._defaultName = None 

2190 return self 

2191 

2192 def recurse(self) -> list[ParserElement]: 

2193 return [] 

2194 

2195 def _checkRecursion(self, parseElementList): 

2196 subRecCheckList = parseElementList[:] + [self] 

2197 for e in self.recurse(): 

2198 e._checkRecursion(subRecCheckList) 

2199 

2200 def validate(self, validateTrace=None) -> None: 

2201 """ 

2202 .. deprecated:: 3.0.0 

2203 Do not use to check for left recursion. 

2204 

2205 Check defined expressions for valid structure, check for infinite recursive definitions. 

2206 

2207 """ 

2208 warnings.warn( 

2209 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

2210 PyparsingDeprecationWarning, 

2211 stacklevel=2, 

2212 ) 

2213 self._checkRecursion([]) 

2214 

2215 def parse_file( 

2216 self, 

2217 file_or_filename: Union[str, Path, TextIO], 

2218 encoding: str = "utf-8", 

2219 parse_all: bool = False, 

2220 **kwargs, 

2221 ) -> ParseResults: 

2222 """ 

2223 Execute the parse expression on the given file or filename. 

2224 If a filename is specified (instead of a file object), 

2225 the entire file is opened, read, and closed before parsing. 

2226 """ 

2227 parseAll: bool = deprecate_argument(kwargs, "parseAll", False) 

2228 

2229 parse_all = parse_all or parseAll 

2230 try: 

2231 file_or_filename = typing.cast(TextIO, file_or_filename) 

2232 file_contents = file_or_filename.read() 

2233 except AttributeError: 

2234 file_or_filename = typing.cast(str, file_or_filename) 

2235 with open(file_or_filename, "r", encoding=encoding) as f: 

2236 file_contents = f.read() 

2237 try: 

2238 return self.parse_string(file_contents, parse_all) 

2239 except ParseBaseException as exc: 

2240 if ParserElement.verbose_stacktrace: 

2241 raise 

2242 

2243 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

2244 raise exc.with_traceback(None) 

2245 

2246 def __eq__(self, other): 

2247 if self is other: 

2248 return True 

2249 elif isinstance(other, str_type): 

2250 return self.matches(other, parse_all=True) 

2251 elif isinstance(other, ParserElement): 

2252 return vars(self) == vars(other) 

2253 return False 

2254 

2255 def __hash__(self): 

2256 return id(self) 

2257 

2258 def matches(self, test_string: str, parse_all: bool = True, **kwargs) -> bool: 

2259 """ 

2260 Method for quick testing of a parser against a test string. Good for simple 

2261 inline microtests of sub expressions while building up larger parser. 

2262 

2263 :param test_string: to test against this expression for a match 

2264 :param parse_all: flag to pass to :meth:`parse_string` when running tests 

2265 

2266 Example: 

2267 

2268 .. doctest:: 

2269 

2270 >>> expr = Word(nums) 

2271 >>> expr.matches("100") 

2272 True 

2273 """ 

2274 parseAll: bool = deprecate_argument(kwargs, "parseAll", True) 

2275 

2276 parse_all = parse_all and parseAll 

2277 try: 

2278 self.parse_string(str(test_string), parse_all=parse_all) 

2279 return True 

2280 except ParseBaseException: 

2281 return False 

2282 

2283 def run_tests( 

2284 self, 

2285 tests: Union[str, list[str]], 

2286 parse_all: bool = True, 

2287 comment: typing.Optional[Union[ParserElement, str]] = "#", 

2288 full_dump: bool = True, 

2289 print_results: bool = True, 

2290 failure_tests: bool = False, 

2291 post_parse: typing.Optional[ 

2292 Callable[[str, ParseResults], typing.Optional[str]] 

2293 ] = None, 

2294 file: typing.Optional[TextIO] = None, 

2295 with_line_numbers: bool = False, 

2296 *, 

2297 parseAll: bool = True, 

2298 fullDump: bool = True, 

2299 printResults: bool = True, 

2300 failureTests: bool = False, 

2301 postParse: typing.Optional[ 

2302 Callable[[str, ParseResults], typing.Optional[str]] 

2303 ] = None, 

2304 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]: 

2305 """ 

2306 Execute the parse expression on a series of test strings, showing each 

2307 test, the parsed results or where the parse failed. Quick and easy way to 

2308 run a parse expression against a list of sample strings. 

2309 

2310 Parameters: 

2311 

2312 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2313 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2314 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2315 string; pass None to disable comment filtering 

2316 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2317 if False, only dump nested list 

2318 - ``print_results`` - (default= ``True``) prints test output to stdout 

2319 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2320 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2321 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2322 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2323 if None, will default to ``sys.stdout`` 

2324 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2325 

2326 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2327 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2328 test's output 

2329 

2330 Passing example: 

2331 

2332 .. testcode:: 

2333 

2334 number_expr = pyparsing_common.number.copy() 

2335 

2336 result = number_expr.run_tests(''' 

2337 # unsigned integer 

2338 100 

2339 # negative integer 

2340 -100 

2341 # float with scientific notation 

2342 6.02e23 

2343 # integer with scientific notation 

2344 1e-12 

2345 # negative decimal number without leading digit 

2346 -.100 

2347 ''') 

2348 print("Success" if result[0] else "Failed!") 

2349 

2350 prints: 

2351 

2352 .. testoutput:: 

2353 :options: +NORMALIZE_WHITESPACE 

2354 

2355 

2356 # unsigned integer 

2357 100 

2358 [100] 

2359 

2360 # negative integer 

2361 -100 

2362 [-100] 

2363 

2364 # float with scientific notation 

2365 6.02e23 

2366 [6.02e+23] 

2367 

2368 # integer with scientific notation 

2369 1e-12 

2370 [1e-12] 

2371 

2372 # negative decimal number without leading digit 

2373 -.100 

2374 [-0.1] 

2375 Success 

2376 

2377 Failure-test example: 

2378 

2379 .. testcode:: 

2380 

2381 result = number_expr.run_tests(''' 

2382 # stray character 

2383 100Z 

2384 # too many '.' 

2385 3.14.159 

2386 ''', failure_tests=True) 

2387 print("Success" if result[0] else "Failed!") 

2388 

2389 prints: 

2390 

2391 .. testoutput:: 

2392 :options: +NORMALIZE_WHITESPACE 

2393 

2394 

2395 # stray character 

2396 100Z 

2397 100Z 

2398 ^ 

2399 ParseException: Expected end of text, found 'Z' ... 

2400 

2401 # too many '.' 

2402 3.14.159 

2403 3.14.159 

2404 ^ 

2405 ParseException: Expected end of text, found '.' ... 

2406 FAIL: Expected end of text, found '.' ... 

2407 Success 

2408 

2409 Each test string must be on a single line. If you want to test a string that spans multiple 

2410 lines, create a test like this: 

2411 

2412 .. testcode:: 

2413 

2414 expr = Word(alphanums)[1,...] 

2415 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2416 

2417 .. testoutput:: 

2418 :options: +NORMALIZE_WHITESPACE 

2419 :hide: 

2420 

2421 

2422 this is a test\\n of strings that spans \\n 3 lines 

2423 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines'] 

2424 

2425 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2426 """ 

2427 from .testing import pyparsing_test 

2428 

2429 parseAll = parseAll and parse_all 

2430 fullDump = fullDump and full_dump 

2431 printResults = printResults and print_results 

2432 failureTests = failureTests or failure_tests 

2433 postParse = postParse or post_parse 

2434 if isinstance(tests, str_type): 

2435 tests = typing.cast(str, tests) 

2436 line_strip = type(tests).strip 

2437 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2438 comment_specified = comment is not None 

2439 if comment_specified: 

2440 if isinstance(comment, str_type): 

2441 comment = typing.cast(str, comment) 

2442 comment = Literal(comment) 

2443 comment = typing.cast(ParserElement, comment) 

2444 if file is None: 

2445 file = sys.stdout 

2446 print_ = file.write 

2447 

2448 result: Union[ParseResults, Exception] 

2449 allResults: list[tuple[str, Union[ParseResults, Exception]]] = [] 

2450 comments: list[str] = [] 

2451 success = True 

2452 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2453 BOM = "\ufeff" 

2454 nlstr = "\n" 

2455 for t in tests: 

2456 if comment_specified and comment.matches(t, False) or comments and not t: 

2457 comments.append( 

2458 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2459 ) 

2460 continue 

2461 if not t: 

2462 continue 

2463 out = [ 

2464 f"{nlstr}{nlstr.join(comments) if comments else ''}", 

2465 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2466 ] 

2467 comments.clear() 

2468 try: 

2469 # convert newline marks to actual newlines, and strip leading BOM if present 

2470 t = NL.transform_string(t.lstrip(BOM)) 

2471 result = self.parse_string(t, parse_all=parse_all) 

2472 except ParseBaseException as pe: 

2473 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" 

2474 out.append(pe.explain()) 

2475 out.append(f"FAIL: {fatal}{pe}") 

2476 if ParserElement.verbose_stacktrace: 

2477 out.extend(traceback.format_tb(pe.__traceback__)) 

2478 success = success and failureTests 

2479 result = pe 

2480 except Exception as exc: 

2481 tag = "FAIL-EXCEPTION" 

2482 

2483 # see if this exception was raised in a parse action 

2484 tb = exc.__traceback__ 

2485 it = iter(traceback.walk_tb(tb)) 

2486 for f, line in it: 

2487 if (f.f_code.co_filename, line) == pa_call_line_synth: 

2488 next_f = next(it)[0] 

2489 tag += f" (raised in parse action {next_f.f_code.co_name!r})" 

2490 break 

2491 

2492 out.append(f"{tag}: {type(exc).__name__}: {exc}") 

2493 if ParserElement.verbose_stacktrace: 

2494 out.extend(traceback.format_tb(exc.__traceback__)) 

2495 success = success and failureTests 

2496 result = exc 

2497 else: 

2498 success = success and not failureTests 

2499 if postParse is not None: 

2500 try: 

2501 pp_value = postParse(t, result) 

2502 if pp_value is not None: 

2503 if isinstance(pp_value, ParseResults): 

2504 out.append(pp_value.dump()) 

2505 else: 

2506 out.append(str(pp_value)) 

2507 else: 

2508 out.append(result.dump()) 

2509 except Exception as e: 

2510 out.append(result.dump(full=fullDump)) 

2511 out.append( 

2512 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2513 ) 

2514 else: 

2515 out.append(result.dump(full=fullDump)) 

2516 out.append("") 

2517 

2518 if printResults: 

2519 print_("\n".join(out)) 

2520 

2521 allResults.append((t, result)) 

2522 

2523 return success, allResults 

2524 

2525 def create_diagram( 

2526 self, 

2527 output_html: Union[TextIO, Path, str], 

2528 vertical: int = 3, 

2529 show_results_names: bool = False, 

2530 show_groups: bool = False, 

2531 embed: bool = False, 

2532 show_hidden: bool = False, 

2533 **kwargs, 

2534 ) -> None: 

2535 """ 

2536 Create a railroad diagram for the parser. 

2537 

2538 Parameters: 

2539 

2540 - ``output_html`` (str or file-like object) - output target for generated 

2541 diagram HTML 

2542 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2543 instead of horizontally (default=3) 

2544 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2545 defined results names 

2546 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2547 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden 

2548 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2549 the resulting HTML in an enclosing HTML source 

2550 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2551 can be used to insert custom CSS styling 

2552 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2553 generated code 

2554 

2555 Additional diagram-formatting keyword arguments can also be included; 

2556 see railroad.Diagram class. 

2557 

2558 .. versionchanged:: 3.1.0 

2559 ``embed`` argument added. 

2560 """ 

2561 

2562 try: 

2563 from .diagram import to_railroad, railroad_to_html 

2564 except ImportError as ie: 

2565 raise Exception( 

2566 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2567 ) from ie 

2568 

2569 self.streamline() 

2570 

2571 railroad = to_railroad( 

2572 self, 

2573 vertical=vertical, 

2574 show_results_names=show_results_names, 

2575 show_groups=show_groups, 

2576 show_hidden=show_hidden, 

2577 diagram_kwargs=kwargs, 

2578 ) 

2579 if not isinstance(output_html, (str, Path)): 

2580 # we were passed a file-like object, just write to it 

2581 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2582 return 

2583 

2584 with open(output_html, "w", encoding="utf-8") as diag_file: 

2585 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2586 

2587 # Compatibility synonyms 

2588 # fmt: off 

2589 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)) 

2590 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8( 

2591 "setDefaultWhitespaceChars", set_default_whitespace_chars 

2592 )) 

2593 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization)) 

2594 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion)) 

2595 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat)) 

2596 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache)) 

2597 

2598 setResultsName = replaced_by_pep8("setResultsName", set_results_name) 

2599 setBreak = replaced_by_pep8("setBreak", set_break) 

2600 setParseAction = replaced_by_pep8("setParseAction", set_parse_action) 

2601 addParseAction = replaced_by_pep8("addParseAction", add_parse_action) 

2602 addCondition = replaced_by_pep8("addCondition", add_condition) 

2603 setFailAction = replaced_by_pep8("setFailAction", set_fail_action) 

2604 tryParse = replaced_by_pep8("tryParse", try_parse) 

2605 parseString = replaced_by_pep8("parseString", parse_string) 

2606 scanString = replaced_by_pep8("scanString", scan_string) 

2607 transformString = replaced_by_pep8("transformString", transform_string) 

2608 searchString = replaced_by_pep8("searchString", search_string) 

2609 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

2610 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

2611 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) 

2612 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) 

2613 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) 

2614 setDebug = replaced_by_pep8("setDebug", set_debug) 

2615 setName = replaced_by_pep8("setName", set_name) 

2616 parseFile = replaced_by_pep8("parseFile", parse_file) 

2617 runTests = replaced_by_pep8("runTests", run_tests) 

2618 canParseNext = replaced_by_pep8("canParseNext", can_parse_next) 

2619 defaultName = default_name 

2620 # fmt: on 

2621 

2622 

2623class _PendingSkip(ParserElement): 

2624 # internal placeholder class to hold a place were '...' is added to a parser element, 

2625 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2626 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None: 

2627 super().__init__() 

2628 self.anchor = expr 

2629 self.must_skip = must_skip 

2630 

2631 def _generateDefaultName(self) -> str: 

2632 return str(self.anchor + Empty()).replace("Empty", "...") 

2633 

2634 def __add__(self, other) -> ParserElement: 

2635 skipper = SkipTo(other).set_name("...")("_skipped*") 

2636 if self.must_skip: 

2637 

2638 def must_skip(t): 

2639 if not t._skipped or t._skipped.as_list() == [""]: 

2640 del t[0] 

2641 t.pop("_skipped", None) 

2642 

2643 def show_skip(t): 

2644 if t._skipped.as_list()[-1:] == [""]: 

2645 t.pop("_skipped") 

2646 t["_skipped"] = f"missing <{self.anchor!r}>" 

2647 

2648 return ( 

2649 self.anchor + skipper().add_parse_action(must_skip) 

2650 | skipper().add_parse_action(show_skip) 

2651 ) + other 

2652 

2653 return self.anchor + skipper + other 

2654 

2655 def __repr__(self): 

2656 return self.defaultName 

2657 

2658 def parseImpl(self, *args) -> ParseImplReturnType: 

2659 raise Exception( 

2660 "use of `...` expression without following SkipTo target expression" 

2661 ) 

2662 

2663 

2664class Token(ParserElement): 

2665 """Abstract :class:`ParserElement` subclass, for defining atomic 

2666 matching patterns. 

2667 """ 

2668 

2669 def __init__(self) -> None: 

2670 super().__init__(savelist=False) 

2671 

2672 def _generateDefaultName(self) -> str: 

2673 return type(self).__name__ 

2674 

2675 

2676class NoMatch(Token): 

2677 """ 

2678 A token that will never match. 

2679 """ 

2680 

2681 def __init__(self) -> None: 

2682 super().__init__() 

2683 self._may_return_empty = True 

2684 self.mayIndexError = False 

2685 self.errmsg = "Unmatchable token" 

2686 

2687 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2688 raise ParseException(instring, loc, self.errmsg, self) 

2689 

2690 

2691class Literal(Token): 

2692 """ 

2693 Token to exactly match a specified string. 

2694 

2695 Example: 

2696 

2697 .. doctest:: 

2698 

2699 >>> Literal('abc').parse_string('abc') 

2700 ParseResults(['abc'], {}) 

2701 >>> Literal('abc').parse_string('abcdef') 

2702 ParseResults(['abc'], {}) 

2703 >>> Literal('abc').parse_string('ab') 

2704 Traceback (most recent call last): 

2705 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1) 

2706 

2707 For case-insensitive matching, use :class:`CaselessLiteral`. 

2708 

2709 For keyword matching (force word break before and after the matched string), 

2710 use :class:`Keyword` or :class:`CaselessKeyword`. 

2711 """ 

2712 

2713 def __new__(cls, match_string: str = "", **kwargs): 

2714 # Performance tuning: select a subclass with optimized parseImpl 

2715 if cls is Literal: 

2716 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2717 

2718 match_string = matchString or match_string 

2719 if not match_string: 

2720 return super().__new__(Empty) 

2721 if len(match_string) == 1: 

2722 return super().__new__(_SingleCharLiteral) 

2723 

2724 # Default behavior 

2725 return super().__new__(cls) 

2726 

2727 # Needed to make copy.copy() work correctly if we customize __new__ 

2728 def __getnewargs__(self): 

2729 return (self.match,) 

2730 

2731 def __init__(self, match_string: str = "", **kwargs) -> None: 

2732 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2733 

2734 super().__init__() 

2735 match_string = matchString or match_string 

2736 self.match = match_string 

2737 self.matchLen = len(match_string) 

2738 self.firstMatchChar = match_string[:1] 

2739 self.errmsg = f"Expected {self.name}" 

2740 self._may_return_empty = False 

2741 self.mayIndexError = False 

2742 

2743 def _generateDefaultName(self) -> str: 

2744 return repr(self.match) 

2745 

2746 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2747 if instring[loc] == self.firstMatchChar and instring.startswith( 

2748 self.match, loc 

2749 ): 

2750 return loc + self.matchLen, self.match 

2751 raise ParseException(instring, loc, self.errmsg, self) 

2752 

2753 

2754class Empty(Literal): 

2755 """ 

2756 An empty token, will always match. 

2757 """ 

2758 

2759 def __init__(self, match_string="", *, matchString="") -> None: 

2760 super().__init__("") 

2761 self._may_return_empty = True 

2762 self.mayIndexError = False 

2763 

2764 def _generateDefaultName(self) -> str: 

2765 return "Empty" 

2766 

2767 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2768 return loc, [] 

2769 

2770 

2771class _SingleCharLiteral(Literal): 

2772 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2773 if instring[loc] == self.firstMatchChar: 

2774 return loc + 1, self.match 

2775 raise ParseException(instring, loc, self.errmsg, self) 

2776 

2777 

2778ParserElement._literalStringClass = Literal 

2779 

2780 

2781class Keyword(Token): 

2782 """ 

2783 Token to exactly match a specified string as a keyword, that is, 

2784 it must be immediately preceded and followed by whitespace or 

2785 non-keyword characters. Compare with :class:`Literal`: 

2786 

2787 - ``Literal("if")`` will match the leading ``'if'`` in 

2788 ``'ifAndOnlyIf'``. 

2789 - ``Keyword("if")`` will not; it will only match the leading 

2790 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2791 

2792 Accepts two optional constructor arguments in addition to the 

2793 keyword string: 

2794 

2795 - ``ident_chars`` is a string of characters that would be valid 

2796 identifier characters, defaulting to all alphanumerics + "_" and 

2797 "$" 

2798 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2799 

2800 Example: 

2801 

2802 .. doctest:: 

2803 :options: +NORMALIZE_WHITESPACE 

2804 

2805 >>> Keyword("start").parse_string("start") 

2806 ParseResults(['start'], {}) 

2807 >>> Keyword("start").parse_string("starting") 

2808 Traceback (most recent call last): 

2809 ParseException: Expected Keyword 'start', keyword was immediately 

2810 followed by keyword character, found 'ing' (at char 5), (line:1, col:6) 

2811 

2812 .. doctest:: 

2813 :options: +NORMALIZE_WHITESPACE 

2814 

2815 >>> Keyword("start").parse_string("starting").debug() 

2816 Traceback (most recent call last): 

2817 ParseException: Expected Keyword "start", keyword was immediately 

2818 followed by keyword character, found 'ing' ... 

2819 

2820 For case-insensitive matching, use :class:`CaselessKeyword`. 

2821 """ 

2822 

2823 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2824 

2825 def __init__( 

2826 self, 

2827 match_string: str = "", 

2828 ident_chars: typing.Optional[str] = None, 

2829 caseless: bool = False, 

2830 **kwargs, 

2831 ) -> None: 

2832 matchString = deprecate_argument(kwargs, "matchString", "") 

2833 identChars = deprecate_argument(kwargs, "identChars", None) 

2834 

2835 super().__init__() 

2836 identChars = identChars or ident_chars 

2837 if identChars is None: 

2838 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2839 match_string = matchString or match_string 

2840 self.match = match_string 

2841 self.matchLen = len(match_string) 

2842 self.firstMatchChar = match_string[:1] 

2843 if not self.firstMatchChar: 

2844 raise ValueError("null string passed to Keyword; use Empty() instead") 

2845 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2846 self._may_return_empty = False 

2847 self.mayIndexError = False 

2848 self.caseless = caseless 

2849 if caseless: 

2850 self.caselessmatch = match_string.upper() 

2851 identChars = identChars.upper() 

2852 self.ident_chars = set(identChars) 

2853 

2854 @property 

2855 def identChars(self) -> set[str]: 

2856 """ 

2857 .. deprecated:: 3.3.0 

2858 use ident_chars instead. 

2859 

2860 Property returning the characters being used as keyword characters for this expression. 

2861 """ 

2862 return self.ident_chars 

2863 

2864 def _generateDefaultName(self) -> str: 

2865 return repr(self.match) 

2866 

2867 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2868 errmsg = self.errmsg or "" 

2869 errloc = loc 

2870 if self.caseless: 

2871 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2872 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2873 if ( 

2874 loc >= len(instring) - self.matchLen 

2875 or instring[loc + self.matchLen].upper() not in self.identChars 

2876 ): 

2877 return loc + self.matchLen, self.match 

2878 

2879 # followed by keyword char 

2880 errmsg += ", was immediately followed by keyword character" 

2881 errloc = loc + self.matchLen 

2882 else: 

2883 # preceded by keyword char 

2884 errmsg += ", keyword was immediately preceded by keyword character" 

2885 errloc = loc - 1 

2886 # else no match just raise plain exception 

2887 

2888 elif ( 

2889 instring[loc] == self.firstMatchChar 

2890 and self.matchLen == 1 

2891 or instring.startswith(self.match, loc) 

2892 ): 

2893 if loc == 0 or instring[loc - 1] not in self.identChars: 

2894 if ( 

2895 loc >= len(instring) - self.matchLen 

2896 or instring[loc + self.matchLen] not in self.identChars 

2897 ): 

2898 return loc + self.matchLen, self.match 

2899 

2900 # followed by keyword char 

2901 errmsg += ", keyword was immediately followed by keyword character" 

2902 errloc = loc + self.matchLen 

2903 else: 

2904 # preceded by keyword char 

2905 errmsg += ", keyword was immediately preceded by keyword character" 

2906 errloc = loc - 1 

2907 # else no match just raise plain exception 

2908 

2909 raise ParseException(instring, errloc, errmsg, self) 

2910 

2911 @staticmethod 

2912 def set_default_keyword_chars(chars) -> None: 

2913 """ 

2914 Overrides the default characters used by :class:`Keyword` expressions. 

2915 """ 

2916 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2917 

2918 # Compatibility synonyms 

2919 setDefaultKeywordChars = staticmethod( 

2920 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars) 

2921 ) 

2922 

2923 

2924class CaselessLiteral(Literal): 

2925 """ 

2926 Token to match a specified string, ignoring case of letters. 

2927 Note: the matched results will always be in the case of the given 

2928 match string, NOT the case of the input text. 

2929 

2930 Example: 

2931 

2932 .. doctest:: 

2933 

2934 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2935 ParseResults(['CMD', 'CMD', 'CMD'], {}) 

2936 

2937 (Contrast with example for :class:`CaselessKeyword`.) 

2938 """ 

2939 

2940 def __init__(self, match_string: str = "", **kwargs) -> None: 

2941 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2942 

2943 match_string = matchString or match_string 

2944 super().__init__(match_string.upper()) 

2945 # Preserve the defining literal. 

2946 self.returnString = match_string 

2947 self.errmsg = f"Expected {self.name}" 

2948 

2949 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2950 if instring[loc : loc + self.matchLen].upper() == self.match: 

2951 return loc + self.matchLen, self.returnString 

2952 raise ParseException(instring, loc, self.errmsg, self) 

2953 

2954 

2955class CaselessKeyword(Keyword): 

2956 """ 

2957 Caseless version of :class:`Keyword`. 

2958 

2959 Example: 

2960 

2961 .. doctest:: 

2962 

2963 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2964 ParseResults(['CMD', 'CMD'], {}) 

2965 

2966 (Contrast with example for :class:`CaselessLiteral`.) 

2967 """ 

2968 

2969 def __init__( 

2970 self, match_string: str = "", ident_chars: typing.Optional[str] = None, **kwargs 

2971 ) -> None: 

2972 matchString: str = deprecate_argument(kwargs, "matchString", "") 

2973 identChars: typing.Optional[str] = deprecate_argument( 

2974 kwargs, "identChars", None 

2975 ) 

2976 

2977 identChars = identChars or ident_chars 

2978 match_string = matchString or match_string 

2979 super().__init__(match_string, identChars, caseless=True) 

2980 

2981 

2982class CloseMatch(Token): 

2983 """A variation on :class:`Literal` which matches "close" matches, 

2984 that is, strings with at most 'n' mismatching characters. 

2985 :class:`CloseMatch` takes parameters: 

2986 

2987 - ``match_string`` - string to be matched 

2988 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2989 - ``max_mismatches`` - (``default=1``) maximum number of 

2990 mismatches allowed to count as a match 

2991 

2992 The results from a successful parse will contain the matched text 

2993 from the input string and the following named results: 

2994 

2995 - ``mismatches`` - a list of the positions within the 

2996 match_string where mismatches were found 

2997 - ``original`` - the original match_string used to compare 

2998 against the input string 

2999 

3000 If ``mismatches`` is an empty list, then the match was an exact 

3001 match. 

3002 

3003 Example: 

3004 

3005 .. doctest:: 

3006 :options: +NORMALIZE_WHITESPACE 

3007 

3008 >>> patt = CloseMatch("ATCATCGAATGGA") 

3009 >>> patt.parse_string("ATCATCGAAXGGA") 

3010 ParseResults(['ATCATCGAAXGGA'], 

3011 {'original': 'ATCATCGAATGGA', 'mismatches': [9]}) 

3012 

3013 >>> patt.parse_string("ATCAXCGAAXGGA") 

3014 Traceback (most recent call last): 

3015 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches), 

3016 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1) 

3017 

3018 # exact match 

3019 >>> patt.parse_string("ATCATCGAATGGA") 

3020 ParseResults(['ATCATCGAATGGA'], 

3021 {'original': 'ATCATCGAATGGA', 'mismatches': []}) 

3022 

3023 # close match allowing up to 2 mismatches 

3024 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

3025 >>> patt.parse_string("ATCAXCGAAXGGA") 

3026 ParseResults(['ATCAXCGAAXGGA'], 

3027 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]}) 

3028 """ 

3029 

3030 def __init__( 

3031 self, 

3032 match_string: str, 

3033 max_mismatches: typing.Optional[int] = None, 

3034 *, 

3035 caseless=False, 

3036 **kwargs, 

3037 ) -> None: 

3038 maxMismatches: int = deprecate_argument(kwargs, "maxMismatches", 1) 

3039 

3040 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

3041 super().__init__() 

3042 self.match_string = match_string 

3043 self.maxMismatches = maxMismatches 

3044 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

3045 self.caseless = caseless 

3046 self.mayIndexError = False 

3047 self._may_return_empty = False 

3048 

3049 def _generateDefaultName(self) -> str: 

3050 return f"{type(self).__name__}:{self.match_string!r}" 

3051 

3052 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3053 start = loc 

3054 instrlen = len(instring) 

3055 maxloc = start + len(self.match_string) 

3056 

3057 if maxloc <= instrlen: 

3058 match_string = self.match_string 

3059 match_stringloc = 0 

3060 mismatches = [] 

3061 maxMismatches = self.maxMismatches 

3062 

3063 for match_stringloc, s_m in enumerate( 

3064 zip(instring[loc:maxloc], match_string) 

3065 ): 

3066 src, mat = s_m 

3067 if self.caseless: 

3068 src, mat = src.lower(), mat.lower() 

3069 

3070 if src != mat: 

3071 mismatches.append(match_stringloc) 

3072 if len(mismatches) > maxMismatches: 

3073 break 

3074 else: 

3075 loc = start + match_stringloc + 1 

3076 results = ParseResults([instring[start:loc]]) 

3077 results["original"] = match_string 

3078 results["mismatches"] = mismatches 

3079 return loc, results 

3080 

3081 raise ParseException(instring, loc, self.errmsg, self) 

3082 

3083 

3084class Word(Token): 

3085 """Token for matching words composed of allowed character sets. 

3086 

3087 Parameters: 

3088 

3089 - ``init_chars`` - string of all characters that should be used to 

3090 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

3091 if ``body_chars`` is also specified, then this is the string of 

3092 initial characters 

3093 - ``body_chars`` - string of characters that 

3094 can be used for matching after a matched initial character as 

3095 given in ``init_chars``; if omitted, same as the initial characters 

3096 (default=``None``) 

3097 - ``min`` - minimum number of characters to match (default=1) 

3098 - ``max`` - maximum number of characters to match (default=0) 

3099 - ``exact`` - exact number of characters to match (default=0) 

3100 - ``as_keyword`` - match as a keyword (default=``False``) 

3101 - ``exclude_chars`` - characters that might be 

3102 found in the input ``body_chars`` string but which should not be 

3103 accepted for matching ;useful to define a word of all 

3104 printables except for one or two characters, for instance 

3105 (default=``None``) 

3106 

3107 :class:`srange` is useful for defining custom character set strings 

3108 for defining :class:`Word` expressions, using range notation from 

3109 regular expression character sets. 

3110 

3111 A common mistake is to use :class:`Word` to match a specific literal 

3112 string, as in ``Word("Address")``. Remember that :class:`Word` 

3113 uses the string argument to define *sets* of matchable characters. 

3114 This expression would match "Add", "AAA", "dAred", or any other word 

3115 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

3116 exact literal string, use :class:`Literal` or :class:`Keyword`. 

3117 

3118 pyparsing includes helper strings for building Words: 

3119 

3120 - :attr:`alphas` 

3121 - :attr:`nums` 

3122 - :attr:`alphanums` 

3123 - :attr:`hexnums` 

3124 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255 

3125 - accented, tilded, umlauted, etc.) 

3126 - :attr:`punc8bit` (non-alphabetic characters in ASCII range 

3127 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

3128 - :attr:`printables` (any non-whitespace character) 

3129 

3130 ``alphas``, ``nums``, and ``printables`` are also defined in several 

3131 Unicode sets - see :class:`pyparsing_unicode`. 

3132 

3133 Example: 

3134 

3135 .. testcode:: 

3136 

3137 # a word composed of digits 

3138 integer = Word(nums) 

3139 # Two equivalent alternate forms: 

3140 Word("0123456789") 

3141 Word(srange("[0-9]")) 

3142 

3143 # a word with a leading capital, and zero or more lowercase 

3144 capitalized_word = Word(alphas.upper(), alphas.lower()) 

3145 

3146 # hostnames are alphanumeric, with leading alpha, and '-' 

3147 hostname = Word(alphas, alphanums + '-') 

3148 

3149 # roman numeral 

3150 # (not a strict parser, accepts invalid mix of characters) 

3151 roman = Word("IVXLCDM") 

3152 

3153 # any string of non-whitespace characters, except for ',' 

3154 csv_value = Word(printables, exclude_chars=",") 

3155 

3156 :raises ValueError: If ``min`` and ``max`` are both specified 

3157 and the test ``min <= max`` fails. 

3158 

3159 .. versionchanged:: 3.1.0 

3160 Raises :exc:`ValueError` if ``min`` > ``max``. 

3161 """ 

3162 

3163 def __init__( 

3164 self, 

3165 init_chars: str = "", 

3166 body_chars: typing.Optional[str] = None, 

3167 min: int = 1, 

3168 max: int = 0, 

3169 exact: int = 0, 

3170 as_keyword: bool = False, 

3171 exclude_chars: typing.Optional[str] = None, 

3172 **kwargs, 

3173 ) -> None: 

3174 initChars: typing.Optional[str] = deprecate_argument(kwargs, "initChars", None) 

3175 bodyChars: typing.Optional[str] = deprecate_argument(kwargs, "bodyChars", None) 

3176 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

3177 excludeChars: typing.Optional[str] = deprecate_argument( 

3178 kwargs, "excludeChars", None 

3179 ) 

3180 

3181 initChars = initChars or init_chars 

3182 bodyChars = bodyChars or body_chars 

3183 asKeyword = asKeyword or as_keyword 

3184 excludeChars = excludeChars or exclude_chars 

3185 super().__init__() 

3186 if not initChars: 

3187 raise ValueError( 

3188 f"invalid {type(self).__name__}, initChars cannot be empty string" 

3189 ) 

3190 

3191 initChars_set = set(initChars) 

3192 if excludeChars: 

3193 excludeChars_set = set(excludeChars) 

3194 initChars_set -= excludeChars_set 

3195 if bodyChars: 

3196 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

3197 self.init_chars = initChars_set 

3198 self.initCharsOrig = "".join(sorted(initChars_set)) 

3199 

3200 if bodyChars: 

3201 self.bodyChars = set(bodyChars) 

3202 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

3203 else: 

3204 self.bodyChars = initChars_set 

3205 self.bodyCharsOrig = self.initCharsOrig 

3206 

3207 self.maxSpecified = max > 0 

3208 

3209 if min < 1: 

3210 raise ValueError( 

3211 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

3212 ) 

3213 

3214 if self.maxSpecified and min > max: 

3215 raise ValueError( 

3216 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

3217 ) 

3218 

3219 self.minLen = min 

3220 

3221 if max > 0: 

3222 self.maxLen = max 

3223 else: 

3224 self.maxLen = _MAX_INT 

3225 

3226 if exact > 0: 

3227 min = max = exact 

3228 self.maxLen = exact 

3229 self.minLen = exact 

3230 

3231 self.errmsg = f"Expected {self.name}" 

3232 self.mayIndexError = False 

3233 self.asKeyword = asKeyword 

3234 if self.asKeyword: 

3235 self.errmsg += " as a keyword" 

3236 

3237 # see if we can make a regex for this Word 

3238 if " " not in (self.initChars | self.bodyChars): 

3239 if len(self.initChars) == 1: 

3240 re_leading_fragment = re.escape(self.initCharsOrig) 

3241 else: 

3242 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

3243 

3244 if self.bodyChars == self.initChars: 

3245 if max == 0 and self.minLen == 1: 

3246 repeat = "+" 

3247 elif max == 1: 

3248 repeat = "" 

3249 else: 

3250 if self.minLen != self.maxLen: 

3251 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

3252 else: 

3253 repeat = f"{{{self.minLen}}}" 

3254 self.reString = f"{re_leading_fragment}{repeat}" 

3255 else: 

3256 if max == 1: 

3257 re_body_fragment = "" 

3258 repeat = "" 

3259 else: 

3260 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

3261 if max == 0 and self.minLen == 1: 

3262 repeat = "*" 

3263 elif max == 2: 

3264 repeat = "?" if min <= 1 else "" 

3265 else: 

3266 if min != max: 

3267 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

3268 else: 

3269 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

3270 

3271 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

3272 

3273 if self.asKeyword: 

3274 self.reString = rf"\b{self.reString}\b" 

3275 

3276 try: 

3277 self.re = re.compile(self.reString) 

3278 except re.error: 

3279 self.re = None # type: ignore[assignment] 

3280 else: 

3281 self.re_match = self.re.match 

3282 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign] 

3283 

3284 @property 

3285 def initChars(self) -> set[str]: 

3286 """ 

3287 .. deprecated:: 3.3.0 

3288 use `init_chars` instead. 

3289 

3290 Property returning the initial chars to be used when matching this 

3291 Word expression. If no body chars were specified, the initial characters 

3292 will also be the body characters. 

3293 """ 

3294 return set(self.init_chars) 

3295 

3296 def copy(self) -> Word: 

3297 """ 

3298 Returns a copy of this expression. 

3299 

3300 Generally only used internally by pyparsing. 

3301 """ 

3302 ret: Word = cast(Word, super().copy()) 

3303 if hasattr(self, "re_match"): 

3304 ret.re_match = self.re_match 

3305 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign] 

3306 return ret 

3307 

3308 def _generateDefaultName(self) -> str: 

3309 def charsAsStr(s): 

3310 max_repr_len = 16 

3311 s = _collapse_string_to_ranges(s, re_escape=False) 

3312 

3313 if len(s) > max_repr_len: 

3314 return s[: max_repr_len - 3] + "..." 

3315 

3316 return s 

3317 

3318 if self.initChars != self.bodyChars: 

3319 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

3320 else: 

3321 base = f"W:({charsAsStr(self.initChars)})" 

3322 

3323 # add length specification 

3324 if self.minLen > 1 or self.maxLen != _MAX_INT: 

3325 if self.minLen == self.maxLen: 

3326 if self.minLen == 1: 

3327 return base[2:] 

3328 else: 

3329 return base + f"{{{self.minLen}}}" 

3330 elif self.maxLen == _MAX_INT: 

3331 return base + f"{{{self.minLen},...}}" 

3332 else: 

3333 return base + f"{{{self.minLen},{self.maxLen}}}" 

3334 return base 

3335 

3336 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3337 if instring[loc] not in self.initChars: 

3338 raise ParseException(instring, loc, self.errmsg, self) 

3339 

3340 start = loc 

3341 loc += 1 

3342 instrlen = len(instring) 

3343 body_chars: set[str] = self.bodyChars 

3344 maxloc = start + self.maxLen 

3345 maxloc = min(maxloc, instrlen) 

3346 while loc < maxloc and instring[loc] in body_chars: 

3347 loc += 1 

3348 

3349 throw_exception = False 

3350 if loc - start < self.minLen: 

3351 throw_exception = True 

3352 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars: 

3353 throw_exception = True 

3354 elif self.asKeyword and ( 

3355 (start > 0 and instring[start - 1] in body_chars) 

3356 or (loc < instrlen and instring[loc] in body_chars) 

3357 ): 

3358 throw_exception = True 

3359 

3360 if throw_exception: 

3361 raise ParseException(instring, loc, self.errmsg, self) 

3362 

3363 return loc, instring[start:loc] 

3364 

3365 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3366 result = self.re_match(instring, loc) 

3367 if not result: 

3368 raise ParseException(instring, loc, self.errmsg, self) 

3369 

3370 loc = result.end() 

3371 return loc, result[0] 

3372 

3373 

3374class Char(Word): 

3375 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

3376 when defining a match of any single character in a string of 

3377 characters. 

3378 """ 

3379 

3380 def __init__( 

3381 self, 

3382 charset: str, 

3383 as_keyword: bool = False, 

3384 exclude_chars: typing.Optional[str] = None, 

3385 **kwargs, 

3386 ) -> None: 

3387 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

3388 excludeChars: typing.Optional[str] = deprecate_argument( 

3389 kwargs, "excludeChars", None 

3390 ) 

3391 

3392 asKeyword = asKeyword or as_keyword 

3393 excludeChars = excludeChars or exclude_chars 

3394 super().__init__( 

3395 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

3396 ) 

3397 

3398 

3399class Regex(Token): 

3400 r"""Token for matching strings that match a given regular 

3401 expression. Defined with string specifying the regular expression in 

3402 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3403 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3404 these will be preserved as named :class:`ParseResults`. 

3405 

3406 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3407 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3408 a compiled RE that was compiled using ``regex``. 

3409 

3410 The parameters ``pattern`` and ``flags`` are passed 

3411 to the ``re.compile()`` function as-is. See the Python 

3412 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3413 explanation of the acceptable patterns and flags. 

3414 

3415 Example: 

3416 

3417 .. testcode:: 

3418 

3419 realnum = Regex(r"[+-]?\d+\.\d*") 

3420 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3421 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3422 

3423 # named fields in a regex will be returned as named results 

3424 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3425 

3426 # the Regex class will accept regular expressions compiled using the 

3427 # re module 

3428 import re 

3429 parser = pp.Regex(re.compile(r'[0-9]')) 

3430 """ 

3431 

3432 def __init__( 

3433 self, 

3434 pattern: Any, 

3435 flags: Union[re.RegexFlag, int] = 0, 

3436 as_group_list: bool = False, 

3437 as_match: bool = False, 

3438 **kwargs, 

3439 ) -> None: 

3440 super().__init__() 

3441 asGroupList: bool = deprecate_argument(kwargs, "asGroupList", False) 

3442 asMatch: bool = deprecate_argument(kwargs, "asMatch", False) 

3443 

3444 asGroupList = asGroupList or as_group_list 

3445 asMatch = asMatch or as_match 

3446 

3447 if isinstance(pattern, str_type): 

3448 if not pattern: 

3449 raise ValueError("null string passed to Regex; use Empty() instead") 

3450 

3451 self._re = None 

3452 self._may_return_empty = None # type: ignore [assignment] 

3453 self.reString = self.pattern = pattern 

3454 

3455 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3456 self._re = pattern 

3457 self._may_return_empty = None # type: ignore [assignment] 

3458 self.pattern = self.reString = pattern.pattern 

3459 

3460 elif callable(pattern): 

3461 # defer creating this pattern until we really need it 

3462 self.pattern = pattern 

3463 self._may_return_empty = None # type: ignore [assignment] 

3464 self._re = None 

3465 

3466 else: 

3467 raise TypeError( 

3468 "Regex may only be constructed with a string or a compiled RE object," 

3469 " or a callable that takes no arguments and returns a string or a" 

3470 " compiled RE object" 

3471 ) 

3472 

3473 self.flags = flags 

3474 self.errmsg = f"Expected {self.name}" 

3475 self.mayIndexError = False 

3476 self.asGroupList = asGroupList 

3477 self.asMatch = asMatch 

3478 if self.asGroupList: 

3479 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign] 

3480 if self.asMatch: 

3481 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign] 

3482 

3483 def copy(self) -> Regex: 

3484 """ 

3485 Returns a copy of this expression. 

3486 

3487 Generally only used internally by pyparsing. 

3488 """ 

3489 ret: Regex = cast(Regex, super().copy()) 

3490 if self.asGroupList: 

3491 ret.parseImpl = ret.parseImplAsGroupList # type: ignore [method-assign] 

3492 if self.asMatch: 

3493 ret.parseImpl = ret.parseImplAsMatch # type: ignore [method-assign] 

3494 return ret 

3495 

3496 @cached_property 

3497 def re(self) -> re.Pattern: 

3498 """ 

3499 Property returning the compiled regular expression for this Regex. 

3500 

3501 Generally only used internally by pyparsing. 

3502 """ 

3503 if self._re: 

3504 return self._re 

3505 

3506 if callable(self.pattern): 

3507 # replace self.pattern with the string returned by calling self.pattern() 

3508 self.pattern = cast(Callable[[], str], self.pattern)() 

3509 

3510 # see if we got a compiled RE back instead of a str - if so, we're done 

3511 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"): 

3512 self._re = cast(re.Pattern[str], self.pattern) 

3513 self.pattern = self.reString = self._re.pattern 

3514 return self._re 

3515 

3516 try: 

3517 self._re = re.compile(self.pattern, self.flags) 

3518 except re.error: 

3519 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3520 else: 

3521 self._may_return_empty = self.re.match("", pos=0) is not None 

3522 return self._re 

3523 

3524 @cached_property 

3525 def re_match(self) -> Callable[[str, int], Any]: 

3526 return self.re.match 

3527 

3528 @property 

3529 def mayReturnEmpty(self): 

3530 if self._may_return_empty is None: 

3531 # force compile of regex pattern, to set may_return_empty flag 

3532 self.re # noqa 

3533 return self._may_return_empty 

3534 

3535 @mayReturnEmpty.setter 

3536 def mayReturnEmpty(self, value): 

3537 self._may_return_empty = value 

3538 

3539 def _generateDefaultName(self) -> str: 

3540 unescaped = repr(self.pattern).replace("\\\\", "\\") 

3541 return f"Re:({unescaped})" 

3542 

3543 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3544 # explicit check for matching past the length of the string; 

3545 # this is done because the re module will not complain about 

3546 # a match with `pos > len(instring)`, it will just return "" 

3547 if loc > len(instring) and self.mayReturnEmpty: 

3548 raise ParseException(instring, loc, self.errmsg, self) 

3549 

3550 result = self.re_match(instring, loc) 

3551 if not result: 

3552 raise ParseException(instring, loc, self.errmsg, self) 

3553 

3554 loc = result.end() 

3555 ret = ParseResults(result[0]) 

3556 d = result.groupdict() 

3557 

3558 for k, v in d.items(): 

3559 ret[k] = v 

3560 

3561 return loc, ret 

3562 

3563 def parseImplAsGroupList(self, instring, loc, do_actions=True): 

3564 if loc > len(instring) and self.mayReturnEmpty: 

3565 raise ParseException(instring, loc, self.errmsg, self) 

3566 

3567 result = self.re_match(instring, loc) 

3568 if not result: 

3569 raise ParseException(instring, loc, self.errmsg, self) 

3570 

3571 loc = result.end() 

3572 ret = result.groups() 

3573 return loc, ret 

3574 

3575 def parseImplAsMatch(self, instring, loc, do_actions=True): 

3576 if loc > len(instring) and self.mayReturnEmpty: 

3577 raise ParseException(instring, loc, self.errmsg, self) 

3578 

3579 result = self.re_match(instring, loc) 

3580 if not result: 

3581 raise ParseException(instring, loc, self.errmsg, self) 

3582 

3583 loc = result.end() 

3584 ret = result 

3585 return loc, ret 

3586 

3587 def sub(self, repl: str) -> ParserElement: 

3588 r""" 

3589 Return :class:`Regex` with an attached parse action to transform the parsed 

3590 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3591 

3592 Example: 

3593 

3594 .. testcode:: 

3595 

3596 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3597 print(make_html.transform_string("h1:main title:")) 

3598 

3599 .. testoutput:: 

3600 

3601 <h1>main title</h1> 

3602 """ 

3603 if self.asGroupList: 

3604 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3605 

3606 if self.asMatch and callable(repl): 

3607 raise TypeError( 

3608 "cannot use sub() with a callable with Regex(as_match=True)" 

3609 ) 

3610 

3611 if self.asMatch: 

3612 

3613 def pa(tokens): 

3614 return tokens[0].expand(repl) 

3615 

3616 else: 

3617 

3618 def pa(tokens): 

3619 return self.re.sub(repl, tokens[0]) 

3620 

3621 return self.add_parse_action(pa) 

3622 

3623 

3624class QuotedString(Token): 

3625 r""" 

3626 Token for matching strings that are delimited by quoting characters. 

3627 

3628 Defined with the following parameters: 

3629 

3630 - ``quote_char`` - string of one or more characters defining the 

3631 quote delimiting string 

3632 - ``esc_char`` - character to re_escape quotes, typically backslash 

3633 (default= ``None``) 

3634 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3635 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3636 (default= ``None``) 

3637 - ``multiline`` - boolean indicating whether quotes can span 

3638 multiple lines (default= ``False``) 

3639 - ``unquote_results`` - boolean indicating whether the matched text 

3640 should be unquoted (default= ``True``) 

3641 - ``end_quote_char`` - string of one or more characters defining the 

3642 end of the quote delimited string (default= ``None`` => same as 

3643 quote_char) 

3644 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3645 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3646 (default= ``True``) 

3647 

3648 .. caution:: ``convert_whitespace_escapes`` has no effect if 

3649 ``unquote_results`` is ``False``. 

3650 

3651 Example: 

3652 

3653 .. doctest:: 

3654 

3655 >>> qs = QuotedString('"') 

3656 >>> print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3657 [['This is the quote']] 

3658 >>> complex_qs = QuotedString('{{', end_quote_char='}}') 

3659 >>> print(complex_qs.search_string( 

3660 ... 'lsjdf {{This is the "quote"}} sldjf')) 

3661 [['This is the "quote"']] 

3662 >>> sql_qs = QuotedString('"', esc_quote='""') 

3663 >>> print(sql_qs.search_string( 

3664 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3665 [['This is the quote with "embedded" quotes']] 

3666 """ 

3667 

3668 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3669 

3670 def __init__( 

3671 self, 

3672 quote_char: str = "", 

3673 esc_char: typing.Optional[str] = None, 

3674 esc_quote: typing.Optional[str] = None, 

3675 multiline: bool = False, 

3676 unquote_results: bool = True, 

3677 end_quote_char: typing.Optional[str] = None, 

3678 convert_whitespace_escapes: bool = True, 

3679 **kwargs, 

3680 ) -> None: 

3681 super().__init__() 

3682 quoteChar: str = deprecate_argument(kwargs, "quoteChar", "") 

3683 escChar: str = deprecate_argument(kwargs, "escChar", None) 

3684 escQuote: str = deprecate_argument(kwargs, "escQuote", None) 

3685 unquoteResults: bool = deprecate_argument(kwargs, "unquoteResults", True) 

3686 endQuoteChar: typing.Optional[str] = deprecate_argument( 

3687 kwargs, "endQuoteChar", None 

3688 ) 

3689 convertWhitespaceEscapes: bool = deprecate_argument( 

3690 kwargs, "convertWhitespaceEscapes", True 

3691 ) 

3692 

3693 esc_char = escChar or esc_char 

3694 esc_quote = escQuote or esc_quote 

3695 unquote_results = unquoteResults and unquote_results 

3696 end_quote_char = endQuoteChar or end_quote_char 

3697 convert_whitespace_escapes = ( 

3698 convertWhitespaceEscapes and convert_whitespace_escapes 

3699 ) 

3700 quote_char = quoteChar or quote_char 

3701 

3702 # remove white space from quote chars 

3703 quote_char = quote_char.strip() 

3704 if not quote_char: 

3705 raise ValueError("quote_char cannot be the empty string") 

3706 

3707 if end_quote_char is None: 

3708 end_quote_char = quote_char 

3709 else: 

3710 end_quote_char = end_quote_char.strip() 

3711 if not end_quote_char: 

3712 raise ValueError("end_quote_char cannot be the empty string") 

3713 

3714 self.quote_char: str = quote_char 

3715 self.quote_char_len: int = len(quote_char) 

3716 self.first_quote_char: str = quote_char[0] 

3717 self.end_quote_char: str = end_quote_char 

3718 self.end_quote_char_len: int = len(end_quote_char) 

3719 self.esc_char: str = esc_char or "" 

3720 self.has_esc_char: bool = esc_char is not None 

3721 self.esc_quote: str = esc_quote or "" 

3722 self.unquote_results: bool = unquote_results 

3723 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3724 self.multiline = multiline 

3725 self.re_flags = re.RegexFlag(0) 

3726 

3727 # fmt: off 

3728 # build up re pattern for the content between the quote delimiters 

3729 inner_pattern: list[str] = [] 

3730 

3731 if esc_quote: 

3732 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3733 

3734 if esc_char: 

3735 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3736 

3737 if len(self.end_quote_char) > 1: 

3738 inner_pattern.append( 

3739 "(?:" 

3740 + "|".join( 

3741 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3742 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3743 ) 

3744 + ")" 

3745 ) 

3746 

3747 if self.multiline: 

3748 self.re_flags |= re.MULTILINE | re.DOTALL 

3749 inner_pattern.append( 

3750 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3751 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3752 ) 

3753 else: 

3754 inner_pattern.append( 

3755 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3756 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3757 ) 

3758 

3759 self.pattern = "".join( 

3760 [ 

3761 re.escape(self.quote_char), 

3762 "(?:", 

3763 '|'.join(inner_pattern), 

3764 ")*", 

3765 re.escape(self.end_quote_char), 

3766 ] 

3767 ) 

3768 

3769 if self.unquote_results: 

3770 if self.convert_whitespace_escapes: 

3771 self.unquote_scan_re = re.compile( 

3772 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3773 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})" 

3774 rf"|({re.escape(self.esc_char)}.)" 

3775 rf"|(\n|.)", 

3776 flags=self.re_flags, 

3777 ) 

3778 else: 

3779 self.unquote_scan_re = re.compile( 

3780 rf"({re.escape(self.esc_char)}.)" 

3781 rf"|(\n|.)", 

3782 flags=self.re_flags 

3783 ) 

3784 # fmt: on 

3785 

3786 try: 

3787 self.re = re.compile(self.pattern, self.re_flags) 

3788 self.reString = self.pattern 

3789 self.re_match = self.re.match 

3790 except re.error: 

3791 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3792 

3793 self.errmsg = f"Expected {self.name}" 

3794 self.mayIndexError = False 

3795 self._may_return_empty = True 

3796 

3797 def _generateDefaultName(self) -> str: 

3798 if self.quote_char == self.end_quote_char and isinstance( 

3799 self.quote_char, str_type 

3800 ): 

3801 return f"string enclosed in {self.quote_char!r}" 

3802 

3803 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3804 

3805 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3806 # check first character of opening quote to see if that is a match 

3807 # before doing the more complicated regex match 

3808 result = ( 

3809 instring[loc] == self.first_quote_char 

3810 and self.re_match(instring, loc) 

3811 or None 

3812 ) 

3813 if not result: 

3814 raise ParseException(instring, loc, self.errmsg, self) 

3815 

3816 # get ending loc and matched string from regex matching result 

3817 loc = result.end() 

3818 ret = result[0] 

3819 

3820 if self.unquote_results: 

3821 # strip off quotes 

3822 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3823 

3824 if isinstance(ret, str_type): 

3825 # fmt: off 

3826 if self.convert_whitespace_escapes: 

3827 # as we iterate over matches in the input string, 

3828 # collect from whichever match group of the unquote_scan_re 

3829 # regex matches (only 1 group will match at any given time) 

3830 ret = "".join( 

3831 # match group 1 matches \t, \n, etc. 

3832 self.ws_map[g] if (g := match[1]) 

3833 # match group 2 matches escaped octal, null, hex, and Unicode 

3834 # sequences 

3835 else _convert_escaped_numerics_to_char(g[1:]) if (g := match[2]) 

3836 # match group 3 matches escaped characters 

3837 else g[-1] if (g := match[3]) 

3838 # match group 4 matches any character 

3839 else match[4] 

3840 for match in self.unquote_scan_re.finditer(ret) 

3841 ) 

3842 else: 

3843 ret = "".join( 

3844 # match group 1 matches escaped characters 

3845 g[-1] if (g := match[1]) 

3846 # match group 2 matches any character 

3847 else match[2] 

3848 for match in self.unquote_scan_re.finditer(ret) 

3849 ) 

3850 # fmt: on 

3851 

3852 # replace escaped quotes 

3853 if self.esc_quote: 

3854 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3855 

3856 return loc, ret 

3857 

3858 

3859class CharsNotIn(Token): 

3860 """Token for matching words composed of characters *not* in a given 

3861 set (will include whitespace in matched characters if not listed in 

3862 the provided exclusion set - see example). Defined with string 

3863 containing all disallowed characters, and an optional minimum, 

3864 maximum, and/or exact length. The default value for ``min`` is 

3865 1 (a minimum value < 1 is not valid); the default values for 

3866 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3867 length restriction. 

3868 

3869 Example: 

3870 

3871 .. testcode:: 

3872 

3873 # define a comma-separated-value as anything that is not a ',' 

3874 csv_value = CharsNotIn(',') 

3875 print( 

3876 DelimitedList(csv_value).parse_string( 

3877 "dkls,lsdkjf,s12 34,@!#,213" 

3878 ) 

3879 ) 

3880 

3881 prints: 

3882 

3883 .. testoutput:: 

3884 

3885 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3886 """ 

3887 

3888 def __init__( 

3889 self, not_chars: str = "", min: int = 1, max: int = 0, exact: int = 0, **kwargs 

3890 ) -> None: 

3891 super().__init__() 

3892 notChars: str = deprecate_argument(kwargs, "notChars", "") 

3893 

3894 self.skipWhitespace = False 

3895 self.notChars = not_chars or notChars 

3896 self.notCharsSet = set(self.notChars) 

3897 

3898 if min < 1: 

3899 raise ValueError( 

3900 "cannot specify a minimum length < 1; use" 

3901 " Opt(CharsNotIn()) if zero-length char group is permitted" 

3902 ) 

3903 

3904 self.minLen = min 

3905 

3906 if max > 0: 

3907 self.maxLen = max 

3908 else: 

3909 self.maxLen = _MAX_INT 

3910 

3911 if exact > 0: 

3912 self.maxLen = exact 

3913 self.minLen = exact 

3914 

3915 self.errmsg = f"Expected {self.name}" 

3916 self._may_return_empty = self.minLen == 0 

3917 self.mayIndexError = False 

3918 

3919 def _generateDefaultName(self) -> str: 

3920 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3921 if len(not_chars_str) > 16: 

3922 return f"!W:({self.notChars[: 16 - 3]}...)" 

3923 else: 

3924 return f"!W:({self.notChars})" 

3925 

3926 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3927 notchars = self.notCharsSet 

3928 if instring[loc] in notchars: 

3929 raise ParseException(instring, loc, self.errmsg, self) 

3930 

3931 start = loc 

3932 loc += 1 

3933 maxlen = min(start + self.maxLen, len(instring)) 

3934 while loc < maxlen and instring[loc] not in notchars: 

3935 loc += 1 

3936 

3937 if loc - start < self.minLen: 

3938 raise ParseException(instring, loc, self.errmsg, self) 

3939 

3940 return loc, instring[start:loc] 

3941 

3942 

3943class White(Token): 

3944 """Special matching class for matching whitespace. Normally, 

3945 whitespace is ignored by pyparsing grammars. This class is included 

3946 when some whitespace structures are significant. Define with 

3947 a string containing the whitespace characters to be matched; default 

3948 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3949 ``max``, and ``exact`` arguments, as defined for the 

3950 :class:`Word` class. 

3951 """ 

3952 

3953 whiteStrs = { 

3954 " ": "<SP>", 

3955 "\t": "<TAB>", 

3956 "\n": "<LF>", 

3957 "\r": "<CR>", 

3958 "\f": "<FF>", 

3959 "\u00a0": "<NBSP>", 

3960 "\u1680": "<OGHAM_SPACE_MARK>", 

3961 "\u180e": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3962 "\u2000": "<EN_QUAD>", 

3963 "\u2001": "<EM_QUAD>", 

3964 "\u2002": "<EN_SPACE>", 

3965 "\u2003": "<EM_SPACE>", 

3966 "\u2004": "<THREE-PER-EM_SPACE>", 

3967 "\u2005": "<FOUR-PER-EM_SPACE>", 

3968 "\u2006": "<SIX-PER-EM_SPACE>", 

3969 "\u2007": "<FIGURE_SPACE>", 

3970 "\u2008": "<PUNCTUATION_SPACE>", 

3971 "\u2009": "<THIN_SPACE>", 

3972 "\u200a": "<HAIR_SPACE>", 

3973 "\u200b": "<ZERO_WIDTH_SPACE>", 

3974 "\u202f": "<NNBSP>", 

3975 "\u205f": "<MMSP>", 

3976 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3977 } 

3978 

3979 def __init__( 

3980 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0 

3981 ) -> None: 

3982 super().__init__() 

3983 self.matchWhite = ws 

3984 self.set_whitespace_chars( 

3985 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3986 copy_defaults=True, 

3987 ) 

3988 # self.leave_whitespace() 

3989 self._may_return_empty = True 

3990 self.errmsg = f"Expected {self.name}" 

3991 

3992 self.minLen = min 

3993 

3994 if max > 0: 

3995 self.maxLen = max 

3996 else: 

3997 self.maxLen = _MAX_INT 

3998 

3999 if exact > 0: 

4000 self.maxLen = exact 

4001 self.minLen = exact 

4002 

4003 def _generateDefaultName(self) -> str: 

4004 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

4005 

4006 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4007 if instring[loc] not in self.matchWhite: 

4008 raise ParseException(instring, loc, self.errmsg, self) 

4009 start = loc 

4010 loc += 1 

4011 maxloc = start + self.maxLen 

4012 maxloc = min(maxloc, len(instring)) 

4013 while loc < maxloc and instring[loc] in self.matchWhite: 

4014 loc += 1 

4015 

4016 if loc - start < self.minLen: 

4017 raise ParseException(instring, loc, self.errmsg, self) 

4018 

4019 return loc, instring[start:loc] 

4020 

4021 

4022class PositionToken(Token): 

4023 def __init__(self) -> None: 

4024 super().__init__() 

4025 self._may_return_empty = True 

4026 self.mayIndexError = False 

4027 

4028 

4029class GoToColumn(PositionToken): 

4030 """Token to advance to a specific column of input text; useful for 

4031 tabular report scraping. 

4032 """ 

4033 

4034 def __init__(self, colno: int) -> None: 

4035 super().__init__() 

4036 self.col = colno 

4037 

4038 def preParse(self, instring: str, loc: int) -> int: 

4039 if col(loc, instring) == self.col: 

4040 return loc 

4041 

4042 instrlen = len(instring) 

4043 if self.ignoreExprs: 

4044 loc = self._skipIgnorables(instring, loc) 

4045 while ( 

4046 loc < instrlen 

4047 and instring[loc].isspace() 

4048 and col(loc, instring) != self.col 

4049 ): 

4050 loc += 1 

4051 

4052 return loc 

4053 

4054 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4055 thiscol = col(loc, instring) 

4056 if thiscol > self.col: 

4057 raise ParseException(instring, loc, "Text not in expected column", self) 

4058 newloc = loc + self.col - thiscol 

4059 ret = instring[loc:newloc] 

4060 return newloc, ret 

4061 

4062 

4063class LineStart(PositionToken): 

4064 r"""Matches if current position is at the logical beginning of a line (after skipping whitespace) 

4065 within the parse string 

4066 

4067 Example: 

4068 

4069 .. testcode:: 

4070 

4071 test = '''\ 

4072 AAA this line 

4073 AAA and this line 

4074 AAA and even this line 

4075 B AAA but definitely not this line 

4076 ''' 

4077 

4078 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

4079 print(t) 

4080 

4081 prints: 

4082 

4083 .. testoutput:: 

4084 

4085 ['AAA', ' this line'] 

4086 ['AAA', ' and this line'] 

4087 ['AAA', ' and even this line'] 

4088 

4089 """ 

4090 

4091 def __init__(self) -> None: 

4092 super().__init__() 

4093 self.leave_whitespace() 

4094 self.orig_whiteChars = set() | self.whiteChars 

4095 self.whiteChars.discard("\n") 

4096 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

4097 self.set_name("start of line") 

4098 

4099 def preParse(self, instring: str, loc: int) -> int: 

4100 if loc == 0: 

4101 return loc 

4102 

4103 ret = self.skipper.preParse(instring, loc) 

4104 

4105 if "\n" in self.orig_whiteChars: 

4106 while instring[ret : ret + 1] == "\n": 

4107 ret = self.skipper.preParse(instring, ret + 1) 

4108 

4109 return ret 

4110 

4111 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4112 if col(loc, instring) == 1: 

4113 return loc, [] 

4114 raise ParseException(instring, loc, self.errmsg, self) 

4115 

4116 

4117class LineEnd(PositionToken): 

4118 """Matches if current position is at the end of a line within the 

4119 parse string 

4120 """ 

4121 

4122 def __init__(self) -> None: 

4123 super().__init__() 

4124 self.whiteChars.discard("\n") 

4125 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

4126 self.set_name("end of line") 

4127 

4128 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4129 if loc < len(instring): 

4130 if instring[loc] == "\n": 

4131 return loc + 1, "\n" 

4132 else: 

4133 raise ParseException(instring, loc, self.errmsg, self) 

4134 elif loc == len(instring): 

4135 return loc + 1, [] 

4136 else: 

4137 raise ParseException(instring, loc, self.errmsg, self) 

4138 

4139 

4140class StringStart(PositionToken): 

4141 """Matches if current position is at the beginning of the parse 

4142 string 

4143 """ 

4144 

4145 def __init__(self) -> None: 

4146 super().__init__() 

4147 self.set_name("start of text") 

4148 

4149 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4150 # see if entire string up to here is just whitespace and ignoreables 

4151 if loc != 0 and loc != self.preParse(instring, 0): 

4152 raise ParseException(instring, loc, self.errmsg, self) 

4153 

4154 return loc, [] 

4155 

4156 

4157class StringEnd(PositionToken): 

4158 """ 

4159 Matches if current position is at the end of the parse string 

4160 """ 

4161 

4162 def __init__(self) -> None: 

4163 super().__init__() 

4164 self.set_name("end of text") 

4165 

4166 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4167 if loc < len(instring): 

4168 raise ParseException(instring, loc, self.errmsg, self) 

4169 if loc == len(instring): 

4170 return loc + 1, [] 

4171 if loc > len(instring): 

4172 return loc, [] 

4173 

4174 raise ParseException(instring, loc, self.errmsg, self) 

4175 

4176 

4177class WordStart(PositionToken): 

4178 """Matches if the current position is at the beginning of a 

4179 :class:`Word`, and is not preceded by any character in a given 

4180 set of ``word_chars`` (default= ``printables``). To emulate the 

4181 ``\b`` behavior of regular expressions, use 

4182 ``WordStart(alphanums)``. ``WordStart`` will also match at 

4183 the beginning of the string being parsed, or at the beginning of 

4184 a line. 

4185 """ 

4186 

4187 def __init__(self, word_chars: str = printables, **kwargs) -> None: 

4188 wordChars: str = deprecate_argument(kwargs, "wordChars", printables) 

4189 

4190 wordChars = word_chars if wordChars == printables else wordChars 

4191 super().__init__() 

4192 self.wordChars = set(wordChars) 

4193 self.set_name("start of a word") 

4194 

4195 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4196 if loc != 0: 

4197 if ( 

4198 instring[loc - 1] in self.wordChars 

4199 or instring[loc] not in self.wordChars 

4200 ): 

4201 raise ParseException(instring, loc, self.errmsg, self) 

4202 return loc, [] 

4203 

4204 

4205class WordEnd(PositionToken): 

4206 """Matches if the current position is at the end of a :class:`Word`, 

4207 and is not followed by any character in a given set of ``word_chars`` 

4208 (default= ``printables``). To emulate the ``\b`` behavior of 

4209 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

4210 will also match at the end of the string being parsed, or at the end 

4211 of a line. 

4212 """ 

4213 

4214 def __init__(self, word_chars: str = printables, **kwargs) -> None: 

4215 wordChars: str = deprecate_argument(kwargs, "wordChars", printables) 

4216 

4217 wordChars = word_chars if wordChars == printables else wordChars 

4218 super().__init__() 

4219 self.wordChars = set(wordChars) 

4220 self.skipWhitespace = False 

4221 self.set_name("end of a word") 

4222 

4223 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4224 instrlen = len(instring) 

4225 if instrlen > 0 and loc < instrlen: 

4226 if ( 

4227 instring[loc] in self.wordChars 

4228 or instring[loc - 1] not in self.wordChars 

4229 ): 

4230 raise ParseException(instring, loc, self.errmsg, self) 

4231 return loc, [] 

4232 

4233 

4234class Tag(Token): 

4235 """ 

4236 A meta-element for inserting a named result into the parsed 

4237 tokens that may be checked later in a parse action or while 

4238 processing the parsed results. Accepts an optional tag value, 

4239 defaulting to `True`. 

4240 

4241 Example: 

4242 

4243 .. doctest:: 

4244 

4245 >>> end_punc = "." | ("!" + Tag("enthusiastic")) 

4246 >>> greeting = "Hello," + Word(alphas) + end_punc 

4247 

4248 >>> result = greeting.parse_string("Hello, World.") 

4249 >>> print(result.dump()) 

4250 ['Hello,', 'World', '.'] 

4251 

4252 >>> result = greeting.parse_string("Hello, World!") 

4253 >>> print(result.dump()) 

4254 ['Hello,', 'World', '!'] 

4255 - enthusiastic: True 

4256 

4257 .. versionadded:: 3.1.0 

4258 """ 

4259 

4260 def __init__(self, tag_name: str, value: Any = True) -> None: 

4261 super().__init__() 

4262 self._may_return_empty = True 

4263 self.mayIndexError = False 

4264 self.leave_whitespace() 

4265 self.tag_name = tag_name 

4266 self.tag_value = value 

4267 self.add_parse_action(self._add_tag) 

4268 self.show_in_diagram = False 

4269 

4270 def _add_tag(self, tokens: ParseResults): 

4271 tokens[self.tag_name] = self.tag_value 

4272 

4273 def _generateDefaultName(self) -> str: 

4274 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}" 

4275 

4276 

4277class ParseExpression(ParserElement): 

4278 """Abstract subclass of ParserElement, for combining and 

4279 post-processing parsed tokens. 

4280 """ 

4281 

4282 def __init__( 

4283 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4284 ) -> None: 

4285 super().__init__(savelist) 

4286 self.exprs: list[ParserElement] 

4287 if isinstance(exprs, _generatorType): 

4288 exprs = list(exprs) 

4289 

4290 if isinstance(exprs, str_type): 

4291 self.exprs = [self._literalStringClass(exprs)] 

4292 elif isinstance(exprs, ParserElement): 

4293 self.exprs = [exprs] 

4294 elif isinstance(exprs, Iterable): 

4295 exprs = list(exprs) 

4296 # if sequence of strings provided, wrap with Literal 

4297 if any(isinstance(expr, str_type) for expr in exprs): 

4298 exprs = ( 

4299 self._literalStringClass(e) if isinstance(e, str_type) else e 

4300 for e in exprs 

4301 ) 

4302 self.exprs = list(exprs) 

4303 else: 

4304 try: 

4305 self.exprs = list(exprs) 

4306 except TypeError: 

4307 self.exprs = [exprs] 

4308 self.callPreparse = False 

4309 

4310 def recurse(self) -> list[ParserElement]: 

4311 return self.exprs[:] 

4312 

4313 def append(self, other) -> ParserElement: 

4314 """ 

4315 Add an expression to the list of expressions related to this ParseExpression instance. 

4316 """ 

4317 self.exprs.append(other) 

4318 self._defaultName = None 

4319 return self 

4320 

4321 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4322 """ 

4323 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

4324 all contained expressions. 

4325 """ 

4326 super().leave_whitespace(recursive) 

4327 

4328 if recursive: 

4329 self.exprs = [e.copy() for e in self.exprs] 

4330 for e in self.exprs: 

4331 e.leave_whitespace(recursive) 

4332 return self 

4333 

4334 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4335 """ 

4336 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on 

4337 all contained expressions. 

4338 """ 

4339 super().ignore_whitespace(recursive) 

4340 if recursive: 

4341 self.exprs = [e.copy() for e in self.exprs] 

4342 for e in self.exprs: 

4343 e.ignore_whitespace(recursive) 

4344 return self 

4345 

4346 def ignore(self, other) -> ParserElement: 

4347 """ 

4348 Define expression to be ignored (e.g., comments) while doing pattern 

4349 matching; may be called repeatedly, to define multiple comment or other 

4350 ignorable patterns. 

4351 """ 

4352 if isinstance(other, Suppress): 

4353 if other not in self.ignoreExprs: 

4354 super().ignore(other) 

4355 for e in self.exprs: 

4356 e.ignore(self.ignoreExprs[-1]) 

4357 else: 

4358 super().ignore(other) 

4359 for e in self.exprs: 

4360 e.ignore(self.ignoreExprs[-1]) 

4361 return self 

4362 

4363 def _generateDefaultName(self) -> str: 

4364 return f"{type(self).__name__}:({self.exprs})" 

4365 

4366 def streamline(self) -> ParserElement: 

4367 if self.streamlined: 

4368 return self 

4369 

4370 super().streamline() 

4371 

4372 for e in self.exprs: 

4373 e.streamline() 

4374 

4375 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

4376 # but only if there are no parse actions or resultsNames on the nested And's 

4377 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

4378 if len(self.exprs) == 2: 

4379 other = self.exprs[0] 

4380 if ( 

4381 isinstance(other, self.__class__) 

4382 and not other.parseAction 

4383 and other.resultsName is None 

4384 and not other.debug 

4385 ): 

4386 self.exprs = other.exprs[:] + [self.exprs[1]] 

4387 self._defaultName = None 

4388 self._may_return_empty |= other.mayReturnEmpty 

4389 self.mayIndexError |= other.mayIndexError 

4390 

4391 other = self.exprs[-1] 

4392 if ( 

4393 isinstance(other, self.__class__) 

4394 and not other.parseAction 

4395 and other.resultsName is None 

4396 and not other.debug 

4397 ): 

4398 self.exprs = self.exprs[:-1] + other.exprs[:] 

4399 self._defaultName = None 

4400 self._may_return_empty |= other.mayReturnEmpty 

4401 self.mayIndexError |= other.mayIndexError 

4402 

4403 self.errmsg = f"Expected {self}" 

4404 

4405 return self 

4406 

4407 def validate(self, validateTrace=None) -> None: 

4408 warnings.warn( 

4409 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4410 PyparsingDeprecationWarning, 

4411 stacklevel=2, 

4412 ) 

4413 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

4414 for e in self.exprs: 

4415 e.validate(tmp) 

4416 self._checkRecursion([]) 

4417 

4418 def copy(self) -> ParserElement: 

4419 """ 

4420 Returns a copy of this expression. 

4421 

4422 Generally only used internally by pyparsing. 

4423 """ 

4424 ret = super().copy() 

4425 ret = typing.cast(ParseExpression, ret) 

4426 ret.exprs = [e.copy() for e in self.exprs] 

4427 return ret 

4428 

4429 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4430 if not ( 

4431 __diag__.warn_ungrouped_named_tokens_in_collection 

4432 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4433 not in self.suppress_warnings_ 

4434 ): 

4435 return super()._setResultsName(name, list_all_matches) 

4436 

4437 for e in self.exprs: 

4438 if ( 

4439 isinstance(e, ParserElement) 

4440 and e.resultsName 

4441 and ( 

4442 Diagnostics.warn_ungrouped_named_tokens_in_collection 

4443 not in e.suppress_warnings_ 

4444 ) 

4445 ): 

4446 warning = ( 

4447 "warn_ungrouped_named_tokens_in_collection:" 

4448 f" setting results name {name!r} on {type(self).__name__} expression" 

4449 f" collides with {e.resultsName!r} on contained expression" 

4450 ) 

4451 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

4452 break 

4453 

4454 return super()._setResultsName(name, list_all_matches) 

4455 

4456 # Compatibility synonyms 

4457 # fmt: off 

4458 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4459 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4460 # fmt: on 

4461 

4462 

4463class And(ParseExpression): 

4464 """ 

4465 Requires all given :class:`ParserElement` s to be found in the given order. 

4466 Expressions may be separated by whitespace. 

4467 May be constructed using the ``'+'`` operator. 

4468 May also be constructed using the ``'-'`` operator, which will 

4469 suppress backtracking. 

4470 

4471 Example: 

4472 

4473 .. testcode:: 

4474 

4475 integer = Word(nums) 

4476 name_expr = Word(alphas)[1, ...] 

4477 

4478 expr = And([integer("id"), name_expr("name"), integer("age")]) 

4479 # more easily written as: 

4480 expr = integer("id") + name_expr("name") + integer("age") 

4481 """ 

4482 

4483 class _ErrorStop(Empty): 

4484 def __init__(self, *args, **kwargs) -> None: 

4485 super().__init__(*args, **kwargs) 

4486 self.leave_whitespace() 

4487 

4488 def _generateDefaultName(self) -> str: 

4489 return "-" 

4490 

4491 def __init__( 

4492 self, 

4493 exprs_arg: typing.Iterable[Union[ParserElement, str]], 

4494 savelist: bool = True, 

4495 ) -> None: 

4496 # instantiate exprs as a list, converting strs to ParserElements 

4497 exprs: list[ParserElement] = [ 

4498 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg 

4499 ] 

4500 

4501 # convert any Ellipsis elements to SkipTo 

4502 if Ellipsis in exprs: 

4503 

4504 # Ellipsis cannot be the last element 

4505 if exprs[-1] is Ellipsis: 

4506 raise Exception("cannot construct And with sequence ending in ...") 

4507 

4508 tmp: list[ParserElement] = [] 

4509 for cur_expr, next_expr in zip(exprs, exprs[1:]): 

4510 if cur_expr is Ellipsis: 

4511 tmp.append(SkipTo(next_expr)("_skipped*")) 

4512 else: 

4513 tmp.append(cur_expr) 

4514 

4515 exprs[:-1] = tmp 

4516 

4517 super().__init__(exprs, savelist) 

4518 if self.exprs: 

4519 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4520 if not isinstance(self.exprs[0], White): 

4521 self.set_whitespace_chars( 

4522 self.exprs[0].whiteChars, 

4523 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

4524 ) 

4525 self.skipWhitespace = self.exprs[0].skipWhitespace 

4526 else: 

4527 self.skipWhitespace = False 

4528 else: 

4529 self._may_return_empty = True 

4530 self.callPreparse = True 

4531 

4532 def streamline(self) -> ParserElement: 

4533 """ 

4534 Collapse `And` expressions like `And(And(And(A, B), C), D)` 

4535 to `And(A, B, C, D)`. 

4536 

4537 .. doctest:: 

4538 

4539 >>> expr = Word("A") + Word("B") + Word("C") + Word("D") 

4540 >>> # Using '+' operator creates nested And expression 

4541 >>> expr 

4542 {{{W:(A) W:(B)} W:(C)} W:(D)} 

4543 >>> # streamline simplifies to a single And with multiple expressions 

4544 >>> expr.streamline() 

4545 {W:(A) W:(B) W:(C) W:(D)} 

4546 

4547 Guards against collapsing out expressions that have special features, 

4548 such as results names or parse actions. 

4549 

4550 Resolves pending Skip commands defined using `...` terms. 

4551 """ 

4552 # collapse any _PendingSkip's 

4553 if self.exprs and any( 

4554 isinstance(e, ParseExpression) 

4555 and e.exprs 

4556 and isinstance(e.exprs[-1], _PendingSkip) 

4557 for e in self.exprs[:-1] 

4558 ): 

4559 deleted_expr_marker = NoMatch() 

4560 for i, e in enumerate(self.exprs[:-1]): 

4561 if e is deleted_expr_marker: 

4562 continue 

4563 if ( 

4564 isinstance(e, ParseExpression) 

4565 and e.exprs 

4566 and isinstance(e.exprs[-1], _PendingSkip) 

4567 ): 

4568 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4569 self.exprs[i + 1] = deleted_expr_marker 

4570 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

4571 

4572 super().streamline() 

4573 

4574 # link any IndentedBlocks to the prior expression 

4575 prev: ParserElement 

4576 cur: ParserElement 

4577 for prev, cur in zip(self.exprs, self.exprs[1:]): 

4578 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

4579 # (but watch out for recursive grammar) 

4580 seen = set() 

4581 while True: 

4582 if id(cur) in seen: 

4583 break 

4584 seen.add(id(cur)) 

4585 if isinstance(cur, IndentedBlock): 

4586 prev.add_parse_action( 

4587 lambda s, l, t, cur_=cur: setattr( 

4588 cur_, "parent_anchor", col(l, s) 

4589 ) 

4590 ) 

4591 break 

4592 subs = cur.recurse() 

4593 next_first = next(iter(subs), None) 

4594 if next_first is None: 

4595 break 

4596 cur = typing.cast(ParserElement, next_first) 

4597 

4598 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4599 return self 

4600 

4601 def parseImpl(self, instring, loc, do_actions=True): 

4602 # pass False as callPreParse arg to _parse for first element, since we already 

4603 # pre-parsed the string as part of our And pre-parsing 

4604 loc, resultlist = self.exprs[0]._parse( 

4605 instring, loc, do_actions, callPreParse=False 

4606 ) 

4607 errorStop = False 

4608 for e in self.exprs[1:]: 

4609 # if isinstance(e, And._ErrorStop): 

4610 if type(e) is And._ErrorStop: 

4611 errorStop = True 

4612 continue 

4613 if errorStop: 

4614 try: 

4615 loc, exprtokens = e._parse(instring, loc, do_actions) 

4616 except ParseSyntaxException: 

4617 raise 

4618 except ParseBaseException as pe: 

4619 pe.__traceback__ = None 

4620 raise ParseSyntaxException._from_exception(pe) 

4621 except IndexError: 

4622 raise ParseSyntaxException( 

4623 instring, len(instring), self.errmsg, self 

4624 ) 

4625 else: 

4626 loc, exprtokens = e._parse(instring, loc, do_actions) 

4627 resultlist += exprtokens 

4628 return loc, resultlist 

4629 

4630 def __iadd__(self, other): 

4631 if isinstance(other, str_type): 

4632 other = self._literalStringClass(other) 

4633 if not isinstance(other, ParserElement): 

4634 return NotImplemented 

4635 return self.append(other) # And([self, other]) 

4636 

4637 def _checkRecursion(self, parseElementList): 

4638 subRecCheckList = parseElementList[:] + [self] 

4639 for e in self.exprs: 

4640 e._checkRecursion(subRecCheckList) 

4641 if not e.mayReturnEmpty: 

4642 break 

4643 

4644 def _generateDefaultName(self) -> str: 

4645 inner = " ".join(str(e) for e in self.exprs) 

4646 # strip off redundant inner {}'s 

4647 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4648 inner = inner[1:-1] 

4649 return f"{{{inner}}}" 

4650 

4651 

4652class Or(ParseExpression): 

4653 """Requires that at least one :class:`ParserElement` is found. If 

4654 two expressions match, the expression that matches the longest 

4655 string will be used. May be constructed using the ``'^'`` 

4656 operator. 

4657 

4658 Example: 

4659 

4660 .. testcode:: 

4661 

4662 # construct Or using '^' operator 

4663 

4664 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4665 print(number.search_string("123 3.1416 789")) 

4666 

4667 prints: 

4668 

4669 .. testoutput:: 

4670 

4671 [['123'], ['3.1416'], ['789']] 

4672 """ 

4673 

4674 def __init__( 

4675 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4676 ) -> None: 

4677 super().__init__(exprs, savelist) 

4678 if self.exprs: 

4679 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4680 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4681 else: 

4682 self._may_return_empty = True 

4683 

4684 def streamline(self) -> ParserElement: 

4685 super().streamline() 

4686 if self.exprs: 

4687 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4688 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4689 self.skipWhitespace = all( 

4690 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4691 ) 

4692 else: 

4693 self.saveAsList = False 

4694 return self 

4695 

4696 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4697 maxExcLoc = -1 

4698 maxException = None 

4699 matches: list[tuple[int, ParserElement]] = [] 

4700 fatals: list[ParseFatalException] = [] 

4701 if all(e.callPreparse for e in self.exprs): 

4702 loc = self.preParse(instring, loc) 

4703 for e in self.exprs: 

4704 try: 

4705 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4706 except ParseFatalException as pfe: 

4707 pfe.__traceback__ = None 

4708 pfe.parser_element = e 

4709 fatals.append(pfe) 

4710 maxException = None 

4711 maxExcLoc = -1 

4712 except ParseException as err: 

4713 if not fatals: 

4714 err.__traceback__ = None 

4715 if err.loc > maxExcLoc: 

4716 maxException = err 

4717 maxExcLoc = err.loc 

4718 except IndexError: 

4719 if len(instring) > maxExcLoc: 

4720 maxException = ParseException( 

4721 instring, len(instring), e.errmsg, self 

4722 ) 

4723 maxExcLoc = len(instring) 

4724 else: 

4725 # save match among all matches, to retry longest to shortest 

4726 matches.append((loc2, e)) 

4727 

4728 if matches: 

4729 # re-evaluate all matches in descending order of length of match, in case attached actions 

4730 # might change whether or how much they match of the input. 

4731 matches.sort(key=itemgetter(0), reverse=True) 

4732 

4733 if not do_actions: 

4734 # no further conditions or parse actions to change the selection of 

4735 # alternative, so the first match will be the best match 

4736 best_expr = matches[0][1] 

4737 return best_expr._parse(instring, loc, do_actions) 

4738 

4739 longest: tuple[int, typing.Optional[ParseResults]] = -1, None 

4740 for loc1, expr1 in matches: 

4741 if loc1 <= longest[0]: 

4742 # already have a longer match than this one will deliver, we are done 

4743 return longest 

4744 

4745 try: 

4746 loc2, toks = expr1._parse(instring, loc, do_actions) 

4747 except ParseException as err: 

4748 err.__traceback__ = None 

4749 if err.loc > maxExcLoc: 

4750 maxException = err 

4751 maxExcLoc = err.loc 

4752 else: 

4753 if loc2 >= loc1: 

4754 return loc2, toks 

4755 # didn't match as much as before 

4756 elif loc2 > longest[0]: 

4757 longest = loc2, toks 

4758 

4759 if longest != (-1, None): 

4760 return longest 

4761 

4762 if fatals: 

4763 if len(fatals) > 1: 

4764 fatals.sort(key=lambda e: -e.loc) 

4765 if fatals[0].loc == fatals[1].loc: 

4766 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4767 max_fatal = fatals[0] 

4768 raise max_fatal 

4769 

4770 if maxException is not None: 

4771 # infer from this check that all alternatives failed at the current position 

4772 # so emit this collective error message instead of any single error message 

4773 parse_start_loc = self.preParse(instring, loc) 

4774 if maxExcLoc == parse_start_loc: 

4775 maxException.msg = self.errmsg or "" 

4776 raise maxException 

4777 

4778 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4779 

4780 def __ixor__(self, other): 

4781 if isinstance(other, str_type): 

4782 other = self._literalStringClass(other) 

4783 if not isinstance(other, ParserElement): 

4784 return NotImplemented 

4785 return self.append(other) # Or([self, other]) 

4786 

4787 def _generateDefaultName(self) -> str: 

4788 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" 

4789 

4790 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4791 if ( 

4792 __diag__.warn_multiple_tokens_in_named_alternation 

4793 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4794 not in self.suppress_warnings_ 

4795 ): 

4796 if any( 

4797 isinstance(e, And) 

4798 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4799 not in e.suppress_warnings_ 

4800 for e in self.exprs 

4801 ): 

4802 warning = ( 

4803 "warn_multiple_tokens_in_named_alternation:" 

4804 f" setting results name {name!r} on {type(self).__name__} expression" 

4805 " will return a list of all parsed tokens in an And alternative," 

4806 " in prior versions only the first token was returned; enclose" 

4807 " contained argument in Group" 

4808 ) 

4809 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

4810 

4811 return super()._setResultsName(name, list_all_matches) 

4812 

4813 

4814class MatchFirst(ParseExpression): 

4815 """Requires that at least one :class:`ParserElement` is found. If 

4816 more than one expression matches, the first one listed is the one that will 

4817 match. May be constructed using the ``'|'`` operator. 

4818 

4819 Example: Construct MatchFirst using '|' operator 

4820 

4821 .. doctest:: 

4822 

4823 # watch the order of expressions to match 

4824 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4825 >>> print(number.search_string("123 3.1416 789")) # Fail! 

4826 [['123'], ['3'], ['1416'], ['789']] 

4827 

4828 # put more selective expression first 

4829 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4830 >>> print(number.search_string("123 3.1416 789")) # Better 

4831 [['123'], ['3.1416'], ['789']] 

4832 """ 

4833 

4834 def __init__( 

4835 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4836 ) -> None: 

4837 super().__init__(exprs, savelist) 

4838 if self.exprs: 

4839 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4840 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4841 else: 

4842 self._may_return_empty = True 

4843 

4844 def streamline(self) -> ParserElement: 

4845 if self.streamlined: 

4846 return self 

4847 

4848 super().streamline() 

4849 if self.exprs: 

4850 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4851 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4852 self.skipWhitespace = all( 

4853 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4854 ) 

4855 else: 

4856 self.saveAsList = False 

4857 self._may_return_empty = True 

4858 return self 

4859 

4860 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4861 maxExcLoc = -1 

4862 maxException = None 

4863 

4864 for e in self.exprs: 

4865 try: 

4866 return e._parse(instring, loc, do_actions) 

4867 except ParseFatalException as pfe: 

4868 pfe.__traceback__ = None 

4869 pfe.parser_element = e 

4870 raise 

4871 except ParseException as err: 

4872 if err.loc > maxExcLoc: 

4873 maxException = err 

4874 maxExcLoc = err.loc 

4875 except IndexError: 

4876 if len(instring) > maxExcLoc: 

4877 maxException = ParseException( 

4878 instring, len(instring), e.errmsg, self 

4879 ) 

4880 maxExcLoc = len(instring) 

4881 

4882 if maxException is not None: 

4883 # infer from this check that all alternatives failed at the current position 

4884 # so emit this collective error message instead of any individual error message 

4885 parse_start_loc = self.preParse(instring, loc) 

4886 if maxExcLoc == parse_start_loc: 

4887 maxException.msg = self.errmsg or "" 

4888 raise maxException 

4889 

4890 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4891 

4892 def __ior__(self, other): 

4893 if isinstance(other, str_type): 

4894 other = self._literalStringClass(other) 

4895 if not isinstance(other, ParserElement): 

4896 return NotImplemented 

4897 return self.append(other) # MatchFirst([self, other]) 

4898 

4899 def _generateDefaultName(self) -> str: 

4900 return f"{{{' | '.join(str(e) for e in self.exprs)}}}" 

4901 

4902 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4903 if ( 

4904 __diag__.warn_multiple_tokens_in_named_alternation 

4905 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4906 not in self.suppress_warnings_ 

4907 ): 

4908 if any( 

4909 isinstance(e, And) 

4910 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4911 not in e.suppress_warnings_ 

4912 for e in self.exprs 

4913 ): 

4914 warning = ( 

4915 "warn_multiple_tokens_in_named_alternation:" 

4916 f" setting results name {name!r} on {type(self).__name__} expression" 

4917 " will return a list of all parsed tokens in an And alternative," 

4918 " in prior versions only the first token was returned; enclose" 

4919 " contained argument in Group" 

4920 ) 

4921 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

4922 

4923 return super()._setResultsName(name, list_all_matches) 

4924 

4925 

4926class Each(ParseExpression): 

4927 """Requires all given :class:`ParserElement` s to be found, but in 

4928 any order. Expressions may be separated by whitespace. 

4929 

4930 May be constructed using the ``'&'`` operator. 

4931 

4932 Example: 

4933 

4934 .. testcode:: 

4935 

4936 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4937 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4938 integer = Word(nums) 

4939 shape_attr = "shape:" + shape_type("shape") 

4940 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4941 color_attr = "color:" + color("color") 

4942 size_attr = "size:" + integer("size") 

4943 

4944 # use Each (using operator '&') to accept attributes in any order 

4945 # (shape and posn are required, color and size are optional) 

4946 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4947 

4948 shape_spec.run_tests(''' 

4949 shape: SQUARE color: BLACK posn: 100, 120 

4950 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4951 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4952 ''' 

4953 ) 

4954 

4955 prints: 

4956 

4957 .. testoutput:: 

4958 :options: +NORMALIZE_WHITESPACE 

4959 

4960 

4961 shape: SQUARE color: BLACK posn: 100, 120 

4962 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4963 - color: 'BLACK' 

4964 - posn: ['100', ',', '120'] 

4965 - x: '100' 

4966 - y: '120' 

4967 - shape: 'SQUARE' 

4968 ... 

4969 

4970 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4971 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 

4972 'posn:', ['50', ',', '80']] 

4973 - color: 'BLUE' 

4974 - posn: ['50', ',', '80'] 

4975 - x: '50' 

4976 - y: '80' 

4977 - shape: 'CIRCLE' 

4978 - size: '50' 

4979 ... 

4980 

4981 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4982 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 

4983 'posn:', ['20', ',', '40']] 

4984 - color: 'GREEN' 

4985 - posn: ['20', ',', '40'] 

4986 - x: '20' 

4987 - y: '40' 

4988 - shape: 'TRIANGLE' 

4989 - size: '20' 

4990 ... 

4991 """ 

4992 

4993 def __init__( 

4994 self, exprs: typing.Iterable[ParserElement], savelist: bool = True 

4995 ) -> None: 

4996 super().__init__(exprs, savelist) 

4997 if self.exprs: 

4998 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4999 else: 

5000 self._may_return_empty = True 

5001 self.skipWhitespace = True 

5002 self.initExprGroups = True 

5003 self.saveAsList = True 

5004 

5005 def __iand__(self, other): 

5006 if isinstance(other, str_type): 

5007 other = self._literalStringClass(other) 

5008 if not isinstance(other, ParserElement): 

5009 return NotImplemented 

5010 return self.append(other) # Each([self, other]) 

5011 

5012 def streamline(self) -> ParserElement: 

5013 super().streamline() 

5014 if self.exprs: 

5015 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

5016 else: 

5017 self._may_return_empty = True 

5018 return self 

5019 

5020 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5021 if self.initExprGroups: 

5022 self.opt1map = dict( 

5023 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

5024 ) 

5025 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

5026 opt2 = [ 

5027 e 

5028 for e in self.exprs 

5029 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

5030 ] 

5031 self.optionals = opt1 + opt2 

5032 self.multioptionals = [ 

5033 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

5034 for e in self.exprs 

5035 if isinstance(e, _MultipleMatch) 

5036 ] 

5037 self.multirequired = [ 

5038 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

5039 for e in self.exprs 

5040 if isinstance(e, OneOrMore) 

5041 ] 

5042 self.required = [ 

5043 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

5044 ] 

5045 self.required += self.multirequired 

5046 self.initExprGroups = False 

5047 

5048 tmpLoc = loc 

5049 tmpReqd = self.required[:] 

5050 tmpOpt = self.optionals[:] 

5051 multis = self.multioptionals[:] 

5052 matchOrder: list[ParserElement] = [] 

5053 

5054 keepMatching = True 

5055 failed: list[ParserElement] = [] 

5056 fatals: list[ParseFatalException] = [] 

5057 while keepMatching: 

5058 tmpExprs = tmpReqd + tmpOpt + multis 

5059 failed.clear() 

5060 fatals.clear() 

5061 for e in tmpExprs: 

5062 try: 

5063 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

5064 except ParseFatalException as pfe: 

5065 pfe.__traceback__ = None 

5066 pfe.parser_element = e 

5067 fatals.append(pfe) 

5068 failed.append(e) 

5069 except ParseException: 

5070 failed.append(e) 

5071 else: 

5072 matchOrder.append(self.opt1map.get(id(e), e)) 

5073 if e in tmpReqd: 

5074 tmpReqd.remove(e) 

5075 elif e in tmpOpt: 

5076 tmpOpt.remove(e) 

5077 if len(failed) == len(tmpExprs): 

5078 keepMatching = False 

5079 

5080 # look for any ParseFatalExceptions 

5081 if fatals: 

5082 if len(fatals) > 1: 

5083 fatals.sort(key=lambda e: -e.loc) 

5084 if fatals[0].loc == fatals[1].loc: 

5085 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

5086 max_fatal = fatals[0] 

5087 raise max_fatal 

5088 

5089 if tmpReqd: 

5090 missing = ", ".join([str(e) for e in tmpReqd]) 

5091 raise ParseException( 

5092 instring, 

5093 loc, 

5094 f"Missing one or more required elements ({missing})", 

5095 ) 

5096 

5097 # add any unmatched Opts, in case they have default values defined 

5098 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

5099 

5100 total_results = ParseResults([]) 

5101 for e in matchOrder: 

5102 loc, results = e._parse(instring, loc, do_actions) 

5103 total_results += results 

5104 

5105 return loc, total_results 

5106 

5107 def _generateDefaultName(self) -> str: 

5108 return f"{{{' & '.join(str(e) for e in self.exprs)}}}" 

5109 

5110 

5111class ParseElementEnhance(ParserElement): 

5112 """Abstract subclass of :class:`ParserElement`, for combining and 

5113 post-processing parsed tokens. 

5114 """ 

5115 

5116 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

5117 super().__init__(savelist) 

5118 if isinstance(expr, str_type): 

5119 expr_str = typing.cast(str, expr) 

5120 if issubclass(self._literalStringClass, Token): 

5121 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

5122 elif issubclass(type(self), self._literalStringClass): 

5123 expr = Literal(expr_str) 

5124 else: 

5125 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

5126 expr = typing.cast(ParserElement, expr) 

5127 self.expr = expr 

5128 if expr is not None: 

5129 self.mayIndexError = expr.mayIndexError 

5130 self._may_return_empty = expr.mayReturnEmpty 

5131 self.set_whitespace_chars( 

5132 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

5133 ) 

5134 self.skipWhitespace = expr.skipWhitespace 

5135 self.saveAsList = expr.saveAsList 

5136 self.callPreparse = expr.callPreparse 

5137 self.ignoreExprs.extend(expr.ignoreExprs) 

5138 

5139 def recurse(self) -> list[ParserElement]: 

5140 return [self.expr] if self.expr is not None else [] 

5141 

5142 def parseImpl(self, instring, loc, do_actions=True): 

5143 if self.expr is None: 

5144 raise ParseException(instring, loc, "No expression defined", self) 

5145 

5146 try: 

5147 return self.expr._parse(instring, loc, do_actions, callPreParse=False) 

5148 except ParseSyntaxException: 

5149 raise 

5150 except ParseBaseException as pbe: 

5151 pbe.pstr = pbe.pstr or instring 

5152 pbe.loc = pbe.loc or loc 

5153 pbe.parser_element = pbe.parser_element or self 

5154 if not isinstance(self, Forward) and self.customName is not None: 

5155 if self.errmsg: 

5156 pbe.msg = self.errmsg 

5157 raise 

5158 

5159 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5160 """ 

5161 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

5162 the contained expression. 

5163 """ 

5164 super().leave_whitespace(recursive) 

5165 

5166 if recursive: 

5167 if self.expr is not None: 

5168 self.expr = self.expr.copy() 

5169 self.expr.leave_whitespace(recursive) 

5170 return self 

5171 

5172 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5173 """ 

5174 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on 

5175 the contained expression. 

5176 """ 

5177 super().ignore_whitespace(recursive) 

5178 

5179 if recursive: 

5180 if self.expr is not None: 

5181 self.expr = self.expr.copy() 

5182 self.expr.ignore_whitespace(recursive) 

5183 return self 

5184 

5185 def ignore(self, other) -> ParserElement: 

5186 """ 

5187 Define expression to be ignored (e.g., comments) while doing pattern 

5188 matching; may be called repeatedly, to define multiple comment or other 

5189 ignorable patterns. 

5190 """ 

5191 if not isinstance(other, Suppress) or other not in self.ignoreExprs: 

5192 super().ignore(other) 

5193 if self.expr is not None: 

5194 self.expr.ignore(self.ignoreExprs[-1]) 

5195 

5196 return self 

5197 

5198 def streamline(self) -> ParserElement: 

5199 super().streamline() 

5200 if self.expr is not None: 

5201 self.expr.streamline() 

5202 return self 

5203 

5204 def _checkRecursion(self, parseElementList): 

5205 if self in parseElementList: 

5206 raise RecursiveGrammarException(parseElementList + [self]) 

5207 subRecCheckList = parseElementList[:] + [self] 

5208 if self.expr is not None: 

5209 self.expr._checkRecursion(subRecCheckList) 

5210 

5211 def validate(self, validateTrace=None) -> None: 

5212 warnings.warn( 

5213 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5214 PyparsingDeprecationWarning, 

5215 stacklevel=2, 

5216 ) 

5217 if validateTrace is None: 

5218 validateTrace = [] 

5219 tmp = validateTrace[:] + [self] 

5220 if self.expr is not None: 

5221 self.expr.validate(tmp) 

5222 self._checkRecursion([]) 

5223 

5224 def _generateDefaultName(self) -> str: 

5225 return f"{type(self).__name__}:({self.expr})" 

5226 

5227 # Compatibility synonyms 

5228 # fmt: off 

5229 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

5230 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

5231 # fmt: on 

5232 

5233 

5234class IndentedBlock(ParseElementEnhance): 

5235 """ 

5236 Expression to match one or more expressions at a given indentation level. 

5237 Useful for parsing text where structure is implied by indentation (like Python source code). 

5238 

5239 Example: 

5240 

5241 .. testcode:: 

5242 

5243 ''' 

5244 BNF: 

5245 statement ::= assignment_stmt | if_stmt 

5246 assignment_stmt ::= identifier '=' rvalue 

5247 rvalue ::= identifier | integer 

5248 if_stmt ::= 'if' bool_condition block 

5249 block ::= ([indent] statement)... 

5250 identifier ::= [A..Za..z] 

5251 integer ::= [0..9]... 

5252 bool_condition ::= 'TRUE' | 'FALSE' 

5253 ''' 

5254 

5255 IF, TRUE, FALSE = Keyword.using_each("IF TRUE FALSE".split()) 

5256 

5257 statement = Forward() 

5258 identifier = Char(alphas) 

5259 integer = Word(nums).add_parse_action(lambda t: int(t[0])) 

5260 rvalue = identifier | integer 

5261 assignment_stmt = identifier + "=" + rvalue 

5262 

5263 if_stmt = IF + (TRUE | FALSE) + IndentedBlock(statement) 

5264 

5265 statement <<= Group(assignment_stmt | if_stmt) 

5266 

5267 result = if_stmt.parse_string(''' 

5268 IF TRUE 

5269 a = 1000 

5270 b = 2000 

5271 IF FALSE 

5272 z = 100 

5273 ''') 

5274 print(result.dump()) 

5275 

5276 .. testoutput:: 

5277 

5278 ['IF', 'TRUE', [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]] 

5279 [0]: 

5280 IF 

5281 [1]: 

5282 TRUE 

5283 [2]: 

5284 [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]] 

5285 [0]: 

5286 ['a', '=', 1000] 

5287 [1]: 

5288 ['b', '=', 2000] 

5289 [2]: 

5290 ['IF', 'FALSE', [['z', '=', 100]]] 

5291 [0]: 

5292 IF 

5293 [1]: 

5294 FALSE 

5295 [2]: 

5296 [['z', '=', 100]] 

5297 [0]: 

5298 ['z', '=', 100] 

5299 """ 

5300 

5301 class _Indent(Empty): 

5302 def __init__(self, ref_col: int) -> None: 

5303 super().__init__() 

5304 self.errmsg = f"expected indent at column {ref_col}" 

5305 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

5306 

5307 class _IndentGreater(Empty): 

5308 def __init__(self, ref_col: int) -> None: 

5309 super().__init__() 

5310 self.errmsg = f"expected indent at column greater than {ref_col}" 

5311 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

5312 

5313 def __init__( 

5314 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

5315 ) -> None: 

5316 super().__init__(expr, savelist=True) 

5317 # if recursive: 

5318 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

5319 self._recursive = recursive 

5320 self._grouped = grouped 

5321 self.parent_anchor = 1 

5322 

5323 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5324 # advance parse position to non-whitespace by using an Empty() 

5325 # this should be the column to be used for all subsequent indented lines 

5326 anchor_loc = Empty().preParse(instring, loc) 

5327 

5328 # see if self.expr matches at the current location - if not it will raise an exception 

5329 # and no further work is necessary 

5330 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions) 

5331 

5332 indent_col = col(anchor_loc, instring) 

5333 peer_detect_expr = self._Indent(indent_col) 

5334 

5335 inner_expr = Empty() + peer_detect_expr + self.expr 

5336 if self._recursive: 

5337 sub_indent = self._IndentGreater(indent_col) 

5338 nested_block = IndentedBlock( 

5339 self.expr, recursive=self._recursive, grouped=self._grouped 

5340 ) 

5341 nested_block.set_debug(self.debug) 

5342 nested_block.parent_anchor = indent_col 

5343 inner_expr += Opt(sub_indent + nested_block) 

5344 

5345 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

5346 block = OneOrMore(inner_expr) 

5347 

5348 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

5349 

5350 if self._grouped: 

5351 wrapper = Group 

5352 else: 

5353 wrapper = lambda expr: expr # type: ignore[misc, assignment] 

5354 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

5355 instring, anchor_loc, do_actions 

5356 ) 

5357 

5358 

5359class AtStringStart(ParseElementEnhance): 

5360 """Matches if expression matches at the beginning of the parse 

5361 string:: 

5362 

5363 AtStringStart(Word(nums)).parse_string("123") 

5364 # prints ["123"] 

5365 

5366 AtStringStart(Word(nums)).parse_string(" 123") 

5367 # raises ParseException 

5368 """ 

5369 

5370 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5371 super().__init__(expr) 

5372 self.callPreparse = False 

5373 

5374 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5375 if loc != 0: 

5376 raise ParseException(instring, loc, "not found at string start") 

5377 return super().parseImpl(instring, loc, do_actions) 

5378 

5379 

5380class AtLineStart(ParseElementEnhance): 

5381 r"""Matches if an expression matches at the beginning of a line within 

5382 the parse string 

5383 

5384 Example: 

5385 

5386 .. testcode:: 

5387 

5388 test = '''\ 

5389 BBB this line 

5390 BBB and this line 

5391 BBB but not this one 

5392 A BBB and definitely not this one 

5393 ''' 

5394 

5395 for t in (AtLineStart('BBB') + rest_of_line).search_string(test): 

5396 print(t) 

5397 

5398 prints: 

5399 

5400 .. testoutput:: 

5401 

5402 ['BBB', ' this line'] 

5403 ['BBB', ' and this line'] 

5404 """ 

5405 

5406 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5407 super().__init__(expr) 

5408 self.callPreparse = False 

5409 

5410 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5411 if col(loc, instring) != 1: 

5412 raise ParseException(instring, loc, "not found at line start") 

5413 return super().parseImpl(instring, loc, do_actions) 

5414 

5415 

5416class FollowedBy(ParseElementEnhance): 

5417 """Lookahead matching of the given parse expression. 

5418 ``FollowedBy`` does *not* advance the parsing position within 

5419 the input string, it only verifies that the specified parse 

5420 expression matches at the current position. ``FollowedBy`` 

5421 always returns a null token list. If any results names are defined 

5422 in the lookahead expression, those *will* be returned for access by 

5423 name. 

5424 

5425 Example: 

5426 

5427 .. testcode:: 

5428 

5429 # use FollowedBy to match a label only if it is followed by a ':' 

5430 data_word = Word(alphas) 

5431 label = data_word + FollowedBy(':') 

5432 attr_expr = Group( 

5433 label + Suppress(':') 

5434 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) 

5435 ) 

5436 

5437 attr_expr[1, ...].parse_string( 

5438 "shape: SQUARE color: BLACK posn: upper left").pprint() 

5439 

5440 prints: 

5441 

5442 .. testoutput:: 

5443 

5444 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

5445 """ 

5446 

5447 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5448 super().__init__(expr) 

5449 self._may_return_empty = True 

5450 

5451 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5452 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

5453 # we keep any named results that were defined in the FollowedBy expression 

5454 _, ret = self.expr._parse(instring, loc, do_actions=do_actions) 

5455 del ret[:] 

5456 

5457 return loc, ret 

5458 

5459 

5460class PrecededBy(ParseElementEnhance): 

5461 """Lookbehind matching of the given parse expression. 

5462 ``PrecededBy`` does not advance the parsing position within the 

5463 input string, it only verifies that the specified parse expression 

5464 matches prior to the current position. ``PrecededBy`` always 

5465 returns a null token list, but if a results name is defined on the 

5466 given expression, it is returned. 

5467 

5468 Parameters: 

5469 

5470 - ``expr`` - expression that must match prior to the current parse 

5471 location 

5472 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

5473 to lookbehind prior to the current parse location 

5474 

5475 If the lookbehind expression is a string, :class:`Literal`, 

5476 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

5477 with a specified exact or maximum length, then the retreat 

5478 parameter is not required. Otherwise, retreat must be specified to 

5479 give a maximum number of characters to look back from 

5480 the current parse position for a lookbehind match. 

5481 

5482 Example: 

5483 

5484 .. testcode:: 

5485 

5486 # VB-style variable names with type prefixes 

5487 int_var = PrecededBy("#") + pyparsing_common.identifier 

5488 str_var = PrecededBy("$") + pyparsing_common.identifier 

5489 """ 

5490 

5491 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None: 

5492 super().__init__(expr) 

5493 self.expr = self.expr().leave_whitespace() 

5494 self._may_return_empty = True 

5495 self.mayIndexError = False 

5496 self.exact = False 

5497 if isinstance(expr, str_type): 

5498 expr = typing.cast(str, expr) 

5499 retreat = len(expr) 

5500 self.exact = True 

5501 elif isinstance(expr, (Literal, Keyword)): 

5502 retreat = expr.matchLen 

5503 self.exact = True 

5504 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

5505 retreat = expr.maxLen 

5506 self.exact = True 

5507 elif isinstance(expr, PositionToken): 

5508 retreat = 0 

5509 self.exact = True 

5510 self.retreat = retreat 

5511 self.errmsg = f"not preceded by {expr}" 

5512 self.skipWhitespace = False 

5513 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

5514 

5515 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType: 

5516 if self.exact: 

5517 if loc < self.retreat: 

5518 raise ParseException(instring, loc, self.errmsg, self) 

5519 start = loc - self.retreat 

5520 _, ret = self.expr._parse(instring, start) 

5521 return loc, ret 

5522 

5523 # retreat specified a maximum lookbehind window, iterate 

5524 test_expr = self.expr + StringEnd() 

5525 instring_slice = instring[max(0, loc - self.retreat) : loc] 

5526 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self) 

5527 

5528 for offset in range(1, min(loc, self.retreat + 1) + 1): 

5529 try: 

5530 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

5531 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

5532 except ParseBaseException as pbe: 

5533 last_expr = pbe 

5534 else: 

5535 break 

5536 else: 

5537 raise last_expr 

5538 

5539 return loc, ret 

5540 

5541 

5542class Located(ParseElementEnhance): 

5543 """ 

5544 Decorates a returned token with its starting and ending 

5545 locations in the input string. 

5546 

5547 This helper adds the following results names: 

5548 

5549 - ``locn_start`` - location where matched expression begins 

5550 - ``locn_end`` - location where matched expression ends 

5551 - ``value`` - the actual parsed results 

5552 

5553 Be careful if the input text contains ``<TAB>`` characters, you 

5554 may want to call :class:`ParserElement.parse_with_tabs` 

5555 

5556 Example: 

5557 

5558 .. testcode:: 

5559 

5560 wd = Word(alphas) 

5561 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

5562 print(match) 

5563 

5564 prints: 

5565 

5566 .. testoutput:: 

5567 

5568 [0, ['ljsdf'], 5] 

5569 [8, ['lksdjjf'], 15] 

5570 [18, ['lkkjj'], 23] 

5571 """ 

5572 

5573 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5574 start = loc 

5575 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False) 

5576 ret_tokens = ParseResults([start, tokens, loc]) 

5577 ret_tokens["locn_start"] = start 

5578 ret_tokens["value"] = tokens 

5579 ret_tokens["locn_end"] = loc 

5580 if self.resultsName: 

5581 # must return as a list, so that the name will be attached to the complete group 

5582 return loc, [ret_tokens] 

5583 else: 

5584 return loc, ret_tokens 

5585 

5586 

5587class NotAny(ParseElementEnhance): 

5588 """ 

5589 Lookahead to disallow matching with the given parse expression. 

5590 ``NotAny`` does *not* advance the parsing position within the 

5591 input string, it only verifies that the specified parse expression 

5592 does *not* match at the current position. Also, ``NotAny`` does 

5593 *not* skip over leading whitespace. ``NotAny`` always returns 

5594 a null token list. May be constructed using the ``'~'`` operator. 

5595 

5596 Example: 

5597 

5598 .. testcode:: 

5599 

5600 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

5601 

5602 # take care not to mistake keywords for identifiers 

5603 ident = ~(AND | OR | NOT) + Word(alphas) 

5604 boolean_term = Opt(NOT) + ident 

5605 

5606 # very crude boolean expression - to support parenthesis groups and 

5607 # operation hierarchy, use infix_notation 

5608 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

5609 

5610 # integers that are followed by "." are actually floats 

5611 integer = Word(nums) + ~Char(".") 

5612 """ 

5613 

5614 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5615 super().__init__(expr) 

5616 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

5617 # self.leave_whitespace() 

5618 self.skipWhitespace = False 

5619 

5620 self._may_return_empty = True 

5621 self.errmsg = f"Found unwanted token, {self.expr}" 

5622 

5623 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5624 if self.expr.can_parse_next(instring, loc, do_actions=do_actions): 

5625 raise ParseException(instring, loc, self.errmsg, self) 

5626 return loc, [] 

5627 

5628 def _generateDefaultName(self) -> str: 

5629 return f"~{{{self.expr}}}" 

5630 

5631 

5632class _MultipleMatch(ParseElementEnhance): 

5633 def __init__( 

5634 self, 

5635 expr: Union[str, ParserElement], 

5636 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5637 **kwargs, 

5638 ) -> None: 

5639 stopOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument( 

5640 kwargs, "stopOn", None 

5641 ) 

5642 

5643 super().__init__(expr) 

5644 stopOn = stopOn or stop_on 

5645 self.saveAsList = True 

5646 ender = stopOn 

5647 if isinstance(ender, str_type): 

5648 ender = self._literalStringClass(ender) 

5649 self.stopOn(ender) 

5650 

5651 def stop_on(self, ender) -> ParserElement: 

5652 if isinstance(ender, str_type): 

5653 ender = self._literalStringClass(ender) 

5654 self.not_ender = ~ender if ender is not None else None 

5655 return self 

5656 

5657 stopOn = stop_on 

5658 

5659 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5660 self_expr_parse = self.expr._parse 

5661 self_skip_ignorables = self._skipIgnorables 

5662 check_ender = False 

5663 if self.not_ender is not None: 

5664 try_not_ender = self.not_ender.try_parse 

5665 check_ender = True 

5666 

5667 # must be at least one (but first see if we are the stopOn sentinel; 

5668 # if so, fail) 

5669 if check_ender: 

5670 try_not_ender(instring, loc) 

5671 loc, tokens = self_expr_parse(instring, loc, do_actions) 

5672 try: 

5673 hasIgnoreExprs = not not self.ignoreExprs 

5674 while 1: 

5675 if check_ender: 

5676 try_not_ender(instring, loc) 

5677 if hasIgnoreExprs: 

5678 preloc = self_skip_ignorables(instring, loc) 

5679 else: 

5680 preloc = loc 

5681 loc, tmptokens = self_expr_parse(instring, preloc, do_actions) 

5682 tokens += tmptokens 

5683 except (ParseException, IndexError): 

5684 pass 

5685 

5686 return loc, tokens 

5687 

5688 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5689 if ( 

5690 __diag__.warn_ungrouped_named_tokens_in_collection 

5691 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5692 not in self.suppress_warnings_ 

5693 ): 

5694 for e in [self.expr] + self.expr.recurse(): 

5695 if ( 

5696 isinstance(e, ParserElement) 

5697 and e.resultsName 

5698 and ( 

5699 Diagnostics.warn_ungrouped_named_tokens_in_collection 

5700 not in e.suppress_warnings_ 

5701 ) 

5702 ): 

5703 warning = ( 

5704 "warn_ungrouped_named_tokens_in_collection:" 

5705 f" setting results name {name!r} on {type(self).__name__} expression" 

5706 f" collides with {e.resultsName!r} on contained expression" 

5707 ) 

5708 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

5709 break 

5710 

5711 return super()._setResultsName(name, list_all_matches) 

5712 

5713 

5714class OneOrMore(_MultipleMatch): 

5715 """ 

5716 Repetition of one or more of the given expression. 

5717 

5718 Parameters: 

5719 

5720 - ``expr`` - expression that must match one or more times 

5721 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5722 (only required if the sentinel would ordinarily match the repetition 

5723 expression) 

5724 

5725 Example: 

5726 

5727 .. doctest:: 

5728 

5729 >>> data_word = Word(alphas) 

5730 >>> label = data_word + FollowedBy(':') 

5731 >>> attr_expr = Group( 

5732 ... label + Suppress(':') 

5733 ... + OneOrMore(data_word).set_parse_action(' '.join)) 

5734 

5735 >>> text = "shape: SQUARE posn: upper left color: BLACK" 

5736 

5737 # Fail! read 'posn' as data instead of next label 

5738 >>> attr_expr[1, ...].parse_string(text).pprint() 

5739 [['shape', 'SQUARE posn']] 

5740 

5741 # use stop_on attribute for OneOrMore 

5742 # to avoid reading label string as part of the data 

5743 >>> attr_expr = Group( 

5744 ... label + Suppress(':') 

5745 ... + OneOrMore( 

5746 ... data_word, stop_on=label).set_parse_action(' '.join)) 

5747 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better 

5748 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5749 

5750 # could also be written as 

5751 >>> (attr_expr * (1,)).parse_string(text).pprint() 

5752 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5753 """ 

5754 

5755 def _generateDefaultName(self) -> str: 

5756 return f"{{{self.expr}}}..." 

5757 

5758 

5759class ZeroOrMore(_MultipleMatch): 

5760 """ 

5761 Optional repetition of zero or more of the given expression. 

5762 

5763 Parameters: 

5764 

5765 - ``expr`` - expression that must match zero or more times 

5766 - ``stop_on`` - expression for a terminating sentinel 

5767 (only required if the sentinel would ordinarily match the repetition 

5768 expression) - (default= ``None``) 

5769 

5770 Example: similar to :class:`OneOrMore` 

5771 """ 

5772 

5773 def __init__( 

5774 self, 

5775 expr: Union[str, ParserElement], 

5776 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5777 **kwargs, 

5778 ) -> None: 

5779 stopOn: Union[ParserElement, str] = deprecate_argument(kwargs, "stopOn", None) 

5780 

5781 super().__init__(expr, stop_on=stopOn or stop_on) 

5782 self._may_return_empty = True 

5783 

5784 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5785 try: 

5786 return super().parseImpl(instring, loc, do_actions) 

5787 except (ParseException, IndexError): 

5788 return loc, ParseResults([], name=self.resultsName) 

5789 

5790 def _generateDefaultName(self) -> str: 

5791 return f"[{self.expr}]..." 

5792 

5793 

5794class DelimitedList(ParseElementEnhance): 

5795 """Helper to define a delimited list of expressions - the delimiter 

5796 defaults to ','. By default, the list elements and delimiters can 

5797 have intervening whitespace, and comments, but this can be 

5798 overridden by passing ``combine=True`` in the constructor. If 

5799 ``combine`` is set to ``True``, the matching tokens are 

5800 returned as a single token string, with the delimiters included; 

5801 otherwise, the matching tokens are returned as a list of tokens, 

5802 with the delimiters suppressed. 

5803 

5804 If ``allow_trailing_delim`` is set to True, then the list may end with 

5805 a delimiter. 

5806 

5807 Example: 

5808 

5809 .. doctest:: 

5810 

5811 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc") 

5812 ParseResults(['aa', 'bb', 'cc'], {}) 

5813 >>> DelimitedList(Word(hexnums), delim=':', combine=True 

5814 ... ).parse_string("AA:BB:CC:DD:EE") 

5815 ParseResults(['AA:BB:CC:DD:EE'], {}) 

5816 

5817 .. versionadded:: 3.1.0 

5818 """ 

5819 

5820 def __init__( 

5821 self, 

5822 expr: Union[str, ParserElement], 

5823 delim: Union[str, ParserElement] = ",", 

5824 combine: bool = False, 

5825 min: typing.Optional[int] = None, 

5826 max: typing.Optional[int] = None, 

5827 *, 

5828 allow_trailing_delim: bool = False, 

5829 ) -> None: 

5830 if isinstance(expr, str_type): 

5831 expr = ParserElement._literalStringClass(expr) 

5832 expr = typing.cast(ParserElement, expr) 

5833 

5834 if min is not None and min < 1: 

5835 raise ValueError("min must be greater than 0") 

5836 

5837 if max is not None and min is not None and max < min: 

5838 raise ValueError("max must be greater than, or equal to min") 

5839 

5840 self.content = expr 

5841 self.raw_delim = str(delim) 

5842 self.delim = delim 

5843 self.combine = combine 

5844 if not combine: 

5845 self.delim = Suppress(delim) if not isinstance(delim, Suppress) else delim 

5846 self.min = min or 1 

5847 self.max = max 

5848 self.allow_trailing_delim = allow_trailing_delim 

5849 

5850 delim_list_expr = self.content + (self.delim + self.content) * ( 

5851 self.min - 1, 

5852 None if self.max is None else self.max - 1, 

5853 ) 

5854 if self.allow_trailing_delim: 

5855 delim_list_expr += Opt(self.delim) 

5856 

5857 if self.combine: 

5858 delim_list_expr = Combine(delim_list_expr) 

5859 

5860 super().__init__(delim_list_expr, savelist=True) 

5861 

5862 def _generateDefaultName(self) -> str: 

5863 content_expr = self.content.streamline() 

5864 return f"{content_expr} [{self.raw_delim} {content_expr}]..." 

5865 

5866 

5867class _NullToken: 

5868 def __bool__(self): 

5869 return False 

5870 

5871 def __str__(self): 

5872 return "" 

5873 

5874 

5875class Opt(ParseElementEnhance): 

5876 """ 

5877 Optional matching of the given expression. 

5878 

5879 :param expr: expression that must match zero or more times 

5880 :param default: (optional) - value to be returned 

5881 if the optional expression is not found. 

5882 

5883 Example: 

5884 

5885 .. testcode:: 

5886 

5887 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5888 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5889 zip.run_tests(''' 

5890 # traditional ZIP code 

5891 12345 

5892 

5893 # ZIP+4 form 

5894 12101-0001 

5895 

5896 # invalid ZIP 

5897 98765- 

5898 ''') 

5899 

5900 prints: 

5901 

5902 .. testoutput:: 

5903 :options: +NORMALIZE_WHITESPACE 

5904 

5905 

5906 # traditional ZIP code 

5907 12345 

5908 ['12345'] 

5909 

5910 # ZIP+4 form 

5911 12101-0001 

5912 ['12101-0001'] 

5913 

5914 # invalid ZIP 

5915 98765- 

5916 98765- 

5917 ^ 

5918 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6) 

5919 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6) 

5920 """ 

5921 

5922 __optionalNotMatched = _NullToken() 

5923 

5924 def __init__( 

5925 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5926 ) -> None: 

5927 super().__init__(expr, savelist=False) 

5928 self.saveAsList = self.expr.saveAsList 

5929 self.defaultValue = default 

5930 self._may_return_empty = True 

5931 

5932 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5933 self_expr = self.expr 

5934 try: 

5935 loc, tokens = self_expr._parse( 

5936 instring, loc, do_actions, callPreParse=False 

5937 ) 

5938 except (ParseException, IndexError): 

5939 default_value = self.defaultValue 

5940 if default_value is not self.__optionalNotMatched: 

5941 if self_expr.resultsName: 

5942 tokens = ParseResults([default_value]) 

5943 tokens[self_expr.resultsName] = default_value 

5944 else: 

5945 tokens = [default_value] # type: ignore[assignment] 

5946 else: 

5947 tokens = [] # type: ignore[assignment] 

5948 return loc, tokens 

5949 

5950 def _generateDefaultName(self) -> str: 

5951 inner = str(self.expr) 

5952 # strip off redundant inner {}'s 

5953 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5954 inner = inner[1:-1] 

5955 return f"[{inner}]" 

5956 

5957 

5958Optional = Opt 

5959 

5960 

5961class SkipTo(ParseElementEnhance): 

5962 """ 

5963 Token for skipping over all undefined text until the matched 

5964 expression is found. 

5965 

5966 :param expr: target expression marking the end of the data to be skipped 

5967 :param include: if ``True``, the target expression is also parsed 

5968 (the skipped text and target expression are returned 

5969 as a 2-element list) (default= ``False``). 

5970 

5971 :param ignore: (default= ``None``) used to define grammars 

5972 (typically quoted strings and comments) 

5973 that might contain false matches to the target expression 

5974 

5975 :param fail_on: (default= ``None``) define expressions that 

5976 are not allowed to be included in the skipped test; 

5977 if found before the target expression is found, 

5978 the :class:`SkipTo` is not a match 

5979 

5980 Example: 

5981 

5982 .. testcode:: 

5983 

5984 report = ''' 

5985 Outstanding Issues Report - 1 Jan 2000 

5986 

5987 # | Severity | Description | Days Open 

5988 -----+----------+-------------------------------------------+----------- 

5989 101 | Critical | Intermittent system crash | 6 

5990 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5991 79 | Minor | System slow when running too many reports | 47 

5992 ''' 

5993 integer = Word(nums) 

5994 SEP = Suppress('|') 

5995 # use SkipTo to simply match everything up until the next SEP 

5996 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5997 # - parse action will call token.strip() for each matched token, i.e., the description body 

5998 string_data = SkipTo(SEP, ignore=quoted_string) 

5999 string_data.set_parse_action(token_map(str.strip)) 

6000 ticket_expr = (integer("issue_num") + SEP 

6001 + string_data("sev") + SEP 

6002 + string_data("desc") + SEP 

6003 + integer("days_open")) 

6004 

6005 for tkt in ticket_expr.search_string(report): 

6006 print(tkt.dump()) 

6007 

6008 prints: 

6009 

6010 .. testoutput:: 

6011 

6012 ['101', 'Critical', 'Intermittent system crash', '6'] 

6013 - days_open: '6' 

6014 - desc: 'Intermittent system crash' 

6015 - issue_num: '101' 

6016 - sev: 'Critical' 

6017 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

6018 - days_open: '14' 

6019 - desc: "Spelling error on Login ('log|n')" 

6020 - issue_num: '94' 

6021 - sev: 'Cosmetic' 

6022 ['79', 'Minor', 'System slow when running too many reports', '47'] 

6023 - days_open: '47' 

6024 - desc: 'System slow when running too many reports' 

6025 - issue_num: '79' 

6026 - sev: 'Minor' 

6027 """ 

6028 

6029 def __init__( 

6030 self, 

6031 other: Union[ParserElement, str], 

6032 include: bool = False, 

6033 ignore: typing.Optional[Union[ParserElement, str]] = None, 

6034 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

6035 **kwargs, 

6036 ) -> None: 

6037 failOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument( 

6038 kwargs, "failOn", None 

6039 ) 

6040 

6041 super().__init__(other) 

6042 failOn = failOn or fail_on 

6043 self.ignoreExpr = ignore 

6044 self._may_return_empty = True 

6045 self.mayIndexError = False 

6046 self.includeMatch = include 

6047 self.saveAsList = False 

6048 if isinstance(failOn, str_type): 

6049 self.failOn = self._literalStringClass(failOn) 

6050 else: 

6051 self.failOn = failOn 

6052 self.errmsg = f"No match found for {self.expr}" 

6053 self.ignorer = Empty().leave_whitespace() 

6054 self._update_ignorer() 

6055 

6056 def _update_ignorer(self): 

6057 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

6058 self.ignorer.ignoreExprs.clear() 

6059 for e in self.expr.ignoreExprs: 

6060 self.ignorer.ignore(e) 

6061 if self.ignoreExpr: 

6062 self.ignorer.ignore(self.ignoreExpr) 

6063 

6064 def ignore(self, expr): 

6065 """ 

6066 Define expression to be ignored (e.g., comments) while doing pattern 

6067 matching; may be called repeatedly, to define multiple comment or other 

6068 ignorable patterns. 

6069 """ 

6070 super().ignore(expr) 

6071 self._update_ignorer() 

6072 

6073 def parseImpl(self, instring, loc, do_actions=True): 

6074 startloc = loc 

6075 instrlen = len(instring) 

6076 self_expr_parse = self.expr._parse 

6077 self_failOn_canParseNext = ( 

6078 self.failOn.can_parse_next if self.failOn is not None else None 

6079 ) 

6080 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

6081 

6082 tmploc = loc 

6083 while tmploc <= instrlen: 

6084 if self_failOn_canParseNext is not None: 

6085 # break if failOn expression matches 

6086 if self_failOn_canParseNext(instring, tmploc): 

6087 break 

6088 

6089 if ignorer_try_parse is not None: 

6090 # advance past ignore expressions 

6091 prev_tmploc = tmploc 

6092 while 1: 

6093 try: 

6094 tmploc = ignorer_try_parse(instring, tmploc) 

6095 except ParseBaseException: 

6096 break 

6097 # see if all ignorers matched, but didn't actually ignore anything 

6098 if tmploc == prev_tmploc: 

6099 break 

6100 prev_tmploc = tmploc 

6101 

6102 try: 

6103 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False) 

6104 except (ParseException, IndexError): 

6105 # no match, advance loc in string 

6106 tmploc += 1 

6107 else: 

6108 # matched skipto expr, done 

6109 break 

6110 

6111 else: 

6112 # ran off the end of the input string without matching skipto expr, fail 

6113 raise ParseException(instring, loc, self.errmsg, self) 

6114 

6115 # build up return values 

6116 loc = tmploc 

6117 skiptext = instring[startloc:loc] 

6118 skipresult = ParseResults(skiptext) 

6119 

6120 if self.includeMatch: 

6121 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False) 

6122 skipresult += mat 

6123 

6124 return loc, skipresult 

6125 

6126 

6127class Forward(ParseElementEnhance): 

6128 """ 

6129 Forward declaration of an expression to be defined later - 

6130 used for recursive grammars, such as algebraic infix notation. 

6131 When the expression is known, it is assigned to the ``Forward`` 

6132 instance using the ``'<<'`` operator. 

6133 

6134 .. Note:: 

6135 

6136 Take care when assigning to ``Forward`` not to overlook 

6137 precedence of operators. 

6138 

6139 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

6140 

6141 fwd_expr << a | b | c 

6142 

6143 will actually be evaluated as:: 

6144 

6145 (fwd_expr << a) | b | c 

6146 

6147 thereby leaving b and c out as parseable alternatives. 

6148 It is recommended that you explicitly group the values 

6149 inserted into the :class:`Forward`:: 

6150 

6151 fwd_expr << (a | b | c) 

6152 

6153 Converting to use the ``'<<='`` operator instead will avoid this problem. 

6154 

6155 See :meth:`ParseResults.pprint` for an example of a recursive 

6156 parser created using :class:`Forward`. 

6157 """ 

6158 

6159 def __init__( 

6160 self, other: typing.Optional[Union[ParserElement, str]] = None 

6161 ) -> None: 

6162 self.caller_frame = traceback.extract_stack(limit=2)[0] 

6163 super().__init__(other, savelist=False) # type: ignore[arg-type] 

6164 self.lshift_line = None 

6165 

6166 def __lshift__(self, other) -> Forward: 

6167 if hasattr(self, "caller_frame"): 

6168 del self.caller_frame 

6169 if isinstance(other, str_type): 

6170 other = self._literalStringClass(other) 

6171 

6172 if not isinstance(other, ParserElement): 

6173 return NotImplemented 

6174 

6175 self.expr = other 

6176 self.streamlined = other.streamlined 

6177 self.mayIndexError = self.expr.mayIndexError 

6178 self._may_return_empty = self.expr.mayReturnEmpty 

6179 self.set_whitespace_chars( 

6180 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

6181 ) 

6182 self.skipWhitespace = self.expr.skipWhitespace 

6183 self.saveAsList = self.expr.saveAsList 

6184 self.ignoreExprs.extend(self.expr.ignoreExprs) 

6185 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

6186 return self 

6187 

6188 def __ilshift__(self, other) -> Forward: 

6189 if not isinstance(other, ParserElement): 

6190 return NotImplemented 

6191 

6192 return self << other 

6193 

6194 def __or__(self, other) -> ParserElement: 

6195 caller_line = traceback.extract_stack(limit=2)[-2] 

6196 if ( 

6197 __diag__.warn_on_match_first_with_lshift_operator 

6198 and caller_line == self.lshift_line 

6199 and Diagnostics.warn_on_match_first_with_lshift_operator 

6200 not in self.suppress_warnings_ 

6201 ): 

6202 warnings.warn( 

6203 "warn_on_match_first_with_lshift_operator:" 

6204 " using '<<' operator with '|' is probably an error, use '<<='", 

6205 PyparsingDiagnosticWarning, 

6206 stacklevel=2, 

6207 ) 

6208 ret = super().__or__(other) 

6209 return ret 

6210 

6211 def __del__(self): 

6212 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

6213 if ( 

6214 self.expr is None 

6215 and __diag__.warn_on_assignment_to_Forward 

6216 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

6217 ): 

6218 warnings.warn_explicit( 

6219 "warn_on_assignment_to_Forward:" 

6220 " Forward defined here but no expression attached later using '<<=' or '<<'", 

6221 UserWarning, 

6222 filename=self.caller_frame.filename, 

6223 lineno=self.caller_frame.lineno, 

6224 ) 

6225 

6226 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

6227 if ( 

6228 self.expr is None 

6229 and __diag__.warn_on_parse_using_empty_Forward 

6230 and Diagnostics.warn_on_parse_using_empty_Forward 

6231 not in self.suppress_warnings_ 

6232 ): 

6233 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

6234 parse_fns = ( 

6235 "parse_string", 

6236 "scan_string", 

6237 "search_string", 

6238 "transform_string", 

6239 ) 

6240 tb = traceback.extract_stack(limit=200) 

6241 for i, frm in enumerate(reversed(tb), start=1): 

6242 if frm.name in parse_fns: 

6243 stacklevel = i + 1 

6244 break 

6245 else: 

6246 stacklevel = 2 

6247 warnings.warn( 

6248 "warn_on_parse_using_empty_Forward:" 

6249 " Forward expression was never assigned a value, will not parse any input", 

6250 PyparsingDiagnosticWarning, 

6251 stacklevel=stacklevel, 

6252 ) 

6253 if not ParserElement._left_recursion_enabled: 

6254 return super().parseImpl(instring, loc, do_actions) 

6255 # ## Bounded Recursion algorithm ## 

6256 # Recursion only needs to be processed at ``Forward`` elements, since they are 

6257 # the only ones that can actually refer to themselves. The general idea is 

6258 # to handle recursion stepwise: We start at no recursion, then recurse once, 

6259 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

6260 # 

6261 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

6262 # - to *match* a specific recursion level, and 

6263 # - to *search* the bounded recursion level 

6264 # and the two run concurrently. The *search* must *match* each recursion level 

6265 # to find the best possible match. This is handled by a memo table, which 

6266 # provides the previous match to the next level match attempt. 

6267 # 

6268 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

6269 # 

6270 # There is a complication since we not only *parse* but also *transform* via 

6271 # actions: We do not want to run the actions too often while expanding. Thus, 

6272 # we expand using `do_actions=False` and only run `do_actions=True` if the next 

6273 # recursion level is acceptable. 

6274 with ParserElement.recursion_lock: 

6275 memo = ParserElement.recursion_memos 

6276 try: 

6277 # we are parsing at a specific recursion expansion - use it as-is 

6278 prev_loc, prev_result = memo[loc, self, do_actions] 

6279 if isinstance(prev_result, Exception): 

6280 raise prev_result 

6281 return prev_loc, prev_result.copy() 

6282 except KeyError: 

6283 act_key = (loc, self, True) 

6284 peek_key = (loc, self, False) 

6285 # we are searching for the best recursion expansion - keep on improving 

6286 # both `do_actions` cases must be tracked separately here! 

6287 prev_loc, prev_peek = memo[peek_key] = ( 

6288 loc - 1, 

6289 ParseException( 

6290 instring, loc, "Forward recursion without base case", self 

6291 ), 

6292 ) 

6293 if do_actions: 

6294 memo[act_key] = memo[peek_key] 

6295 while True: 

6296 try: 

6297 new_loc, new_peek = super().parseImpl(instring, loc, False) 

6298 except ParseException: 

6299 # we failed before getting any match - do not hide the error 

6300 if isinstance(prev_peek, Exception): 

6301 raise 

6302 new_loc, new_peek = prev_loc, prev_peek 

6303 # the match did not get better: we are done 

6304 if new_loc <= prev_loc: 

6305 if do_actions: 

6306 # replace the match for do_actions=False as well, 

6307 # in case the action did backtrack 

6308 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

6309 del memo[peek_key], memo[act_key] 

6310 return prev_loc, copy.copy(prev_result) 

6311 del memo[peek_key] 

6312 return prev_loc, copy.copy(prev_peek) 

6313 # the match did get better: see if we can improve further 

6314 if do_actions: 

6315 try: 

6316 memo[act_key] = super().parseImpl(instring, loc, True) 

6317 except ParseException as e: 

6318 memo[peek_key] = memo[act_key] = (new_loc, e) 

6319 raise 

6320 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

6321 

6322 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

6323 """ 

6324 Extends ``leave_whitespace`` defined in base class. 

6325 """ 

6326 self.skipWhitespace = False 

6327 return self 

6328 

6329 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

6330 """ 

6331 Extends ``ignore_whitespace`` defined in base class. 

6332 """ 

6333 self.skipWhitespace = True 

6334 return self 

6335 

6336 def streamline(self) -> ParserElement: 

6337 if not self.streamlined: 

6338 self.streamlined = True 

6339 if self.expr is not None: 

6340 self.expr.streamline() 

6341 return self 

6342 

6343 def validate(self, validateTrace=None) -> None: 

6344 warnings.warn( 

6345 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

6346 PyparsingDeprecationWarning, 

6347 stacklevel=2, 

6348 ) 

6349 if validateTrace is None: 

6350 validateTrace = [] 

6351 

6352 if self not in validateTrace: 

6353 tmp = validateTrace[:] + [self] 

6354 if self.expr is not None: 

6355 self.expr.validate(tmp) 

6356 self._checkRecursion([]) 

6357 

6358 def _generateDefaultName(self) -> str: 

6359 # Avoid infinite recursion by setting a temporary _defaultName 

6360 save_default_name = self._defaultName 

6361 self._defaultName = ": ..." 

6362 

6363 # Use the string representation of main expression. 

6364 try: 

6365 if self.expr is not None: 

6366 ret_string = str(self.expr)[:1000] 

6367 else: 

6368 ret_string = "None" 

6369 except Exception: 

6370 ret_string = "..." 

6371 

6372 self._defaultName = save_default_name 

6373 return f"{type(self).__name__}: {ret_string}" 

6374 

6375 def copy(self) -> ParserElement: 

6376 """ 

6377 Returns a copy of this expression. 

6378 

6379 Generally only used internally by pyparsing. 

6380 """ 

6381 if self.expr is not None: 

6382 return super().copy() 

6383 else: 

6384 ret = Forward() 

6385 ret <<= self 

6386 return ret 

6387 

6388 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

6389 # fmt: off 

6390 if ( 

6391 __diag__.warn_name_set_on_empty_Forward 

6392 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ 

6393 and self.expr is None 

6394 ): 

6395 warning = ( 

6396 "warn_name_set_on_empty_Forward:" 

6397 f" setting results name {name!r} on {type(self).__name__} expression" 

6398 " that has no contained expression" 

6399 ) 

6400 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3) 

6401 # fmt: on 

6402 

6403 return super()._setResultsName(name, list_all_matches) 

6404 

6405 # Compatibility synonyms 

6406 # fmt: off 

6407 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

6408 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

6409 # fmt: on 

6410 

6411 

6412class TokenConverter(ParseElementEnhance): 

6413 """ 

6414 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. 

6415 """ 

6416 

6417 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None: 

6418 super().__init__(expr) # , savelist) 

6419 self.saveAsList = False 

6420 

6421 

6422class Combine(TokenConverter): 

6423 """Converter to concatenate all matching tokens to a single string. 

6424 By default, the matching patterns must also be contiguous in the 

6425 input string; this can be disabled by specifying 

6426 ``'adjacent=False'`` in the constructor. 

6427 

6428 Example: 

6429 

6430 .. doctest:: 

6431 

6432 >>> real = Word(nums) + '.' + Word(nums) 

6433 >>> print(real.parse_string('3.1416')) 

6434 ['3', '.', '1416'] 

6435 

6436 >>> # will also erroneously match the following 

6437 >>> print(real.parse_string('3. 1416')) 

6438 ['3', '.', '1416'] 

6439 

6440 >>> real = Combine(Word(nums) + '.' + Word(nums)) 

6441 >>> print(real.parse_string('3.1416')) 

6442 ['3.1416'] 

6443 

6444 >>> # no match when there are internal spaces 

6445 >>> print(real.parse_string('3. 1416')) 

6446 Traceback (most recent call last): 

6447 ParseException: Expected W:(0123...) 

6448 """ 

6449 

6450 def __init__( 

6451 self, 

6452 expr: ParserElement, 

6453 join_string: str = "", 

6454 adjacent: bool = True, 

6455 *, 

6456 joinString: typing.Optional[str] = None, 

6457 ) -> None: 

6458 super().__init__(expr) 

6459 joinString = joinString if joinString is not None else join_string 

6460 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

6461 if adjacent: 

6462 self.leave_whitespace() 

6463 self.adjacent = adjacent 

6464 self.skipWhitespace = True 

6465 self.joinString = joinString 

6466 self.callPreparse = True 

6467 

6468 def ignore(self, other) -> ParserElement: 

6469 """ 

6470 Define expression to be ignored (e.g., comments) while doing pattern 

6471 matching; may be called repeatedly, to define multiple comment or other 

6472 ignorable patterns. 

6473 """ 

6474 if self.adjacent: 

6475 ParserElement.ignore(self, other) 

6476 else: 

6477 super().ignore(other) 

6478 return self 

6479 

6480 def postParse(self, instring, loc, tokenlist): 

6481 retToks = tokenlist.copy() 

6482 del retToks[:] 

6483 retToks += ParseResults( 

6484 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

6485 ) 

6486 

6487 if self.resultsName and retToks.haskeys(): 

6488 return [retToks] 

6489 else: 

6490 return retToks 

6491 

6492 

6493class Group(TokenConverter): 

6494 """Converter to return the matched tokens as a list - useful for 

6495 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

6496 

6497 The optional ``aslist`` argument when set to True will return the 

6498 parsed tokens as a Python list instead of a pyparsing ParseResults. 

6499 

6500 Example: 

6501 

6502 .. doctest:: 

6503 

6504 >>> ident = Word(alphas) 

6505 >>> num = Word(nums) 

6506 >>> term = ident | num 

6507 >>> func = ident + Opt(DelimitedList(term)) 

6508 >>> print(func.parse_string("fn a, b, 100")) 

6509 ['fn', 'a', 'b', '100'] 

6510 

6511 >>> func = ident + Group(Opt(DelimitedList(term))) 

6512 >>> print(func.parse_string("fn a, b, 100")) 

6513 ['fn', ['a', 'b', '100']] 

6514 """ 

6515 

6516 def __init__(self, expr: ParserElement, aslist: bool = False) -> None: 

6517 super().__init__(expr) 

6518 self.saveAsList = True 

6519 self._asPythonList = aslist 

6520 

6521 def postParse(self, instring, loc, tokenlist): 

6522 if self._asPythonList: 

6523 return ParseResults.List( 

6524 tokenlist.as_list() 

6525 if isinstance(tokenlist, ParseResults) 

6526 else list(tokenlist) 

6527 ) 

6528 

6529 return [tokenlist] 

6530 

6531 

6532class Dict(TokenConverter): 

6533 """Converter to return a repetitive expression as a list, but also 

6534 as a dictionary. Each element can also be referenced using the first 

6535 token in the expression as its key. Useful for tabular report 

6536 scraping when the first column can be used as a item key. 

6537 

6538 The optional ``asdict`` argument when set to True will return the 

6539 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

6540 

6541 Example: 

6542 

6543 .. doctest:: 

6544 

6545 >>> data_word = Word(alphas) 

6546 >>> label = data_word + FollowedBy(':') 

6547 

6548 >>> attr_expr = ( 

6549 ... label + Suppress(':') 

6550 ... + OneOrMore(data_word, stop_on=label) 

6551 ... .set_parse_action(' '.join) 

6552 ... ) 

6553 

6554 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

6555 

6556 >>> # print attributes as plain groups 

6557 >>> print(attr_expr[1, ...].parse_string(text).dump()) 

6558 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

6559 

6560 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) 

6561 # Dict will auto-assign names. 

6562 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

6563 >>> print(result.dump()) 

6564 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

6565 - color: 'light blue' 

6566 - posn: 'upper left' 

6567 - shape: 'SQUARE' 

6568 - texture: 'burlap' 

6569 [0]: 

6570 ['shape', 'SQUARE'] 

6571 [1]: 

6572 ['posn', 'upper left'] 

6573 [2]: 

6574 ['color', 'light blue'] 

6575 [3]: 

6576 ['texture', 'burlap'] 

6577 

6578 # access named fields as dict entries, or output as dict 

6579 >>> print(result['shape']) 

6580 SQUARE 

6581 >>> print(result.as_dict()) 

6582 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'} 

6583 

6584 See more examples at :class:`ParseResults` of accessing fields by results name. 

6585 """ 

6586 

6587 def __init__(self, expr: ParserElement, asdict: bool = False) -> None: 

6588 super().__init__(expr) 

6589 self.saveAsList = True 

6590 self._asPythonDict = asdict 

6591 

6592 def postParse(self, instring, loc, tokenlist): 

6593 for i, tok in enumerate(tokenlist): 

6594 if len(tok) == 0: 

6595 continue 

6596 

6597 ikey = tok[0] 

6598 if isinstance(ikey, int): 

6599 ikey = str(ikey).strip() 

6600 

6601 if len(tok) == 1: 

6602 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

6603 

6604 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

6605 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

6606 

6607 else: 

6608 try: 

6609 dictvalue = tok.copy() # ParseResults(i) 

6610 except Exception: 

6611 exc = TypeError( 

6612 "could not extract dict values from parsed results" 

6613 " - Dict expression must contain Grouped expressions" 

6614 ) 

6615 raise exc from None 

6616 

6617 del dictvalue[0] 

6618 

6619 if len(dictvalue) != 1 or ( 

6620 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

6621 ): 

6622 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

6623 else: 

6624 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

6625 

6626 if self._asPythonDict: 

6627 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

6628 

6629 return [tokenlist] if self.resultsName else tokenlist 

6630 

6631 

6632class Suppress(TokenConverter): 

6633 """Converter for ignoring the results of a parsed expression. 

6634 

6635 Example: 

6636 

6637 .. doctest:: 

6638 

6639 >>> source = "a, b, c,d" 

6640 >>> wd = Word(alphas) 

6641 >>> wd_list1 = wd + (',' + wd)[...] 

6642 >>> print(wd_list1.parse_string(source)) 

6643 ['a', ',', 'b', ',', 'c', ',', 'd'] 

6644 

6645 # often, delimiters that are useful during parsing are just in the 

6646 # way afterward - use Suppress to keep them out of the parsed output 

6647 >>> wd_list2 = wd + (Suppress(',') + wd)[...] 

6648 >>> print(wd_list2.parse_string(source)) 

6649 ['a', 'b', 'c', 'd'] 

6650 

6651 # Skipped text (using '...') can be suppressed as well 

6652 >>> source = "lead in START relevant text END trailing text" 

6653 >>> start_marker = Keyword("START") 

6654 >>> end_marker = Keyword("END") 

6655 >>> find_body = Suppress(...) + start_marker + ... + end_marker 

6656 >>> print(find_body.parse_string(source)) 

6657 ['START', 'relevant text ', 'END'] 

6658 

6659 (See also :class:`DelimitedList`.) 

6660 """ 

6661 

6662 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

6663 if expr is ...: 

6664 expr = _PendingSkip(NoMatch()) 

6665 super().__init__(expr) 

6666 

6667 def __add__(self, other) -> ParserElement: 

6668 if isinstance(self.expr, _PendingSkip): 

6669 return Suppress(SkipTo(other)) + other 

6670 

6671 return super().__add__(other) 

6672 

6673 def __sub__(self, other) -> ParserElement: 

6674 if isinstance(self.expr, _PendingSkip): 

6675 return Suppress(SkipTo(other)) - other 

6676 

6677 return super().__sub__(other) 

6678 

6679 def postParse(self, instring, loc, tokenlist): 

6680 return [] 

6681 

6682 def suppress(self) -> ParserElement: 

6683 return self 

6684 

6685 

6686# XXX: Example needs to be re-done for updated output 

6687def trace_parse_action(f: ParseAction) -> ParseAction: 

6688 """Decorator for debugging parse actions. 

6689 

6690 When the parse action is called, this decorator will print 

6691 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

6692 When the parse action completes, the decorator will print 

6693 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

6694 

6695 Example: 

6696 

6697 .. testsetup:: stderr 

6698 

6699 import sys 

6700 sys.stderr = sys.stdout 

6701 

6702 .. testcleanup:: stderr 

6703 

6704 sys.stderr = sys.__stderr__ 

6705 

6706 .. testcode:: stderr 

6707 

6708 wd = Word(alphas) 

6709 

6710 @trace_parse_action 

6711 def remove_duplicate_chars(tokens): 

6712 return ''.join(sorted(set(''.join(tokens)))) 

6713 

6714 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

6715 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

6716 

6717 prints: 

6718 

6719 .. testoutput:: stderr 

6720 :options: +NORMALIZE_WHITESPACE 

6721 

6722 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 

6723 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

6724 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

6725 ['dfjkls'] 

6726 

6727 .. versionchanged:: 3.1.0 

6728 Exception type added to output 

6729 """ 

6730 f = _trim_arity(f) 

6731 

6732 def z(*paArgs): 

6733 thisFunc = f.__name__ 

6734 s, l, t = paArgs[-3:] 

6735 if len(paArgs) > 3: 

6736 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" 

6737 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

6738 try: 

6739 ret = f(*paArgs) 

6740 except Exception as exc: 

6741 sys.stderr.write( 

6742 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n" 

6743 ) 

6744 raise 

6745 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

6746 return ret 

6747 

6748 z.__name__ = f.__name__ 

6749 return z 

6750 

6751 

6752# convenience constants for positional expressions 

6753empty = Empty().set_name("empty") 

6754line_start = LineStart().set_name("line_start") 

6755line_end = LineEnd().set_name("line_end") 

6756string_start = StringStart().set_name("string_start") 

6757string_end = StringEnd().set_name("string_end") 

6758 

6759_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

6760 lambda s, l, t: t[0][1] 

6761) 

6762_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

6763 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

6764) 

6765_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

6766 lambda s, l, t: chr(int(t[0][1:], 8)) 

6767) 

6768_singleChar = ( 

6769 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

6770) 

6771_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

6772_reBracketExpr = ( 

6773 Literal("[") 

6774 + Opt("^").set_results_name("negate") 

6775 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

6776 + Literal("]") 

6777) 

6778 

6779 

6780def srange(s: str) -> str: 

6781 r"""Helper to easily define string ranges for use in :class:`Word` 

6782 construction. Borrows syntax from regexp ``'[]'`` string range 

6783 definitions:: 

6784 

6785 srange("[0-9]") -> "0123456789" 

6786 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

6787 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

6788 

6789 The input string must be enclosed in []'s, and the returned string 

6790 is the expanded character set joined into a single string. The 

6791 values enclosed in the []'s may be: 

6792 

6793 - a single character 

6794 - an escaped character with a leading backslash (such as ``\-`` 

6795 or ``\]``) 

6796 - an escaped hex character with a leading ``'\x'`` 

6797 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

6798 is also supported for backwards compatibility) 

6799 - an escaped octal character with a leading ``'\0'`` 

6800 (``\041``, which is a ``'!'`` character) 

6801 - a range of any of the above, separated by a dash (``'a-z'``, 

6802 etc.) 

6803 - any combination of the above (``'aeiouy'``, 

6804 ``'a-zA-Z0-9_$'``, etc.) 

6805 """ 

6806 

6807 def _expanded(p): 

6808 if isinstance(p, ParseResults): 

6809 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

6810 else: 

6811 yield p 

6812 

6813 try: 

6814 return "".join( 

6815 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)] 

6816 ) 

6817 except Exception as e: 

6818 return "" 

6819 

6820 

6821def token_map(func, *args) -> ParseAction: 

6822 """Helper to define a parse action by mapping a function to all 

6823 elements of a :class:`ParseResults` list. If any additional args are passed, 

6824 they are forwarded to the given function as additional arguments 

6825 after the token, as in 

6826 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

6827 which will convert the parsed data to an integer using base 16. 

6828 

6829 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

6830 

6831 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6832 hex_ints.run_tests(''' 

6833 00 11 22 aa FF 0a 0d 1a 

6834 ''') 

6835 

6836 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6837 upperword[1, ...].run_tests(''' 

6838 my kingdom for a horse 

6839 ''') 

6840 

6841 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6842 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6843 now is the winter of our discontent made glorious summer by this sun of york 

6844 ''') 

6845 

6846 prints:: 

6847 

6848 00 11 22 aa FF 0a 0d 1a 

6849 [0, 17, 34, 170, 255, 10, 13, 26] 

6850 

6851 my kingdom for a horse 

6852 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6853 

6854 now is the winter of our discontent made glorious summer by this sun of york 

6855 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6856 """ 

6857 

6858 def pa(s, l, t): 

6859 return [func(tokn, *args) for tokn in t] 

6860 

6861 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6862 pa.__name__ = func_name 

6863 

6864 return pa 

6865 

6866 

6867def autoname_elements() -> None: 

6868 """ 

6869 Utility to simplify mass-naming of parser elements, for 

6870 generating railroad diagram with named subdiagrams. 

6871 """ 

6872 

6873 # guard against _getframe not being implemented in the current Python 

6874 getframe_fn = getattr(sys, "_getframe", lambda _: None) 

6875 calling_frame = getframe_fn(1) 

6876 if calling_frame is None: 

6877 return 

6878 

6879 # find all locals in the calling frame that are ParserElements 

6880 calling_frame = typing.cast(types.FrameType, calling_frame) 

6881 for name, var in calling_frame.f_locals.items(): 

6882 # if no custom name defined, set the name to the var name 

6883 if isinstance(var, ParserElement) and not var.customName: 

6884 var.set_name(name) 

6885 

6886 

6887dbl_quoted_string = Combine( 

6888 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6889).set_name("string enclosed in double quotes") 

6890 

6891sgl_quoted_string = Combine( 

6892 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6893).set_name("string enclosed in single quotes") 

6894 

6895quoted_string = Combine( 

6896 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6897 "double quoted string" 

6898 ) 

6899 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6900 "single quoted string" 

6901 ) 

6902).set_name("quoted string using single or double quotes") 

6903 

6904# XXX: Is there some way to make this show up in API docs? 

6905# .. versionadded:: 3.1.0 

6906python_quoted_string = Combine( 

6907 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6908 "multiline double quoted string" 

6909 ) 

6910 ^ ( 

6911 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6912 ).set_name("multiline single quoted string") 

6913 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6914 "double quoted string" 

6915 ) 

6916 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6917 "single quoted string" 

6918 ) 

6919).set_name("Python quoted string") 

6920 

6921unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6922 

6923 

6924alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6925punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6926 

6927# build list of built-in expressions, for future reference if a global default value 

6928# gets updated 

6929_builtin_exprs: list[ParserElement] = [ 

6930 v for v in vars().values() if isinstance(v, ParserElement) 

6931] 

6932 

6933# Compatibility synonyms 

6934# fmt: off 

6935sglQuotedString = sgl_quoted_string 

6936dblQuotedString = dbl_quoted_string 

6937quotedString = quoted_string 

6938unicodeString = unicode_string 

6939lineStart = line_start 

6940lineEnd = line_end 

6941stringStart = string_start 

6942stringEnd = string_end 

6943nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) 

6944traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) 

6945conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) 

6946tokenMap = replaced_by_pep8("tokenMap", token_map) 

6947# fmt: on