Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2684 statements  

1# 

2# core.py 

3# 

4from __future__ import annotations 

5 

6import collections.abc 

7from collections import deque 

8import os 

9import typing 

10from typing import ( 

11 Any, 

12 Callable, 

13 Generator, 

14 NamedTuple, 

15 Sequence, 

16 TextIO, 

17 Union, 

18 cast, 

19) 

20from abc import ABC, abstractmethod 

21from enum import Enum 

22import string 

23import copy 

24import warnings 

25import re 

26import sys 

27from collections.abc import Iterable 

28import traceback 

29import types 

30from operator import itemgetter 

31from functools import wraps 

32from threading import RLock 

33from pathlib import Path 

34 

35from .util import ( 

36 _FifoCache, 

37 _UnboundedCache, 

38 __config_flags, 

39 _collapse_string_to_ranges, 

40 _escape_regex_range_chars, 

41 _flatten, 

42 LRUMemo as _LRUMemo, 

43 UnboundedMemo as _UnboundedMemo, 

44 replaced_by_pep8, 

45) 

46from .exceptions import * 

47from .actions import * 

48from .results import ParseResults, _ParseResultsWithOffset 

49from .unicode import pyparsing_unicode 

50 

51_MAX_INT = sys.maxsize 

52str_type: tuple[type, ...] = (str, bytes) 

53 

54# 

55# Copyright (c) 2003-2022 Paul T. McGuire 

56# 

57# Permission is hereby granted, free of charge, to any person obtaining 

58# a copy of this software and associated documentation files (the 

59# "Software"), to deal in the Software without restriction, including 

60# without limitation the rights to use, copy, modify, merge, publish, 

61# distribute, sublicense, and/or sell copies of the Software, and to 

62# permit persons to whom the Software is furnished to do so, subject to 

63# the following conditions: 

64# 

65# The above copyright notice and this permission notice shall be 

66# included in all copies or substantial portions of the Software. 

67# 

68# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

69# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

70# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

71# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

72# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

73# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

74# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

75# 

76 

77from functools import cached_property 

78 

79 

80class __compat__(__config_flags): 

81 """ 

82 A cross-version compatibility configuration for pyparsing features that will be 

83 released in a future version. By setting values in this configuration to True, 

84 those features can be enabled in prior versions for compatibility development 

85 and testing. 

86 

87 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

88 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

89 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

90 behavior 

91 """ 

92 

93 _type_desc = "compatibility" 

94 

95 collect_all_And_tokens = True 

96 

97 _all_names = [__ for __ in locals() if not __.startswith("_")] 

98 _fixed_names = """ 

99 collect_all_And_tokens 

100 """.split() 

101 

102 

103class __diag__(__config_flags): 

104 _type_desc = "diagnostic" 

105 

106 warn_multiple_tokens_in_named_alternation = False 

107 warn_ungrouped_named_tokens_in_collection = False 

108 warn_name_set_on_empty_Forward = False 

109 warn_on_parse_using_empty_Forward = False 

110 warn_on_assignment_to_Forward = False 

111 warn_on_multiple_string_args_to_oneof = False 

112 warn_on_match_first_with_lshift_operator = False 

113 enable_debug_on_named_expressions = False 

114 

115 _all_names = [__ for __ in locals() if not __.startswith("_")] 

116 _warning_names = [name for name in _all_names if name.startswith("warn")] 

117 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

118 

119 @classmethod 

120 def enable_all_warnings(cls) -> None: 

121 for name in cls._warning_names: 

122 cls.enable(name) 

123 

124 

125class Diagnostics(Enum): 

126 """ 

127 Diagnostic configuration (all default to disabled) 

128 

129 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

130 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

131 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

132 name is defined on a containing expression with ungrouped subexpressions that also 

133 have results names 

134 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

135 with a results name, but has no contents defined 

136 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

137 defined in a grammar but has never had an expression attached to it 

138 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

139 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

140 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

141 incorrectly called with multiple str arguments 

142 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

143 calls to :class:`ParserElement.set_name` 

144 

145 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

146 All warnings can be enabled by calling :class:`enable_all_warnings`. 

147 """ 

148 

149 warn_multiple_tokens_in_named_alternation = 0 

150 warn_ungrouped_named_tokens_in_collection = 1 

151 warn_name_set_on_empty_Forward = 2 

152 warn_on_parse_using_empty_Forward = 3 

153 warn_on_assignment_to_Forward = 4 

154 warn_on_multiple_string_args_to_oneof = 5 

155 warn_on_match_first_with_lshift_operator = 6 

156 enable_debug_on_named_expressions = 7 

157 

158 

159def enable_diag(diag_enum: Diagnostics) -> None: 

160 """ 

161 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

162 """ 

163 __diag__.enable(diag_enum.name) 

164 

165 

166def disable_diag(diag_enum: Diagnostics) -> None: 

167 """ 

168 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

169 """ 

170 __diag__.disable(diag_enum.name) 

171 

172 

173def enable_all_warnings() -> None: 

174 """ 

175 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

176 """ 

177 __diag__.enable_all_warnings() 

178 

179 

180# hide abstract class 

181del __config_flags 

182 

183 

184def _should_enable_warnings( 

185 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

186) -> bool: 

187 enable = bool(warn_env_var) 

188 for warn_opt in cmd_line_warn_options: 

189 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

190 ":" 

191 )[:5] 

192 if not w_action.lower().startswith("i") and ( 

193 not (w_message or w_category or w_module) or w_module == "pyparsing" 

194 ): 

195 enable = True 

196 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

197 enable = False 

198 return enable 

199 

200 

201if _should_enable_warnings( 

202 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

203): 

204 enable_all_warnings() 

205 

206 

207# build list of single arg builtins, that can be used as parse actions 

208# fmt: off 

209_single_arg_builtins = { 

210 sum, len, sorted, reversed, list, tuple, set, any, all, min, max 

211} 

212# fmt: on 

213 

214_generatorType = types.GeneratorType 

215ParseImplReturnType = tuple[int, Any] 

216PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

217 

218ParseCondition = Union[ 

219 Callable[[], bool], 

220 Callable[[ParseResults], bool], 

221 Callable[[int, ParseResults], bool], 

222 Callable[[str, int, ParseResults], bool], 

223] 

224ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

225DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

226DebugSuccessAction = Callable[ 

227 [str, int, int, "ParserElement", ParseResults, bool], None 

228] 

229DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

230 

231 

232alphas: str = string.ascii_uppercase + string.ascii_lowercase 

233identchars: str = pyparsing_unicode.Latin1.identchars 

234identbodychars: str = pyparsing_unicode.Latin1.identbodychars 

235nums: str = "0123456789" 

236hexnums: str = nums + "ABCDEFabcdef" 

237alphanums: str = alphas + nums 

238printables: str = "".join([c for c in string.printable if c not in string.whitespace]) 

239 

240 

241class _ParseActionIndexError(Exception): 

242 """ 

243 Internal wrapper around IndexError so that IndexErrors raised inside 

244 parse actions aren't misinterpreted as IndexErrors raised inside 

245 ParserElement parseImpl methods. 

246 """ 

247 

248 def __init__(self, msg: str, exc: BaseException) -> None: 

249 self.msg: str = msg 

250 self.exc: BaseException = exc 

251 

252 

253_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

254pa_call_line_synth = () 

255 

256 

257def _trim_arity(func, max_limit=3): 

258 """decorator to trim function calls to match the arity of the target""" 

259 global _trim_arity_call_line, pa_call_line_synth 

260 

261 if func in _single_arg_builtins: 

262 return lambda s, l, t: func(t) 

263 

264 limit = 0 

265 found_arity = False 

266 

267 # synthesize what would be returned by traceback.extract_stack at the call to 

268 # user's parse action 'func', so that we don't incur call penalty at parse time 

269 

270 # fmt: off 

271 LINE_DIFF = 9 

272 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

273 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

274 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1] 

275 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

276 

277 def wrapper(*args): 

278 nonlocal found_arity, limit 

279 if found_arity: 

280 return func(*args[limit:]) 

281 while 1: 

282 try: 

283 ret = func(*args[limit:]) 

284 found_arity = True 

285 return ret 

286 except TypeError as te: 

287 # re-raise TypeErrors if they did not come from our arity testing 

288 if found_arity: 

289 raise 

290 else: 

291 tb = te.__traceback__ 

292 frames = traceback.extract_tb(tb, limit=2) 

293 frame_summary = frames[-1] 

294 trim_arity_type_error = ( 

295 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

296 ) 

297 del tb 

298 

299 if trim_arity_type_error: 

300 if limit < max_limit: 

301 limit += 1 

302 continue 

303 

304 raise 

305 except IndexError as ie: 

306 # wrap IndexErrors inside a _ParseActionIndexError 

307 raise _ParseActionIndexError( 

308 "IndexError raised in parse action", ie 

309 ).with_traceback(None) 

310 # fmt: on 

311 

312 # copy func name to wrapper for sensible debug output 

313 # (can't use functools.wraps, since that messes with function signature) 

314 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

315 wrapper.__name__ = func_name 

316 wrapper.__doc__ = func.__doc__ 

317 

318 return wrapper 

319 

320 

321def condition_as_parse_action( 

322 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

323) -> ParseAction: 

324 """ 

325 Function to convert a simple predicate function that returns ``True`` or ``False`` 

326 into a parse action. Can be used in places when a parse action is required 

327 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition 

328 to an operator level in :class:`infix_notation`). 

329 

330 Optional keyword arguments: 

331 

332 :param message: define a custom message to be used in the raised exception 

333 :param fatal: if ``True``, will raise :class:`ParseFatalException` 

334 to stop parsing immediately; 

335 otherwise will raise :class:`ParseException` 

336 

337 """ 

338 msg = message if message is not None else "failed user-defined condition" 

339 exc_type = ParseFatalException if fatal else ParseException 

340 fn = _trim_arity(fn) 

341 

342 @wraps(fn) 

343 def pa(s, l, t): 

344 if not bool(fn(s, l, t)): 

345 raise exc_type(s, l, msg) 

346 

347 return pa 

348 

349 

350def _default_start_debug_action( 

351 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False 

352): 

353 cache_hit_str = "*" if cache_hit else "" 

354 print( 

355 ( 

356 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

357 f" {line(loc, instring)}\n" 

358 f" {'^':>{col(loc, instring)}}" 

359 ) 

360 ) 

361 

362 

363def _default_success_debug_action( 

364 instring: str, 

365 startloc: int, 

366 endloc: int, 

367 expr: ParserElement, 

368 toks: ParseResults, 

369 cache_hit: bool = False, 

370): 

371 cache_hit_str = "*" if cache_hit else "" 

372 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

373 

374 

375def _default_exception_debug_action( 

376 instring: str, 

377 loc: int, 

378 expr: ParserElement, 

379 exc: Exception, 

380 cache_hit: bool = False, 

381): 

382 cache_hit_str = "*" if cache_hit else "" 

383 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

384 

385 

386def null_debug_action(*args): 

387 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

388 

389 

390class ParserElement(ABC): 

391 """Abstract base level parser element class.""" 

392 

393 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

394 verbose_stacktrace: bool = False 

395 _literalStringClass: type = None # type: ignore[assignment] 

396 

397 @staticmethod 

398 def set_default_whitespace_chars(chars: str) -> None: 

399 r""" 

400 Overrides the default whitespace chars 

401 

402 Example: 

403 

404 .. doctest:: 

405 

406 # default whitespace chars are space, <TAB> and newline 

407 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl") 

408 ParseResults(['abc', 'def', 'ghi', 'jkl'], {}) 

409 

410 # change to just treat newline as significant 

411 >>> ParserElement.set_default_whitespace_chars(" \t") 

412 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl") 

413 ParseResults(['abc', 'def'], {}) 

414 

415 # Reset to default 

416 >>> ParserElement.set_default_whitespace_chars(" \n\t\r") 

417 """ 

418 ParserElement.DEFAULT_WHITE_CHARS = chars 

419 

420 # update whitespace all parse expressions defined in this module 

421 for expr in _builtin_exprs: 

422 if expr.copyDefaultWhiteChars: 

423 expr.whiteChars = set(chars) 

424 

425 @staticmethod 

426 def inline_literals_using(cls: type) -> None: 

427 """ 

428 Set class to be used for inclusion of string literals into a parser. 

429 

430 Example: 

431 

432 .. doctest:: 

433 :options: +NORMALIZE_WHITESPACE 

434 

435 # default literal class used is Literal 

436 >>> integer = Word(nums) 

437 >>> date_str = ( 

438 ... integer("year") + '/' 

439 ... + integer("month") + '/' 

440 ... + integer("day") 

441 ... ) 

442 

443 >>> date_str.parse_string("1999/12/31") 

444 ParseResults(['1999', '/', '12', '/', '31'], 

445 {'year': '1999', 'month': '12', 'day': '31'}) 

446 

447 # change to Suppress 

448 >>> ParserElement.inline_literals_using(Suppress) 

449 >>> date_str = ( 

450 ... integer("year") + '/' 

451 ... + integer("month") + '/' 

452 ... + integer("day") 

453 ... ) 

454 

455 >>> date_str.parse_string("1999/12/31") 

456 ParseResults(['1999', '12', '31'], 

457 {'year': '1999', 'month': '12', 'day': '31'}) 

458 

459 # Reset 

460 >>> ParserElement.inline_literals_using(Literal) 

461 """ 

462 ParserElement._literalStringClass = cls 

463 

464 @classmethod 

465 def using_each(cls, seq, **class_kwargs): 

466 """ 

467 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq. 

468 

469 Example: 

470 

471 .. testcode:: 

472 

473 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

474 

475 .. versionadded:: 3.1.0 

476 """ 

477 yield from (cls(obj, **class_kwargs) for obj in seq) 

478 

479 class DebugActions(NamedTuple): 

480 debug_try: typing.Optional[DebugStartAction] 

481 debug_match: typing.Optional[DebugSuccessAction] 

482 debug_fail: typing.Optional[DebugExceptionAction] 

483 

484 def __init__(self, savelist: bool = False) -> None: 

485 self.parseAction: list[ParseAction] = list() 

486 self.failAction: typing.Optional[ParseFailAction] = None 

487 self.customName: str = None # type: ignore[assignment] 

488 self._defaultName: typing.Optional[str] = None 

489 self.resultsName: str = None # type: ignore[assignment] 

490 self.saveAsList = savelist 

491 self.skipWhitespace = True 

492 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

493 self.copyDefaultWhiteChars = True 

494 # used when checking for left-recursion 

495 self._may_return_empty = False 

496 self.keepTabs = False 

497 self.ignoreExprs: list[ParserElement] = list() 

498 self.debug = False 

499 self.streamlined = False 

500 # optimize exception handling for subclasses that don't advance parse index 

501 self.mayIndexError = True 

502 self.errmsg: Union[str, None] = "" 

503 # mark results names as modal (report only last) or cumulative (list all) 

504 self.modalResults = True 

505 # custom debug actions 

506 self.debugActions = self.DebugActions(None, None, None) 

507 # avoid redundant calls to preParse 

508 self.callPreparse = True 

509 self.callDuringTry = False 

510 self.suppress_warnings_: list[Diagnostics] = [] 

511 self.show_in_diagram = True 

512 

513 @property 

514 def mayReturnEmpty(self): 

515 return self._may_return_empty 

516 

517 @mayReturnEmpty.setter 

518 def mayReturnEmpty(self, value): 

519 self._may_return_empty = value 

520 

521 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: 

522 """ 

523 Suppress warnings emitted for a particular diagnostic on this expression. 

524 

525 Example: 

526 

527 .. doctest:: 

528 

529 >>> label = pp.Word(pp.alphas) 

530 

531 # Normally using an empty Forward in a grammar 

532 # would print a warning, but we can suppress that 

533 >>> base = pp.Forward().suppress_warning( 

534 ... pp.Diagnostics.warn_on_parse_using_empty_Forward) 

535 

536 >>> grammar = base | label 

537 >>> print(grammar.parse_string("x")) 

538 ['x'] 

539 """ 

540 self.suppress_warnings_.append(warning_type) 

541 return self 

542 

543 def visit_all(self): 

544 """General-purpose method to yield all expressions and sub-expressions 

545 in a grammar. Typically just for internal use. 

546 """ 

547 to_visit = deque([self]) 

548 seen = set() 

549 while to_visit: 

550 cur = to_visit.popleft() 

551 

552 # guard against looping forever through recursive grammars 

553 if cur in seen: 

554 continue 

555 seen.add(cur) 

556 

557 to_visit.extend(cur.recurse()) 

558 yield cur 

559 

560 def copy(self) -> ParserElement: 

561 """ 

562 Make a copy of this :class:`ParserElement`. Useful for defining 

563 different parse actions for the same parsing pattern, using copies of 

564 the original parse element. 

565 

566 Example: 

567 

568 .. testcode:: 

569 

570 integer = Word(nums).set_parse_action( 

571 lambda toks: int(toks[0])) 

572 integerK = integer.copy().add_parse_action( 

573 lambda toks: toks[0] * 1024) + Suppress("K") 

574 integerM = integer.copy().add_parse_action( 

575 lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

576 

577 print( 

578 (integerK | integerM | integer)[1, ...].parse_string( 

579 "5K 100 640K 256M") 

580 ) 

581 

582 prints: 

583 

584 .. testoutput:: 

585 

586 [5120, 100, 655360, 268435456] 

587 

588 Equivalent form of ``expr.copy()`` is just ``expr()``: 

589 

590 .. testcode:: 

591 

592 integerM = integer().add_parse_action( 

593 lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

594 """ 

595 cpy = copy.copy(self) 

596 cpy.parseAction = self.parseAction[:] 

597 cpy.ignoreExprs = self.ignoreExprs[:] 

598 if self.copyDefaultWhiteChars: 

599 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

600 return cpy 

601 

602 def set_results_name( 

603 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 

604 ) -> ParserElement: 

605 """ 

606 Define name for referencing matching tokens as a nested attribute 

607 of the returned parse results. 

608 

609 Normally, results names are assigned as you would assign keys in a dict: 

610 any existing value is overwritten by later values. If it is necessary to 

611 keep all values captured for a particular results name, call ``set_results_name`` 

612 with ``list_all_matches`` = True. 

613 

614 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

615 this is so that the client can define a basic element, such as an 

616 integer, and reference it in multiple places with different names. 

617 

618 You can also set results names using the abbreviated syntax, 

619 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

620 - see :meth:`__call__`. If ``list_all_matches`` is required, use 

621 ``expr("name*")``. 

622 

623 Example: 

624 

625 .. testcode:: 

626 

627 integer = Word(nums) 

628 date_str = (integer.set_results_name("year") + '/' 

629 + integer.set_results_name("month") + '/' 

630 + integer.set_results_name("day")) 

631 

632 # equivalent form: 

633 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

634 """ 

635 listAllMatches = listAllMatches or list_all_matches 

636 return self._setResultsName(name, listAllMatches) 

637 

638 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

639 if name is None: 

640 return self 

641 newself = self.copy() 

642 if name.endswith("*"): 

643 name = name[:-1] 

644 list_all_matches = True 

645 newself.resultsName = name 

646 newself.modalResults = not list_all_matches 

647 return newself 

648 

649 def set_break(self, break_flag: bool = True) -> ParserElement: 

650 """ 

651 Method to invoke the Python pdb debugger when this element is 

652 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

653 disable. 

654 """ 

655 if break_flag: 

656 _parseMethod = self._parse 

657 

658 def breaker(instring, loc, do_actions=True, callPreParse=True): 

659 # this call to breakpoint() is intentional, not a checkin error 

660 breakpoint() 

661 return _parseMethod(instring, loc, do_actions, callPreParse) 

662 

663 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

664 self._parse = breaker # type: ignore [method-assign] 

665 elif hasattr(self._parse, "_originalParseMethod"): 

666 self._parse = self._parse._originalParseMethod # type: ignore [method-assign] 

667 return self 

668 

669 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: 

670 """ 

671 Define one or more actions to perform when successfully matching parse element definition. 

672 

673 Parse actions can be called to perform data conversions, do extra validation, 

674 update external data structures, or enhance or replace the parsed tokens. 

675 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

676 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

677 

678 - ``s`` = the original string being parsed (see note below) 

679 - ``loc`` = the location of the matching substring 

680 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

681 

682 The parsed tokens are passed to the parse action as ParseResults. They can be 

683 modified in place using list-style append, extend, and pop operations to update 

684 the parsed list elements; and with dictionary-style item set and del operations 

685 to add, update, or remove any named results. If the tokens are modified in place, 

686 it is not necessary to return them with a return statement. 

687 

688 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

689 object, or with some entirely different object (common for parse actions that perform data 

690 conversions). A convenient way to build a new parse result is to define the values 

691 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

692 

693 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

694 expression are cleared. 

695 

696 Optional keyword arguments: 

697 

698 :param call_during_try: (default= ``False``) indicate if parse action 

699 should be run during lookaheads and alternate 

700 testing. For parse actions that have side 

701 effects, it is important to only call the parse 

702 action once it is determined that it is being 

703 called as part of a successful parse. 

704 For parse actions that perform additional 

705 validation, then ``call_during_try`` should 

706 be passed as True, so that the validation code 

707 is included in the preliminary "try" parses. 

708 

709 .. Note:: 

710 The default parsing behavior is to expand tabs in the input string 

711 before starting the parsing process. 

712 See :meth:`parse_string` for more information on parsing strings 

713 containing ``<TAB>`` s, and suggested methods to maintain a 

714 consistent view of the parsed string, the parse location, and 

715 line and column positions within the parsed string. 

716 

717 Example: Parse dates in the form ``YYYY/MM/DD`` 

718 ----------------------------------------------- 

719 

720 Setup code: 

721 

722 .. testcode:: 

723 

724 def convert_to_int(toks): 

725 '''a parse action to convert toks from str to int 

726 at parse time''' 

727 return int(toks[0]) 

728 

729 def is_valid_date(instring, loc, toks): 

730 '''a parse action to verify that the date is a valid date''' 

731 from datetime import date 

732 year, month, day = toks[::2] 

733 try: 

734 date(year, month, day) 

735 except ValueError: 

736 raise ParseException(instring, loc, "invalid date given") 

737 

738 integer = Word(nums) 

739 date_str = integer + '/' + integer + '/' + integer 

740 

741 # add parse actions 

742 integer.set_parse_action(convert_to_int) 

743 date_str.set_parse_action(is_valid_date) 

744 

745 Successful parse - note that integer fields are converted to ints: 

746 

747 .. testcode:: 

748 

749 print(date_str.parse_string("1999/12/31")) 

750  

751 prints: 

752 

753 .. testoutput:: 

754 

755 [1999, '/', 12, '/', 31] 

756 

757 Failure - invalid date: 

758 

759 .. testcode:: 

760 

761 date_str.parse_string("1999/13/31") 

762 

763 prints: 

764 

765 .. testoutput:: 

766 

767 Traceback (most recent call last): 

768 ParseException: invalid date given, found '1999' ... 

769 """ 

770 if list(fns) == [None]: 

771 self.parseAction.clear() 

772 return self 

773 

774 if not all(callable(fn) for fn in fns): 

775 raise TypeError("parse actions must be callable") 

776 self.parseAction[:] = [_trim_arity(fn) for fn in fns] 

777 self.callDuringTry = kwargs.get( 

778 "call_during_try", kwargs.get("callDuringTry", False) 

779 ) 

780 

781 return self 

782 

783 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: 

784 """ 

785 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

786 

787 See examples in :class:`copy`. 

788 """ 

789 self.parseAction += [_trim_arity(fn) for fn in fns] 

790 self.callDuringTry = self.callDuringTry or kwargs.get( 

791 "call_during_try", kwargs.get("callDuringTry", False) 

792 ) 

793 return self 

794 

795 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement: 

796 """Add a boolean predicate function to expression's list of parse actions. See 

797 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

798 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

799 

800 Optional keyword arguments: 

801 

802 - ``message`` = define a custom message to be used in the raised exception 

803 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

804 ParseException 

805 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

806 default=False 

807 

808 Example: 

809 

810 .. doctest:: 

811 :options: +NORMALIZE_WHITESPACE 

812 

813 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

814 >>> year_int = integer.copy().add_condition( 

815 ... lambda toks: toks[0] >= 2000, 

816 ... message="Only support years 2000 and later") 

817 >>> date_str = year_int + '/' + integer + '/' + integer 

818 

819 >>> result = date_str.parse_string("1999/12/31") 

820 Traceback (most recent call last): 

821 ParseException: Only support years 2000 and later... 

822 """ 

823 for fn in fns: 

824 self.parseAction.append( 

825 condition_as_parse_action( 

826 fn, 

827 message=str(kwargs.get("message")), 

828 fatal=bool(kwargs.get("fatal", False)), 

829 ) 

830 ) 

831 

832 self.callDuringTry = self.callDuringTry or kwargs.get( 

833 "call_during_try", kwargs.get("callDuringTry", False) 

834 ) 

835 return self 

836 

837 def set_fail_action(self, fn: ParseFailAction) -> ParserElement: 

838 """ 

839 Define action to perform if parsing fails at this expression. 

840 Fail acton fn is a callable function that takes the arguments 

841 ``fn(s, loc, expr, err)`` where: 

842 

843 - ``s`` = string being parsed 

844 - ``loc`` = location where expression match was attempted and failed 

845 - ``expr`` = the parse expression that failed 

846 - ``err`` = the exception thrown 

847 

848 The function returns no value. It may throw :class:`ParseFatalException` 

849 if it is desired to stop parsing immediately.""" 

850 self.failAction = fn 

851 return self 

852 

853 def _skipIgnorables(self, instring: str, loc: int) -> int: 

854 if not self.ignoreExprs: 

855 return loc 

856 exprsFound = True 

857 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

858 last_loc = loc 

859 while exprsFound: 

860 exprsFound = False 

861 for ignore_fn in ignore_expr_fns: 

862 try: 

863 while 1: 

864 loc, dummy = ignore_fn(instring, loc) 

865 exprsFound = True 

866 except ParseException: 

867 pass 

868 # check if all ignore exprs matched but didn't actually advance the parse location 

869 if loc == last_loc: 

870 break 

871 last_loc = loc 

872 return loc 

873 

874 def preParse(self, instring: str, loc: int) -> int: 

875 if self.ignoreExprs: 

876 loc = self._skipIgnorables(instring, loc) 

877 

878 if self.skipWhitespace: 

879 instrlen = len(instring) 

880 white_chars = self.whiteChars 

881 while loc < instrlen and instring[loc] in white_chars: 

882 loc += 1 

883 

884 return loc 

885 

886 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

887 return loc, [] 

888 

889 def postParse(self, instring, loc, tokenlist): 

890 return tokenlist 

891 

892 # @profile 

893 def _parseNoCache( 

894 self, instring, loc, do_actions=True, callPreParse=True 

895 ) -> tuple[int, ParseResults]: 

896 debugging = self.debug # and do_actions) 

897 len_instring = len(instring) 

898 

899 if debugging or self.failAction: 

900 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

901 try: 

902 if callPreParse and self.callPreparse: 

903 pre_loc = self.preParse(instring, loc) 

904 else: 

905 pre_loc = loc 

906 tokens_start = pre_loc 

907 if self.debugActions.debug_try: 

908 self.debugActions.debug_try(instring, tokens_start, self, False) 

909 if self.mayIndexError or pre_loc >= len_instring: 

910 try: 

911 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

912 except IndexError: 

913 raise ParseException(instring, len_instring, self.errmsg, self) 

914 else: 

915 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

916 except Exception as err: 

917 # print("Exception raised:", err) 

918 if self.debugActions.debug_fail: 

919 self.debugActions.debug_fail( 

920 instring, tokens_start, self, err, False 

921 ) 

922 if self.failAction: 

923 self.failAction(instring, tokens_start, self, err) 

924 raise 

925 else: 

926 if callPreParse and self.callPreparse: 

927 pre_loc = self.preParse(instring, loc) 

928 else: 

929 pre_loc = loc 

930 tokens_start = pre_loc 

931 if self.mayIndexError or pre_loc >= len_instring: 

932 try: 

933 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

934 except IndexError: 

935 raise ParseException(instring, len_instring, self.errmsg, self) 

936 else: 

937 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

938 

939 tokens = self.postParse(instring, loc, tokens) 

940 

941 ret_tokens = ParseResults( 

942 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 

943 ) 

944 if self.parseAction and (do_actions or self.callDuringTry): 

945 if debugging: 

946 try: 

947 for fn in self.parseAction: 

948 try: 

949 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

950 except IndexError as parse_action_exc: 

951 exc = ParseException("exception raised in parse action") 

952 raise exc from parse_action_exc 

953 

954 if tokens is not None and tokens is not ret_tokens: 

955 ret_tokens = ParseResults( 

956 tokens, 

957 self.resultsName, 

958 asList=self.saveAsList 

959 and isinstance(tokens, (ParseResults, list)), 

960 modal=self.modalResults, 

961 ) 

962 except Exception as err: 

963 # print "Exception raised in user parse action:", err 

964 if self.debugActions.debug_fail: 

965 self.debugActions.debug_fail( 

966 instring, tokens_start, self, err, False 

967 ) 

968 raise 

969 else: 

970 for fn in self.parseAction: 

971 try: 

972 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

973 except IndexError as parse_action_exc: 

974 exc = ParseException("exception raised in parse action") 

975 raise exc from parse_action_exc 

976 

977 if tokens is not None and tokens is not ret_tokens: 

978 ret_tokens = ParseResults( 

979 tokens, 

980 self.resultsName, 

981 asList=self.saveAsList 

982 and isinstance(tokens, (ParseResults, list)), 

983 modal=self.modalResults, 

984 ) 

985 if debugging: 

986 # print("Matched", self, "->", ret_tokens.as_list()) 

987 if self.debugActions.debug_match: 

988 self.debugActions.debug_match( 

989 instring, tokens_start, loc, self, ret_tokens, False 

990 ) 

991 

992 return loc, ret_tokens 

993 

994 def try_parse( 

995 self, 

996 instring: str, 

997 loc: int, 

998 *, 

999 raise_fatal: bool = False, 

1000 do_actions: bool = False, 

1001 ) -> int: 

1002 try: 

1003 return self._parse(instring, loc, do_actions=do_actions)[0] 

1004 except ParseFatalException: 

1005 if raise_fatal: 

1006 raise 

1007 raise ParseException(instring, loc, self.errmsg, self) 

1008 

1009 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

1010 try: 

1011 self.try_parse(instring, loc, do_actions=do_actions) 

1012 except (ParseException, IndexError): 

1013 return False 

1014 else: 

1015 return True 

1016 

1017 # cache for left-recursion in Forward references 

1018 recursion_lock = RLock() 

1019 recursion_memos: collections.abc.MutableMapping[ 

1020 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]] 

1021 ] = {} 

1022 

1023 class _CacheType(typing.Protocol): 

1024 """ 

1025 Class to be used for packrat and left-recursion cacheing of results 

1026 and exceptions. 

1027 """ 

1028 

1029 not_in_cache: bool 

1030 

1031 def get(self, *args) -> typing.Any: ... 

1032 

1033 def set(self, *args) -> None: ... 

1034 

1035 def clear(self) -> None: ... 

1036 

1037 class NullCache(dict): 

1038 """ 

1039 A null cache type for initialization of the packrat_cache class variable. 

1040 If/when enable_packrat() is called, this null cache will be replaced by a 

1041 proper _CacheType class instance. 

1042 """ 

1043 

1044 not_in_cache: bool = True 

1045 

1046 def get(self, *args) -> typing.Any: ... 

1047 

1048 def set(self, *args) -> None: ... 

1049 

1050 def clear(self) -> None: ... 

1051 

1052 # class-level argument cache for optimizing repeated calls when backtracking 

1053 # through recursive expressions 

1054 packrat_cache: _CacheType = NullCache() 

1055 packrat_cache_lock = RLock() 

1056 packrat_cache_stats = [0, 0] 

1057 

1058 # this method gets repeatedly called during backtracking with the same arguments - 

1059 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

1060 def _parseCache( 

1061 self, instring, loc, do_actions=True, callPreParse=True 

1062 ) -> tuple[int, ParseResults]: 

1063 HIT, MISS = 0, 1 

1064 lookup = (self, instring, loc, callPreParse, do_actions) 

1065 with ParserElement.packrat_cache_lock: 

1066 cache = ParserElement.packrat_cache 

1067 value = cache.get(lookup) 

1068 if value is cache.not_in_cache: 

1069 ParserElement.packrat_cache_stats[MISS] += 1 

1070 try: 

1071 value = self._parseNoCache(instring, loc, do_actions, callPreParse) 

1072 except ParseBaseException as pe: 

1073 # cache a copy of the exception, without the traceback 

1074 cache.set(lookup, pe.__class__(*pe.args)) 

1075 raise 

1076 else: 

1077 cache.set(lookup, (value[0], value[1].copy(), loc)) 

1078 return value 

1079 else: 

1080 ParserElement.packrat_cache_stats[HIT] += 1 

1081 if self.debug and self.debugActions.debug_try: 

1082 try: 

1083 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

1084 except TypeError: 

1085 pass 

1086 if isinstance(value, Exception): 

1087 if self.debug and self.debugActions.debug_fail: 

1088 try: 

1089 self.debugActions.debug_fail( 

1090 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

1091 ) 

1092 except TypeError: 

1093 pass 

1094 raise value 

1095 

1096 value = cast(tuple[int, ParseResults, int], value) 

1097 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1098 if self.debug and self.debugActions.debug_match: 

1099 try: 

1100 self.debugActions.debug_match( 

1101 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1102 ) 

1103 except TypeError: 

1104 pass 

1105 

1106 return loc_, result 

1107 

1108 _parse = _parseNoCache 

1109 

1110 @staticmethod 

1111 def reset_cache() -> None: 

1112 with ParserElement.packrat_cache_lock: 

1113 ParserElement.packrat_cache.clear() 

1114 ParserElement.packrat_cache_stats[:] = [0] * len( 

1115 ParserElement.packrat_cache_stats 

1116 ) 

1117 ParserElement.recursion_memos.clear() 

1118 

1119 # class attributes to keep caching status 

1120 _packratEnabled = False 

1121 _left_recursion_enabled = False 

1122 

1123 @staticmethod 

1124 def disable_memoization() -> None: 

1125 """ 

1126 Disables active Packrat or Left Recursion parsing and their memoization 

1127 

1128 This method also works if neither Packrat nor Left Recursion are enabled. 

1129 This makes it safe to call before activating Packrat nor Left Recursion 

1130 to clear any previous settings. 

1131 """ 

1132 with ParserElement.packrat_cache_lock: 

1133 ParserElement.reset_cache() 

1134 ParserElement._left_recursion_enabled = False 

1135 ParserElement._packratEnabled = False 

1136 ParserElement._parse = ParserElement._parseNoCache 

1137 

1138 @staticmethod 

1139 def enable_left_recursion( 

1140 cache_size_limit: typing.Optional[int] = None, *, force=False 

1141 ) -> None: 

1142 """ 

1143 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1144 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1145 repeatedly matched with a fixed recursion depth that is gradually increased 

1146 until finding the longest match. 

1147 

1148 Example: 

1149 

1150 .. testcode:: 

1151 

1152 import pyparsing as pp 

1153 pp.ParserElement.enable_left_recursion() 

1154 

1155 E = pp.Forward("E") 

1156 num = pp.Word(pp.nums) 

1157 

1158 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1159 E <<= E + '+' - num | num 

1160 

1161 print(E.parse_string("1+2+3+4")) 

1162 

1163 prints: 

1164 

1165 .. testoutput:: 

1166 

1167 ['1', '+', '2', '+', '3', '+', '4'] 

1168 

1169 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1170 thus skip reevaluation of parse actions during backtracking. This may break 

1171 programs with parse actions which rely on strict ordering of side-effects. 

1172 

1173 Parameters: 

1174 

1175 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1176 ``Forward`` elements during matching; if ``None`` (the default), 

1177 memoize all ``Forward`` elements. 

1178 

1179 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1180 thus the two cannot be used together. Use ``force=True`` to disable any 

1181 previous, conflicting settings. 

1182 """ 

1183 with ParserElement.packrat_cache_lock: 

1184 if force: 

1185 ParserElement.disable_memoization() 

1186 elif ParserElement._packratEnabled: 

1187 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1188 if cache_size_limit is None: 

1189 ParserElement.recursion_memos = _UnboundedMemo() 

1190 elif cache_size_limit > 0: 

1191 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1192 else: 

1193 raise NotImplementedError(f"Memo size of {cache_size_limit}") 

1194 ParserElement._left_recursion_enabled = True 

1195 

1196 @staticmethod 

1197 def enable_packrat( 

1198 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1199 ) -> None: 

1200 """ 

1201 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1202 Repeated parse attempts at the same string location (which happens 

1203 often in many complex grammars) can immediately return a cached value, 

1204 instead of re-executing parsing/validating code. Memoizing is done of 

1205 both valid results and parsing exceptions. 

1206 

1207 Parameters: 

1208 

1209 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1210 will limit the size of the packrat cache; if None is passed, then 

1211 the cache size will be unbounded; if 0 is passed, the cache will 

1212 be effectively disabled. 

1213 

1214 This speedup may break existing programs that use parse actions that 

1215 have side-effects. For this reason, packrat parsing is disabled when 

1216 you first import pyparsing. To activate the packrat feature, your 

1217 program must call the class method :class:`ParserElement.enable_packrat`. 

1218 For best results, call ``enable_packrat()`` immediately after 

1219 importing pyparsing. 

1220 

1221 .. Can't really be doctested, alas 

1222 

1223 Example:: 

1224 

1225 import pyparsing 

1226 pyparsing.ParserElement.enable_packrat() 

1227 

1228 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1229 thus the two cannot be used together. Use ``force=True`` to disable any 

1230 previous, conflicting settings. 

1231 """ 

1232 with ParserElement.packrat_cache_lock: 

1233 if force: 

1234 ParserElement.disable_memoization() 

1235 elif ParserElement._left_recursion_enabled: 

1236 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1237 

1238 if ParserElement._packratEnabled: 

1239 return 

1240 

1241 ParserElement._packratEnabled = True 

1242 if cache_size_limit is None: 

1243 ParserElement.packrat_cache = _UnboundedCache() 

1244 else: 

1245 ParserElement.packrat_cache = _FifoCache(cache_size_limit) 

1246 ParserElement._parse = ParserElement._parseCache 

1247 

1248 def parse_string( 

1249 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 

1250 ) -> ParseResults: 

1251 """ 

1252 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1253 client code. 

1254 

1255 :param instring: The input string to be parsed. 

1256 :param parse_all: If set, the entire input string must match the grammar. 

1257 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1258 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1259 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1260 an object with attributes if the given parser includes results names. 

1261 

1262 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1263 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1264 

1265 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1266 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1267 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1268 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1269 

1270 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1271 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1272 parse action's ``s`` argument, or 

1273 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1274 

1275 Examples: 

1276 

1277 By default, partial matches are OK. 

1278 

1279 .. doctest:: 

1280 

1281 >>> res = Word('a').parse_string('aaaaabaaa') 

1282 >>> print(res) 

1283 ['aaaaa'] 

1284 

1285 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1286 directly to see more examples. 

1287 

1288 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1289 

1290 .. doctest:: 

1291 

1292 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1293 Traceback (most recent call last): 

1294 ParseException: Expected end of text, found 'b' ... 

1295 """ 

1296 parseAll = parse_all or parseAll 

1297 

1298 ParserElement.reset_cache() 

1299 if not self.streamlined: 

1300 self.streamline() 

1301 for e in self.ignoreExprs: 

1302 e.streamline() 

1303 if not self.keepTabs: 

1304 instring = instring.expandtabs() 

1305 try: 

1306 loc, tokens = self._parse(instring, 0) 

1307 if parseAll: 

1308 loc = self.preParse(instring, loc) 

1309 se = Empty() + StringEnd().set_debug(False) 

1310 se._parse(instring, loc) 

1311 except _ParseActionIndexError as pa_exc: 

1312 raise pa_exc.exc 

1313 except ParseBaseException as exc: 

1314 if ParserElement.verbose_stacktrace: 

1315 raise 

1316 

1317 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1318 raise exc.with_traceback(None) 

1319 else: 

1320 return tokens 

1321 

1322 def scan_string( 

1323 self, 

1324 instring: str, 

1325 max_matches: int = _MAX_INT, 

1326 overlap: bool = False, 

1327 always_skip_whitespace=True, 

1328 *, 

1329 debug: bool = False, 

1330 maxMatches: int = _MAX_INT, 

1331 ) -> Generator[tuple[ParseResults, int, int], None, None]: 

1332 """ 

1333 Scan the input string for expression matches. Each match will return the 

1334 matching tokens, start location, and end location. May be called with optional 

1335 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1336 ``overlap`` is specified, then overlapping matches will be reported. 

1337 

1338 Note that the start and end locations are reported relative to the string 

1339 being parsed. See :class:`parse_string` for more information on parsing 

1340 strings with embedded tabs. 

1341 

1342 Example: 

1343 

1344 .. testcode:: 

1345 

1346 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1347 print(source) 

1348 for tokens, start, end in Word(alphas).scan_string(source): 

1349 print(' '*start + '^'*(end-start)) 

1350 print(' '*start + tokens[0]) 

1351 

1352 prints: 

1353 

1354 .. testoutput:: 

1355 

1356 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1357 ^^^^^ 

1358 sldjf 

1359 ^^^^^^^ 

1360 lsdjjkf 

1361 ^^^^^^ 

1362 sldkjf 

1363 ^^^^^^ 

1364 lkjsfd 

1365 """ 

1366 maxMatches = min(maxMatches, max_matches) 

1367 if not self.streamlined: 

1368 self.streamline() 

1369 for e in self.ignoreExprs: 

1370 e.streamline() 

1371 

1372 if not self.keepTabs: 

1373 instring = str(instring).expandtabs() 

1374 instrlen = len(instring) 

1375 loc = 0 

1376 if always_skip_whitespace: 

1377 preparser = Empty() 

1378 preparser.ignoreExprs = self.ignoreExprs 

1379 preparser.whiteChars = self.whiteChars 

1380 preparseFn = preparser.preParse 

1381 else: 

1382 preparseFn = self.preParse 

1383 parseFn = self._parse 

1384 ParserElement.resetCache() 

1385 matches = 0 

1386 try: 

1387 while loc <= instrlen and matches < maxMatches: 

1388 try: 

1389 preloc: int = preparseFn(instring, loc) 

1390 nextLoc: int 

1391 tokens: ParseResults 

1392 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1393 except ParseException: 

1394 loc = preloc + 1 

1395 else: 

1396 if nextLoc > loc: 

1397 matches += 1 

1398 if debug: 

1399 print( 

1400 { 

1401 "tokens": tokens.asList(), 

1402 "start": preloc, 

1403 "end": nextLoc, 

1404 } 

1405 ) 

1406 yield tokens, preloc, nextLoc 

1407 if overlap: 

1408 nextloc = preparseFn(instring, loc) 

1409 if nextloc > loc: 

1410 loc = nextLoc 

1411 else: 

1412 loc += 1 

1413 else: 

1414 loc = nextLoc 

1415 else: 

1416 loc = preloc + 1 

1417 except ParseBaseException as exc: 

1418 if ParserElement.verbose_stacktrace: 

1419 raise 

1420 

1421 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1422 raise exc.with_traceback(None) 

1423 

1424 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1425 """ 

1426 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1427 be returned from a parse action. To use ``transform_string``, define a grammar and 

1428 attach a parse action to it that modifies the returned token list. 

1429 Invoking ``transform_string()`` on a target string will then scan for matches, 

1430 and replace the matched text patterns according to the logic in the parse 

1431 action. ``transform_string()`` returns the resulting transformed string. 

1432 

1433 Example: 

1434 

1435 .. testcode:: 

1436 

1437 quote = '''now is the winter of our discontent, 

1438 made glorious summer by this sun of york.''' 

1439 

1440 wd = Word(alphas) 

1441 wd.set_parse_action(lambda toks: toks[0].title()) 

1442 

1443 print(wd.transform_string(quote)) 

1444 

1445 prints: 

1446 

1447 .. testoutput:: 

1448 

1449 Now Is The Winter Of Our Discontent, 

1450 Made Glorious Summer By This Sun Of York. 

1451 """ 

1452 out: list[str] = [] 

1453 lastE = 0 

1454 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1455 # keep string locs straight between transform_string and scan_string 

1456 self.keepTabs = True 

1457 try: 

1458 for t, s, e in self.scan_string(instring, debug=debug): 

1459 if s > lastE: 

1460 out.append(instring[lastE:s]) 

1461 lastE = e 

1462 

1463 if not t: 

1464 continue 

1465 

1466 if isinstance(t, ParseResults): 

1467 out += t.as_list() 

1468 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1469 out.extend(t) 

1470 else: 

1471 out.append(t) 

1472 

1473 out.append(instring[lastE:]) 

1474 out = [o for o in out if o] 

1475 return "".join([str(s) for s in _flatten(out)]) 

1476 except ParseBaseException as exc: 

1477 if ParserElement.verbose_stacktrace: 

1478 raise 

1479 

1480 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1481 raise exc.with_traceback(None) 

1482 

1483 def search_string( 

1484 self, 

1485 instring: str, 

1486 max_matches: int = _MAX_INT, 

1487 *, 

1488 debug: bool = False, 

1489 maxMatches: int = _MAX_INT, 

1490 ) -> ParseResults: 

1491 """ 

1492 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1493 to match the given parse expression. May be called with optional 

1494 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1495 

1496 Example: 

1497 

1498 .. testcode:: 

1499 

1500 quote = '''More than Iron, more than Lead, 

1501 more than Gold I need Electricity''' 

1502 

1503 # a capitalized word starts with an uppercase letter, 

1504 # followed by zero or more lowercase letters 

1505 cap_word = Word(alphas.upper(), alphas.lower()) 

1506 

1507 print(cap_word.search_string(quote)) 

1508 

1509 # the sum() builtin can be used to merge results 

1510 # into a single ParseResults object 

1511 print(sum(cap_word.search_string(quote))) 

1512 

1513 prints: 

1514 

1515 .. testoutput:: 

1516 

1517 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1518 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1519 """ 

1520 maxMatches = min(maxMatches, max_matches) 

1521 try: 

1522 return ParseResults( 

1523 [ 

1524 t 

1525 for t, s, e in self.scan_string( 

1526 instring, maxMatches, always_skip_whitespace=False, debug=debug 

1527 ) 

1528 ] 

1529 ) 

1530 except ParseBaseException as exc: 

1531 if ParserElement.verbose_stacktrace: 

1532 raise 

1533 

1534 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1535 raise exc.with_traceback(None) 

1536 

1537 def split( 

1538 self, 

1539 instring: str, 

1540 maxsplit: int = _MAX_INT, 

1541 include_separators: bool = False, 

1542 *, 

1543 includeSeparators=False, 

1544 ) -> Generator[str, None, None]: 

1545 """ 

1546 Generator method to split a string using the given expression as a separator. 

1547 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1548 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1549 matching text should be included in the split results. 

1550 

1551 Example: 

1552 

1553 .. testcode:: 

1554 

1555 punc = one_of(list(".,;:/-!?")) 

1556 print(list(punc.split( 

1557 "This, this?, this sentence, is badly punctuated!"))) 

1558 

1559 prints: 

1560 

1561 .. testoutput:: 

1562 

1563 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1564 """ 

1565 includeSeparators = includeSeparators or include_separators 

1566 last = 0 

1567 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1568 yield instring[last:s] 

1569 if includeSeparators: 

1570 yield t[0] 

1571 last = e 

1572 yield instring[last:] 

1573 

1574 def __add__(self, other) -> ParserElement: 

1575 """ 

1576 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1577 converts them to :class:`Literal`\\ s by default. 

1578 

1579 Example: 

1580 

1581 .. testcode:: 

1582 

1583 greet = Word(alphas) + "," + Word(alphas) + "!" 

1584 hello = "Hello, World!" 

1585 print(hello, "->", greet.parse_string(hello)) 

1586 

1587 prints: 

1588 

1589 .. testoutput:: 

1590 

1591 Hello, World! -> ['Hello', ',', 'World', '!'] 

1592 

1593 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`: 

1594 

1595 .. testcode:: 

1596 

1597 Literal('start') + ... + Literal('end') 

1598 

1599 is equivalent to: 

1600 

1601 .. testcode:: 

1602 

1603 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1604 

1605 Note that the skipped text is returned with '_skipped' as a results name, 

1606 and to support having multiple skips in the same parser, the value returned is 

1607 a list of all skipped text. 

1608 """ 

1609 if other is Ellipsis: 

1610 return _PendingSkip(self) 

1611 

1612 if isinstance(other, str_type): 

1613 other = self._literalStringClass(other) 

1614 if not isinstance(other, ParserElement): 

1615 return NotImplemented 

1616 return And([self, other]) 

1617 

1618 def __radd__(self, other) -> ParserElement: 

1619 """ 

1620 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1621 """ 

1622 if other is Ellipsis: 

1623 return SkipTo(self)("_skipped*") + self 

1624 

1625 if isinstance(other, str_type): 

1626 other = self._literalStringClass(other) 

1627 if not isinstance(other, ParserElement): 

1628 return NotImplemented 

1629 return other + self 

1630 

1631 def __sub__(self, other) -> ParserElement: 

1632 """ 

1633 Implementation of ``-`` operator, returns :class:`And` with error stop 

1634 """ 

1635 if isinstance(other, str_type): 

1636 other = self._literalStringClass(other) 

1637 if not isinstance(other, ParserElement): 

1638 return NotImplemented 

1639 return self + And._ErrorStop() + other 

1640 

1641 def __rsub__(self, other) -> ParserElement: 

1642 """ 

1643 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1644 """ 

1645 if isinstance(other, str_type): 

1646 other = self._literalStringClass(other) 

1647 if not isinstance(other, ParserElement): 

1648 return NotImplemented 

1649 return other - self 

1650 

1651 def __mul__(self, other) -> ParserElement: 

1652 """ 

1653 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1654 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1655 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1656 may also include ``None`` as in: 

1657 

1658 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1659 to ``expr*n + ZeroOrMore(expr)`` 

1660 (read as "at least n instances of ``expr``") 

1661 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1662 (read as "0 to n instances of ``expr``") 

1663 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1664 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1665 

1666 Note that ``expr*(None, n)`` does not raise an exception if 

1667 more than n exprs exist in the input stream; that is, 

1668 ``expr*(None, n)`` does not enforce a maximum number of expr 

1669 occurrences. If this behavior is desired, then write 

1670 ``expr*(None, n) + ~expr`` 

1671 """ 

1672 if other is Ellipsis: 

1673 other = (0, None) 

1674 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1675 other = ((0,) + other[1:] + (None,))[:2] 

1676 

1677 if not isinstance(other, (int, tuple)): 

1678 return NotImplemented 

1679 

1680 if isinstance(other, int): 

1681 minElements, optElements = other, 0 

1682 else: 

1683 other = tuple(o if o is not Ellipsis else None for o in other) 

1684 other = (other + (None, None))[:2] 

1685 if other[0] is None: 

1686 other = (0, other[1]) 

1687 if isinstance(other[0], int) and other[1] is None: 

1688 if other[0] == 0: 

1689 return ZeroOrMore(self) 

1690 if other[0] == 1: 

1691 return OneOrMore(self) 

1692 else: 

1693 return self * other[0] + ZeroOrMore(self) 

1694 elif isinstance(other[0], int) and isinstance(other[1], int): 

1695 minElements, optElements = other 

1696 optElements -= minElements 

1697 else: 

1698 return NotImplemented 

1699 

1700 if minElements < 0: 

1701 raise ValueError("cannot multiply ParserElement by negative value") 

1702 if optElements < 0: 

1703 raise ValueError( 

1704 "second tuple value must be greater or equal to first tuple value" 

1705 ) 

1706 if minElements == optElements == 0: 

1707 return And([]) 

1708 

1709 if optElements: 

1710 

1711 def makeOptionalList(n): 

1712 if n > 1: 

1713 return Opt(self + makeOptionalList(n - 1)) 

1714 else: 

1715 return Opt(self) 

1716 

1717 if minElements: 

1718 if minElements == 1: 

1719 ret = self + makeOptionalList(optElements) 

1720 else: 

1721 ret = And([self] * minElements) + makeOptionalList(optElements) 

1722 else: 

1723 ret = makeOptionalList(optElements) 

1724 else: 

1725 if minElements == 1: 

1726 ret = self 

1727 else: 

1728 ret = And([self] * minElements) 

1729 return ret 

1730 

1731 def __rmul__(self, other) -> ParserElement: 

1732 return self.__mul__(other) 

1733 

1734 def __or__(self, other) -> ParserElement: 

1735 """ 

1736 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1737 

1738 .. versionchanged:: 3.1.0 

1739 Support ``expr | ""`` as a synonym for ``Optional(expr)``. 

1740 """ 

1741 if other is Ellipsis: 

1742 return _PendingSkip(self, must_skip=True) 

1743 

1744 if isinstance(other, str_type): 

1745 # `expr | ""` is equivalent to `Opt(expr)` 

1746 if other == "": 

1747 return Opt(self) 

1748 other = self._literalStringClass(other) 

1749 if not isinstance(other, ParserElement): 

1750 return NotImplemented 

1751 return MatchFirst([self, other]) 

1752 

1753 def __ror__(self, other) -> ParserElement: 

1754 """ 

1755 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1756 """ 

1757 if isinstance(other, str_type): 

1758 other = self._literalStringClass(other) 

1759 if not isinstance(other, ParserElement): 

1760 return NotImplemented 

1761 return other | self 

1762 

1763 def __xor__(self, other) -> ParserElement: 

1764 """ 

1765 Implementation of ``^`` operator - returns :class:`Or` 

1766 """ 

1767 if isinstance(other, str_type): 

1768 other = self._literalStringClass(other) 

1769 if not isinstance(other, ParserElement): 

1770 return NotImplemented 

1771 return Or([self, other]) 

1772 

1773 def __rxor__(self, other) -> ParserElement: 

1774 """ 

1775 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1776 """ 

1777 if isinstance(other, str_type): 

1778 other = self._literalStringClass(other) 

1779 if not isinstance(other, ParserElement): 

1780 return NotImplemented 

1781 return other ^ self 

1782 

1783 def __and__(self, other) -> ParserElement: 

1784 """ 

1785 Implementation of ``&`` operator - returns :class:`Each` 

1786 """ 

1787 if isinstance(other, str_type): 

1788 other = self._literalStringClass(other) 

1789 if not isinstance(other, ParserElement): 

1790 return NotImplemented 

1791 return Each([self, other]) 

1792 

1793 def __rand__(self, other) -> ParserElement: 

1794 """ 

1795 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1796 """ 

1797 if isinstance(other, str_type): 

1798 other = self._literalStringClass(other) 

1799 if not isinstance(other, ParserElement): 

1800 return NotImplemented 

1801 return other & self 

1802 

1803 def __invert__(self) -> ParserElement: 

1804 """ 

1805 Implementation of ``~`` operator - returns :class:`NotAny` 

1806 """ 

1807 return NotAny(self) 

1808 

1809 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1810 # iterate over a sequence 

1811 __iter__ = None 

1812 

1813 def __getitem__(self, key): 

1814 """ 

1815 use ``[]`` indexing notation as a short form for expression repetition: 

1816 

1817 - ``expr[n]`` is equivalent to ``expr*n`` 

1818 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1819 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1820 to ``expr*n + ZeroOrMore(expr)`` 

1821 (read as "at least n instances of ``expr``") 

1822 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1823 (read as "0 to n instances of ``expr``") 

1824 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1825 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1826 

1827 ``None`` may be used in place of ``...``. 

1828 

1829 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1830 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1831 desired, then write ``expr[..., n] + ~expr``. 

1832 

1833 For repetition with a stop_on expression, use slice notation: 

1834 

1835 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1836 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1837 

1838 .. versionchanged:: 3.1.0 

1839 Support for slice notation. 

1840 """ 

1841 

1842 stop_on_defined = False 

1843 stop_on = NoMatch() 

1844 if isinstance(key, slice): 

1845 key, stop_on = key.start, key.stop 

1846 if key is None: 

1847 key = ... 

1848 stop_on_defined = True 

1849 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1850 key, stop_on = (key[0], key[1].start), key[1].stop 

1851 stop_on_defined = True 

1852 

1853 # convert single arg keys to tuples 

1854 if isinstance(key, str_type): 

1855 key = (key,) 

1856 try: 

1857 iter(key) 

1858 except TypeError: 

1859 key = (key, key) 

1860 

1861 if len(key) > 2: 

1862 raise TypeError( 

1863 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1864 ) 

1865 

1866 # clip to 2 elements 

1867 ret = self * tuple(key[:2]) 

1868 ret = typing.cast(_MultipleMatch, ret) 

1869 

1870 if stop_on_defined: 

1871 ret.stopOn(stop_on) 

1872 

1873 return ret 

1874 

1875 def __call__(self, name: typing.Optional[str] = None) -> ParserElement: 

1876 """ 

1877 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1878 

1879 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1880 passed as ``True``. 

1881 

1882 If ``name`` is omitted, same as calling :class:`copy`. 

1883 

1884 Example: 

1885 

1886 .. testcode:: 

1887 

1888 # these are equivalent 

1889 userdata = ( 

1890 Word(alphas).set_results_name("name") 

1891 + Word(nums + "-").set_results_name("socsecno") 

1892 ) 

1893 

1894 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1895 """ 

1896 if name is not None: 

1897 return self._setResultsName(name) 

1898 

1899 return self.copy() 

1900 

1901 def suppress(self) -> ParserElement: 

1902 """ 

1903 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1904 cluttering up returned output. 

1905 """ 

1906 return Suppress(self) 

1907 

1908 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

1909 """ 

1910 Enables the skipping of whitespace before matching the characters in the 

1911 :class:`ParserElement`'s defined pattern. 

1912 

1913 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1914 """ 

1915 self.skipWhitespace = True 

1916 return self 

1917 

1918 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

1919 """ 

1920 Disables the skipping of whitespace before matching the characters in the 

1921 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1922 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1923 

1924 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1925 """ 

1926 self.skipWhitespace = False 

1927 return self 

1928 

1929 def set_whitespace_chars( 

1930 self, chars: Union[set[str], str], copy_defaults: bool = False 

1931 ) -> ParserElement: 

1932 """ 

1933 Overrides the default whitespace chars 

1934 """ 

1935 self.skipWhitespace = True 

1936 self.whiteChars = set(chars) 

1937 self.copyDefaultWhiteChars = copy_defaults 

1938 return self 

1939 

1940 def parse_with_tabs(self) -> ParserElement: 

1941 """ 

1942 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1943 Must be called before ``parse_string`` when the input grammar contains elements that 

1944 match ``<TAB>`` characters. 

1945 """ 

1946 self.keepTabs = True 

1947 return self 

1948 

1949 def ignore(self, other: ParserElement) -> ParserElement: 

1950 """ 

1951 Define expression to be ignored (e.g., comments) while doing pattern 

1952 matching; may be called repeatedly, to define multiple comment or other 

1953 ignorable patterns. 

1954 

1955 Example: 

1956 

1957 .. doctest:: 

1958 

1959 >>> patt = Word(alphas)[...] 

1960 >>> print(patt.parse_string('ablaj /* comment */ lskjd')) 

1961 ['ablaj'] 

1962 

1963 >>> patt = Word(alphas)[...].ignore(c_style_comment) 

1964 >>> print(patt.parse_string('ablaj /* comment */ lskjd')) 

1965 ['ablaj', 'lskjd'] 

1966 """ 

1967 if isinstance(other, str_type): 

1968 other = Suppress(other) 

1969 

1970 if isinstance(other, Suppress): 

1971 if other not in self.ignoreExprs: 

1972 self.ignoreExprs.append(other) 

1973 else: 

1974 self.ignoreExprs.append(Suppress(other.copy())) 

1975 return self 

1976 

1977 def set_debug_actions( 

1978 self, 

1979 start_action: DebugStartAction, 

1980 success_action: DebugSuccessAction, 

1981 exception_action: DebugExceptionAction, 

1982 ) -> ParserElement: 

1983 """ 

1984 Customize display of debugging messages while doing pattern matching: 

1985 

1986 :param start_action: method to be called when an expression is about to be parsed; 

1987 should have the signature:: 

1988  

1989 fn(input_string: str, 

1990 location: int, 

1991 expression: ParserElement, 

1992 cache_hit: bool) 

1993 

1994 :param success_action: method to be called when an expression has successfully parsed; 

1995 should have the signature:: 

1996  

1997 fn(input_string: str, 

1998 start_location: int, 

1999 end_location: int, 

2000 expression: ParserELement, 

2001 parsed_tokens: ParseResults, 

2002 cache_hit: bool) 

2003 

2004 :param exception_action: method to be called when expression fails to parse; 

2005 should have the signature:: 

2006  

2007 fn(input_string: str, 

2008 location: int, 

2009 expression: ParserElement, 

2010 exception: Exception, 

2011 cache_hit: bool) 

2012 """ 

2013 self.debugActions = self.DebugActions( 

2014 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

2015 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

2016 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

2017 ) 

2018 self.debug = True 

2019 return self 

2020 

2021 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement: 

2022 """ 

2023 Enable display of debugging messages while doing pattern matching. 

2024 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

2025 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

2026 

2027 Example: 

2028 

2029 .. testcode:: 

2030 

2031 wd = Word(alphas).set_name("alphaword") 

2032 integer = Word(nums).set_name("numword") 

2033 term = wd | integer 

2034 

2035 # turn on debugging for wd 

2036 wd.set_debug() 

2037 

2038 term[1, ...].parse_string("abc 123 xyz 890") 

2039 

2040 prints: 

2041 

2042 .. testoutput:: 

2043 :options: +NORMALIZE_WHITESPACE 

2044 

2045 Match alphaword at loc 0(1,1) 

2046 abc 123 xyz 890 

2047 ^ 

2048 Matched alphaword -> ['abc'] 

2049 Match alphaword at loc 4(1,5) 

2050 abc 123 xyz 890 

2051 ^ 

2052 Match alphaword failed, ParseException raised: Expected alphaword, ... 

2053 Match alphaword at loc 8(1,9) 

2054 abc 123 xyz 890 

2055 ^ 

2056 Matched alphaword -> ['xyz'] 

2057 Match alphaword at loc 12(1,13) 

2058 abc 123 xyz 890 

2059 ^ 

2060 Match alphaword failed, ParseException raised: Expected alphaword, ... 

2061 abc 123 xyz 890 

2062 ^ 

2063 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ... 

2064 

2065 The output shown is that produced by the default debug actions - custom debug actions can be 

2066 specified using :meth:`set_debug_actions`. Prior to attempting 

2067 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

2068 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

2069 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression, 

2070 which makes debugging and exception messages easier to understand - for instance, the default 

2071 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``. 

2072 

2073 .. versionchanged:: 3.1.0 

2074 ``recurse`` argument added. 

2075 """ 

2076 if recurse: 

2077 for expr in self.visit_all(): 

2078 expr.set_debug(flag, recurse=False) 

2079 return self 

2080 

2081 if flag: 

2082 self.set_debug_actions( 

2083 _default_start_debug_action, 

2084 _default_success_debug_action, 

2085 _default_exception_debug_action, 

2086 ) 

2087 else: 

2088 self.debug = False 

2089 return self 

2090 

2091 @property 

2092 def default_name(self) -> str: 

2093 if self._defaultName is None: 

2094 self._defaultName = self._generateDefaultName() 

2095 return self._defaultName 

2096 

2097 @abstractmethod 

2098 def _generateDefaultName(self) -> str: 

2099 """ 

2100 Child classes must define this method, which defines how the ``default_name`` is set. 

2101 """ 

2102 

2103 def set_name(self, name: typing.Optional[str]) -> ParserElement: 

2104 """ 

2105 Define name for this expression, makes debugging and exception messages clearer. If 

2106 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also 

2107 enable debug for this expression. 

2108 

2109 If `name` is None, clears any custom name for this expression, and clears the 

2110 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`. 

2111 

2112 Example: 

2113 

2114 .. doctest:: 

2115 

2116 >>> integer = Word(nums) 

2117 >>> integer.parse_string("ABC") 

2118 Traceback (most recent call last): 

2119 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1) 

2120 

2121 >>> integer.set_name("integer") 

2122 integer 

2123 >>> integer.parse_string("ABC") 

2124 Traceback (most recent call last): 

2125 ParseException: Expected integer (at char 0), (line:1, col:1) 

2126  

2127 .. versionchanged:: 3.1.0 

2128 Accept ``None`` as the ``name`` argument. 

2129 """ 

2130 self.customName = name # type: ignore[assignment] 

2131 self.errmsg = f"Expected {str(self)}" 

2132 

2133 if __diag__.enable_debug_on_named_expressions: 

2134 self.set_debug(name is not None) 

2135 

2136 return self 

2137 

2138 @property 

2139 def name(self) -> str: 

2140 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 

2141 return self.customName if self.customName is not None else self.default_name 

2142 

2143 @name.setter 

2144 def name(self, new_name) -> None: 

2145 self.set_name(new_name) 

2146 

2147 def __str__(self) -> str: 

2148 return self.name 

2149 

2150 def __repr__(self) -> str: 

2151 return str(self) 

2152 

2153 def streamline(self) -> ParserElement: 

2154 self.streamlined = True 

2155 self._defaultName = None 

2156 return self 

2157 

2158 def recurse(self) -> list[ParserElement]: 

2159 return [] 

2160 

2161 def _checkRecursion(self, parseElementList): 

2162 subRecCheckList = parseElementList[:] + [self] 

2163 for e in self.recurse(): 

2164 e._checkRecursion(subRecCheckList) 

2165 

2166 def validate(self, validateTrace=None) -> None: 

2167 """ 

2168 .. deprecated:: 3.0.0 

2169 Do not use to check for left recursion. 

2170 

2171 Check defined expressions for valid structure, check for infinite recursive definitions. 

2172 

2173 """ 

2174 warnings.warn( 

2175 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

2176 DeprecationWarning, 

2177 stacklevel=2, 

2178 ) 

2179 self._checkRecursion([]) 

2180 

2181 def parse_file( 

2182 self, 

2183 file_or_filename: Union[str, Path, TextIO], 

2184 encoding: str = "utf-8", 

2185 parse_all: bool = False, 

2186 *, 

2187 parseAll: bool = False, 

2188 ) -> ParseResults: 

2189 """ 

2190 Execute the parse expression on the given file or filename. 

2191 If a filename is specified (instead of a file object), 

2192 the entire file is opened, read, and closed before parsing. 

2193 """ 

2194 parseAll = parseAll or parse_all 

2195 try: 

2196 file_or_filename = typing.cast(TextIO, file_or_filename) 

2197 file_contents = file_or_filename.read() 

2198 except AttributeError: 

2199 file_or_filename = typing.cast(str, file_or_filename) 

2200 with open(file_or_filename, "r", encoding=encoding) as f: 

2201 file_contents = f.read() 

2202 try: 

2203 return self.parse_string(file_contents, parseAll) 

2204 except ParseBaseException as exc: 

2205 if ParserElement.verbose_stacktrace: 

2206 raise 

2207 

2208 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

2209 raise exc.with_traceback(None) 

2210 

2211 def __eq__(self, other): 

2212 if self is other: 

2213 return True 

2214 elif isinstance(other, str_type): 

2215 return self.matches(other, parse_all=True) 

2216 elif isinstance(other, ParserElement): 

2217 return vars(self) == vars(other) 

2218 return False 

2219 

2220 def __hash__(self): 

2221 return id(self) 

2222 

2223 def matches( 

2224 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 

2225 ) -> bool: 

2226 """ 

2227 Method for quick testing of a parser against a test string. Good for simple 

2228 inline microtests of sub expressions while building up larger parser. 

2229 

2230 :param test_string: to test against this expression for a match 

2231 :param parse_all: flag to pass to :meth:`parse_string` when running tests 

2232 

2233 Example: 

2234 

2235 .. doctest:: 

2236 

2237 >>> expr = Word(nums) 

2238 >>> expr.matches("100") 

2239 True 

2240 """ 

2241 parseAll = parseAll and parse_all 

2242 try: 

2243 self.parse_string(str(test_string), parse_all=parseAll) 

2244 return True 

2245 except ParseBaseException: 

2246 return False 

2247 

2248 def run_tests( 

2249 self, 

2250 tests: Union[str, list[str]], 

2251 parse_all: bool = True, 

2252 comment: typing.Optional[Union[ParserElement, str]] = "#", 

2253 full_dump: bool = True, 

2254 print_results: bool = True, 

2255 failure_tests: bool = False, 

2256 post_parse: typing.Optional[ 

2257 Callable[[str, ParseResults], typing.Optional[str]] 

2258 ] = None, 

2259 file: typing.Optional[TextIO] = None, 

2260 with_line_numbers: bool = False, 

2261 *, 

2262 parseAll: bool = True, 

2263 fullDump: bool = True, 

2264 printResults: bool = True, 

2265 failureTests: bool = False, 

2266 postParse: typing.Optional[ 

2267 Callable[[str, ParseResults], typing.Optional[str]] 

2268 ] = None, 

2269 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]: 

2270 """ 

2271 Execute the parse expression on a series of test strings, showing each 

2272 test, the parsed results or where the parse failed. Quick and easy way to 

2273 run a parse expression against a list of sample strings. 

2274 

2275 Parameters: 

2276 

2277 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2278 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2279 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2280 string; pass None to disable comment filtering 

2281 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2282 if False, only dump nested list 

2283 - ``print_results`` - (default= ``True``) prints test output to stdout 

2284 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2285 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2286 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2287 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2288 if None, will default to ``sys.stdout`` 

2289 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2290 

2291 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2292 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2293 test's output 

2294 

2295 Passing example: 

2296 

2297 .. testcode:: 

2298 

2299 number_expr = pyparsing_common.number.copy() 

2300 

2301 result = number_expr.run_tests(''' 

2302 # unsigned integer 

2303 100 

2304 # negative integer 

2305 -100 

2306 # float with scientific notation 

2307 6.02e23 

2308 # integer with scientific notation 

2309 1e-12 

2310 # negative decimal number without leading digit 

2311 -.100 

2312 ''') 

2313 print("Success" if result[0] else "Failed!") 

2314 

2315 prints: 

2316 

2317 .. testoutput:: 

2318 :options: +NORMALIZE_WHITESPACE 

2319 

2320 

2321 # unsigned integer 

2322 100 

2323 [100] 

2324 

2325 # negative integer 

2326 -100 

2327 [-100] 

2328 

2329 # float with scientific notation 

2330 6.02e23 

2331 [6.02e+23] 

2332 

2333 # integer with scientific notation 

2334 1e-12 

2335 [1e-12] 

2336 

2337 # negative decimal number without leading digit 

2338 -.100 

2339 [-0.1] 

2340 Success 

2341 

2342 Failure-test example: 

2343 

2344 .. testcode:: 

2345 

2346 result = number_expr.run_tests(''' 

2347 # stray character 

2348 100Z 

2349 # too many '.' 

2350 3.14.159 

2351 ''', failure_tests=True) 

2352 print("Success" if result[0] else "Failed!") 

2353 

2354 prints: 

2355 

2356 .. testoutput:: 

2357 :options: +NORMALIZE_WHITESPACE 

2358 

2359 

2360 # stray character 

2361 100Z 

2362 100Z 

2363 ^ 

2364 ParseException: Expected end of text, found 'Z' ... 

2365 

2366 # too many '.' 

2367 3.14.159 

2368 3.14.159 

2369 ^ 

2370 ParseException: Expected end of text, found '.' ... 

2371 FAIL: Expected end of text, found '.' ... 

2372 Success 

2373 

2374 Each test string must be on a single line. If you want to test a string that spans multiple 

2375 lines, create a test like this: 

2376 

2377 .. testcode:: 

2378 

2379 expr = Word(alphanums)[1,...] 

2380 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2381 

2382 .. testoutput:: 

2383 :options: +NORMALIZE_WHITESPACE 

2384 :hide: 

2385 

2386 

2387 this is a test\\n of strings that spans \\n 3 lines 

2388 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines'] 

2389 

2390 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2391 """ 

2392 from .testing import pyparsing_test 

2393 

2394 parseAll = parseAll and parse_all 

2395 fullDump = fullDump and full_dump 

2396 printResults = printResults and print_results 

2397 failureTests = failureTests or failure_tests 

2398 postParse = postParse or post_parse 

2399 if isinstance(tests, str_type): 

2400 tests = typing.cast(str, tests) 

2401 line_strip = type(tests).strip 

2402 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2403 comment_specified = comment is not None 

2404 if comment_specified: 

2405 if isinstance(comment, str_type): 

2406 comment = typing.cast(str, comment) 

2407 comment = Literal(comment) 

2408 comment = typing.cast(ParserElement, comment) 

2409 if file is None: 

2410 file = sys.stdout 

2411 print_ = file.write 

2412 

2413 result: Union[ParseResults, Exception] 

2414 allResults: list[tuple[str, Union[ParseResults, Exception]]] = [] 

2415 comments: list[str] = [] 

2416 success = True 

2417 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2418 BOM = "\ufeff" 

2419 nlstr = "\n" 

2420 for t in tests: 

2421 if comment_specified and comment.matches(t, False) or comments and not t: 

2422 comments.append( 

2423 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2424 ) 

2425 continue 

2426 if not t: 

2427 continue 

2428 out = [ 

2429 f"{nlstr}{nlstr.join(comments) if comments else ''}", 

2430 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2431 ] 

2432 comments.clear() 

2433 try: 

2434 # convert newline marks to actual newlines, and strip leading BOM if present 

2435 t = NL.transform_string(t.lstrip(BOM)) 

2436 result = self.parse_string(t, parse_all=parseAll) 

2437 except ParseBaseException as pe: 

2438 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" 

2439 out.append(pe.explain()) 

2440 out.append(f"FAIL: {fatal}{pe}") 

2441 if ParserElement.verbose_stacktrace: 

2442 out.extend(traceback.format_tb(pe.__traceback__)) 

2443 success = success and failureTests 

2444 result = pe 

2445 except Exception as exc: 

2446 tag = "FAIL-EXCEPTION" 

2447 

2448 # see if this exception was raised in a parse action 

2449 tb = exc.__traceback__ 

2450 it = iter(traceback.walk_tb(tb)) 

2451 for f, line in it: 

2452 if (f.f_code.co_filename, line) == pa_call_line_synth: 

2453 next_f = next(it)[0] 

2454 tag += f" (raised in parse action {next_f.f_code.co_name!r})" 

2455 break 

2456 

2457 out.append(f"{tag}: {type(exc).__name__}: {exc}") 

2458 if ParserElement.verbose_stacktrace: 

2459 out.extend(traceback.format_tb(exc.__traceback__)) 

2460 success = success and failureTests 

2461 result = exc 

2462 else: 

2463 success = success and not failureTests 

2464 if postParse is not None: 

2465 try: 

2466 pp_value = postParse(t, result) 

2467 if pp_value is not None: 

2468 if isinstance(pp_value, ParseResults): 

2469 out.append(pp_value.dump()) 

2470 else: 

2471 out.append(str(pp_value)) 

2472 else: 

2473 out.append(result.dump()) 

2474 except Exception as e: 

2475 out.append(result.dump(full=fullDump)) 

2476 out.append( 

2477 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2478 ) 

2479 else: 

2480 out.append(result.dump(full=fullDump)) 

2481 out.append("") 

2482 

2483 if printResults: 

2484 print_("\n".join(out)) 

2485 

2486 allResults.append((t, result)) 

2487 

2488 return success, allResults 

2489 

2490 def create_diagram( 

2491 self, 

2492 output_html: Union[TextIO, Path, str], 

2493 vertical: int = 3, 

2494 show_results_names: bool = False, 

2495 show_groups: bool = False, 

2496 embed: bool = False, 

2497 show_hidden: bool = False, 

2498 **kwargs, 

2499 ) -> None: 

2500 """ 

2501 Create a railroad diagram for the parser. 

2502 

2503 Parameters: 

2504 

2505 - ``output_html`` (str or file-like object) - output target for generated 

2506 diagram HTML 

2507 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2508 instead of horizontally (default=3) 

2509 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2510 defined results names 

2511 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2512 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden 

2513 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2514 the resulting HTML in an enclosing HTML source 

2515 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2516 can be used to insert custom CSS styling 

2517 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2518 generated code 

2519 

2520 Additional diagram-formatting keyword arguments can also be included; 

2521 see railroad.Diagram class. 

2522 

2523 .. versionchanged:: 3.1.0 

2524 ``embed`` argument added. 

2525 """ 

2526 

2527 try: 

2528 from .diagram import to_railroad, railroad_to_html 

2529 except ImportError as ie: 

2530 raise Exception( 

2531 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2532 ) from ie 

2533 

2534 self.streamline() 

2535 

2536 railroad = to_railroad( 

2537 self, 

2538 vertical=vertical, 

2539 show_results_names=show_results_names, 

2540 show_groups=show_groups, 

2541 show_hidden=show_hidden, 

2542 diagram_kwargs=kwargs, 

2543 ) 

2544 if not isinstance(output_html, (str, Path)): 

2545 # we were passed a file-like object, just write to it 

2546 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2547 return 

2548 

2549 with open(output_html, "w", encoding="utf-8") as diag_file: 

2550 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2551 

2552 # Compatibility synonyms 

2553 # fmt: off 

2554 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)) 

2555 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8( 

2556 "setDefaultWhitespaceChars", set_default_whitespace_chars 

2557 )) 

2558 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization)) 

2559 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion)) 

2560 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat)) 

2561 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache)) 

2562 

2563 setResultsName = replaced_by_pep8("setResultsName", set_results_name) 

2564 setBreak = replaced_by_pep8("setBreak", set_break) 

2565 setParseAction = replaced_by_pep8("setParseAction", set_parse_action) 

2566 addParseAction = replaced_by_pep8("addParseAction", add_parse_action) 

2567 addCondition = replaced_by_pep8("addCondition", add_condition) 

2568 setFailAction = replaced_by_pep8("setFailAction", set_fail_action) 

2569 tryParse = replaced_by_pep8("tryParse", try_parse) 

2570 parseString = replaced_by_pep8("parseString", parse_string) 

2571 scanString = replaced_by_pep8("scanString", scan_string) 

2572 transformString = replaced_by_pep8("transformString", transform_string) 

2573 searchString = replaced_by_pep8("searchString", search_string) 

2574 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

2575 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

2576 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) 

2577 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) 

2578 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) 

2579 setDebug = replaced_by_pep8("setDebug", set_debug) 

2580 setName = replaced_by_pep8("setName", set_name) 

2581 parseFile = replaced_by_pep8("parseFile", parse_file) 

2582 runTests = replaced_by_pep8("runTests", run_tests) 

2583 canParseNext = replaced_by_pep8("canParseNext", can_parse_next) 

2584 defaultName = default_name 

2585 # fmt: on 

2586 

2587 

2588class _PendingSkip(ParserElement): 

2589 # internal placeholder class to hold a place were '...' is added to a parser element, 

2590 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2591 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None: 

2592 super().__init__() 

2593 self.anchor = expr 

2594 self.must_skip = must_skip 

2595 

2596 def _generateDefaultName(self) -> str: 

2597 return str(self.anchor + Empty()).replace("Empty", "...") 

2598 

2599 def __add__(self, other) -> ParserElement: 

2600 skipper = SkipTo(other).set_name("...")("_skipped*") 

2601 if self.must_skip: 

2602 

2603 def must_skip(t): 

2604 if not t._skipped or t._skipped.as_list() == [""]: 

2605 del t[0] 

2606 t.pop("_skipped", None) 

2607 

2608 def show_skip(t): 

2609 if t._skipped.as_list()[-1:] == [""]: 

2610 t.pop("_skipped") 

2611 t["_skipped"] = f"missing <{self.anchor!r}>" 

2612 

2613 return ( 

2614 self.anchor + skipper().add_parse_action(must_skip) 

2615 | skipper().add_parse_action(show_skip) 

2616 ) + other 

2617 

2618 return self.anchor + skipper + other 

2619 

2620 def __repr__(self): 

2621 return self.defaultName 

2622 

2623 def parseImpl(self, *args) -> ParseImplReturnType: 

2624 raise Exception( 

2625 "use of `...` expression without following SkipTo target expression" 

2626 ) 

2627 

2628 

2629class Token(ParserElement): 

2630 """Abstract :class:`ParserElement` subclass, for defining atomic 

2631 matching patterns. 

2632 """ 

2633 

2634 def __init__(self) -> None: 

2635 super().__init__(savelist=False) 

2636 

2637 def _generateDefaultName(self) -> str: 

2638 return type(self).__name__ 

2639 

2640 

2641class NoMatch(Token): 

2642 """ 

2643 A token that will never match. 

2644 """ 

2645 

2646 def __init__(self) -> None: 

2647 super().__init__() 

2648 self._may_return_empty = True 

2649 self.mayIndexError = False 

2650 self.errmsg = "Unmatchable token" 

2651 

2652 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2653 raise ParseException(instring, loc, self.errmsg, self) 

2654 

2655 

2656class Literal(Token): 

2657 """ 

2658 Token to exactly match a specified string. 

2659 

2660 Example: 

2661 

2662 .. doctest:: 

2663 

2664 >>> Literal('abc').parse_string('abc') 

2665 ParseResults(['abc'], {}) 

2666 >>> Literal('abc').parse_string('abcdef') 

2667 ParseResults(['abc'], {}) 

2668 >>> Literal('abc').parse_string('ab') 

2669 Traceback (most recent call last): 

2670 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1) 

2671 

2672 For case-insensitive matching, use :class:`CaselessLiteral`. 

2673 

2674 For keyword matching (force word break before and after the matched string), 

2675 use :class:`Keyword` or :class:`CaselessKeyword`. 

2676 """ 

2677 

2678 def __new__(cls, match_string: str = "", *, matchString: str = ""): 

2679 # Performance tuning: select a subclass with optimized parseImpl 

2680 if cls is Literal: 

2681 match_string = matchString or match_string 

2682 if not match_string: 

2683 return super().__new__(Empty) 

2684 if len(match_string) == 1: 

2685 return super().__new__(_SingleCharLiteral) 

2686 

2687 # Default behavior 

2688 return super().__new__(cls) 

2689 

2690 # Needed to make copy.copy() work correctly if we customize __new__ 

2691 def __getnewargs__(self): 

2692 return (self.match,) 

2693 

2694 def __init__(self, match_string: str = "", *, matchString: str = "") -> None: 

2695 super().__init__() 

2696 match_string = matchString or match_string 

2697 self.match = match_string 

2698 self.matchLen = len(match_string) 

2699 self.firstMatchChar = match_string[:1] 

2700 self.errmsg = f"Expected {self.name}" 

2701 self._may_return_empty = False 

2702 self.mayIndexError = False 

2703 

2704 def _generateDefaultName(self) -> str: 

2705 return repr(self.match) 

2706 

2707 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2708 if instring[loc] == self.firstMatchChar and instring.startswith( 

2709 self.match, loc 

2710 ): 

2711 return loc + self.matchLen, self.match 

2712 raise ParseException(instring, loc, self.errmsg, self) 

2713 

2714 

2715class Empty(Literal): 

2716 """ 

2717 An empty token, will always match. 

2718 """ 

2719 

2720 def __init__(self, match_string="", *, matchString="") -> None: 

2721 super().__init__("") 

2722 self._may_return_empty = True 

2723 self.mayIndexError = False 

2724 

2725 def _generateDefaultName(self) -> str: 

2726 return "Empty" 

2727 

2728 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2729 return loc, [] 

2730 

2731 

2732class _SingleCharLiteral(Literal): 

2733 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2734 if instring[loc] == self.firstMatchChar: 

2735 return loc + 1, self.match 

2736 raise ParseException(instring, loc, self.errmsg, self) 

2737 

2738 

2739ParserElement._literalStringClass = Literal 

2740 

2741 

2742class Keyword(Token): 

2743 """ 

2744 Token to exactly match a specified string as a keyword, that is, 

2745 it must be immediately preceded and followed by whitespace or 

2746 non-keyword characters. Compare with :class:`Literal`: 

2747 

2748 - ``Literal("if")`` will match the leading ``'if'`` in 

2749 ``'ifAndOnlyIf'``. 

2750 - ``Keyword("if")`` will not; it will only match the leading 

2751 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2752 

2753 Accepts two optional constructor arguments in addition to the 

2754 keyword string: 

2755 

2756 - ``ident_chars`` is a string of characters that would be valid 

2757 identifier characters, defaulting to all alphanumerics + "_" and 

2758 "$" 

2759 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2760 

2761 Example: 

2762 

2763 .. doctest:: 

2764 :options: +NORMALIZE_WHITESPACE 

2765 

2766 >>> Keyword("start").parse_string("start") 

2767 ParseResults(['start'], {}) 

2768 >>> Keyword("start").parse_string("starting") 

2769 Traceback (most recent call last): 

2770 ParseException: Expected Keyword 'start', keyword was immediately 

2771 followed by keyword character, found 'ing' (at char 5), (line:1, col:6) 

2772 

2773 .. doctest:: 

2774 :options: +NORMALIZE_WHITESPACE 

2775 

2776 >>> Keyword("start").parse_string("starting").debug() 

2777 Traceback (most recent call last): 

2778 ParseException: Expected Keyword "start", keyword was immediately 

2779 followed by keyword character, found 'ing' ... 

2780 

2781 For case-insensitive matching, use :class:`CaselessKeyword`. 

2782 """ 

2783 

2784 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2785 

2786 def __init__( 

2787 self, 

2788 match_string: str = "", 

2789 ident_chars: typing.Optional[str] = None, 

2790 caseless: bool = False, 

2791 *, 

2792 matchString: str = "", 

2793 identChars: typing.Optional[str] = None, 

2794 ) -> None: 

2795 super().__init__() 

2796 identChars = identChars or ident_chars 

2797 if identChars is None: 

2798 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2799 match_string = matchString or match_string 

2800 self.match = match_string 

2801 self.matchLen = len(match_string) 

2802 self.firstMatchChar = match_string[:1] 

2803 if not self.firstMatchChar: 

2804 raise ValueError("null string passed to Keyword; use Empty() instead") 

2805 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2806 self._may_return_empty = False 

2807 self.mayIndexError = False 

2808 self.caseless = caseless 

2809 if caseless: 

2810 self.caselessmatch = match_string.upper() 

2811 identChars = identChars.upper() 

2812 self.identChars = set(identChars) 

2813 

2814 def _generateDefaultName(self) -> str: 

2815 return repr(self.match) 

2816 

2817 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2818 errmsg = self.errmsg or "" 

2819 errloc = loc 

2820 if self.caseless: 

2821 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2822 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2823 if ( 

2824 loc >= len(instring) - self.matchLen 

2825 or instring[loc + self.matchLen].upper() not in self.identChars 

2826 ): 

2827 return loc + self.matchLen, self.match 

2828 

2829 # followed by keyword char 

2830 errmsg += ", was immediately followed by keyword character" 

2831 errloc = loc + self.matchLen 

2832 else: 

2833 # preceded by keyword char 

2834 errmsg += ", keyword was immediately preceded by keyword character" 

2835 errloc = loc - 1 

2836 # else no match just raise plain exception 

2837 

2838 elif ( 

2839 instring[loc] == self.firstMatchChar 

2840 and self.matchLen == 1 

2841 or instring.startswith(self.match, loc) 

2842 ): 

2843 if loc == 0 or instring[loc - 1] not in self.identChars: 

2844 if ( 

2845 loc >= len(instring) - self.matchLen 

2846 or instring[loc + self.matchLen] not in self.identChars 

2847 ): 

2848 return loc + self.matchLen, self.match 

2849 

2850 # followed by keyword char 

2851 errmsg += ", keyword was immediately followed by keyword character" 

2852 errloc = loc + self.matchLen 

2853 else: 

2854 # preceded by keyword char 

2855 errmsg += ", keyword was immediately preceded by keyword character" 

2856 errloc = loc - 1 

2857 # else no match just raise plain exception 

2858 

2859 raise ParseException(instring, errloc, errmsg, self) 

2860 

2861 @staticmethod 

2862 def set_default_keyword_chars(chars) -> None: 

2863 """ 

2864 Overrides the default characters used by :class:`Keyword` expressions. 

2865 """ 

2866 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2867 

2868 # Compatibility synonyms 

2869 setDefaultKeywordChars = staticmethod( 

2870 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars) 

2871 ) 

2872 

2873 

2874class CaselessLiteral(Literal): 

2875 """ 

2876 Token to match a specified string, ignoring case of letters. 

2877 Note: the matched results will always be in the case of the given 

2878 match string, NOT the case of the input text. 

2879 

2880 Example: 

2881 

2882 .. doctest:: 

2883 

2884 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2885 ParseResults(['CMD', 'CMD', 'CMD'], {}) 

2886 

2887 (Contrast with example for :class:`CaselessKeyword`.) 

2888 """ 

2889 

2890 def __init__(self, match_string: str = "", *, matchString: str = "") -> None: 

2891 match_string = matchString or match_string 

2892 super().__init__(match_string.upper()) 

2893 # Preserve the defining literal. 

2894 self.returnString = match_string 

2895 self.errmsg = f"Expected {self.name}" 

2896 

2897 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2898 if instring[loc : loc + self.matchLen].upper() == self.match: 

2899 return loc + self.matchLen, self.returnString 

2900 raise ParseException(instring, loc, self.errmsg, self) 

2901 

2902 

2903class CaselessKeyword(Keyword): 

2904 """ 

2905 Caseless version of :class:`Keyword`. 

2906 

2907 Example: 

2908 

2909 .. doctest:: 

2910 

2911 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2912 ParseResults(['CMD', 'CMD'], {}) 

2913 

2914 (Contrast with example for :class:`CaselessLiteral`.) 

2915 """ 

2916 

2917 def __init__( 

2918 self, 

2919 match_string: str = "", 

2920 ident_chars: typing.Optional[str] = None, 

2921 *, 

2922 matchString: str = "", 

2923 identChars: typing.Optional[str] = None, 

2924 ) -> None: 

2925 identChars = identChars or ident_chars 

2926 match_string = matchString or match_string 

2927 super().__init__(match_string, identChars, caseless=True) 

2928 

2929 

2930class CloseMatch(Token): 

2931 """A variation on :class:`Literal` which matches "close" matches, 

2932 that is, strings with at most 'n' mismatching characters. 

2933 :class:`CloseMatch` takes parameters: 

2934 

2935 - ``match_string`` - string to be matched 

2936 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2937 - ``max_mismatches`` - (``default=1``) maximum number of 

2938 mismatches allowed to count as a match 

2939 

2940 The results from a successful parse will contain the matched text 

2941 from the input string and the following named results: 

2942 

2943 - ``mismatches`` - a list of the positions within the 

2944 match_string where mismatches were found 

2945 - ``original`` - the original match_string used to compare 

2946 against the input string 

2947 

2948 If ``mismatches`` is an empty list, then the match was an exact 

2949 match. 

2950 

2951 Example: 

2952 

2953 .. doctest:: 

2954 :options: +NORMALIZE_WHITESPACE 

2955 

2956 >>> patt = CloseMatch("ATCATCGAATGGA") 

2957 >>> patt.parse_string("ATCATCGAAXGGA") 

2958 ParseResults(['ATCATCGAAXGGA'], 

2959 {'original': 'ATCATCGAATGGA', 'mismatches': [9]}) 

2960 

2961 >>> patt.parse_string("ATCAXCGAAXGGA") 

2962 Traceback (most recent call last): 

2963 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches), 

2964 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1) 

2965 

2966 # exact match 

2967 >>> patt.parse_string("ATCATCGAATGGA") 

2968 ParseResults(['ATCATCGAATGGA'], 

2969 {'original': 'ATCATCGAATGGA', 'mismatches': []}) 

2970 

2971 # close match allowing up to 2 mismatches 

2972 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

2973 >>> patt.parse_string("ATCAXCGAAXGGA") 

2974 ParseResults(['ATCAXCGAAXGGA'], 

2975 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]}) 

2976 """ 

2977 

2978 def __init__( 

2979 self, 

2980 match_string: str, 

2981 max_mismatches: typing.Optional[int] = None, 

2982 *, 

2983 maxMismatches: int = 1, 

2984 caseless=False, 

2985 ) -> None: 

2986 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

2987 super().__init__() 

2988 self.match_string = match_string 

2989 self.maxMismatches = maxMismatches 

2990 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

2991 self.caseless = caseless 

2992 self.mayIndexError = False 

2993 self._may_return_empty = False 

2994 

2995 def _generateDefaultName(self) -> str: 

2996 return f"{type(self).__name__}:{self.match_string!r}" 

2997 

2998 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2999 start = loc 

3000 instrlen = len(instring) 

3001 maxloc = start + len(self.match_string) 

3002 

3003 if maxloc <= instrlen: 

3004 match_string = self.match_string 

3005 match_stringloc = 0 

3006 mismatches = [] 

3007 maxMismatches = self.maxMismatches 

3008 

3009 for match_stringloc, s_m in enumerate( 

3010 zip(instring[loc:maxloc], match_string) 

3011 ): 

3012 src, mat = s_m 

3013 if self.caseless: 

3014 src, mat = src.lower(), mat.lower() 

3015 

3016 if src != mat: 

3017 mismatches.append(match_stringloc) 

3018 if len(mismatches) > maxMismatches: 

3019 break 

3020 else: 

3021 loc = start + match_stringloc + 1 

3022 results = ParseResults([instring[start:loc]]) 

3023 results["original"] = match_string 

3024 results["mismatches"] = mismatches 

3025 return loc, results 

3026 

3027 raise ParseException(instring, loc, self.errmsg, self) 

3028 

3029 

3030class Word(Token): 

3031 """Token for matching words composed of allowed character sets. 

3032 

3033 Parameters: 

3034 

3035 - ``init_chars`` - string of all characters that should be used to 

3036 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

3037 if ``body_chars`` is also specified, then this is the string of 

3038 initial characters 

3039 - ``body_chars`` - string of characters that 

3040 can be used for matching after a matched initial character as 

3041 given in ``init_chars``; if omitted, same as the initial characters 

3042 (default=``None``) 

3043 - ``min`` - minimum number of characters to match (default=1) 

3044 - ``max`` - maximum number of characters to match (default=0) 

3045 - ``exact`` - exact number of characters to match (default=0) 

3046 - ``as_keyword`` - match as a keyword (default=``False``) 

3047 - ``exclude_chars`` - characters that might be 

3048 found in the input ``body_chars`` string but which should not be 

3049 accepted for matching ;useful to define a word of all 

3050 printables except for one or two characters, for instance 

3051 (default=``None``) 

3052 

3053 :class:`srange` is useful for defining custom character set strings 

3054 for defining :class:`Word` expressions, using range notation from 

3055 regular expression character sets. 

3056 

3057 A common mistake is to use :class:`Word` to match a specific literal 

3058 string, as in ``Word("Address")``. Remember that :class:`Word` 

3059 uses the string argument to define *sets* of matchable characters. 

3060 This expression would match "Add", "AAA", "dAred", or any other word 

3061 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

3062 exact literal string, use :class:`Literal` or :class:`Keyword`. 

3063 

3064 pyparsing includes helper strings for building Words: 

3065 

3066 - :attr:`alphas` 

3067 - :attr:`nums` 

3068 - :attr:`alphanums` 

3069 - :attr:`hexnums` 

3070 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255 

3071 - accented, tilded, umlauted, etc.) 

3072 - :attr:`punc8bit` (non-alphabetic characters in ASCII range 

3073 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

3074 - :attr:`printables` (any non-whitespace character) 

3075 

3076 ``alphas``, ``nums``, and ``printables`` are also defined in several 

3077 Unicode sets - see :class:`pyparsing_unicode`. 

3078 

3079 Example: 

3080 

3081 .. testcode:: 

3082 

3083 # a word composed of digits 

3084 integer = Word(nums) 

3085 # Two equivalent alternate forms: 

3086 Word("0123456789") 

3087 Word(srange("[0-9]")) 

3088 

3089 # a word with a leading capital, and zero or more lowercase 

3090 capitalized_word = Word(alphas.upper(), alphas.lower()) 

3091 

3092 # hostnames are alphanumeric, with leading alpha, and '-' 

3093 hostname = Word(alphas, alphanums + '-') 

3094 

3095 # roman numeral 

3096 # (not a strict parser, accepts invalid mix of characters) 

3097 roman = Word("IVXLCDM") 

3098 

3099 # any string of non-whitespace characters, except for ',' 

3100 csv_value = Word(printables, exclude_chars=",") 

3101 

3102 :raises ValueError: If ``min`` and ``max`` are both specified 

3103 and the test ``min <= max`` fails. 

3104 

3105 .. versionchanged:: 3.1.0 

3106 Raises :exc:`ValueError` if ``min`` > ``max``. 

3107 """ 

3108 

3109 def __init__( 

3110 self, 

3111 init_chars: str = "", 

3112 body_chars: typing.Optional[str] = None, 

3113 min: int = 1, 

3114 max: int = 0, 

3115 exact: int = 0, 

3116 as_keyword: bool = False, 

3117 exclude_chars: typing.Optional[str] = None, 

3118 *, 

3119 initChars: typing.Optional[str] = None, 

3120 bodyChars: typing.Optional[str] = None, 

3121 asKeyword: bool = False, 

3122 excludeChars: typing.Optional[str] = None, 

3123 ) -> None: 

3124 initChars = initChars or init_chars 

3125 bodyChars = bodyChars or body_chars 

3126 asKeyword = asKeyword or as_keyword 

3127 excludeChars = excludeChars or exclude_chars 

3128 super().__init__() 

3129 if not initChars: 

3130 raise ValueError( 

3131 f"invalid {type(self).__name__}, initChars cannot be empty string" 

3132 ) 

3133 

3134 initChars_set = set(initChars) 

3135 if excludeChars: 

3136 excludeChars_set = set(excludeChars) 

3137 initChars_set -= excludeChars_set 

3138 if bodyChars: 

3139 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

3140 self.initChars = initChars_set 

3141 self.initCharsOrig = "".join(sorted(initChars_set)) 

3142 

3143 if bodyChars: 

3144 self.bodyChars = set(bodyChars) 

3145 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

3146 else: 

3147 self.bodyChars = initChars_set 

3148 self.bodyCharsOrig = self.initCharsOrig 

3149 

3150 self.maxSpecified = max > 0 

3151 

3152 if min < 1: 

3153 raise ValueError( 

3154 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

3155 ) 

3156 

3157 if self.maxSpecified and min > max: 

3158 raise ValueError( 

3159 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

3160 ) 

3161 

3162 self.minLen = min 

3163 

3164 if max > 0: 

3165 self.maxLen = max 

3166 else: 

3167 self.maxLen = _MAX_INT 

3168 

3169 if exact > 0: 

3170 min = max = exact 

3171 self.maxLen = exact 

3172 self.minLen = exact 

3173 

3174 self.errmsg = f"Expected {self.name}" 

3175 self.mayIndexError = False 

3176 self.asKeyword = asKeyword 

3177 if self.asKeyword: 

3178 self.errmsg += " as a keyword" 

3179 

3180 # see if we can make a regex for this Word 

3181 if " " not in (self.initChars | self.bodyChars): 

3182 if len(self.initChars) == 1: 

3183 re_leading_fragment = re.escape(self.initCharsOrig) 

3184 else: 

3185 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

3186 

3187 if self.bodyChars == self.initChars: 

3188 if max == 0 and self.minLen == 1: 

3189 repeat = "+" 

3190 elif max == 1: 

3191 repeat = "" 

3192 else: 

3193 if self.minLen != self.maxLen: 

3194 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

3195 else: 

3196 repeat = f"{{{self.minLen}}}" 

3197 self.reString = f"{re_leading_fragment}{repeat}" 

3198 else: 

3199 if max == 1: 

3200 re_body_fragment = "" 

3201 repeat = "" 

3202 else: 

3203 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

3204 if max == 0 and self.minLen == 1: 

3205 repeat = "*" 

3206 elif max == 2: 

3207 repeat = "?" if min <= 1 else "" 

3208 else: 

3209 if min != max: 

3210 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

3211 else: 

3212 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

3213 

3214 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

3215 

3216 if self.asKeyword: 

3217 self.reString = rf"\b{self.reString}\b" 

3218 

3219 try: 

3220 self.re = re.compile(self.reString) 

3221 except re.error: 

3222 self.re = None # type: ignore[assignment] 

3223 else: 

3224 self.re_match = self.re.match 

3225 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign] 

3226 

3227 def copy(self) -> Word: 

3228 ret: Word = cast(Word, super().copy()) 

3229 if hasattr(self, "re_match"): 

3230 ret.re_match = self.re_match 

3231 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign] 

3232 return ret 

3233 

3234 def _generateDefaultName(self) -> str: 

3235 def charsAsStr(s): 

3236 max_repr_len = 16 

3237 s = _collapse_string_to_ranges(s, re_escape=False) 

3238 

3239 if len(s) > max_repr_len: 

3240 return s[: max_repr_len - 3] + "..." 

3241 

3242 return s 

3243 

3244 if self.initChars != self.bodyChars: 

3245 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

3246 else: 

3247 base = f"W:({charsAsStr(self.initChars)})" 

3248 

3249 # add length specification 

3250 if self.minLen > 1 or self.maxLen != _MAX_INT: 

3251 if self.minLen == self.maxLen: 

3252 if self.minLen == 1: 

3253 return base[2:] 

3254 else: 

3255 return base + f"{{{self.minLen}}}" 

3256 elif self.maxLen == _MAX_INT: 

3257 return base + f"{{{self.minLen},...}}" 

3258 else: 

3259 return base + f"{{{self.minLen},{self.maxLen}}}" 

3260 return base 

3261 

3262 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3263 if instring[loc] not in self.initChars: 

3264 raise ParseException(instring, loc, self.errmsg, self) 

3265 

3266 start = loc 

3267 loc += 1 

3268 instrlen = len(instring) 

3269 body_chars: set[str] = self.bodyChars 

3270 maxloc = start + self.maxLen 

3271 maxloc = min(maxloc, instrlen) 

3272 while loc < maxloc and instring[loc] in body_chars: 

3273 loc += 1 

3274 

3275 throw_exception = False 

3276 if loc - start < self.minLen: 

3277 throw_exception = True 

3278 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars: 

3279 throw_exception = True 

3280 elif self.asKeyword and ( 

3281 (start > 0 and instring[start - 1] in body_chars) 

3282 or (loc < instrlen and instring[loc] in body_chars) 

3283 ): 

3284 throw_exception = True 

3285 

3286 if throw_exception: 

3287 raise ParseException(instring, loc, self.errmsg, self) 

3288 

3289 return loc, instring[start:loc] 

3290 

3291 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3292 result = self.re_match(instring, loc) 

3293 if not result: 

3294 raise ParseException(instring, loc, self.errmsg, self) 

3295 

3296 loc = result.end() 

3297 return loc, result.group() 

3298 

3299 

3300class Char(Word): 

3301 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

3302 when defining a match of any single character in a string of 

3303 characters. 

3304 """ 

3305 

3306 def __init__( 

3307 self, 

3308 charset: str, 

3309 as_keyword: bool = False, 

3310 exclude_chars: typing.Optional[str] = None, 

3311 *, 

3312 asKeyword: bool = False, 

3313 excludeChars: typing.Optional[str] = None, 

3314 ) -> None: 

3315 asKeyword = asKeyword or as_keyword 

3316 excludeChars = excludeChars or exclude_chars 

3317 super().__init__( 

3318 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

3319 ) 

3320 

3321 

3322class Regex(Token): 

3323 r"""Token for matching strings that match a given regular 

3324 expression. Defined with string specifying the regular expression in 

3325 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3326 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3327 these will be preserved as named :class:`ParseResults`. 

3328 

3329 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3330 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3331 a compiled RE that was compiled using ``regex``. 

3332 

3333 The parameters ``pattern`` and ``flags`` are passed 

3334 to the ``re.compile()`` function as-is. See the Python 

3335 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3336 explanation of the acceptable patterns and flags. 

3337 

3338 Example: 

3339 

3340 .. testcode:: 

3341 

3342 realnum = Regex(r"[+-]?\d+\.\d*") 

3343 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3344 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3345 

3346 # named fields in a regex will be returned as named results 

3347 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3348 

3349 # the Regex class will accept regular expressions compiled using the 

3350 # re module 

3351 import re 

3352 parser = pp.Regex(re.compile(r'[0-9]')) 

3353 """ 

3354 

3355 def __init__( 

3356 self, 

3357 pattern: Any, 

3358 flags: Union[re.RegexFlag, int] = 0, 

3359 as_group_list: bool = False, 

3360 as_match: bool = False, 

3361 *, 

3362 asGroupList: bool = False, 

3363 asMatch: bool = False, 

3364 ) -> None: 

3365 super().__init__() 

3366 asGroupList = asGroupList or as_group_list 

3367 asMatch = asMatch or as_match 

3368 

3369 if isinstance(pattern, str_type): 

3370 if not pattern: 

3371 raise ValueError("null string passed to Regex; use Empty() instead") 

3372 

3373 self._re = None 

3374 self._may_return_empty = None # type: ignore [assignment] 

3375 self.reString = self.pattern = pattern 

3376 

3377 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3378 self._re = pattern 

3379 self._may_return_empty = None # type: ignore [assignment] 

3380 self.pattern = self.reString = pattern.pattern 

3381 

3382 elif callable(pattern): 

3383 # defer creating this pattern until we really need it 

3384 self.pattern = pattern 

3385 self._may_return_empty = None # type: ignore [assignment] 

3386 self._re = None 

3387 

3388 else: 

3389 raise TypeError( 

3390 "Regex may only be constructed with a string or a compiled RE object," 

3391 " or a callable that takes no arguments and returns a string or a" 

3392 " compiled RE object" 

3393 ) 

3394 

3395 self.flags = flags 

3396 self.errmsg = f"Expected {self.name}" 

3397 self.mayIndexError = False 

3398 self.asGroupList = asGroupList 

3399 self.asMatch = asMatch 

3400 if self.asGroupList: 

3401 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign] 

3402 if self.asMatch: 

3403 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign] 

3404 

3405 def copy(self): 

3406 ret: Regex = cast(Regex, super().copy()) 

3407 if self.asGroupList: 

3408 ret.parseImpl = ret.parseImplAsGroupList 

3409 if self.asMatch: 

3410 ret.parseImpl = ret.parseImplAsMatch 

3411 return ret 

3412 

3413 @cached_property 

3414 def re(self) -> re.Pattern: 

3415 if self._re: 

3416 return self._re 

3417 

3418 if callable(self.pattern): 

3419 # replace self.pattern with the string returned by calling self.pattern() 

3420 self.pattern = cast(Callable[[], str], self.pattern)() 

3421 

3422 # see if we got a compiled RE back instead of a str - if so, we're done 

3423 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"): 

3424 self._re = cast(re.Pattern[str], self.pattern) 

3425 self.pattern = self.reString = self._re.pattern 

3426 return self._re 

3427 

3428 try: 

3429 self._re = re.compile(self.pattern, self.flags) 

3430 except re.error: 

3431 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3432 else: 

3433 self._may_return_empty = self.re.match("", pos=0) is not None 

3434 return self._re 

3435 

3436 @cached_property 

3437 def re_match(self) -> Callable[[str, int], Any]: 

3438 return self.re.match 

3439 

3440 @property 

3441 def mayReturnEmpty(self): 

3442 if self._may_return_empty is None: 

3443 # force compile of regex pattern, to set may_return_empty flag 

3444 self.re # noqa 

3445 return self._may_return_empty 

3446 

3447 @mayReturnEmpty.setter 

3448 def mayReturnEmpty(self, value): 

3449 self._may_return_empty = value 

3450 

3451 def _generateDefaultName(self) -> str: 

3452 unescaped = repr(self.pattern).replace("\\\\", "\\") 

3453 return f"Re:({unescaped})" 

3454 

3455 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3456 # explicit check for matching past the length of the string; 

3457 # this is done because the re module will not complain about 

3458 # a match with `pos > len(instring)`, it will just return "" 

3459 if loc > len(instring) and self.mayReturnEmpty: 

3460 raise ParseException(instring, loc, self.errmsg, self) 

3461 

3462 result = self.re_match(instring, loc) 

3463 if not result: 

3464 raise ParseException(instring, loc, self.errmsg, self) 

3465 

3466 loc = result.end() 

3467 ret = ParseResults(result.group()) 

3468 d = result.groupdict() 

3469 

3470 for k, v in d.items(): 

3471 ret[k] = v 

3472 

3473 return loc, ret 

3474 

3475 def parseImplAsGroupList(self, instring, loc, do_actions=True): 

3476 if loc > len(instring) and self.mayReturnEmpty: 

3477 raise ParseException(instring, loc, self.errmsg, self) 

3478 

3479 result = self.re_match(instring, loc) 

3480 if not result: 

3481 raise ParseException(instring, loc, self.errmsg, self) 

3482 

3483 loc = result.end() 

3484 ret = result.groups() 

3485 return loc, ret 

3486 

3487 def parseImplAsMatch(self, instring, loc, do_actions=True): 

3488 if loc > len(instring) and self.mayReturnEmpty: 

3489 raise ParseException(instring, loc, self.errmsg, self) 

3490 

3491 result = self.re_match(instring, loc) 

3492 if not result: 

3493 raise ParseException(instring, loc, self.errmsg, self) 

3494 

3495 loc = result.end() 

3496 ret = result 

3497 return loc, ret 

3498 

3499 def sub(self, repl: str) -> ParserElement: 

3500 r""" 

3501 Return :class:`Regex` with an attached parse action to transform the parsed 

3502 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3503 

3504 Example: 

3505 

3506 .. testcode:: 

3507 

3508 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3509 print(make_html.transform_string("h1:main title:")) 

3510 

3511 .. testoutput:: 

3512 

3513 <h1>main title</h1> 

3514 """ 

3515 if self.asGroupList: 

3516 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3517 

3518 if self.asMatch and callable(repl): 

3519 raise TypeError( 

3520 "cannot use sub() with a callable with Regex(as_match=True)" 

3521 ) 

3522 

3523 if self.asMatch: 

3524 

3525 def pa(tokens): 

3526 return tokens[0].expand(repl) 

3527 

3528 else: 

3529 

3530 def pa(tokens): 

3531 return self.re.sub(repl, tokens[0]) 

3532 

3533 return self.add_parse_action(pa) 

3534 

3535 

3536class QuotedString(Token): 

3537 r""" 

3538 Token for matching strings that are delimited by quoting characters. 

3539 

3540 Defined with the following parameters: 

3541 

3542 - ``quote_char`` - string of one or more characters defining the 

3543 quote delimiting string 

3544 - ``esc_char`` - character to re_escape quotes, typically backslash 

3545 (default= ``None``) 

3546 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3547 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3548 (default= ``None``) 

3549 - ``multiline`` - boolean indicating whether quotes can span 

3550 multiple lines (default= ``False``) 

3551 - ``unquote_results`` - boolean indicating whether the matched text 

3552 should be unquoted (default= ``True``) 

3553 - ``end_quote_char`` - string of one or more characters defining the 

3554 end of the quote delimited string (default= ``None`` => same as 

3555 quote_char) 

3556 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3557 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3558 (default= ``True``) 

3559 

3560 .. caution:: ``convert_whitespace_escapes`` has no effect if 

3561 ``unquote_results`` is ``False``. 

3562 

3563 Example: 

3564 

3565 .. doctest:: 

3566 

3567 >>> qs = QuotedString('"') 

3568 >>> print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3569 [['This is the quote']] 

3570 >>> complex_qs = QuotedString('{{', end_quote_char='}}') 

3571 >>> print(complex_qs.search_string( 

3572 ... 'lsjdf {{This is the "quote"}} sldjf')) 

3573 [['This is the "quote"']] 

3574 >>> sql_qs = QuotedString('"', esc_quote='""') 

3575 >>> print(sql_qs.search_string( 

3576 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3577 [['This is the quote with "embedded" quotes']] 

3578 """ 

3579 

3580 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3581 

3582 def __init__( 

3583 self, 

3584 quote_char: str = "", 

3585 esc_char: typing.Optional[str] = None, 

3586 esc_quote: typing.Optional[str] = None, 

3587 multiline: bool = False, 

3588 unquote_results: bool = True, 

3589 end_quote_char: typing.Optional[str] = None, 

3590 convert_whitespace_escapes: bool = True, 

3591 *, 

3592 quoteChar: str = "", 

3593 escChar: typing.Optional[str] = None, 

3594 escQuote: typing.Optional[str] = None, 

3595 unquoteResults: bool = True, 

3596 endQuoteChar: typing.Optional[str] = None, 

3597 convertWhitespaceEscapes: bool = True, 

3598 ) -> None: 

3599 super().__init__() 

3600 esc_char = escChar or esc_char 

3601 esc_quote = escQuote or esc_quote 

3602 unquote_results = unquoteResults and unquote_results 

3603 end_quote_char = endQuoteChar or end_quote_char 

3604 convert_whitespace_escapes = ( 

3605 convertWhitespaceEscapes and convert_whitespace_escapes 

3606 ) 

3607 quote_char = quoteChar or quote_char 

3608 

3609 # remove white space from quote chars 

3610 quote_char = quote_char.strip() 

3611 if not quote_char: 

3612 raise ValueError("quote_char cannot be the empty string") 

3613 

3614 if end_quote_char is None: 

3615 end_quote_char = quote_char 

3616 else: 

3617 end_quote_char = end_quote_char.strip() 

3618 if not end_quote_char: 

3619 raise ValueError("end_quote_char cannot be the empty string") 

3620 

3621 self.quote_char: str = quote_char 

3622 self.quote_char_len: int = len(quote_char) 

3623 self.first_quote_char: str = quote_char[0] 

3624 self.end_quote_char: str = end_quote_char 

3625 self.end_quote_char_len: int = len(end_quote_char) 

3626 self.esc_char: str = esc_char or "" 

3627 self.has_esc_char: bool = esc_char is not None 

3628 self.esc_quote: str = esc_quote or "" 

3629 self.unquote_results: bool = unquote_results 

3630 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3631 self.multiline = multiline 

3632 self.re_flags = re.RegexFlag(0) 

3633 

3634 # fmt: off 

3635 # build up re pattern for the content between the quote delimiters 

3636 inner_pattern: list[str] = [] 

3637 

3638 if esc_quote: 

3639 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3640 

3641 if esc_char: 

3642 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3643 

3644 if len(self.end_quote_char) > 1: 

3645 inner_pattern.append( 

3646 "(?:" 

3647 + "|".join( 

3648 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3649 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3650 ) 

3651 + ")" 

3652 ) 

3653 

3654 if self.multiline: 

3655 self.re_flags |= re.MULTILINE | re.DOTALL 

3656 inner_pattern.append( 

3657 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3658 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3659 ) 

3660 else: 

3661 inner_pattern.append( 

3662 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3663 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3664 ) 

3665 

3666 self.pattern = "".join( 

3667 [ 

3668 re.escape(self.quote_char), 

3669 "(?:", 

3670 '|'.join(inner_pattern), 

3671 ")*", 

3672 re.escape(self.end_quote_char), 

3673 ] 

3674 ) 

3675 

3676 if self.unquote_results: 

3677 if self.convert_whitespace_escapes: 

3678 self.unquote_scan_re = re.compile( 

3679 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3680 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})" 

3681 rf"|({re.escape(self.esc_char)}.)" 

3682 rf"|(\n|.)", 

3683 flags=self.re_flags, 

3684 ) 

3685 else: 

3686 self.unquote_scan_re = re.compile( 

3687 rf"({re.escape(self.esc_char)}.)" 

3688 rf"|(\n|.)", 

3689 flags=self.re_flags 

3690 ) 

3691 # fmt: on 

3692 

3693 try: 

3694 self.re = re.compile(self.pattern, self.re_flags) 

3695 self.reString = self.pattern 

3696 self.re_match = self.re.match 

3697 except re.error: 

3698 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3699 

3700 self.errmsg = f"Expected {self.name}" 

3701 self.mayIndexError = False 

3702 self._may_return_empty = True 

3703 

3704 def _generateDefaultName(self) -> str: 

3705 if self.quote_char == self.end_quote_char and isinstance( 

3706 self.quote_char, str_type 

3707 ): 

3708 return f"string enclosed in {self.quote_char!r}" 

3709 

3710 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3711 

3712 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3713 # check first character of opening quote to see if that is a match 

3714 # before doing the more complicated regex match 

3715 result = ( 

3716 instring[loc] == self.first_quote_char 

3717 and self.re_match(instring, loc) 

3718 or None 

3719 ) 

3720 if not result: 

3721 raise ParseException(instring, loc, self.errmsg, self) 

3722 

3723 # get ending loc and matched string from regex matching result 

3724 loc = result.end() 

3725 ret = result.group() 

3726 

3727 def convert_escaped_numerics(s: str) -> str: 

3728 if s == "0": 

3729 return "\0" 

3730 if s.isdigit() and len(s) == 3: 

3731 return chr(int(s, base=8)) 

3732 elif s.startswith(("u", "x")): 

3733 return chr(int(s[1:], base=16)) 

3734 else: 

3735 return s 

3736 

3737 if self.unquote_results: 

3738 # strip off quotes 

3739 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3740 

3741 if isinstance(ret, str_type): 

3742 # fmt: off 

3743 if self.convert_whitespace_escapes: 

3744 # as we iterate over matches in the input string, 

3745 # collect from whichever match group of the unquote_scan_re 

3746 # regex matches (only 1 group will match at any given time) 

3747 ret = "".join( 

3748 # match group 1 matches \t, \n, etc. 

3749 self.ws_map[match.group(1)] if match.group(1) 

3750 # match group 2 matches escaped octal, null, hex, and Unicode 

3751 # sequences 

3752 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2) 

3753 # match group 3 matches escaped characters 

3754 else match.group(3)[-1] if match.group(3) 

3755 # match group 4 matches any character 

3756 else match.group(4) 

3757 for match in self.unquote_scan_re.finditer(ret) 

3758 ) 

3759 else: 

3760 ret = "".join( 

3761 # match group 1 matches escaped characters 

3762 match.group(1)[-1] if match.group(1) 

3763 # match group 2 matches any character 

3764 else match.group(2) 

3765 for match in self.unquote_scan_re.finditer(ret) 

3766 ) 

3767 # fmt: on 

3768 

3769 # replace escaped quotes 

3770 if self.esc_quote: 

3771 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3772 

3773 return loc, ret 

3774 

3775 

3776class CharsNotIn(Token): 

3777 """Token for matching words composed of characters *not* in a given 

3778 set (will include whitespace in matched characters if not listed in 

3779 the provided exclusion set - see example). Defined with string 

3780 containing all disallowed characters, and an optional minimum, 

3781 maximum, and/or exact length. The default value for ``min`` is 

3782 1 (a minimum value < 1 is not valid); the default values for 

3783 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3784 length restriction. 

3785 

3786 Example: 

3787 

3788 .. testcode:: 

3789 

3790 # define a comma-separated-value as anything that is not a ',' 

3791 csv_value = CharsNotIn(',') 

3792 print( 

3793 DelimitedList(csv_value).parse_string( 

3794 "dkls,lsdkjf,s12 34,@!#,213" 

3795 ) 

3796 ) 

3797 

3798 prints: 

3799 

3800 .. testoutput:: 

3801 

3802 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3803 """ 

3804 

3805 def __init__( 

3806 self, 

3807 not_chars: str = "", 

3808 min: int = 1, 

3809 max: int = 0, 

3810 exact: int = 0, 

3811 *, 

3812 notChars: str = "", 

3813 ) -> None: 

3814 super().__init__() 

3815 self.skipWhitespace = False 

3816 self.notChars = not_chars or notChars 

3817 self.notCharsSet = set(self.notChars) 

3818 

3819 if min < 1: 

3820 raise ValueError( 

3821 "cannot specify a minimum length < 1; use" 

3822 " Opt(CharsNotIn()) if zero-length char group is permitted" 

3823 ) 

3824 

3825 self.minLen = min 

3826 

3827 if max > 0: 

3828 self.maxLen = max 

3829 else: 

3830 self.maxLen = _MAX_INT 

3831 

3832 if exact > 0: 

3833 self.maxLen = exact 

3834 self.minLen = exact 

3835 

3836 self.errmsg = f"Expected {self.name}" 

3837 self._may_return_empty = self.minLen == 0 

3838 self.mayIndexError = False 

3839 

3840 def _generateDefaultName(self) -> str: 

3841 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3842 if len(not_chars_str) > 16: 

3843 return f"!W:({self.notChars[: 16 - 3]}...)" 

3844 else: 

3845 return f"!W:({self.notChars})" 

3846 

3847 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3848 notchars = self.notCharsSet 

3849 if instring[loc] in notchars: 

3850 raise ParseException(instring, loc, self.errmsg, self) 

3851 

3852 start = loc 

3853 loc += 1 

3854 maxlen = min(start + self.maxLen, len(instring)) 

3855 while loc < maxlen and instring[loc] not in notchars: 

3856 loc += 1 

3857 

3858 if loc - start < self.minLen: 

3859 raise ParseException(instring, loc, self.errmsg, self) 

3860 

3861 return loc, instring[start:loc] 

3862 

3863 

3864class White(Token): 

3865 """Special matching class for matching whitespace. Normally, 

3866 whitespace is ignored by pyparsing grammars. This class is included 

3867 when some whitespace structures are significant. Define with 

3868 a string containing the whitespace characters to be matched; default 

3869 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3870 ``max``, and ``exact`` arguments, as defined for the 

3871 :class:`Word` class. 

3872 """ 

3873 

3874 whiteStrs = { 

3875 " ": "<SP>", 

3876 "\t": "<TAB>", 

3877 "\n": "<LF>", 

3878 "\r": "<CR>", 

3879 "\f": "<FF>", 

3880 "\u00A0": "<NBSP>", 

3881 "\u1680": "<OGHAM_SPACE_MARK>", 

3882 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3883 "\u2000": "<EN_QUAD>", 

3884 "\u2001": "<EM_QUAD>", 

3885 "\u2002": "<EN_SPACE>", 

3886 "\u2003": "<EM_SPACE>", 

3887 "\u2004": "<THREE-PER-EM_SPACE>", 

3888 "\u2005": "<FOUR-PER-EM_SPACE>", 

3889 "\u2006": "<SIX-PER-EM_SPACE>", 

3890 "\u2007": "<FIGURE_SPACE>", 

3891 "\u2008": "<PUNCTUATION_SPACE>", 

3892 "\u2009": "<THIN_SPACE>", 

3893 "\u200A": "<HAIR_SPACE>", 

3894 "\u200B": "<ZERO_WIDTH_SPACE>", 

3895 "\u202F": "<NNBSP>", 

3896 "\u205F": "<MMSP>", 

3897 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3898 } 

3899 

3900 def __init__( 

3901 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0 

3902 ) -> None: 

3903 super().__init__() 

3904 self.matchWhite = ws 

3905 self.set_whitespace_chars( 

3906 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3907 copy_defaults=True, 

3908 ) 

3909 # self.leave_whitespace() 

3910 self._may_return_empty = True 

3911 self.errmsg = f"Expected {self.name}" 

3912 

3913 self.minLen = min 

3914 

3915 if max > 0: 

3916 self.maxLen = max 

3917 else: 

3918 self.maxLen = _MAX_INT 

3919 

3920 if exact > 0: 

3921 self.maxLen = exact 

3922 self.minLen = exact 

3923 

3924 def _generateDefaultName(self) -> str: 

3925 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

3926 

3927 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3928 if instring[loc] not in self.matchWhite: 

3929 raise ParseException(instring, loc, self.errmsg, self) 

3930 start = loc 

3931 loc += 1 

3932 maxloc = start + self.maxLen 

3933 maxloc = min(maxloc, len(instring)) 

3934 while loc < maxloc and instring[loc] in self.matchWhite: 

3935 loc += 1 

3936 

3937 if loc - start < self.minLen: 

3938 raise ParseException(instring, loc, self.errmsg, self) 

3939 

3940 return loc, instring[start:loc] 

3941 

3942 

3943class PositionToken(Token): 

3944 def __init__(self) -> None: 

3945 super().__init__() 

3946 self._may_return_empty = True 

3947 self.mayIndexError = False 

3948 

3949 

3950class GoToColumn(PositionToken): 

3951 """Token to advance to a specific column of input text; useful for 

3952 tabular report scraping. 

3953 """ 

3954 

3955 def __init__(self, colno: int) -> None: 

3956 super().__init__() 

3957 self.col = colno 

3958 

3959 def preParse(self, instring: str, loc: int) -> int: 

3960 if col(loc, instring) == self.col: 

3961 return loc 

3962 

3963 instrlen = len(instring) 

3964 if self.ignoreExprs: 

3965 loc = self._skipIgnorables(instring, loc) 

3966 while ( 

3967 loc < instrlen 

3968 and instring[loc].isspace() 

3969 and col(loc, instring) != self.col 

3970 ): 

3971 loc += 1 

3972 

3973 return loc 

3974 

3975 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3976 thiscol = col(loc, instring) 

3977 if thiscol > self.col: 

3978 raise ParseException(instring, loc, "Text not in expected column", self) 

3979 newloc = loc + self.col - thiscol 

3980 ret = instring[loc:newloc] 

3981 return newloc, ret 

3982 

3983 

3984class LineStart(PositionToken): 

3985 r"""Matches if current position is at the beginning of a line within 

3986 the parse string 

3987 

3988 Example: 

3989 

3990 .. testcode:: 

3991 

3992 test = '''\ 

3993 AAA this line 

3994 AAA and this line 

3995 AAA and even this line 

3996 B AAA but definitely not this line 

3997 ''' 

3998 

3999 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

4000 print(t) 

4001 

4002 prints: 

4003 

4004 .. testoutput:: 

4005 

4006 ['AAA', ' this line'] 

4007 ['AAA', ' and this line'] 

4008 ['AAA', ' and even this line'] 

4009 

4010 """ 

4011 

4012 def __init__(self) -> None: 

4013 super().__init__() 

4014 self.leave_whitespace() 

4015 self.orig_whiteChars = set() | self.whiteChars 

4016 self.whiteChars.discard("\n") 

4017 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

4018 self.set_name("start of line") 

4019 

4020 def preParse(self, instring: str, loc: int) -> int: 

4021 if loc == 0: 

4022 return loc 

4023 

4024 ret = self.skipper.preParse(instring, loc) 

4025 

4026 if "\n" in self.orig_whiteChars: 

4027 while instring[ret : ret + 1] == "\n": 

4028 ret = self.skipper.preParse(instring, ret + 1) 

4029 

4030 return ret 

4031 

4032 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4033 if col(loc, instring) == 1: 

4034 return loc, [] 

4035 raise ParseException(instring, loc, self.errmsg, self) 

4036 

4037 

4038class LineEnd(PositionToken): 

4039 """Matches if current position is at the end of a line within the 

4040 parse string 

4041 """ 

4042 

4043 def __init__(self) -> None: 

4044 super().__init__() 

4045 self.whiteChars.discard("\n") 

4046 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

4047 self.set_name("end of line") 

4048 

4049 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4050 if loc < len(instring): 

4051 if instring[loc] == "\n": 

4052 return loc + 1, "\n" 

4053 else: 

4054 raise ParseException(instring, loc, self.errmsg, self) 

4055 elif loc == len(instring): 

4056 return loc + 1, [] 

4057 else: 

4058 raise ParseException(instring, loc, self.errmsg, self) 

4059 

4060 

4061class StringStart(PositionToken): 

4062 """Matches if current position is at the beginning of the parse 

4063 string 

4064 """ 

4065 

4066 def __init__(self) -> None: 

4067 super().__init__() 

4068 self.set_name("start of text") 

4069 

4070 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4071 # see if entire string up to here is just whitespace and ignoreables 

4072 if loc != 0 and loc != self.preParse(instring, 0): 

4073 raise ParseException(instring, loc, self.errmsg, self) 

4074 

4075 return loc, [] 

4076 

4077 

4078class StringEnd(PositionToken): 

4079 """ 

4080 Matches if current position is at the end of the parse string 

4081 """ 

4082 

4083 def __init__(self) -> None: 

4084 super().__init__() 

4085 self.set_name("end of text") 

4086 

4087 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4088 if loc < len(instring): 

4089 raise ParseException(instring, loc, self.errmsg, self) 

4090 if loc == len(instring): 

4091 return loc + 1, [] 

4092 if loc > len(instring): 

4093 return loc, [] 

4094 

4095 raise ParseException(instring, loc, self.errmsg, self) 

4096 

4097 

4098class WordStart(PositionToken): 

4099 """Matches if the current position is at the beginning of a 

4100 :class:`Word`, and is not preceded by any character in a given 

4101 set of ``word_chars`` (default= ``printables``). To emulate the 

4102 ``\b`` behavior of regular expressions, use 

4103 ``WordStart(alphanums)``. ``WordStart`` will also match at 

4104 the beginning of the string being parsed, or at the beginning of 

4105 a line. 

4106 """ 

4107 

4108 def __init__( 

4109 self, word_chars: str = printables, *, wordChars: str = printables 

4110 ) -> None: 

4111 wordChars = word_chars if wordChars == printables else wordChars 

4112 super().__init__() 

4113 self.wordChars = set(wordChars) 

4114 self.set_name("start of a word") 

4115 

4116 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4117 if loc != 0: 

4118 if ( 

4119 instring[loc - 1] in self.wordChars 

4120 or instring[loc] not in self.wordChars 

4121 ): 

4122 raise ParseException(instring, loc, self.errmsg, self) 

4123 return loc, [] 

4124 

4125 

4126class WordEnd(PositionToken): 

4127 """Matches if the current position is at the end of a :class:`Word`, 

4128 and is not followed by any character in a given set of ``word_chars`` 

4129 (default= ``printables``). To emulate the ``\b`` behavior of 

4130 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

4131 will also match at the end of the string being parsed, or at the end 

4132 of a line. 

4133 """ 

4134 

4135 def __init__( 

4136 self, word_chars: str = printables, *, wordChars: str = printables 

4137 ) -> None: 

4138 wordChars = word_chars if wordChars == printables else wordChars 

4139 super().__init__() 

4140 self.wordChars = set(wordChars) 

4141 self.skipWhitespace = False 

4142 self.set_name("end of a word") 

4143 

4144 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4145 instrlen = len(instring) 

4146 if instrlen > 0 and loc < instrlen: 

4147 if ( 

4148 instring[loc] in self.wordChars 

4149 or instring[loc - 1] not in self.wordChars 

4150 ): 

4151 raise ParseException(instring, loc, self.errmsg, self) 

4152 return loc, [] 

4153 

4154 

4155class Tag(Token): 

4156 """ 

4157 A meta-element for inserting a named result into the parsed 

4158 tokens that may be checked later in a parse action or while 

4159 processing the parsed results. Accepts an optional tag value, 

4160 defaulting to `True`. 

4161 

4162 Example: 

4163 

4164 .. doctest:: 

4165 

4166 >>> end_punc = "." | ("!" + Tag("enthusiastic")) 

4167 >>> greeting = "Hello," + Word(alphas) + end_punc 

4168 

4169 >>> result = greeting.parse_string("Hello, World.") 

4170 >>> print(result.dump()) 

4171 ['Hello,', 'World', '.'] 

4172 

4173 >>> result = greeting.parse_string("Hello, World!") 

4174 >>> print(result.dump()) 

4175 ['Hello,', 'World', '!'] 

4176 - enthusiastic: True 

4177 

4178 .. versionadded:: 3.1.0 

4179 """ 

4180 

4181 def __init__(self, tag_name: str, value: Any = True) -> None: 

4182 super().__init__() 

4183 self._may_return_empty = True 

4184 self.mayIndexError = False 

4185 self.leave_whitespace() 

4186 self.tag_name = tag_name 

4187 self.tag_value = value 

4188 self.add_parse_action(self._add_tag) 

4189 self.show_in_diagram = False 

4190 

4191 def _add_tag(self, tokens: ParseResults): 

4192 tokens[self.tag_name] = self.tag_value 

4193 

4194 def _generateDefaultName(self) -> str: 

4195 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}" 

4196 

4197 

4198class ParseExpression(ParserElement): 

4199 """Abstract subclass of ParserElement, for combining and 

4200 post-processing parsed tokens. 

4201 """ 

4202 

4203 def __init__( 

4204 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4205 ) -> None: 

4206 super().__init__(savelist) 

4207 self.exprs: list[ParserElement] 

4208 if isinstance(exprs, _generatorType): 

4209 exprs = list(exprs) 

4210 

4211 if isinstance(exprs, str_type): 

4212 self.exprs = [self._literalStringClass(exprs)] 

4213 elif isinstance(exprs, ParserElement): 

4214 self.exprs = [exprs] 

4215 elif isinstance(exprs, Iterable): 

4216 exprs = list(exprs) 

4217 # if sequence of strings provided, wrap with Literal 

4218 if any(isinstance(expr, str_type) for expr in exprs): 

4219 exprs = ( 

4220 self._literalStringClass(e) if isinstance(e, str_type) else e 

4221 for e in exprs 

4222 ) 

4223 self.exprs = list(exprs) 

4224 else: 

4225 try: 

4226 self.exprs = list(exprs) 

4227 except TypeError: 

4228 self.exprs = [exprs] 

4229 self.callPreparse = False 

4230 

4231 def recurse(self) -> list[ParserElement]: 

4232 return self.exprs[:] 

4233 

4234 def append(self, other) -> ParserElement: 

4235 self.exprs.append(other) 

4236 self._defaultName = None 

4237 return self 

4238 

4239 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4240 """ 

4241 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

4242 all contained expressions. 

4243 """ 

4244 super().leave_whitespace(recursive) 

4245 

4246 if recursive: 

4247 self.exprs = [e.copy() for e in self.exprs] 

4248 for e in self.exprs: 

4249 e.leave_whitespace(recursive) 

4250 return self 

4251 

4252 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4253 """ 

4254 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

4255 all contained expressions. 

4256 """ 

4257 super().ignore_whitespace(recursive) 

4258 if recursive: 

4259 self.exprs = [e.copy() for e in self.exprs] 

4260 for e in self.exprs: 

4261 e.ignore_whitespace(recursive) 

4262 return self 

4263 

4264 def ignore(self, other) -> ParserElement: 

4265 if isinstance(other, Suppress): 

4266 if other not in self.ignoreExprs: 

4267 super().ignore(other) 

4268 for e in self.exprs: 

4269 e.ignore(self.ignoreExprs[-1]) 

4270 else: 

4271 super().ignore(other) 

4272 for e in self.exprs: 

4273 e.ignore(self.ignoreExprs[-1]) 

4274 return self 

4275 

4276 def _generateDefaultName(self) -> str: 

4277 return f"{type(self).__name__}:({self.exprs})" 

4278 

4279 def streamline(self) -> ParserElement: 

4280 if self.streamlined: 

4281 return self 

4282 

4283 super().streamline() 

4284 

4285 for e in self.exprs: 

4286 e.streamline() 

4287 

4288 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

4289 # but only if there are no parse actions or resultsNames on the nested And's 

4290 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

4291 if len(self.exprs) == 2: 

4292 other = self.exprs[0] 

4293 if ( 

4294 isinstance(other, self.__class__) 

4295 and not other.parseAction 

4296 and other.resultsName is None 

4297 and not other.debug 

4298 ): 

4299 self.exprs = other.exprs[:] + [self.exprs[1]] 

4300 self._defaultName = None 

4301 self._may_return_empty |= other.mayReturnEmpty 

4302 self.mayIndexError |= other.mayIndexError 

4303 

4304 other = self.exprs[-1] 

4305 if ( 

4306 isinstance(other, self.__class__) 

4307 and not other.parseAction 

4308 and other.resultsName is None 

4309 and not other.debug 

4310 ): 

4311 self.exprs = self.exprs[:-1] + other.exprs[:] 

4312 self._defaultName = None 

4313 self._may_return_empty |= other.mayReturnEmpty 

4314 self.mayIndexError |= other.mayIndexError 

4315 

4316 self.errmsg = f"Expected {self}" 

4317 

4318 return self 

4319 

4320 def validate(self, validateTrace=None) -> None: 

4321 warnings.warn( 

4322 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4323 DeprecationWarning, 

4324 stacklevel=2, 

4325 ) 

4326 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

4327 for e in self.exprs: 

4328 e.validate(tmp) 

4329 self._checkRecursion([]) 

4330 

4331 def copy(self) -> ParserElement: 

4332 ret = super().copy() 

4333 ret = typing.cast(ParseExpression, ret) 

4334 ret.exprs = [e.copy() for e in self.exprs] 

4335 return ret 

4336 

4337 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4338 if not ( 

4339 __diag__.warn_ungrouped_named_tokens_in_collection 

4340 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4341 not in self.suppress_warnings_ 

4342 ): 

4343 return super()._setResultsName(name, list_all_matches) 

4344 

4345 for e in self.exprs: 

4346 if ( 

4347 isinstance(e, ParserElement) 

4348 and e.resultsName 

4349 and ( 

4350 Diagnostics.warn_ungrouped_named_tokens_in_collection 

4351 not in e.suppress_warnings_ 

4352 ) 

4353 ): 

4354 warning = ( 

4355 "warn_ungrouped_named_tokens_in_collection:" 

4356 f" setting results name {name!r} on {type(self).__name__} expression" 

4357 f" collides with {e.resultsName!r} on contained expression" 

4358 ) 

4359 warnings.warn(warning, stacklevel=3) 

4360 break 

4361 

4362 return super()._setResultsName(name, list_all_matches) 

4363 

4364 # Compatibility synonyms 

4365 # fmt: off 

4366 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4367 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4368 # fmt: on 

4369 

4370 

4371class And(ParseExpression): 

4372 """ 

4373 Requires all given :class:`ParserElement` s to be found in the given order. 

4374 Expressions may be separated by whitespace. 

4375 May be constructed using the ``'+'`` operator. 

4376 May also be constructed using the ``'-'`` operator, which will 

4377 suppress backtracking. 

4378 

4379 Example: 

4380 

4381 .. testcode:: 

4382 

4383 integer = Word(nums) 

4384 name_expr = Word(alphas)[1, ...] 

4385 

4386 expr = And([integer("id"), name_expr("name"), integer("age")]) 

4387 # more easily written as: 

4388 expr = integer("id") + name_expr("name") + integer("age") 

4389 """ 

4390 

4391 class _ErrorStop(Empty): 

4392 def __init__(self, *args, **kwargs) -> None: 

4393 super().__init__(*args, **kwargs) 

4394 self.leave_whitespace() 

4395 

4396 def _generateDefaultName(self) -> str: 

4397 return "-" 

4398 

4399 def __init__( 

4400 self, 

4401 exprs_arg: typing.Iterable[Union[ParserElement, str]], 

4402 savelist: bool = True, 

4403 ) -> None: 

4404 # instantiate exprs as a list, converting strs to ParserElements 

4405 exprs: list[ParserElement] = [ 

4406 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg 

4407 ] 

4408 

4409 # convert any Ellipsis elements to SkipTo 

4410 if Ellipsis in exprs: 

4411 

4412 # Ellipsis cannot be the last element 

4413 if exprs[-1] is Ellipsis: 

4414 raise Exception("cannot construct And with sequence ending in ...") 

4415 

4416 tmp: list[ParserElement] = [] 

4417 for cur_expr, next_expr in zip(exprs, exprs[1:]): 

4418 if cur_expr is Ellipsis: 

4419 tmp.append(SkipTo(next_expr)("_skipped*")) 

4420 else: 

4421 tmp.append(cur_expr) 

4422 

4423 exprs[:-1] = tmp 

4424 

4425 super().__init__(exprs, savelist) 

4426 if self.exprs: 

4427 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4428 if not isinstance(self.exprs[0], White): 

4429 self.set_whitespace_chars( 

4430 self.exprs[0].whiteChars, 

4431 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

4432 ) 

4433 self.skipWhitespace = self.exprs[0].skipWhitespace 

4434 else: 

4435 self.skipWhitespace = False 

4436 else: 

4437 self._may_return_empty = True 

4438 self.callPreparse = True 

4439 

4440 def streamline(self) -> ParserElement: 

4441 # collapse any _PendingSkip's 

4442 if self.exprs and any( 

4443 isinstance(e, ParseExpression) 

4444 and e.exprs 

4445 and isinstance(e.exprs[-1], _PendingSkip) 

4446 for e in self.exprs[:-1] 

4447 ): 

4448 deleted_expr_marker = NoMatch() 

4449 for i, e in enumerate(self.exprs[:-1]): 

4450 if e is deleted_expr_marker: 

4451 continue 

4452 if ( 

4453 isinstance(e, ParseExpression) 

4454 and e.exprs 

4455 and isinstance(e.exprs[-1], _PendingSkip) 

4456 ): 

4457 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4458 self.exprs[i + 1] = deleted_expr_marker 

4459 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

4460 

4461 super().streamline() 

4462 

4463 # link any IndentedBlocks to the prior expression 

4464 prev: ParserElement 

4465 cur: ParserElement 

4466 for prev, cur in zip(self.exprs, self.exprs[1:]): 

4467 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

4468 # (but watch out for recursive grammar) 

4469 seen = set() 

4470 while True: 

4471 if id(cur) in seen: 

4472 break 

4473 seen.add(id(cur)) 

4474 if isinstance(cur, IndentedBlock): 

4475 prev.add_parse_action( 

4476 lambda s, l, t, cur_=cur: setattr( 

4477 cur_, "parent_anchor", col(l, s) 

4478 ) 

4479 ) 

4480 break 

4481 subs = cur.recurse() 

4482 next_first = next(iter(subs), None) 

4483 if next_first is None: 

4484 break 

4485 cur = typing.cast(ParserElement, next_first) 

4486 

4487 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4488 return self 

4489 

4490 def parseImpl(self, instring, loc, do_actions=True): 

4491 # pass False as callPreParse arg to _parse for first element, since we already 

4492 # pre-parsed the string as part of our And pre-parsing 

4493 loc, resultlist = self.exprs[0]._parse( 

4494 instring, loc, do_actions, callPreParse=False 

4495 ) 

4496 errorStop = False 

4497 for e in self.exprs[1:]: 

4498 # if isinstance(e, And._ErrorStop): 

4499 if type(e) is And._ErrorStop: 

4500 errorStop = True 

4501 continue 

4502 if errorStop: 

4503 try: 

4504 loc, exprtokens = e._parse(instring, loc, do_actions) 

4505 except ParseSyntaxException: 

4506 raise 

4507 except ParseBaseException as pe: 

4508 pe.__traceback__ = None 

4509 raise ParseSyntaxException._from_exception(pe) 

4510 except IndexError: 

4511 raise ParseSyntaxException( 

4512 instring, len(instring), self.errmsg, self 

4513 ) 

4514 else: 

4515 loc, exprtokens = e._parse(instring, loc, do_actions) 

4516 resultlist += exprtokens 

4517 return loc, resultlist 

4518 

4519 def __iadd__(self, other): 

4520 if isinstance(other, str_type): 

4521 other = self._literalStringClass(other) 

4522 if not isinstance(other, ParserElement): 

4523 return NotImplemented 

4524 return self.append(other) # And([self, other]) 

4525 

4526 def _checkRecursion(self, parseElementList): 

4527 subRecCheckList = parseElementList[:] + [self] 

4528 for e in self.exprs: 

4529 e._checkRecursion(subRecCheckList) 

4530 if not e.mayReturnEmpty: 

4531 break 

4532 

4533 def _generateDefaultName(self) -> str: 

4534 inner = " ".join(str(e) for e in self.exprs) 

4535 # strip off redundant inner {}'s 

4536 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4537 inner = inner[1:-1] 

4538 return f"{{{inner}}}" 

4539 

4540 

4541class Or(ParseExpression): 

4542 """Requires that at least one :class:`ParserElement` is found. If 

4543 two expressions match, the expression that matches the longest 

4544 string will be used. May be constructed using the ``'^'`` 

4545 operator. 

4546 

4547 Example: 

4548 

4549 .. testcode:: 

4550 

4551 # construct Or using '^' operator 

4552 

4553 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4554 print(number.search_string("123 3.1416 789")) 

4555 

4556 prints: 

4557 

4558 .. testoutput:: 

4559 

4560 [['123'], ['3.1416'], ['789']] 

4561 """ 

4562 

4563 def __init__( 

4564 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4565 ) -> None: 

4566 super().__init__(exprs, savelist) 

4567 if self.exprs: 

4568 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4569 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4570 else: 

4571 self._may_return_empty = True 

4572 

4573 def streamline(self) -> ParserElement: 

4574 super().streamline() 

4575 if self.exprs: 

4576 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4577 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4578 self.skipWhitespace = all( 

4579 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4580 ) 

4581 else: 

4582 self.saveAsList = False 

4583 return self 

4584 

4585 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4586 maxExcLoc = -1 

4587 maxException = None 

4588 matches: list[tuple[int, ParserElement]] = [] 

4589 fatals: list[ParseFatalException] = [] 

4590 if all(e.callPreparse for e in self.exprs): 

4591 loc = self.preParse(instring, loc) 

4592 for e in self.exprs: 

4593 try: 

4594 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4595 except ParseFatalException as pfe: 

4596 pfe.__traceback__ = None 

4597 pfe.parser_element = e 

4598 fatals.append(pfe) 

4599 maxException = None 

4600 maxExcLoc = -1 

4601 except ParseException as err: 

4602 if not fatals: 

4603 err.__traceback__ = None 

4604 if err.loc > maxExcLoc: 

4605 maxException = err 

4606 maxExcLoc = err.loc 

4607 except IndexError: 

4608 if len(instring) > maxExcLoc: 

4609 maxException = ParseException( 

4610 instring, len(instring), e.errmsg, self 

4611 ) 

4612 maxExcLoc = len(instring) 

4613 else: 

4614 # save match among all matches, to retry longest to shortest 

4615 matches.append((loc2, e)) 

4616 

4617 if matches: 

4618 # re-evaluate all matches in descending order of length of match, in case attached actions 

4619 # might change whether or how much they match of the input. 

4620 matches.sort(key=itemgetter(0), reverse=True) 

4621 

4622 if not do_actions: 

4623 # no further conditions or parse actions to change the selection of 

4624 # alternative, so the first match will be the best match 

4625 best_expr = matches[0][1] 

4626 return best_expr._parse(instring, loc, do_actions) 

4627 

4628 longest: tuple[int, typing.Optional[ParseResults]] = -1, None 

4629 for loc1, expr1 in matches: 

4630 if loc1 <= longest[0]: 

4631 # already have a longer match than this one will deliver, we are done 

4632 return longest 

4633 

4634 try: 

4635 loc2, toks = expr1._parse(instring, loc, do_actions) 

4636 except ParseException as err: 

4637 err.__traceback__ = None 

4638 if err.loc > maxExcLoc: 

4639 maxException = err 

4640 maxExcLoc = err.loc 

4641 else: 

4642 if loc2 >= loc1: 

4643 return loc2, toks 

4644 # didn't match as much as before 

4645 elif loc2 > longest[0]: 

4646 longest = loc2, toks 

4647 

4648 if longest != (-1, None): 

4649 return longest 

4650 

4651 if fatals: 

4652 if len(fatals) > 1: 

4653 fatals.sort(key=lambda e: -e.loc) 

4654 if fatals[0].loc == fatals[1].loc: 

4655 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4656 max_fatal = fatals[0] 

4657 raise max_fatal 

4658 

4659 if maxException is not None: 

4660 # infer from this check that all alternatives failed at the current position 

4661 # so emit this collective error message instead of any single error message 

4662 parse_start_loc = self.preParse(instring, loc) 

4663 if maxExcLoc == parse_start_loc: 

4664 maxException.msg = self.errmsg or "" 

4665 raise maxException 

4666 

4667 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4668 

4669 def __ixor__(self, other): 

4670 if isinstance(other, str_type): 

4671 other = self._literalStringClass(other) 

4672 if not isinstance(other, ParserElement): 

4673 return NotImplemented 

4674 return self.append(other) # Or([self, other]) 

4675 

4676 def _generateDefaultName(self) -> str: 

4677 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" 

4678 

4679 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4680 if ( 

4681 __diag__.warn_multiple_tokens_in_named_alternation 

4682 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4683 not in self.suppress_warnings_ 

4684 ): 

4685 if any( 

4686 isinstance(e, And) 

4687 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4688 not in e.suppress_warnings_ 

4689 for e in self.exprs 

4690 ): 

4691 warning = ( 

4692 "warn_multiple_tokens_in_named_alternation:" 

4693 f" setting results name {name!r} on {type(self).__name__} expression" 

4694 " will return a list of all parsed tokens in an And alternative," 

4695 " in prior versions only the first token was returned; enclose" 

4696 " contained argument in Group" 

4697 ) 

4698 warnings.warn(warning, stacklevel=3) 

4699 

4700 return super()._setResultsName(name, list_all_matches) 

4701 

4702 

4703class MatchFirst(ParseExpression): 

4704 """Requires that at least one :class:`ParserElement` is found. If 

4705 more than one expression matches, the first one listed is the one that will 

4706 match. May be constructed using the ``'|'`` operator. 

4707 

4708 Example: Construct MatchFirst using '|' operator 

4709 

4710 .. doctest:: 

4711 

4712 # watch the order of expressions to match 

4713 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4714 >>> print(number.search_string("123 3.1416 789")) # Fail! 

4715 [['123'], ['3'], ['1416'], ['789']] 

4716 

4717 # put more selective expression first 

4718 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4719 >>> print(number.search_string("123 3.1416 789")) # Better 

4720 [['123'], ['3.1416'], ['789']] 

4721 """ 

4722 

4723 def __init__( 

4724 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4725 ) -> None: 

4726 super().__init__(exprs, savelist) 

4727 if self.exprs: 

4728 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4729 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4730 else: 

4731 self._may_return_empty = True 

4732 

4733 def streamline(self) -> ParserElement: 

4734 if self.streamlined: 

4735 return self 

4736 

4737 super().streamline() 

4738 if self.exprs: 

4739 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4740 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4741 self.skipWhitespace = all( 

4742 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4743 ) 

4744 else: 

4745 self.saveAsList = False 

4746 self._may_return_empty = True 

4747 return self 

4748 

4749 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4750 maxExcLoc = -1 

4751 maxException = None 

4752 

4753 for e in self.exprs: 

4754 try: 

4755 return e._parse(instring, loc, do_actions) 

4756 except ParseFatalException as pfe: 

4757 pfe.__traceback__ = None 

4758 pfe.parser_element = e 

4759 raise 

4760 except ParseException as err: 

4761 if err.loc > maxExcLoc: 

4762 maxException = err 

4763 maxExcLoc = err.loc 

4764 except IndexError: 

4765 if len(instring) > maxExcLoc: 

4766 maxException = ParseException( 

4767 instring, len(instring), e.errmsg, self 

4768 ) 

4769 maxExcLoc = len(instring) 

4770 

4771 if maxException is not None: 

4772 # infer from this check that all alternatives failed at the current position 

4773 # so emit this collective error message instead of any individual error message 

4774 parse_start_loc = self.preParse(instring, loc) 

4775 if maxExcLoc == parse_start_loc: 

4776 maxException.msg = self.errmsg or "" 

4777 raise maxException 

4778 

4779 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4780 

4781 def __ior__(self, other): 

4782 if isinstance(other, str_type): 

4783 other = self._literalStringClass(other) 

4784 if not isinstance(other, ParserElement): 

4785 return NotImplemented 

4786 return self.append(other) # MatchFirst([self, other]) 

4787 

4788 def _generateDefaultName(self) -> str: 

4789 return f"{{{' | '.join(str(e) for e in self.exprs)}}}" 

4790 

4791 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4792 if ( 

4793 __diag__.warn_multiple_tokens_in_named_alternation 

4794 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4795 not in self.suppress_warnings_ 

4796 ): 

4797 if any( 

4798 isinstance(e, And) 

4799 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4800 not in e.suppress_warnings_ 

4801 for e in self.exprs 

4802 ): 

4803 warning = ( 

4804 "warn_multiple_tokens_in_named_alternation:" 

4805 f" setting results name {name!r} on {type(self).__name__} expression" 

4806 " will return a list of all parsed tokens in an And alternative," 

4807 " in prior versions only the first token was returned; enclose" 

4808 " contained argument in Group" 

4809 ) 

4810 warnings.warn(warning, stacklevel=3) 

4811 

4812 return super()._setResultsName(name, list_all_matches) 

4813 

4814 

4815class Each(ParseExpression): 

4816 """Requires all given :class:`ParserElement` s to be found, but in 

4817 any order. Expressions may be separated by whitespace. 

4818 

4819 May be constructed using the ``'&'`` operator. 

4820 

4821 Example: 

4822 

4823 .. testcode:: 

4824 

4825 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4826 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4827 integer = Word(nums) 

4828 shape_attr = "shape:" + shape_type("shape") 

4829 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4830 color_attr = "color:" + color("color") 

4831 size_attr = "size:" + integer("size") 

4832 

4833 # use Each (using operator '&') to accept attributes in any order 

4834 # (shape and posn are required, color and size are optional) 

4835 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4836 

4837 shape_spec.run_tests(''' 

4838 shape: SQUARE color: BLACK posn: 100, 120 

4839 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4840 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4841 ''' 

4842 ) 

4843 

4844 prints: 

4845 

4846 .. testoutput:: 

4847 :options: +NORMALIZE_WHITESPACE 

4848 

4849 

4850 shape: SQUARE color: BLACK posn: 100, 120 

4851 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4852 - color: 'BLACK' 

4853 - posn: ['100', ',', '120'] 

4854 - x: '100' 

4855 - y: '120' 

4856 - shape: 'SQUARE' 

4857 ... 

4858 

4859 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4860 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 

4861 'posn:', ['50', ',', '80']] 

4862 - color: 'BLUE' 

4863 - posn: ['50', ',', '80'] 

4864 - x: '50' 

4865 - y: '80' 

4866 - shape: 'CIRCLE' 

4867 - size: '50' 

4868 ... 

4869 

4870 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4871 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 

4872 'posn:', ['20', ',', '40']] 

4873 - color: 'GREEN' 

4874 - posn: ['20', ',', '40'] 

4875 - x: '20' 

4876 - y: '40' 

4877 - shape: 'TRIANGLE' 

4878 - size: '20' 

4879 ... 

4880 """ 

4881 

4882 def __init__( 

4883 self, exprs: typing.Iterable[ParserElement], savelist: bool = True 

4884 ) -> None: 

4885 super().__init__(exprs, savelist) 

4886 if self.exprs: 

4887 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4888 else: 

4889 self._may_return_empty = True 

4890 self.skipWhitespace = True 

4891 self.initExprGroups = True 

4892 self.saveAsList = True 

4893 

4894 def __iand__(self, other): 

4895 if isinstance(other, str_type): 

4896 other = self._literalStringClass(other) 

4897 if not isinstance(other, ParserElement): 

4898 return NotImplemented 

4899 return self.append(other) # Each([self, other]) 

4900 

4901 def streamline(self) -> ParserElement: 

4902 super().streamline() 

4903 if self.exprs: 

4904 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4905 else: 

4906 self._may_return_empty = True 

4907 return self 

4908 

4909 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4910 if self.initExprGroups: 

4911 self.opt1map = dict( 

4912 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

4913 ) 

4914 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

4915 opt2 = [ 

4916 e 

4917 for e in self.exprs 

4918 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

4919 ] 

4920 self.optionals = opt1 + opt2 

4921 self.multioptionals = [ 

4922 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4923 for e in self.exprs 

4924 if isinstance(e, _MultipleMatch) 

4925 ] 

4926 self.multirequired = [ 

4927 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4928 for e in self.exprs 

4929 if isinstance(e, OneOrMore) 

4930 ] 

4931 self.required = [ 

4932 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

4933 ] 

4934 self.required += self.multirequired 

4935 self.initExprGroups = False 

4936 

4937 tmpLoc = loc 

4938 tmpReqd = self.required[:] 

4939 tmpOpt = self.optionals[:] 

4940 multis = self.multioptionals[:] 

4941 matchOrder: list[ParserElement] = [] 

4942 

4943 keepMatching = True 

4944 failed: list[ParserElement] = [] 

4945 fatals: list[ParseFatalException] = [] 

4946 while keepMatching: 

4947 tmpExprs = tmpReqd + tmpOpt + multis 

4948 failed.clear() 

4949 fatals.clear() 

4950 for e in tmpExprs: 

4951 try: 

4952 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

4953 except ParseFatalException as pfe: 

4954 pfe.__traceback__ = None 

4955 pfe.parser_element = e 

4956 fatals.append(pfe) 

4957 failed.append(e) 

4958 except ParseException: 

4959 failed.append(e) 

4960 else: 

4961 matchOrder.append(self.opt1map.get(id(e), e)) 

4962 if e in tmpReqd: 

4963 tmpReqd.remove(e) 

4964 elif e in tmpOpt: 

4965 tmpOpt.remove(e) 

4966 if len(failed) == len(tmpExprs): 

4967 keepMatching = False 

4968 

4969 # look for any ParseFatalExceptions 

4970 if fatals: 

4971 if len(fatals) > 1: 

4972 fatals.sort(key=lambda e: -e.loc) 

4973 if fatals[0].loc == fatals[1].loc: 

4974 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4975 max_fatal = fatals[0] 

4976 raise max_fatal 

4977 

4978 if tmpReqd: 

4979 missing = ", ".join([str(e) for e in tmpReqd]) 

4980 raise ParseException( 

4981 instring, 

4982 loc, 

4983 f"Missing one or more required elements ({missing})", 

4984 ) 

4985 

4986 # add any unmatched Opts, in case they have default values defined 

4987 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

4988 

4989 total_results = ParseResults([]) 

4990 for e in matchOrder: 

4991 loc, results = e._parse(instring, loc, do_actions) 

4992 total_results += results 

4993 

4994 return loc, total_results 

4995 

4996 def _generateDefaultName(self) -> str: 

4997 return f"{{{' & '.join(str(e) for e in self.exprs)}}}" 

4998 

4999 

5000class ParseElementEnhance(ParserElement): 

5001 """Abstract subclass of :class:`ParserElement`, for combining and 

5002 post-processing parsed tokens. 

5003 """ 

5004 

5005 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

5006 super().__init__(savelist) 

5007 if isinstance(expr, str_type): 

5008 expr_str = typing.cast(str, expr) 

5009 if issubclass(self._literalStringClass, Token): 

5010 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

5011 elif issubclass(type(self), self._literalStringClass): 

5012 expr = Literal(expr_str) 

5013 else: 

5014 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

5015 expr = typing.cast(ParserElement, expr) 

5016 self.expr = expr 

5017 if expr is not None: 

5018 self.mayIndexError = expr.mayIndexError 

5019 self._may_return_empty = expr.mayReturnEmpty 

5020 self.set_whitespace_chars( 

5021 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

5022 ) 

5023 self.skipWhitespace = expr.skipWhitespace 

5024 self.saveAsList = expr.saveAsList 

5025 self.callPreparse = expr.callPreparse 

5026 self.ignoreExprs.extend(expr.ignoreExprs) 

5027 

5028 def recurse(self) -> list[ParserElement]: 

5029 return [self.expr] if self.expr is not None else [] 

5030 

5031 def parseImpl(self, instring, loc, do_actions=True): 

5032 if self.expr is None: 

5033 raise ParseException(instring, loc, "No expression defined", self) 

5034 

5035 try: 

5036 return self.expr._parse(instring, loc, do_actions, callPreParse=False) 

5037 except ParseSyntaxException: 

5038 raise 

5039 except ParseBaseException as pbe: 

5040 pbe.pstr = pbe.pstr or instring 

5041 pbe.loc = pbe.loc or loc 

5042 pbe.parser_element = pbe.parser_element or self 

5043 if not isinstance(self, Forward) and self.customName is not None: 

5044 if self.errmsg: 

5045 pbe.msg = self.errmsg 

5046 raise 

5047 

5048 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5049 super().leave_whitespace(recursive) 

5050 

5051 if recursive: 

5052 if self.expr is not None: 

5053 self.expr = self.expr.copy() 

5054 self.expr.leave_whitespace(recursive) 

5055 return self 

5056 

5057 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5058 super().ignore_whitespace(recursive) 

5059 

5060 if recursive: 

5061 if self.expr is not None: 

5062 self.expr = self.expr.copy() 

5063 self.expr.ignore_whitespace(recursive) 

5064 return self 

5065 

5066 def ignore(self, other) -> ParserElement: 

5067 if not isinstance(other, Suppress) or other not in self.ignoreExprs: 

5068 super().ignore(other) 

5069 if self.expr is not None: 

5070 self.expr.ignore(self.ignoreExprs[-1]) 

5071 

5072 return self 

5073 

5074 def streamline(self) -> ParserElement: 

5075 super().streamline() 

5076 if self.expr is not None: 

5077 self.expr.streamline() 

5078 return self 

5079 

5080 def _checkRecursion(self, parseElementList): 

5081 if self in parseElementList: 

5082 raise RecursiveGrammarException(parseElementList + [self]) 

5083 subRecCheckList = parseElementList[:] + [self] 

5084 if self.expr is not None: 

5085 self.expr._checkRecursion(subRecCheckList) 

5086 

5087 def validate(self, validateTrace=None) -> None: 

5088 warnings.warn( 

5089 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5090 DeprecationWarning, 

5091 stacklevel=2, 

5092 ) 

5093 if validateTrace is None: 

5094 validateTrace = [] 

5095 tmp = validateTrace[:] + [self] 

5096 if self.expr is not None: 

5097 self.expr.validate(tmp) 

5098 self._checkRecursion([]) 

5099 

5100 def _generateDefaultName(self) -> str: 

5101 return f"{type(self).__name__}:({self.expr})" 

5102 

5103 # Compatibility synonyms 

5104 # fmt: off 

5105 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

5106 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

5107 # fmt: on 

5108 

5109 

5110class IndentedBlock(ParseElementEnhance): 

5111 """ 

5112 Expression to match one or more expressions at a given indentation level. 

5113 Useful for parsing text where structure is implied by indentation (like Python source code). 

5114 """ 

5115 

5116 class _Indent(Empty): 

5117 def __init__(self, ref_col: int) -> None: 

5118 super().__init__() 

5119 self.errmsg = f"expected indent at column {ref_col}" 

5120 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

5121 

5122 class _IndentGreater(Empty): 

5123 def __init__(self, ref_col: int) -> None: 

5124 super().__init__() 

5125 self.errmsg = f"expected indent at column greater than {ref_col}" 

5126 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

5127 

5128 def __init__( 

5129 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

5130 ) -> None: 

5131 super().__init__(expr, savelist=True) 

5132 # if recursive: 

5133 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

5134 self._recursive = recursive 

5135 self._grouped = grouped 

5136 self.parent_anchor = 1 

5137 

5138 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5139 # advance parse position to non-whitespace by using an Empty() 

5140 # this should be the column to be used for all subsequent indented lines 

5141 anchor_loc = Empty().preParse(instring, loc) 

5142 

5143 # see if self.expr matches at the current location - if not it will raise an exception 

5144 # and no further work is necessary 

5145 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions) 

5146 

5147 indent_col = col(anchor_loc, instring) 

5148 peer_detect_expr = self._Indent(indent_col) 

5149 

5150 inner_expr = Empty() + peer_detect_expr + self.expr 

5151 if self._recursive: 

5152 sub_indent = self._IndentGreater(indent_col) 

5153 nested_block = IndentedBlock( 

5154 self.expr, recursive=self._recursive, grouped=self._grouped 

5155 ) 

5156 nested_block.set_debug(self.debug) 

5157 nested_block.parent_anchor = indent_col 

5158 inner_expr += Opt(sub_indent + nested_block) 

5159 

5160 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

5161 block = OneOrMore(inner_expr) 

5162 

5163 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

5164 

5165 if self._grouped: 

5166 wrapper = Group 

5167 else: 

5168 wrapper = lambda expr: expr # type: ignore[misc, assignment] 

5169 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

5170 instring, anchor_loc, do_actions 

5171 ) 

5172 

5173 

5174class AtStringStart(ParseElementEnhance): 

5175 """Matches if expression matches at the beginning of the parse 

5176 string:: 

5177 

5178 AtStringStart(Word(nums)).parse_string("123") 

5179 # prints ["123"] 

5180 

5181 AtStringStart(Word(nums)).parse_string(" 123") 

5182 # raises ParseException 

5183 """ 

5184 

5185 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5186 super().__init__(expr) 

5187 self.callPreparse = False 

5188 

5189 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5190 if loc != 0: 

5191 raise ParseException(instring, loc, "not found at string start") 

5192 return super().parseImpl(instring, loc, do_actions) 

5193 

5194 

5195class AtLineStart(ParseElementEnhance): 

5196 r"""Matches if an expression matches at the beginning of a line within 

5197 the parse string 

5198 

5199 Example: 

5200 

5201 .. testcode:: 

5202 

5203 test = '''\ 

5204 BBB this line 

5205 BBB and this line 

5206 BBB but not this one 

5207 A BBB and definitely not this one 

5208 ''' 

5209 

5210 for t in (AtLineStart('BBB') + rest_of_line).search_string(test): 

5211 print(t) 

5212 

5213 prints: 

5214 

5215 .. testoutput:: 

5216 

5217 ['BBB', ' this line'] 

5218 ['BBB', ' and this line'] 

5219 """ 

5220 

5221 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5222 super().__init__(expr) 

5223 self.callPreparse = False 

5224 

5225 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5226 if col(loc, instring) != 1: 

5227 raise ParseException(instring, loc, "not found at line start") 

5228 return super().parseImpl(instring, loc, do_actions) 

5229 

5230 

5231class FollowedBy(ParseElementEnhance): 

5232 """Lookahead matching of the given parse expression. 

5233 ``FollowedBy`` does *not* advance the parsing position within 

5234 the input string, it only verifies that the specified parse 

5235 expression matches at the current position. ``FollowedBy`` 

5236 always returns a null token list. If any results names are defined 

5237 in the lookahead expression, those *will* be returned for access by 

5238 name. 

5239 

5240 Example: 

5241 

5242 .. testcode:: 

5243 

5244 # use FollowedBy to match a label only if it is followed by a ':' 

5245 data_word = Word(alphas) 

5246 label = data_word + FollowedBy(':') 

5247 attr_expr = Group( 

5248 label + Suppress(':') 

5249 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) 

5250 ) 

5251 

5252 attr_expr[1, ...].parse_string( 

5253 "shape: SQUARE color: BLACK posn: upper left").pprint() 

5254 

5255 prints: 

5256 

5257 .. testoutput:: 

5258 

5259 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

5260 """ 

5261 

5262 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5263 super().__init__(expr) 

5264 self._may_return_empty = True 

5265 

5266 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5267 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

5268 # we keep any named results that were defined in the FollowedBy expression 

5269 _, ret = self.expr._parse(instring, loc, do_actions=do_actions) 

5270 del ret[:] 

5271 

5272 return loc, ret 

5273 

5274 

5275class PrecededBy(ParseElementEnhance): 

5276 """Lookbehind matching of the given parse expression. 

5277 ``PrecededBy`` does not advance the parsing position within the 

5278 input string, it only verifies that the specified parse expression 

5279 matches prior to the current position. ``PrecededBy`` always 

5280 returns a null token list, but if a results name is defined on the 

5281 given expression, it is returned. 

5282 

5283 Parameters: 

5284 

5285 - ``expr`` - expression that must match prior to the current parse 

5286 location 

5287 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

5288 to lookbehind prior to the current parse location 

5289 

5290 If the lookbehind expression is a string, :class:`Literal`, 

5291 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

5292 with a specified exact or maximum length, then the retreat 

5293 parameter is not required. Otherwise, retreat must be specified to 

5294 give a maximum number of characters to look back from 

5295 the current parse position for a lookbehind match. 

5296 

5297 Example: 

5298 

5299 .. testcode:: 

5300 

5301 # VB-style variable names with type prefixes 

5302 int_var = PrecededBy("#") + pyparsing_common.identifier 

5303 str_var = PrecededBy("$") + pyparsing_common.identifier 

5304 """ 

5305 

5306 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None: 

5307 super().__init__(expr) 

5308 self.expr = self.expr().leave_whitespace() 

5309 self._may_return_empty = True 

5310 self.mayIndexError = False 

5311 self.exact = False 

5312 if isinstance(expr, str_type): 

5313 expr = typing.cast(str, expr) 

5314 retreat = len(expr) 

5315 self.exact = True 

5316 elif isinstance(expr, (Literal, Keyword)): 

5317 retreat = expr.matchLen 

5318 self.exact = True 

5319 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

5320 retreat = expr.maxLen 

5321 self.exact = True 

5322 elif isinstance(expr, PositionToken): 

5323 retreat = 0 

5324 self.exact = True 

5325 self.retreat = retreat 

5326 self.errmsg = f"not preceded by {expr}" 

5327 self.skipWhitespace = False 

5328 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

5329 

5330 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType: 

5331 if self.exact: 

5332 if loc < self.retreat: 

5333 raise ParseException(instring, loc, self.errmsg, self) 

5334 start = loc - self.retreat 

5335 _, ret = self.expr._parse(instring, start) 

5336 return loc, ret 

5337 

5338 # retreat specified a maximum lookbehind window, iterate 

5339 test_expr = self.expr + StringEnd() 

5340 instring_slice = instring[max(0, loc - self.retreat) : loc] 

5341 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self) 

5342 

5343 for offset in range(1, min(loc, self.retreat + 1) + 1): 

5344 try: 

5345 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

5346 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

5347 except ParseBaseException as pbe: 

5348 last_expr = pbe 

5349 else: 

5350 break 

5351 else: 

5352 raise last_expr 

5353 

5354 return loc, ret 

5355 

5356 

5357class Located(ParseElementEnhance): 

5358 """ 

5359 Decorates a returned token with its starting and ending 

5360 locations in the input string. 

5361 

5362 This helper adds the following results names: 

5363 

5364 - ``locn_start`` - location where matched expression begins 

5365 - ``locn_end`` - location where matched expression ends 

5366 - ``value`` - the actual parsed results 

5367 

5368 Be careful if the input text contains ``<TAB>`` characters, you 

5369 may want to call :class:`ParserElement.parse_with_tabs` 

5370 

5371 Example: 

5372 

5373 .. testcode:: 

5374 

5375 wd = Word(alphas) 

5376 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

5377 print(match) 

5378 

5379 prints: 

5380 

5381 .. testoutput:: 

5382 

5383 [0, ['ljsdf'], 5] 

5384 [8, ['lksdjjf'], 15] 

5385 [18, ['lkkjj'], 23] 

5386 """ 

5387 

5388 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5389 start = loc 

5390 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False) 

5391 ret_tokens = ParseResults([start, tokens, loc]) 

5392 ret_tokens["locn_start"] = start 

5393 ret_tokens["value"] = tokens 

5394 ret_tokens["locn_end"] = loc 

5395 if self.resultsName: 

5396 # must return as a list, so that the name will be attached to the complete group 

5397 return loc, [ret_tokens] 

5398 else: 

5399 return loc, ret_tokens 

5400 

5401 

5402class NotAny(ParseElementEnhance): 

5403 """ 

5404 Lookahead to disallow matching with the given parse expression. 

5405 ``NotAny`` does *not* advance the parsing position within the 

5406 input string, it only verifies that the specified parse expression 

5407 does *not* match at the current position. Also, ``NotAny`` does 

5408 *not* skip over leading whitespace. ``NotAny`` always returns 

5409 a null token list. May be constructed using the ``'~'`` operator. 

5410 

5411 Example: 

5412 

5413 .. testcode:: 

5414 

5415 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

5416 

5417 # take care not to mistake keywords for identifiers 

5418 ident = ~(AND | OR | NOT) + Word(alphas) 

5419 boolean_term = Opt(NOT) + ident 

5420 

5421 # very crude boolean expression - to support parenthesis groups and 

5422 # operation hierarchy, use infix_notation 

5423 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

5424 

5425 # integers that are followed by "." are actually floats 

5426 integer = Word(nums) + ~Char(".") 

5427 """ 

5428 

5429 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5430 super().__init__(expr) 

5431 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

5432 # self.leave_whitespace() 

5433 self.skipWhitespace = False 

5434 

5435 self._may_return_empty = True 

5436 self.errmsg = f"Found unwanted token, {self.expr}" 

5437 

5438 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5439 if self.expr.can_parse_next(instring, loc, do_actions=do_actions): 

5440 raise ParseException(instring, loc, self.errmsg, self) 

5441 return loc, [] 

5442 

5443 def _generateDefaultName(self) -> str: 

5444 return f"~{{{self.expr}}}" 

5445 

5446 

5447class _MultipleMatch(ParseElementEnhance): 

5448 def __init__( 

5449 self, 

5450 expr: Union[str, ParserElement], 

5451 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5452 *, 

5453 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5454 ) -> None: 

5455 super().__init__(expr) 

5456 stopOn = stopOn or stop_on 

5457 self.saveAsList = True 

5458 ender = stopOn 

5459 if isinstance(ender, str_type): 

5460 ender = self._literalStringClass(ender) 

5461 self.stopOn(ender) 

5462 

5463 def stopOn(self, ender) -> ParserElement: 

5464 if isinstance(ender, str_type): 

5465 ender = self._literalStringClass(ender) 

5466 self.not_ender = ~ender if ender is not None else None 

5467 return self 

5468 

5469 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5470 self_expr_parse = self.expr._parse 

5471 self_skip_ignorables = self._skipIgnorables 

5472 check_ender = False 

5473 if self.not_ender is not None: 

5474 try_not_ender = self.not_ender.try_parse 

5475 check_ender = True 

5476 

5477 # must be at least one (but first see if we are the stopOn sentinel; 

5478 # if so, fail) 

5479 if check_ender: 

5480 try_not_ender(instring, loc) 

5481 loc, tokens = self_expr_parse(instring, loc, do_actions) 

5482 try: 

5483 hasIgnoreExprs = not not self.ignoreExprs 

5484 while 1: 

5485 if check_ender: 

5486 try_not_ender(instring, loc) 

5487 if hasIgnoreExprs: 

5488 preloc = self_skip_ignorables(instring, loc) 

5489 else: 

5490 preloc = loc 

5491 loc, tmptokens = self_expr_parse(instring, preloc, do_actions) 

5492 tokens += tmptokens 

5493 except (ParseException, IndexError): 

5494 pass 

5495 

5496 return loc, tokens 

5497 

5498 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5499 if ( 

5500 __diag__.warn_ungrouped_named_tokens_in_collection 

5501 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5502 not in self.suppress_warnings_ 

5503 ): 

5504 for e in [self.expr] + self.expr.recurse(): 

5505 if ( 

5506 isinstance(e, ParserElement) 

5507 and e.resultsName 

5508 and ( 

5509 Diagnostics.warn_ungrouped_named_tokens_in_collection 

5510 not in e.suppress_warnings_ 

5511 ) 

5512 ): 

5513 warning = ( 

5514 "warn_ungrouped_named_tokens_in_collection:" 

5515 f" setting results name {name!r} on {type(self).__name__} expression" 

5516 f" collides with {e.resultsName!r} on contained expression" 

5517 ) 

5518 warnings.warn(warning, stacklevel=3) 

5519 break 

5520 

5521 return super()._setResultsName(name, list_all_matches) 

5522 

5523 

5524class OneOrMore(_MultipleMatch): 

5525 """ 

5526 Repetition of one or more of the given expression. 

5527 

5528 Parameters: 

5529 

5530 - ``expr`` - expression that must match one or more times 

5531 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5532 (only required if the sentinel would ordinarily match the repetition 

5533 expression) 

5534 

5535 Example: 

5536 

5537 .. doctest:: 

5538 

5539 >>> data_word = Word(alphas) 

5540 >>> label = data_word + FollowedBy(':') 

5541 >>> attr_expr = Group( 

5542 ... label + Suppress(':') 

5543 ... + OneOrMore(data_word).set_parse_action(' '.join)) 

5544 

5545 >>> text = "shape: SQUARE posn: upper left color: BLACK" 

5546 

5547 # Fail! read 'posn' as data instead of next label 

5548 >>> attr_expr[1, ...].parse_string(text).pprint() 

5549 [['shape', 'SQUARE posn']] 

5550 

5551 # use stop_on attribute for OneOrMore 

5552 # to avoid reading label string as part of the data 

5553 >>> attr_expr = Group( 

5554 ... label + Suppress(':') 

5555 ... + OneOrMore( 

5556 ... data_word, stop_on=label).set_parse_action(' '.join)) 

5557 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better 

5558 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5559 

5560 # could also be written as 

5561 >>> (attr_expr * (1,)).parse_string(text).pprint() 

5562 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5563 """ 

5564 

5565 def _generateDefaultName(self) -> str: 

5566 return f"{{{self.expr}}}..." 

5567 

5568 

5569class ZeroOrMore(_MultipleMatch): 

5570 """ 

5571 Optional repetition of zero or more of the given expression. 

5572 

5573 Parameters: 

5574 

5575 - ``expr`` - expression that must match zero or more times 

5576 - ``stop_on`` - expression for a terminating sentinel 

5577 (only required if the sentinel would ordinarily match the repetition 

5578 expression) - (default= ``None``) 

5579 

5580 Example: similar to :class:`OneOrMore` 

5581 """ 

5582 

5583 def __init__( 

5584 self, 

5585 expr: Union[str, ParserElement], 

5586 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5587 *, 

5588 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5589 ) -> None: 

5590 super().__init__(expr, stopOn=stopOn or stop_on) 

5591 self._may_return_empty = True 

5592 

5593 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5594 try: 

5595 return super().parseImpl(instring, loc, do_actions) 

5596 except (ParseException, IndexError): 

5597 return loc, ParseResults([], name=self.resultsName) 

5598 

5599 def _generateDefaultName(self) -> str: 

5600 return f"[{self.expr}]..." 

5601 

5602 

5603class DelimitedList(ParseElementEnhance): 

5604 """Helper to define a delimited list of expressions - the delimiter 

5605 defaults to ','. By default, the list elements and delimiters can 

5606 have intervening whitespace, and comments, but this can be 

5607 overridden by passing ``combine=True`` in the constructor. If 

5608 ``combine`` is set to ``True``, the matching tokens are 

5609 returned as a single token string, with the delimiters included; 

5610 otherwise, the matching tokens are returned as a list of tokens, 

5611 with the delimiters suppressed. 

5612 

5613 If ``allow_trailing_delim`` is set to True, then the list may end with 

5614 a delimiter. 

5615 

5616 Example: 

5617 

5618 .. doctest:: 

5619 

5620 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc") 

5621 ParseResults(['aa', 'bb', 'cc'], {}) 

5622 >>> DelimitedList(Word(hexnums), delim=':', combine=True 

5623 ... ).parse_string("AA:BB:CC:DD:EE") 

5624 ParseResults(['AA:BB:CC:DD:EE'], {}) 

5625 

5626 .. versionadded:: 3.1.0 

5627 """ 

5628 

5629 def __init__( 

5630 self, 

5631 expr: Union[str, ParserElement], 

5632 delim: Union[str, ParserElement] = ",", 

5633 combine: bool = False, 

5634 min: typing.Optional[int] = None, 

5635 max: typing.Optional[int] = None, 

5636 *, 

5637 allow_trailing_delim: bool = False, 

5638 ) -> None: 

5639 if isinstance(expr, str_type): 

5640 expr = ParserElement._literalStringClass(expr) 

5641 expr = typing.cast(ParserElement, expr) 

5642 

5643 if min is not None and min < 1: 

5644 raise ValueError("min must be greater than 0") 

5645 

5646 if max is not None and min is not None and max < min: 

5647 raise ValueError("max must be greater than, or equal to min") 

5648 

5649 self.content = expr 

5650 self.raw_delim = str(delim) 

5651 self.delim = delim 

5652 self.combine = combine 

5653 if not combine: 

5654 self.delim = Suppress(delim) 

5655 self.min = min or 1 

5656 self.max = max 

5657 self.allow_trailing_delim = allow_trailing_delim 

5658 

5659 delim_list_expr = self.content + (self.delim + self.content) * ( 

5660 self.min - 1, 

5661 None if self.max is None else self.max - 1, 

5662 ) 

5663 if self.allow_trailing_delim: 

5664 delim_list_expr += Opt(self.delim) 

5665 

5666 if self.combine: 

5667 delim_list_expr = Combine(delim_list_expr) 

5668 

5669 super().__init__(delim_list_expr, savelist=True) 

5670 

5671 def _generateDefaultName(self) -> str: 

5672 content_expr = self.content.streamline() 

5673 return f"{content_expr} [{self.raw_delim} {content_expr}]..." 

5674 

5675 

5676class _NullToken: 

5677 def __bool__(self): 

5678 return False 

5679 

5680 def __str__(self): 

5681 return "" 

5682 

5683 

5684class Opt(ParseElementEnhance): 

5685 """ 

5686 Optional matching of the given expression. 

5687 

5688 :param expr: expression that must match zero or more times 

5689 :param default: (optional) - value to be returned 

5690 if the optional expression is not found. 

5691 

5692 Example: 

5693 

5694 .. testcode:: 

5695 

5696 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5697 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5698 zip.run_tests(''' 

5699 # traditional ZIP code 

5700 12345 

5701 

5702 # ZIP+4 form 

5703 12101-0001 

5704 

5705 # invalid ZIP 

5706 98765- 

5707 ''') 

5708 

5709 prints: 

5710 

5711 .. testoutput:: 

5712 :options: +NORMALIZE_WHITESPACE 

5713 

5714 

5715 # traditional ZIP code 

5716 12345 

5717 ['12345'] 

5718 

5719 # ZIP+4 form 

5720 12101-0001 

5721 ['12101-0001'] 

5722 

5723 # invalid ZIP 

5724 98765- 

5725 98765- 

5726 ^ 

5727 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6) 

5728 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6) 

5729 """ 

5730 

5731 __optionalNotMatched = _NullToken() 

5732 

5733 def __init__( 

5734 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5735 ) -> None: 

5736 super().__init__(expr, savelist=False) 

5737 self.saveAsList = self.expr.saveAsList 

5738 self.defaultValue = default 

5739 self._may_return_empty = True 

5740 

5741 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5742 self_expr = self.expr 

5743 try: 

5744 loc, tokens = self_expr._parse( 

5745 instring, loc, do_actions, callPreParse=False 

5746 ) 

5747 except (ParseException, IndexError): 

5748 default_value = self.defaultValue 

5749 if default_value is not self.__optionalNotMatched: 

5750 if self_expr.resultsName: 

5751 tokens = ParseResults([default_value]) 

5752 tokens[self_expr.resultsName] = default_value 

5753 else: 

5754 tokens = [default_value] # type: ignore[assignment] 

5755 else: 

5756 tokens = [] # type: ignore[assignment] 

5757 return loc, tokens 

5758 

5759 def _generateDefaultName(self) -> str: 

5760 inner = str(self.expr) 

5761 # strip off redundant inner {}'s 

5762 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5763 inner = inner[1:-1] 

5764 return f"[{inner}]" 

5765 

5766 

5767Optional = Opt 

5768 

5769 

5770class SkipTo(ParseElementEnhance): 

5771 """ 

5772 Token for skipping over all undefined text until the matched 

5773 expression is found. 

5774 

5775 :param expr: target expression marking the end of the data to be skipped 

5776 :param include: if ``True``, the target expression is also parsed 

5777 (the skipped text and target expression are returned 

5778 as a 2-element list) (default= ``False``). 

5779 

5780 :param ignore: (default= ``None``) used to define grammars 

5781 (typically quoted strings and comments) 

5782 that might contain false matches to the target expression 

5783 

5784 :param fail_on: (default= ``None``) define expressions that 

5785 are not allowed to be included in the skipped test; 

5786 if found before the target expression is found, 

5787 the :class:`SkipTo` is not a match 

5788 

5789 Example: 

5790 

5791 .. testcode:: 

5792 

5793 report = ''' 

5794 Outstanding Issues Report - 1 Jan 2000 

5795 

5796 # | Severity | Description | Days Open 

5797 -----+----------+-------------------------------------------+----------- 

5798 101 | Critical | Intermittent system crash | 6 

5799 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5800 79 | Minor | System slow when running too many reports | 47 

5801 ''' 

5802 integer = Word(nums) 

5803 SEP = Suppress('|') 

5804 # use SkipTo to simply match everything up until the next SEP 

5805 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5806 # - parse action will call token.strip() for each matched token, i.e., the description body 

5807 string_data = SkipTo(SEP, ignore=quoted_string) 

5808 string_data.set_parse_action(token_map(str.strip)) 

5809 ticket_expr = (integer("issue_num") + SEP 

5810 + string_data("sev") + SEP 

5811 + string_data("desc") + SEP 

5812 + integer("days_open")) 

5813 

5814 for tkt in ticket_expr.search_string(report): 

5815 print(tkt.dump()) 

5816 

5817 prints: 

5818 

5819 .. testoutput:: 

5820 

5821 ['101', 'Critical', 'Intermittent system crash', '6'] 

5822 - days_open: '6' 

5823 - desc: 'Intermittent system crash' 

5824 - issue_num: '101' 

5825 - sev: 'Critical' 

5826 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

5827 - days_open: '14' 

5828 - desc: "Spelling error on Login ('log|n')" 

5829 - issue_num: '94' 

5830 - sev: 'Cosmetic' 

5831 ['79', 'Minor', 'System slow when running too many reports', '47'] 

5832 - days_open: '47' 

5833 - desc: 'System slow when running too many reports' 

5834 - issue_num: '79' 

5835 - sev: 'Minor' 

5836 """ 

5837 

5838 def __init__( 

5839 self, 

5840 other: Union[ParserElement, str], 

5841 include: bool = False, 

5842 ignore: typing.Optional[Union[ParserElement, str]] = None, 

5843 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

5844 *, 

5845 failOn: typing.Optional[Union[ParserElement, str]] = None, 

5846 ) -> None: 

5847 super().__init__(other) 

5848 failOn = failOn or fail_on 

5849 self.ignoreExpr = ignore 

5850 self._may_return_empty = True 

5851 self.mayIndexError = False 

5852 self.includeMatch = include 

5853 self.saveAsList = False 

5854 if isinstance(failOn, str_type): 

5855 self.failOn = self._literalStringClass(failOn) 

5856 else: 

5857 self.failOn = failOn 

5858 self.errmsg = f"No match found for {self.expr}" 

5859 self.ignorer = Empty().leave_whitespace() 

5860 self._update_ignorer() 

5861 

5862 def _update_ignorer(self): 

5863 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

5864 self.ignorer.ignoreExprs.clear() 

5865 for e in self.expr.ignoreExprs: 

5866 self.ignorer.ignore(e) 

5867 if self.ignoreExpr: 

5868 self.ignorer.ignore(self.ignoreExpr) 

5869 

5870 def ignore(self, expr): 

5871 super().ignore(expr) 

5872 self._update_ignorer() 

5873 

5874 def parseImpl(self, instring, loc, do_actions=True): 

5875 startloc = loc 

5876 instrlen = len(instring) 

5877 self_expr_parse = self.expr._parse 

5878 self_failOn_canParseNext = ( 

5879 self.failOn.canParseNext if self.failOn is not None else None 

5880 ) 

5881 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

5882 

5883 tmploc = loc 

5884 while tmploc <= instrlen: 

5885 if self_failOn_canParseNext is not None: 

5886 # break if failOn expression matches 

5887 if self_failOn_canParseNext(instring, tmploc): 

5888 break 

5889 

5890 if ignorer_try_parse is not None: 

5891 # advance past ignore expressions 

5892 prev_tmploc = tmploc 

5893 while 1: 

5894 try: 

5895 tmploc = ignorer_try_parse(instring, tmploc) 

5896 except ParseBaseException: 

5897 break 

5898 # see if all ignorers matched, but didn't actually ignore anything 

5899 if tmploc == prev_tmploc: 

5900 break 

5901 prev_tmploc = tmploc 

5902 

5903 try: 

5904 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False) 

5905 except (ParseException, IndexError): 

5906 # no match, advance loc in string 

5907 tmploc += 1 

5908 else: 

5909 # matched skipto expr, done 

5910 break 

5911 

5912 else: 

5913 # ran off the end of the input string without matching skipto expr, fail 

5914 raise ParseException(instring, loc, self.errmsg, self) 

5915 

5916 # build up return values 

5917 loc = tmploc 

5918 skiptext = instring[startloc:loc] 

5919 skipresult = ParseResults(skiptext) 

5920 

5921 if self.includeMatch: 

5922 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False) 

5923 skipresult += mat 

5924 

5925 return loc, skipresult 

5926 

5927 

5928class Forward(ParseElementEnhance): 

5929 """ 

5930 Forward declaration of an expression to be defined later - 

5931 used for recursive grammars, such as algebraic infix notation. 

5932 When the expression is known, it is assigned to the ``Forward`` 

5933 instance using the ``'<<'`` operator. 

5934 

5935 .. Note:: 

5936 

5937 Take care when assigning to ``Forward`` not to overlook 

5938 precedence of operators. 

5939 

5940 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

5941 

5942 fwd_expr << a | b | c 

5943 

5944 will actually be evaluated as:: 

5945 

5946 (fwd_expr << a) | b | c 

5947 

5948 thereby leaving b and c out as parseable alternatives. 

5949 It is recommended that you explicitly group the values 

5950 inserted into the :class:`Forward`:: 

5951 

5952 fwd_expr << (a | b | c) 

5953 

5954 Converting to use the ``'<<='`` operator instead will avoid this problem. 

5955 

5956 See :meth:`ParseResults.pprint` for an example of a recursive 

5957 parser created using :class:`Forward`. 

5958 """ 

5959 

5960 def __init__( 

5961 self, other: typing.Optional[Union[ParserElement, str]] = None 

5962 ) -> None: 

5963 self.caller_frame = traceback.extract_stack(limit=2)[0] 

5964 super().__init__(other, savelist=False) # type: ignore[arg-type] 

5965 self.lshift_line = None 

5966 

5967 def __lshift__(self, other) -> Forward: 

5968 if hasattr(self, "caller_frame"): 

5969 del self.caller_frame 

5970 if isinstance(other, str_type): 

5971 other = self._literalStringClass(other) 

5972 

5973 if not isinstance(other, ParserElement): 

5974 return NotImplemented 

5975 

5976 self.expr = other 

5977 self.streamlined = other.streamlined 

5978 self.mayIndexError = self.expr.mayIndexError 

5979 self._may_return_empty = self.expr.mayReturnEmpty 

5980 self.set_whitespace_chars( 

5981 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

5982 ) 

5983 self.skipWhitespace = self.expr.skipWhitespace 

5984 self.saveAsList = self.expr.saveAsList 

5985 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5986 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

5987 return self 

5988 

5989 def __ilshift__(self, other) -> Forward: 

5990 if not isinstance(other, ParserElement): 

5991 return NotImplemented 

5992 

5993 return self << other 

5994 

5995 def __or__(self, other) -> ParserElement: 

5996 caller_line = traceback.extract_stack(limit=2)[-2] 

5997 if ( 

5998 __diag__.warn_on_match_first_with_lshift_operator 

5999 and caller_line == self.lshift_line 

6000 and Diagnostics.warn_on_match_first_with_lshift_operator 

6001 not in self.suppress_warnings_ 

6002 ): 

6003 warnings.warn( 

6004 "warn_on_match_first_with_lshift_operator:" 

6005 " using '<<' operator with '|' is probably an error, use '<<='", 

6006 stacklevel=2, 

6007 ) 

6008 ret = super().__or__(other) 

6009 return ret 

6010 

6011 def __del__(self): 

6012 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

6013 if ( 

6014 self.expr is None 

6015 and __diag__.warn_on_assignment_to_Forward 

6016 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

6017 ): 

6018 warnings.warn_explicit( 

6019 "warn_on_assignment_to_Forward:" 

6020 " Forward defined here but no expression attached later using '<<=' or '<<'", 

6021 UserWarning, 

6022 filename=self.caller_frame.filename, 

6023 lineno=self.caller_frame.lineno, 

6024 ) 

6025 

6026 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

6027 if ( 

6028 self.expr is None 

6029 and __diag__.warn_on_parse_using_empty_Forward 

6030 and Diagnostics.warn_on_parse_using_empty_Forward 

6031 not in self.suppress_warnings_ 

6032 ): 

6033 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

6034 parse_fns = ( 

6035 "parse_string", 

6036 "scan_string", 

6037 "search_string", 

6038 "transform_string", 

6039 ) 

6040 tb = traceback.extract_stack(limit=200) 

6041 for i, frm in enumerate(reversed(tb), start=1): 

6042 if frm.name in parse_fns: 

6043 stacklevel = i + 1 

6044 break 

6045 else: 

6046 stacklevel = 2 

6047 warnings.warn( 

6048 "warn_on_parse_using_empty_Forward:" 

6049 " Forward expression was never assigned a value, will not parse any input", 

6050 stacklevel=stacklevel, 

6051 ) 

6052 if not ParserElement._left_recursion_enabled: 

6053 return super().parseImpl(instring, loc, do_actions) 

6054 # ## Bounded Recursion algorithm ## 

6055 # Recursion only needs to be processed at ``Forward`` elements, since they are 

6056 # the only ones that can actually refer to themselves. The general idea is 

6057 # to handle recursion stepwise: We start at no recursion, then recurse once, 

6058 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

6059 # 

6060 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

6061 # - to *match* a specific recursion level, and 

6062 # - to *search* the bounded recursion level 

6063 # and the two run concurrently. The *search* must *match* each recursion level 

6064 # to find the best possible match. This is handled by a memo table, which 

6065 # provides the previous match to the next level match attempt. 

6066 # 

6067 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

6068 # 

6069 # There is a complication since we not only *parse* but also *transform* via 

6070 # actions: We do not want to run the actions too often while expanding. Thus, 

6071 # we expand using `do_actions=False` and only run `do_actions=True` if the next 

6072 # recursion level is acceptable. 

6073 with ParserElement.recursion_lock: 

6074 memo = ParserElement.recursion_memos 

6075 try: 

6076 # we are parsing at a specific recursion expansion - use it as-is 

6077 prev_loc, prev_result = memo[loc, self, do_actions] 

6078 if isinstance(prev_result, Exception): 

6079 raise prev_result 

6080 return prev_loc, prev_result.copy() 

6081 except KeyError: 

6082 act_key = (loc, self, True) 

6083 peek_key = (loc, self, False) 

6084 # we are searching for the best recursion expansion - keep on improving 

6085 # both `do_actions` cases must be tracked separately here! 

6086 prev_loc, prev_peek = memo[peek_key] = ( 

6087 loc - 1, 

6088 ParseException( 

6089 instring, loc, "Forward recursion without base case", self 

6090 ), 

6091 ) 

6092 if do_actions: 

6093 memo[act_key] = memo[peek_key] 

6094 while True: 

6095 try: 

6096 new_loc, new_peek = super().parseImpl(instring, loc, False) 

6097 except ParseException: 

6098 # we failed before getting any match - do not hide the error 

6099 if isinstance(prev_peek, Exception): 

6100 raise 

6101 new_loc, new_peek = prev_loc, prev_peek 

6102 # the match did not get better: we are done 

6103 if new_loc <= prev_loc: 

6104 if do_actions: 

6105 # replace the match for do_actions=False as well, 

6106 # in case the action did backtrack 

6107 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

6108 del memo[peek_key], memo[act_key] 

6109 return prev_loc, copy.copy(prev_result) 

6110 del memo[peek_key] 

6111 return prev_loc, copy.copy(prev_peek) 

6112 # the match did get better: see if we can improve further 

6113 if do_actions: 

6114 try: 

6115 memo[act_key] = super().parseImpl(instring, loc, True) 

6116 except ParseException as e: 

6117 memo[peek_key] = memo[act_key] = (new_loc, e) 

6118 raise 

6119 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

6120 

6121 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

6122 self.skipWhitespace = False 

6123 return self 

6124 

6125 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

6126 self.skipWhitespace = True 

6127 return self 

6128 

6129 def streamline(self) -> ParserElement: 

6130 if not self.streamlined: 

6131 self.streamlined = True 

6132 if self.expr is not None: 

6133 self.expr.streamline() 

6134 return self 

6135 

6136 def validate(self, validateTrace=None) -> None: 

6137 warnings.warn( 

6138 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

6139 DeprecationWarning, 

6140 stacklevel=2, 

6141 ) 

6142 if validateTrace is None: 

6143 validateTrace = [] 

6144 

6145 if self not in validateTrace: 

6146 tmp = validateTrace[:] + [self] 

6147 if self.expr is not None: 

6148 self.expr.validate(tmp) 

6149 self._checkRecursion([]) 

6150 

6151 def _generateDefaultName(self) -> str: 

6152 # Avoid infinite recursion by setting a temporary _defaultName 

6153 save_default_name = self._defaultName 

6154 self._defaultName = ": ..." 

6155 

6156 # Use the string representation of main expression. 

6157 try: 

6158 if self.expr is not None: 

6159 ret_string = str(self.expr)[:1000] 

6160 else: 

6161 ret_string = "None" 

6162 except Exception: 

6163 ret_string = "..." 

6164 

6165 self._defaultName = save_default_name 

6166 return f"{type(self).__name__}: {ret_string}" 

6167 

6168 def copy(self) -> ParserElement: 

6169 if self.expr is not None: 

6170 return super().copy() 

6171 else: 

6172 ret = Forward() 

6173 ret <<= self 

6174 return ret 

6175 

6176 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

6177 # fmt: off 

6178 if ( 

6179 __diag__.warn_name_set_on_empty_Forward 

6180 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ 

6181 and self.expr is None 

6182 ): 

6183 warning = ( 

6184 "warn_name_set_on_empty_Forward:" 

6185 f" setting results name {name!r} on {type(self).__name__} expression" 

6186 " that has no contained expression" 

6187 ) 

6188 warnings.warn(warning, stacklevel=3) 

6189 # fmt: on 

6190 

6191 return super()._setResultsName(name, list_all_matches) 

6192 

6193 # Compatibility synonyms 

6194 # fmt: off 

6195 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

6196 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

6197 # fmt: on 

6198 

6199 

6200class TokenConverter(ParseElementEnhance): 

6201 """ 

6202 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. 

6203 """ 

6204 

6205 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None: 

6206 super().__init__(expr) # , savelist) 

6207 self.saveAsList = False 

6208 

6209 

6210class Combine(TokenConverter): 

6211 """Converter to concatenate all matching tokens to a single string. 

6212 By default, the matching patterns must also be contiguous in the 

6213 input string; this can be disabled by specifying 

6214 ``'adjacent=False'`` in the constructor. 

6215 

6216 Example: 

6217 

6218 .. doctest:: 

6219 

6220 >>> real = Word(nums) + '.' + Word(nums) 

6221 >>> print(real.parse_string('3.1416')) 

6222 ['3', '.', '1416'] 

6223 

6224 >>> # will also erroneously match the following 

6225 >>> print(real.parse_string('3. 1416')) 

6226 ['3', '.', '1416'] 

6227 

6228 >>> real = Combine(Word(nums) + '.' + Word(nums)) 

6229 >>> print(real.parse_string('3.1416')) 

6230 ['3.1416'] 

6231 

6232 >>> # no match when there are internal spaces 

6233 >>> print(real.parse_string('3. 1416')) 

6234 Traceback (most recent call last): 

6235 ParseException: Expected W:(0123...) 

6236 """ 

6237 

6238 def __init__( 

6239 self, 

6240 expr: ParserElement, 

6241 join_string: str = "", 

6242 adjacent: bool = True, 

6243 *, 

6244 joinString: typing.Optional[str] = None, 

6245 ) -> None: 

6246 super().__init__(expr) 

6247 joinString = joinString if joinString is not None else join_string 

6248 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

6249 if adjacent: 

6250 self.leave_whitespace() 

6251 self.adjacent = adjacent 

6252 self.skipWhitespace = True 

6253 self.joinString = joinString 

6254 self.callPreparse = True 

6255 

6256 def ignore(self, other) -> ParserElement: 

6257 if self.adjacent: 

6258 ParserElement.ignore(self, other) 

6259 else: 

6260 super().ignore(other) 

6261 return self 

6262 

6263 def postParse(self, instring, loc, tokenlist): 

6264 retToks = tokenlist.copy() 

6265 del retToks[:] 

6266 retToks += ParseResults( 

6267 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

6268 ) 

6269 

6270 if self.resultsName and retToks.haskeys(): 

6271 return [retToks] 

6272 else: 

6273 return retToks 

6274 

6275 

6276class Group(TokenConverter): 

6277 """Converter to return the matched tokens as a list - useful for 

6278 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

6279 

6280 The optional ``aslist`` argument when set to True will return the 

6281 parsed tokens as a Python list instead of a pyparsing ParseResults. 

6282 

6283 Example: 

6284 

6285 .. doctest:: 

6286 

6287 >>> ident = Word(alphas) 

6288 >>> num = Word(nums) 

6289 >>> term = ident | num 

6290 >>> func = ident + Opt(DelimitedList(term)) 

6291 >>> print(func.parse_string("fn a, b, 100")) 

6292 ['fn', 'a', 'b', '100'] 

6293 

6294 >>> func = ident + Group(Opt(DelimitedList(term))) 

6295 >>> print(func.parse_string("fn a, b, 100")) 

6296 ['fn', ['a', 'b', '100']] 

6297 """ 

6298 

6299 def __init__(self, expr: ParserElement, aslist: bool = False) -> None: 

6300 super().__init__(expr) 

6301 self.saveAsList = True 

6302 self._asPythonList = aslist 

6303 

6304 def postParse(self, instring, loc, tokenlist): 

6305 if self._asPythonList: 

6306 return ParseResults.List( 

6307 tokenlist.asList() 

6308 if isinstance(tokenlist, ParseResults) 

6309 else list(tokenlist) 

6310 ) 

6311 

6312 return [tokenlist] 

6313 

6314 

6315class Dict(TokenConverter): 

6316 """Converter to return a repetitive expression as a list, but also 

6317 as a dictionary. Each element can also be referenced using the first 

6318 token in the expression as its key. Useful for tabular report 

6319 scraping when the first column can be used as a item key. 

6320 

6321 The optional ``asdict`` argument when set to True will return the 

6322 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

6323 

6324 Example: 

6325 

6326 .. doctest:: 

6327 

6328 >>> data_word = Word(alphas) 

6329 >>> label = data_word + FollowedBy(':') 

6330 

6331 >>> attr_expr = ( 

6332 ... label + Suppress(':') 

6333 ... + OneOrMore(data_word, stop_on=label) 

6334 ... .set_parse_action(' '.join) 

6335 ... ) 

6336 

6337 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

6338 

6339 >>> # print attributes as plain groups 

6340 >>> print(attr_expr[1, ...].parse_string(text).dump()) 

6341 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

6342 

6343 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) 

6344 # Dict will auto-assign names. 

6345 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

6346 >>> print(result.dump()) 

6347 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

6348 - color: 'light blue' 

6349 - posn: 'upper left' 

6350 - shape: 'SQUARE' 

6351 - texture: 'burlap' 

6352 [0]: 

6353 ['shape', 'SQUARE'] 

6354 [1]: 

6355 ['posn', 'upper left'] 

6356 [2]: 

6357 ['color', 'light blue'] 

6358 [3]: 

6359 ['texture', 'burlap'] 

6360 

6361 # access named fields as dict entries, or output as dict 

6362 >>> print(result['shape']) 

6363 SQUARE 

6364 >>> print(result.as_dict()) 

6365 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'} 

6366 

6367 See more examples at :class:`ParseResults` of accessing fields by results name. 

6368 """ 

6369 

6370 def __init__(self, expr: ParserElement, asdict: bool = False) -> None: 

6371 super().__init__(expr) 

6372 self.saveAsList = True 

6373 self._asPythonDict = asdict 

6374 

6375 def postParse(self, instring, loc, tokenlist): 

6376 for i, tok in enumerate(tokenlist): 

6377 if len(tok) == 0: 

6378 continue 

6379 

6380 ikey = tok[0] 

6381 if isinstance(ikey, int): 

6382 ikey = str(ikey).strip() 

6383 

6384 if len(tok) == 1: 

6385 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

6386 

6387 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

6388 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

6389 

6390 else: 

6391 try: 

6392 dictvalue = tok.copy() # ParseResults(i) 

6393 except Exception: 

6394 exc = TypeError( 

6395 "could not extract dict values from parsed results" 

6396 " - Dict expression must contain Grouped expressions" 

6397 ) 

6398 raise exc from None 

6399 

6400 del dictvalue[0] 

6401 

6402 if len(dictvalue) != 1 or ( 

6403 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

6404 ): 

6405 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

6406 else: 

6407 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

6408 

6409 if self._asPythonDict: 

6410 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

6411 

6412 return [tokenlist] if self.resultsName else tokenlist 

6413 

6414 

6415class Suppress(TokenConverter): 

6416 """Converter for ignoring the results of a parsed expression. 

6417 

6418 Example: 

6419 

6420 .. doctest:: 

6421 

6422 >>> source = "a, b, c,d" 

6423 >>> wd = Word(alphas) 

6424 >>> wd_list1 = wd + (',' + wd)[...] 

6425 >>> print(wd_list1.parse_string(source)) 

6426 ['a', ',', 'b', ',', 'c', ',', 'd'] 

6427 

6428 # often, delimiters that are useful during parsing are just in the 

6429 # way afterward - use Suppress to keep them out of the parsed output 

6430 >>> wd_list2 = wd + (Suppress(',') + wd)[...] 

6431 >>> print(wd_list2.parse_string(source)) 

6432 ['a', 'b', 'c', 'd'] 

6433 

6434 # Skipped text (using '...') can be suppressed as well 

6435 >>> source = "lead in START relevant text END trailing text" 

6436 >>> start_marker = Keyword("START") 

6437 >>> end_marker = Keyword("END") 

6438 >>> find_body = Suppress(...) + start_marker + ... + end_marker 

6439 >>> print(find_body.parse_string(source)) 

6440 ['START', 'relevant text ', 'END'] 

6441 

6442 (See also :class:`DelimitedList`.) 

6443 """ 

6444 

6445 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

6446 if expr is ...: 

6447 expr = _PendingSkip(NoMatch()) 

6448 super().__init__(expr) 

6449 

6450 def __add__(self, other) -> ParserElement: 

6451 if isinstance(self.expr, _PendingSkip): 

6452 return Suppress(SkipTo(other)) + other 

6453 

6454 return super().__add__(other) 

6455 

6456 def __sub__(self, other) -> ParserElement: 

6457 if isinstance(self.expr, _PendingSkip): 

6458 return Suppress(SkipTo(other)) - other 

6459 

6460 return super().__sub__(other) 

6461 

6462 def postParse(self, instring, loc, tokenlist): 

6463 return [] 

6464 

6465 def suppress(self) -> ParserElement: 

6466 return self 

6467 

6468 

6469# XXX: Example needs to be re-done for updated output 

6470def trace_parse_action(f: ParseAction) -> ParseAction: 

6471 """Decorator for debugging parse actions. 

6472 

6473 When the parse action is called, this decorator will print 

6474 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

6475 When the parse action completes, the decorator will print 

6476 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

6477 

6478 Example: 

6479 

6480 .. testsetup:: stderr 

6481 

6482 import sys 

6483 sys.stderr = sys.stdout 

6484 

6485 .. testcleanup:: stderr 

6486 

6487 sys.stderr = sys.__stderr__ 

6488 

6489 .. testcode:: stderr 

6490 

6491 wd = Word(alphas) 

6492 

6493 @trace_parse_action 

6494 def remove_duplicate_chars(tokens): 

6495 return ''.join(sorted(set(''.join(tokens)))) 

6496 

6497 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

6498 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

6499 

6500 prints: 

6501 

6502 .. testoutput:: stderr 

6503 :options: +NORMALIZE_WHITESPACE 

6504 

6505 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 

6506 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

6507 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

6508 ['dfjkls'] 

6509 

6510 .. versionchanged:: 3.1.0 

6511 Exception type added to output 

6512 """ 

6513 f = _trim_arity(f) 

6514 

6515 def z(*paArgs): 

6516 thisFunc = f.__name__ 

6517 s, l, t = paArgs[-3:] 

6518 if len(paArgs) > 3: 

6519 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" 

6520 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

6521 try: 

6522 ret = f(*paArgs) 

6523 except Exception as exc: 

6524 sys.stderr.write( 

6525 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n" 

6526 ) 

6527 raise 

6528 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

6529 return ret 

6530 

6531 z.__name__ = f.__name__ 

6532 return z 

6533 

6534 

6535# convenience constants for positional expressions 

6536empty = Empty().set_name("empty") 

6537line_start = LineStart().set_name("line_start") 

6538line_end = LineEnd().set_name("line_end") 

6539string_start = StringStart().set_name("string_start") 

6540string_end = StringEnd().set_name("string_end") 

6541 

6542_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

6543 lambda s, l, t: t[0][1] 

6544) 

6545_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

6546 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

6547) 

6548_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

6549 lambda s, l, t: chr(int(t[0][1:], 8)) 

6550) 

6551_singleChar = ( 

6552 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

6553) 

6554_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

6555_reBracketExpr = ( 

6556 Literal("[") 

6557 + Opt("^").set_results_name("negate") 

6558 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

6559 + Literal("]") 

6560) 

6561 

6562 

6563def srange(s: str) -> str: 

6564 r"""Helper to easily define string ranges for use in :class:`Word` 

6565 construction. Borrows syntax from regexp ``'[]'`` string range 

6566 definitions:: 

6567 

6568 srange("[0-9]") -> "0123456789" 

6569 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

6570 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

6571 

6572 The input string must be enclosed in []'s, and the returned string 

6573 is the expanded character set joined into a single string. The 

6574 values enclosed in the []'s may be: 

6575 

6576 - a single character 

6577 - an escaped character with a leading backslash (such as ``\-`` 

6578 or ``\]``) 

6579 - an escaped hex character with a leading ``'\x'`` 

6580 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

6581 is also supported for backwards compatibility) 

6582 - an escaped octal character with a leading ``'\0'`` 

6583 (``\041``, which is a ``'!'`` character) 

6584 - a range of any of the above, separated by a dash (``'a-z'``, 

6585 etc.) 

6586 - any combination of the above (``'aeiouy'``, 

6587 ``'a-zA-Z0-9_$'``, etc.) 

6588 """ 

6589 

6590 def _expanded(p): 

6591 if isinstance(p, ParseResults): 

6592 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

6593 else: 

6594 yield p 

6595 

6596 try: 

6597 return "".join( 

6598 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)] 

6599 ) 

6600 except Exception as e: 

6601 return "" 

6602 

6603 

6604def token_map(func, *args) -> ParseAction: 

6605 """Helper to define a parse action by mapping a function to all 

6606 elements of a :class:`ParseResults` list. If any additional args are passed, 

6607 they are forwarded to the given function as additional arguments 

6608 after the token, as in 

6609 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

6610 which will convert the parsed data to an integer using base 16. 

6611 

6612 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

6613 

6614 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6615 hex_ints.run_tests(''' 

6616 00 11 22 aa FF 0a 0d 1a 

6617 ''') 

6618 

6619 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6620 upperword[1, ...].run_tests(''' 

6621 my kingdom for a horse 

6622 ''') 

6623 

6624 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6625 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6626 now is the winter of our discontent made glorious summer by this sun of york 

6627 ''') 

6628 

6629 prints:: 

6630 

6631 00 11 22 aa FF 0a 0d 1a 

6632 [0, 17, 34, 170, 255, 10, 13, 26] 

6633 

6634 my kingdom for a horse 

6635 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6636 

6637 now is the winter of our discontent made glorious summer by this sun of york 

6638 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6639 """ 

6640 

6641 def pa(s, l, t): 

6642 return [func(tokn, *args) for tokn in t] 

6643 

6644 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6645 pa.__name__ = func_name 

6646 

6647 return pa 

6648 

6649 

6650def autoname_elements() -> None: 

6651 """ 

6652 Utility to simplify mass-naming of parser elements, for 

6653 generating railroad diagram with named subdiagrams. 

6654 """ 

6655 

6656 # guard against _getframe not being implemented in the current Python 

6657 getframe_fn = getattr(sys, "_getframe", lambda _: None) 

6658 calling_frame = getframe_fn(1) 

6659 if calling_frame is None: 

6660 return 

6661 

6662 # find all locals in the calling frame that are ParserElements 

6663 calling_frame = typing.cast(types.FrameType, calling_frame) 

6664 for name, var in calling_frame.f_locals.items(): 

6665 # if no custom name defined, set the name to the var name 

6666 if isinstance(var, ParserElement) and not var.customName: 

6667 var.set_name(name) 

6668 

6669 

6670dbl_quoted_string = Combine( 

6671 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6672).set_name("string enclosed in double quotes") 

6673 

6674sgl_quoted_string = Combine( 

6675 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6676).set_name("string enclosed in single quotes") 

6677 

6678quoted_string = Combine( 

6679 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6680 "double quoted string" 

6681 ) 

6682 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6683 "single quoted string" 

6684 ) 

6685).set_name("quoted string using single or double quotes") 

6686 

6687# XXX: Is there some way to make this show up in API docs? 

6688# .. versionadded:: 3.1.0 

6689python_quoted_string = Combine( 

6690 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6691 "multiline double quoted string" 

6692 ) 

6693 ^ ( 

6694 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6695 ).set_name("multiline single quoted string") 

6696 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6697 "double quoted string" 

6698 ) 

6699 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6700 "single quoted string" 

6701 ) 

6702).set_name("Python quoted string") 

6703 

6704unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6705 

6706 

6707alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6708punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6709 

6710# build list of built-in expressions, for future reference if a global default value 

6711# gets updated 

6712_builtin_exprs: list[ParserElement] = [ 

6713 v for v in vars().values() if isinstance(v, ParserElement) 

6714] 

6715 

6716# Compatibility synonyms 

6717# fmt: off 

6718sglQuotedString = sgl_quoted_string 

6719dblQuotedString = dbl_quoted_string 

6720quotedString = quoted_string 

6721unicodeString = unicode_string 

6722lineStart = line_start 

6723lineEnd = line_end 

6724stringStart = string_start 

6725stringEnd = string_end 

6726nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) 

6727traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) 

6728conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) 

6729tokenMap = replaced_by_pep8("tokenMap", token_map) 

6730# fmt: on