Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2569 statements  

1# 

2# core.py 

3# 

4 

5from collections import deque 

6import os 

7import typing 

8from typing import ( 

9 Any, 

10 Callable, 

11 Generator, 

12 List, 

13 NamedTuple, 

14 Sequence, 

15 Set, 

16 TextIO, 

17 Tuple, 

18 Union, 

19 cast, 

20) 

21from abc import ABC, abstractmethod 

22from enum import Enum 

23import string 

24import copy 

25import warnings 

26import re 

27import sys 

28from collections.abc import Iterable 

29import traceback 

30import types 

31from operator import itemgetter 

32from functools import wraps 

33from threading import RLock 

34from pathlib import Path 

35 

36from .util import ( 

37 _FifoCache, 

38 _UnboundedCache, 

39 __config_flags, 

40 _collapse_string_to_ranges, 

41 _escape_regex_range_chars, 

42 _bslash, 

43 _flatten, 

44 LRUMemo as _LRUMemo, 

45 UnboundedMemo as _UnboundedMemo, 

46 replaced_by_pep8, 

47) 

48from .exceptions import * 

49from .actions import * 

50from .results import ParseResults, _ParseResultsWithOffset 

51from .unicode import pyparsing_unicode 

52 

53_MAX_INT = sys.maxsize 

54str_type: Tuple[type, ...] = (str, bytes) 

55 

56# 

57# Copyright (c) 2003-2022 Paul T. McGuire 

58# 

59# Permission is hereby granted, free of charge, to any person obtaining 

60# a copy of this software and associated documentation files (the 

61# "Software"), to deal in the Software without restriction, including 

62# without limitation the rights to use, copy, modify, merge, publish, 

63# distribute, sublicense, and/or sell copies of the Software, and to 

64# permit persons to whom the Software is furnished to do so, subject to 

65# the following conditions: 

66# 

67# The above copyright notice and this permission notice shall be 

68# included in all copies or substantial portions of the Software. 

69# 

70# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

71# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

72# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

73# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

74# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

75# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

76# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

77# 

78 

79 

80if sys.version_info >= (3, 8): 

81 from functools import cached_property 

82else: 

83 

84 class cached_property: 

85 def __init__(self, func): 

86 self._func = func 

87 

88 def __get__(self, instance, owner=None): 

89 ret = instance.__dict__[self._func.__name__] = self._func(instance) 

90 return ret 

91 

92 

93class __compat__(__config_flags): 

94 """ 

95 A cross-version compatibility configuration for pyparsing features that will be 

96 released in a future version. By setting values in this configuration to True, 

97 those features can be enabled in prior versions for compatibility development 

98 and testing. 

99 

100 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

101 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

102 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

103 behavior 

104 """ 

105 

106 _type_desc = "compatibility" 

107 

108 collect_all_And_tokens = True 

109 

110 _all_names = [__ for __ in locals() if not __.startswith("_")] 

111 _fixed_names = """ 

112 collect_all_And_tokens 

113 """.split() 

114 

115 

116class __diag__(__config_flags): 

117 _type_desc = "diagnostic" 

118 

119 warn_multiple_tokens_in_named_alternation = False 

120 warn_ungrouped_named_tokens_in_collection = False 

121 warn_name_set_on_empty_Forward = False 

122 warn_on_parse_using_empty_Forward = False 

123 warn_on_assignment_to_Forward = False 

124 warn_on_multiple_string_args_to_oneof = False 

125 warn_on_match_first_with_lshift_operator = False 

126 enable_debug_on_named_expressions = False 

127 

128 _all_names = [__ for __ in locals() if not __.startswith("_")] 

129 _warning_names = [name for name in _all_names if name.startswith("warn")] 

130 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

131 

132 @classmethod 

133 def enable_all_warnings(cls) -> None: 

134 for name in cls._warning_names: 

135 cls.enable(name) 

136 

137 

138class Diagnostics(Enum): 

139 """ 

140 Diagnostic configuration (all default to disabled) 

141 

142 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

143 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

144 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

145 name is defined on a containing expression with ungrouped subexpressions that also 

146 have results names 

147 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

148 with a results name, but has no contents defined 

149 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

150 defined in a grammar but has never had an expression attached to it 

151 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

152 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

153 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

154 incorrectly called with multiple str arguments 

155 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

156 calls to :class:`ParserElement.set_name` 

157 

158 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

159 All warnings can be enabled by calling :class:`enable_all_warnings`. 

160 """ 

161 

162 warn_multiple_tokens_in_named_alternation = 0 

163 warn_ungrouped_named_tokens_in_collection = 1 

164 warn_name_set_on_empty_Forward = 2 

165 warn_on_parse_using_empty_Forward = 3 

166 warn_on_assignment_to_Forward = 4 

167 warn_on_multiple_string_args_to_oneof = 5 

168 warn_on_match_first_with_lshift_operator = 6 

169 enable_debug_on_named_expressions = 7 

170 

171 

172def enable_diag(diag_enum: Diagnostics) -> None: 

173 """ 

174 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

175 """ 

176 __diag__.enable(diag_enum.name) 

177 

178 

179def disable_diag(diag_enum: Diagnostics) -> None: 

180 """ 

181 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

182 """ 

183 __diag__.disable(diag_enum.name) 

184 

185 

186def enable_all_warnings() -> None: 

187 """ 

188 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

189 """ 

190 __diag__.enable_all_warnings() 

191 

192 

193# hide abstract class 

194del __config_flags 

195 

196 

197def _should_enable_warnings( 

198 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

199) -> bool: 

200 enable = bool(warn_env_var) 

201 for warn_opt in cmd_line_warn_options: 

202 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

203 ":" 

204 )[:5] 

205 if not w_action.lower().startswith("i") and ( 

206 not (w_message or w_category or w_module) or w_module == "pyparsing" 

207 ): 

208 enable = True 

209 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

210 enable = False 

211 return enable 

212 

213 

214if _should_enable_warnings( 

215 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

216): 

217 enable_all_warnings() 

218 

219 

220# build list of single arg builtins, that can be used as parse actions 

221_single_arg_builtins = { 

222 sum, 

223 len, 

224 sorted, 

225 reversed, 

226 list, 

227 tuple, 

228 set, 

229 any, 

230 all, 

231 min, 

232 max, 

233} 

234 

235_generatorType = types.GeneratorType 

236ParseImplReturnType = Tuple[int, Any] 

237PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

238ParseAction = Union[ 

239 Callable[[], Any], 

240 Callable[[ParseResults], Any], 

241 Callable[[int, ParseResults], Any], 

242 Callable[[str, int, ParseResults], Any], 

243] 

244ParseCondition = Union[ 

245 Callable[[], bool], 

246 Callable[[ParseResults], bool], 

247 Callable[[int, ParseResults], bool], 

248 Callable[[str, int, ParseResults], bool], 

249] 

250ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

251DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

252DebugSuccessAction = Callable[ 

253 [str, int, int, "ParserElement", ParseResults, bool], None 

254] 

255DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

256 

257 

258alphas = string.ascii_uppercase + string.ascii_lowercase 

259identchars = pyparsing_unicode.Latin1.identchars 

260identbodychars = pyparsing_unicode.Latin1.identbodychars 

261nums = "0123456789" 

262hexnums = nums + "ABCDEFabcdef" 

263alphanums = alphas + nums 

264printables = "".join([c for c in string.printable if c not in string.whitespace]) 

265 

266_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

267 

268 

269def _trim_arity(func, max_limit=3): 

270 """decorator to trim function calls to match the arity of the target""" 

271 global _trim_arity_call_line 

272 

273 if func in _single_arg_builtins: 

274 return lambda s, l, t: func(t) 

275 

276 limit = 0 

277 found_arity = False 

278 

279 # synthesize what would be returned by traceback.extract_stack at the call to 

280 # user's parse action 'func', so that we don't incur call penalty at parse time 

281 

282 # fmt: off 

283 LINE_DIFF = 7 

284 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

285 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

286 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) 

287 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

288 

289 def wrapper(*args): 

290 nonlocal found_arity, limit 

291 while 1: 

292 try: 

293 ret = func(*args[limit:]) 

294 found_arity = True 

295 return ret 

296 except TypeError as te: 

297 # re-raise TypeErrors if they did not come from our arity testing 

298 if found_arity: 

299 raise 

300 else: 

301 tb = te.__traceback__ 

302 frames = traceback.extract_tb(tb, limit=2) 

303 frame_summary = frames[-1] 

304 trim_arity_type_error = ( 

305 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

306 ) 

307 del tb 

308 

309 if trim_arity_type_error: 

310 if limit < max_limit: 

311 limit += 1 

312 continue 

313 

314 raise 

315 # fmt: on 

316 

317 # copy func name to wrapper for sensible debug output 

318 # (can't use functools.wraps, since that messes with function signature) 

319 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

320 wrapper.__name__ = func_name 

321 wrapper.__doc__ = func.__doc__ 

322 

323 return wrapper 

324 

325 

326def condition_as_parse_action( 

327 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

328) -> ParseAction: 

329 """ 

330 Function to convert a simple predicate function that returns ``True`` or ``False`` 

331 into a parse action. Can be used in places when a parse action is required 

332 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition 

333 to an operator level in :class:`infix_notation`). 

334 

335 Optional keyword arguments: 

336 

337 - ``message`` - define a custom message to be used in the raised exception 

338 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; 

339 otherwise will raise :class:`ParseException` 

340 

341 """ 

342 msg = message if message is not None else "failed user-defined condition" 

343 exc_type = ParseFatalException if fatal else ParseException 

344 fn = _trim_arity(fn) 

345 

346 @wraps(fn) 

347 def pa(s, l, t): 

348 if not bool(fn(s, l, t)): 

349 raise exc_type(s, l, msg) 

350 

351 return pa 

352 

353 

354def _default_start_debug_action( 

355 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False 

356): 

357 cache_hit_str = "*" if cache_hit else "" 

358 print( 

359 ( 

360 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

361 f" {line(loc, instring)}\n" 

362 f" {' ' * (col(loc, instring) - 1)}^" 

363 ) 

364 ) 

365 

366 

367def _default_success_debug_action( 

368 instring: str, 

369 startloc: int, 

370 endloc: int, 

371 expr: "ParserElement", 

372 toks: ParseResults, 

373 cache_hit: bool = False, 

374): 

375 cache_hit_str = "*" if cache_hit else "" 

376 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

377 

378 

379def _default_exception_debug_action( 

380 instring: str, 

381 loc: int, 

382 expr: "ParserElement", 

383 exc: Exception, 

384 cache_hit: bool = False, 

385): 

386 cache_hit_str = "*" if cache_hit else "" 

387 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

388 

389 

390def null_debug_action(*args): 

391 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

392 

393 

394class ParserElement(ABC): 

395 """Abstract base level parser element class.""" 

396 

397 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

398 verbose_stacktrace: bool = False 

399 _literalStringClass: type = None # type: ignore[assignment] 

400 

401 @staticmethod 

402 def set_default_whitespace_chars(chars: str) -> None: 

403 r""" 

404 Overrides the default whitespace chars 

405 

406 Example:: 

407 

408 # default whitespace chars are space, <TAB> and newline 

409 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

410 

411 # change to just treat newline as significant 

412 ParserElement.set_default_whitespace_chars(" \t") 

413 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] 

414 """ 

415 ParserElement.DEFAULT_WHITE_CHARS = chars 

416 

417 # update whitespace all parse expressions defined in this module 

418 for expr in _builtin_exprs: 

419 if expr.copyDefaultWhiteChars: 

420 expr.whiteChars = set(chars) 

421 

422 @staticmethod 

423 def inline_literals_using(cls: type) -> None: 

424 """ 

425 Set class to be used for inclusion of string literals into a parser. 

426 

427 Example:: 

428 

429 # default literal class used is Literal 

430 integer = Word(nums) 

431 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

432 

433 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

434 

435 

436 # change to Suppress 

437 ParserElement.inline_literals_using(Suppress) 

438 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

439 

440 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] 

441 """ 

442 ParserElement._literalStringClass = cls 

443 

444 @classmethod 

445 def using_each(cls, seq, **class_kwargs): 

446 """ 

447 Yields a sequence of class(obj, **class_kwargs) for obj in seq. 

448 

449 Example:: 

450 

451 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

452 

453 """ 

454 yield from (cls(obj, **class_kwargs) for obj in seq) 

455 

456 class DebugActions(NamedTuple): 

457 debug_try: typing.Optional[DebugStartAction] 

458 debug_match: typing.Optional[DebugSuccessAction] 

459 debug_fail: typing.Optional[DebugExceptionAction] 

460 

461 def __init__(self, savelist: bool = False): 

462 self.parseAction: List[ParseAction] = list() 

463 self.failAction: typing.Optional[ParseFailAction] = None 

464 self.customName: str = None # type: ignore[assignment] 

465 self._defaultName: typing.Optional[str] = None 

466 self.resultsName: str = None # type: ignore[assignment] 

467 self.saveAsList = savelist 

468 self.skipWhitespace = True 

469 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

470 self.copyDefaultWhiteChars = True 

471 # used when checking for left-recursion 

472 self.mayReturnEmpty = False 

473 self.keepTabs = False 

474 self.ignoreExprs: List["ParserElement"] = list() 

475 self.debug = False 

476 self.streamlined = False 

477 # optimize exception handling for subclasses that don't advance parse index 

478 self.mayIndexError = True 

479 self.errmsg = "" 

480 # mark results names as modal (report only last) or cumulative (list all) 

481 self.modalResults = True 

482 # custom debug actions 

483 self.debugActions = self.DebugActions(None, None, None) 

484 # avoid redundant calls to preParse 

485 self.callPreparse = True 

486 self.callDuringTry = False 

487 self.suppress_warnings_: List[Diagnostics] = [] 

488 

489 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": 

490 """ 

491 Suppress warnings emitted for a particular diagnostic on this expression. 

492 

493 Example:: 

494 

495 base = pp.Forward() 

496 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) 

497 

498 # statement would normally raise a warning, but is now suppressed 

499 print(base.parse_string("x")) 

500 

501 """ 

502 self.suppress_warnings_.append(warning_type) 

503 return self 

504 

505 def visit_all(self): 

506 """General-purpose method to yield all expressions and sub-expressions 

507 in a grammar. Typically just for internal use. 

508 """ 

509 to_visit = deque([self]) 

510 seen = set() 

511 while to_visit: 

512 cur = to_visit.popleft() 

513 

514 # guard against looping forever through recursive grammars 

515 if cur in seen: 

516 continue 

517 seen.add(cur) 

518 

519 to_visit.extend(cur.recurse()) 

520 yield cur 

521 

522 def copy(self) -> "ParserElement": 

523 """ 

524 Make a copy of this :class:`ParserElement`. Useful for defining 

525 different parse actions for the same parsing pattern, using copies of 

526 the original parse element. 

527 

528 Example:: 

529 

530 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

531 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") 

532 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

533 

534 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) 

535 

536 prints:: 

537 

538 [5120, 100, 655360, 268435456] 

539 

540 Equivalent form of ``expr.copy()`` is just ``expr()``:: 

541 

542 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

543 """ 

544 cpy = copy.copy(self) 

545 cpy.parseAction = self.parseAction[:] 

546 cpy.ignoreExprs = self.ignoreExprs[:] 

547 if self.copyDefaultWhiteChars: 

548 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

549 return cpy 

550 

551 def set_results_name( 

552 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 

553 ) -> "ParserElement": 

554 """ 

555 Define name for referencing matching tokens as a nested attribute 

556 of the returned parse results. 

557 

558 Normally, results names are assigned as you would assign keys in a dict: 

559 any existing value is overwritten by later values. If it is necessary to 

560 keep all values captured for a particular results name, call ``set_results_name`` 

561 with ``list_all_matches`` = True. 

562 

563 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

564 this is so that the client can define a basic element, such as an 

565 integer, and reference it in multiple places with different names. 

566 

567 You can also set results names using the abbreviated syntax, 

568 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

569 - see :class:`__call__`. If ``list_all_matches`` is required, use 

570 ``expr("name*")``. 

571 

572 Example:: 

573 

574 integer = Word(nums) 

575 date_str = (integer.set_results_name("year") + '/' 

576 + integer.set_results_name("month") + '/' 

577 + integer.set_results_name("day")) 

578 

579 # equivalent form: 

580 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

581 """ 

582 listAllMatches = listAllMatches or list_all_matches 

583 return self._setResultsName(name, listAllMatches) 

584 

585 def _setResultsName(self, name, listAllMatches=False): 

586 if name is None: 

587 return self 

588 newself = self.copy() 

589 if name.endswith("*"): 

590 name = name[:-1] 

591 listAllMatches = True 

592 newself.resultsName = name 

593 newself.modalResults = not listAllMatches 

594 return newself 

595 

596 def set_break(self, break_flag: bool = True) -> "ParserElement": 

597 """ 

598 Method to invoke the Python pdb debugger when this element is 

599 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

600 disable. 

601 """ 

602 if break_flag: 

603 _parseMethod = self._parse 

604 

605 def breaker(instring, loc, doActions=True, callPreParse=True): 

606 import pdb 

607 

608 # this call to pdb.set_trace() is intentional, not a checkin error 

609 pdb.set_trace() 

610 return _parseMethod(instring, loc, doActions, callPreParse) 

611 

612 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

613 self._parse = breaker # type: ignore [assignment] 

614 elif hasattr(self._parse, "_originalParseMethod"): 

615 self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment] 

616 return self 

617 

618 def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": 

619 """ 

620 Define one or more actions to perform when successfully matching parse element definition. 

621 

622 Parse actions can be called to perform data conversions, do extra validation, 

623 update external data structures, or enhance or replace the parsed tokens. 

624 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

625 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

626 

627 - ``s`` = the original string being parsed (see note below) 

628 - ``loc`` = the location of the matching substring 

629 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

630 

631 The parsed tokens are passed to the parse action as ParseResults. They can be 

632 modified in place using list-style append, extend, and pop operations to update 

633 the parsed list elements; and with dictionary-style item set and del operations 

634 to add, update, or remove any named results. If the tokens are modified in place, 

635 it is not necessary to return them with a return statement. 

636 

637 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

638 object, or with some entirely different object (common for parse actions that perform data 

639 conversions). A convenient way to build a new parse result is to define the values 

640 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

641 

642 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

643 expression are cleared. 

644 

645 Optional keyword arguments: 

646 

647 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during 

648 lookaheads and alternate testing. For parse actions that have side effects, it is 

649 important to only call the parse action once it is determined that it is being 

650 called as part of a successful parse. For parse actions that perform additional 

651 validation, then call_during_try should be passed as True, so that the validation 

652 code is included in the preliminary "try" parses. 

653 

654 Note: the default parsing behavior is to expand tabs in the input string 

655 before starting the parsing process. See :class:`parse_string` for more 

656 information on parsing strings containing ``<TAB>`` s, and suggested 

657 methods to maintain a consistent view of the parsed string, the parse 

658 location, and line and column positions within the parsed string. 

659 

660 Example:: 

661 

662 # parse dates in the form YYYY/MM/DD 

663 

664 # use parse action to convert toks from str to int at parse time 

665 def convert_to_int(toks): 

666 return int(toks[0]) 

667 

668 # use a parse action to verify that the date is a valid date 

669 def is_valid_date(instring, loc, toks): 

670 from datetime import date 

671 year, month, day = toks[::2] 

672 try: 

673 date(year, month, day) 

674 except ValueError: 

675 raise ParseException(instring, loc, "invalid date given") 

676 

677 integer = Word(nums) 

678 date_str = integer + '/' + integer + '/' + integer 

679 

680 # add parse actions 

681 integer.set_parse_action(convert_to_int) 

682 date_str.set_parse_action(is_valid_date) 

683 

684 # note that integer fields are now ints, not strings 

685 date_str.run_tests(''' 

686 # successful parse - note that integer fields were converted to ints 

687 1999/12/31 

688 

689 # fail - invalid date 

690 1999/13/31 

691 ''') 

692 """ 

693 if list(fns) == [None]: 

694 self.parseAction = [] 

695 return self 

696 

697 if not all(callable(fn) for fn in fns): 

698 raise TypeError("parse actions must be callable") 

699 self.parseAction = [_trim_arity(fn) for fn in fns] 

700 self.callDuringTry = kwargs.get( 

701 "call_during_try", kwargs.get("callDuringTry", False) 

702 ) 

703 

704 return self 

705 

706 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": 

707 """ 

708 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

709 

710 See examples in :class:`copy`. 

711 """ 

712 self.parseAction += [_trim_arity(fn) for fn in fns] 

713 self.callDuringTry = self.callDuringTry or kwargs.get( 

714 "call_during_try", kwargs.get("callDuringTry", False) 

715 ) 

716 return self 

717 

718 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": 

719 """Add a boolean predicate function to expression's list of parse actions. See 

720 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

721 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

722 

723 Optional keyword arguments: 

724 

725 - ``message`` = define a custom message to be used in the raised exception 

726 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

727 ParseException 

728 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

729 default=False 

730 

731 Example:: 

732 

733 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

734 year_int = integer.copy() 

735 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

736 date_str = year_int + '/' + integer + '/' + integer 

737 

738 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), 

739 (line:1, col:1) 

740 """ 

741 for fn in fns: 

742 self.parseAction.append( 

743 condition_as_parse_action( 

744 fn, 

745 message=str(kwargs.get("message")), 

746 fatal=bool(kwargs.get("fatal", False)), 

747 ) 

748 ) 

749 

750 self.callDuringTry = self.callDuringTry or kwargs.get( 

751 "call_during_try", kwargs.get("callDuringTry", False) 

752 ) 

753 return self 

754 

755 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": 

756 """ 

757 Define action to perform if parsing fails at this expression. 

758 Fail acton fn is a callable function that takes the arguments 

759 ``fn(s, loc, expr, err)`` where: 

760 

761 - ``s`` = string being parsed 

762 - ``loc`` = location where expression match was attempted and failed 

763 - ``expr`` = the parse expression that failed 

764 - ``err`` = the exception thrown 

765 

766 The function returns no value. It may throw :class:`ParseFatalException` 

767 if it is desired to stop parsing immediately.""" 

768 self.failAction = fn 

769 return self 

770 

771 def _skipIgnorables(self, instring: str, loc: int) -> int: 

772 if not self.ignoreExprs: 

773 return loc 

774 exprsFound = True 

775 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

776 last_loc = loc 

777 while exprsFound: 

778 exprsFound = False 

779 for ignore_fn in ignore_expr_fns: 

780 try: 

781 while 1: 

782 loc, dummy = ignore_fn(instring, loc) 

783 exprsFound = True 

784 except ParseException: 

785 pass 

786 # check if all ignore exprs matched but didn't actually advance the parse location 

787 if loc == last_loc: 

788 break 

789 last_loc = loc 

790 return loc 

791 

792 def preParse(self, instring: str, loc: int) -> int: 

793 if self.ignoreExprs: 

794 loc = self._skipIgnorables(instring, loc) 

795 

796 if self.skipWhitespace: 

797 instrlen = len(instring) 

798 white_chars = self.whiteChars 

799 while loc < instrlen and instring[loc] in white_chars: 

800 loc += 1 

801 

802 return loc 

803 

804 def parseImpl(self, instring, loc, doActions=True): 

805 return loc, [] 

806 

807 def postParse(self, instring, loc, tokenlist): 

808 return tokenlist 

809 

810 # @profile 

811 def _parseNoCache( 

812 self, instring, loc, doActions=True, callPreParse=True 

813 ) -> Tuple[int, ParseResults]: 

814 TRY, MATCH, FAIL = 0, 1, 2 

815 debugging = self.debug # and doActions) 

816 len_instring = len(instring) 

817 

818 if debugging or self.failAction: 

819 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

820 try: 

821 if callPreParse and self.callPreparse: 

822 pre_loc = self.preParse(instring, loc) 

823 else: 

824 pre_loc = loc 

825 tokens_start = pre_loc 

826 if self.debugActions.debug_try: 

827 self.debugActions.debug_try(instring, tokens_start, self, False) 

828 if self.mayIndexError or pre_loc >= len_instring: 

829 try: 

830 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

831 except IndexError: 

832 raise ParseException(instring, len_instring, self.errmsg, self) 

833 else: 

834 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

835 except Exception as err: 

836 # print("Exception raised:", err) 

837 if self.debugActions.debug_fail: 

838 self.debugActions.debug_fail( 

839 instring, tokens_start, self, err, False 

840 ) 

841 if self.failAction: 

842 self.failAction(instring, tokens_start, self, err) 

843 raise 

844 else: 

845 if callPreParse and self.callPreparse: 

846 pre_loc = self.preParse(instring, loc) 

847 else: 

848 pre_loc = loc 

849 tokens_start = pre_loc 

850 if self.mayIndexError or pre_loc >= len_instring: 

851 try: 

852 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

853 except IndexError: 

854 raise ParseException(instring, len_instring, self.errmsg, self) 

855 else: 

856 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

857 

858 tokens = self.postParse(instring, loc, tokens) 

859 

860 ret_tokens = ParseResults( 

861 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 

862 ) 

863 if self.parseAction and (doActions or self.callDuringTry): 

864 if debugging: 

865 try: 

866 for fn in self.parseAction: 

867 try: 

868 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

869 except IndexError as parse_action_exc: 

870 exc = ParseException("exception raised in parse action") 

871 raise exc from parse_action_exc 

872 

873 if tokens is not None and tokens is not ret_tokens: 

874 ret_tokens = ParseResults( 

875 tokens, 

876 self.resultsName, 

877 asList=self.saveAsList 

878 and isinstance(tokens, (ParseResults, list)), 

879 modal=self.modalResults, 

880 ) 

881 except Exception as err: 

882 # print "Exception raised in user parse action:", err 

883 if self.debugActions.debug_fail: 

884 self.debugActions.debug_fail( 

885 instring, tokens_start, self, err, False 

886 ) 

887 raise 

888 else: 

889 for fn in self.parseAction: 

890 try: 

891 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

892 except IndexError as parse_action_exc: 

893 exc = ParseException("exception raised in parse action") 

894 raise exc from parse_action_exc 

895 

896 if tokens is not None and tokens is not ret_tokens: 

897 ret_tokens = ParseResults( 

898 tokens, 

899 self.resultsName, 

900 asList=self.saveAsList 

901 and isinstance(tokens, (ParseResults, list)), 

902 modal=self.modalResults, 

903 ) 

904 if debugging: 

905 # print("Matched", self, "->", ret_tokens.as_list()) 

906 if self.debugActions.debug_match: 

907 self.debugActions.debug_match( 

908 instring, tokens_start, loc, self, ret_tokens, False 

909 ) 

910 

911 return loc, ret_tokens 

912 

913 def try_parse( 

914 self, 

915 instring: str, 

916 loc: int, 

917 *, 

918 raise_fatal: bool = False, 

919 do_actions: bool = False, 

920 ) -> int: 

921 try: 

922 return self._parse(instring, loc, doActions=do_actions)[0] 

923 except ParseFatalException: 

924 if raise_fatal: 

925 raise 

926 raise ParseException(instring, loc, self.errmsg, self) 

927 

928 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

929 try: 

930 self.try_parse(instring, loc, do_actions=do_actions) 

931 except (ParseException, IndexError): 

932 return False 

933 else: 

934 return True 

935 

936 # cache for left-recursion in Forward references 

937 recursion_lock = RLock() 

938 recursion_memos: typing.Dict[ 

939 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] 

940 ] = {} 

941 

942 class _CacheType(dict): 

943 """ 

944 class to help type checking 

945 """ 

946 

947 not_in_cache: bool 

948 

949 def get(self, *args): ... 

950 

951 def set(self, *args): ... 

952 

953 # argument cache for optimizing repeated calls when backtracking through recursive expressions 

954 packrat_cache = ( 

955 _CacheType() 

956 ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail 

957 packrat_cache_lock = RLock() 

958 packrat_cache_stats = [0, 0] 

959 

960 # this method gets repeatedly called during backtracking with the same arguments - 

961 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

962 def _parseCache( 

963 self, instring, loc, doActions=True, callPreParse=True 

964 ) -> Tuple[int, ParseResults]: 

965 HIT, MISS = 0, 1 

966 TRY, MATCH, FAIL = 0, 1, 2 

967 lookup = (self, instring, loc, callPreParse, doActions) 

968 with ParserElement.packrat_cache_lock: 

969 cache = ParserElement.packrat_cache 

970 value = cache.get(lookup) 

971 if value is cache.not_in_cache: 

972 ParserElement.packrat_cache_stats[MISS] += 1 

973 try: 

974 value = self._parseNoCache(instring, loc, doActions, callPreParse) 

975 except ParseBaseException as pe: 

976 # cache a copy of the exception, without the traceback 

977 cache.set(lookup, pe.__class__(*pe.args)) 

978 raise 

979 else: 

980 cache.set(lookup, (value[0], value[1].copy(), loc)) 

981 return value 

982 else: 

983 ParserElement.packrat_cache_stats[HIT] += 1 

984 if self.debug and self.debugActions.debug_try: 

985 try: 

986 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

987 except TypeError: 

988 pass 

989 if isinstance(value, Exception): 

990 if self.debug and self.debugActions.debug_fail: 

991 try: 

992 self.debugActions.debug_fail( 

993 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

994 ) 

995 except TypeError: 

996 pass 

997 raise value 

998 

999 value = cast(Tuple[int, ParseResults, int], value) 

1000 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1001 if self.debug and self.debugActions.debug_match: 

1002 try: 

1003 self.debugActions.debug_match( 

1004 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1005 ) 

1006 except TypeError: 

1007 pass 

1008 

1009 return loc_, result 

1010 

1011 _parse = _parseNoCache 

1012 

1013 @staticmethod 

1014 def reset_cache() -> None: 

1015 ParserElement.packrat_cache.clear() 

1016 ParserElement.packrat_cache_stats[:] = [0] * len( 

1017 ParserElement.packrat_cache_stats 

1018 ) 

1019 ParserElement.recursion_memos.clear() 

1020 

1021 _packratEnabled = False 

1022 _left_recursion_enabled = False 

1023 

1024 @staticmethod 

1025 def disable_memoization() -> None: 

1026 """ 

1027 Disables active Packrat or Left Recursion parsing and their memoization 

1028 

1029 This method also works if neither Packrat nor Left Recursion are enabled. 

1030 This makes it safe to call before activating Packrat nor Left Recursion 

1031 to clear any previous settings. 

1032 """ 

1033 ParserElement.reset_cache() 

1034 ParserElement._left_recursion_enabled = False 

1035 ParserElement._packratEnabled = False 

1036 ParserElement._parse = ParserElement._parseNoCache 

1037 

1038 @staticmethod 

1039 def enable_left_recursion( 

1040 cache_size_limit: typing.Optional[int] = None, *, force=False 

1041 ) -> None: 

1042 """ 

1043 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1044 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1045 repeatedly matched with a fixed recursion depth that is gradually increased 

1046 until finding the longest match. 

1047 

1048 Example:: 

1049 

1050 import pyparsing as pp 

1051 pp.ParserElement.enable_left_recursion() 

1052 

1053 E = pp.Forward("E") 

1054 num = pp.Word(pp.nums) 

1055 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1056 E <<= E + '+' - num | num 

1057 

1058 print(E.parse_string("1+2+3")) 

1059 

1060 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1061 thus skip reevaluation of parse actions during backtracking. This may break 

1062 programs with parse actions which rely on strict ordering of side-effects. 

1063 

1064 Parameters: 

1065 

1066 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1067 ``Forward`` elements during matching; if ``None`` (the default), 

1068 memoize all ``Forward`` elements. 

1069 

1070 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1071 thus the two cannot be used together. Use ``force=True`` to disable any 

1072 previous, conflicting settings. 

1073 """ 

1074 if force: 

1075 ParserElement.disable_memoization() 

1076 elif ParserElement._packratEnabled: 

1077 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1078 if cache_size_limit is None: 

1079 ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment] 

1080 elif cache_size_limit > 0: 

1081 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1082 else: 

1083 raise NotImplementedError(f"Memo size of {cache_size_limit}") 

1084 ParserElement._left_recursion_enabled = True 

1085 

1086 @staticmethod 

1087 def enable_packrat( 

1088 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1089 ) -> None: 

1090 """ 

1091 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1092 Repeated parse attempts at the same string location (which happens 

1093 often in many complex grammars) can immediately return a cached value, 

1094 instead of re-executing parsing/validating code. Memoizing is done of 

1095 both valid results and parsing exceptions. 

1096 

1097 Parameters: 

1098 

1099 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1100 will limit the size of the packrat cache; if None is passed, then 

1101 the cache size will be unbounded; if 0 is passed, the cache will 

1102 be effectively disabled. 

1103 

1104 This speedup may break existing programs that use parse actions that 

1105 have side-effects. For this reason, packrat parsing is disabled when 

1106 you first import pyparsing. To activate the packrat feature, your 

1107 program must call the class method :class:`ParserElement.enable_packrat`. 

1108 For best results, call ``enable_packrat()`` immediately after 

1109 importing pyparsing. 

1110 

1111 Example:: 

1112 

1113 import pyparsing 

1114 pyparsing.ParserElement.enable_packrat() 

1115 

1116 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1117 thus the two cannot be used together. Use ``force=True`` to disable any 

1118 previous, conflicting settings. 

1119 """ 

1120 if force: 

1121 ParserElement.disable_memoization() 

1122 elif ParserElement._left_recursion_enabled: 

1123 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1124 

1125 if ParserElement._packratEnabled: 

1126 return 

1127 

1128 ParserElement._packratEnabled = True 

1129 if cache_size_limit is None: 

1130 ParserElement.packrat_cache = _UnboundedCache() 

1131 else: 

1132 ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment] 

1133 ParserElement._parse = ParserElement._parseCache 

1134 

1135 def parse_string( 

1136 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 

1137 ) -> ParseResults: 

1138 """ 

1139 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1140 client code. 

1141 

1142 :param instring: The input string to be parsed. 

1143 :param parse_all: If set, the entire input string must match the grammar. 

1144 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1145 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1146 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1147 an object with attributes if the given parser includes results names. 

1148 

1149 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1150 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1151 

1152 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1153 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1154 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1155 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1156 

1157 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1158 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1159 parse action's ``s`` argument, or 

1160 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1161 

1162 Examples: 

1163 

1164 By default, partial matches are OK. 

1165 

1166 >>> res = Word('a').parse_string('aaaaabaaa') 

1167 >>> print(res) 

1168 ['aaaaa'] 

1169 

1170 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1171 directly to see more examples. 

1172 

1173 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1174 

1175 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1176 Traceback (most recent call last): 

1177 ... 

1178 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) 

1179 """ 

1180 parseAll = parse_all or parseAll 

1181 

1182 ParserElement.reset_cache() 

1183 if not self.streamlined: 

1184 self.streamline() 

1185 for e in self.ignoreExprs: 

1186 e.streamline() 

1187 if not self.keepTabs: 

1188 instring = instring.expandtabs() 

1189 try: 

1190 loc, tokens = self._parse(instring, 0) 

1191 if parseAll: 

1192 loc = self.preParse(instring, loc) 

1193 se = Empty() + StringEnd() 

1194 se._parse(instring, loc) 

1195 except ParseBaseException as exc: 

1196 if ParserElement.verbose_stacktrace: 

1197 raise 

1198 else: 

1199 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1200 raise exc.with_traceback(None) 

1201 else: 

1202 return tokens 

1203 

1204 def scan_string( 

1205 self, 

1206 instring: str, 

1207 max_matches: int = _MAX_INT, 

1208 overlap: bool = False, 

1209 *, 

1210 debug: bool = False, 

1211 maxMatches: int = _MAX_INT, 

1212 ) -> Generator[Tuple[ParseResults, int, int], None, None]: 

1213 """ 

1214 Scan the input string for expression matches. Each match will return the 

1215 matching tokens, start location, and end location. May be called with optional 

1216 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1217 ``overlap`` is specified, then overlapping matches will be reported. 

1218 

1219 Note that the start and end locations are reported relative to the string 

1220 being parsed. See :class:`parse_string` for more information on parsing 

1221 strings with embedded tabs. 

1222 

1223 Example:: 

1224 

1225 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1226 print(source) 

1227 for tokens, start, end in Word(alphas).scan_string(source): 

1228 print(' '*start + '^'*(end-start)) 

1229 print(' '*start + tokens[0]) 

1230 

1231 prints:: 

1232 

1233 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1234 ^^^^^ 

1235 sldjf 

1236 ^^^^^^^ 

1237 lsdjjkf 

1238 ^^^^^^ 

1239 sldkjf 

1240 ^^^^^^ 

1241 lkjsfd 

1242 """ 

1243 maxMatches = min(maxMatches, max_matches) 

1244 if not self.streamlined: 

1245 self.streamline() 

1246 for e in self.ignoreExprs: 

1247 e.streamline() 

1248 

1249 if not self.keepTabs: 

1250 instring = str(instring).expandtabs() 

1251 instrlen = len(instring) 

1252 loc = 0 

1253 preparseFn = self.preParse 

1254 parseFn = self._parse 

1255 ParserElement.resetCache() 

1256 matches = 0 

1257 try: 

1258 while loc <= instrlen and matches < maxMatches: 

1259 try: 

1260 preloc: int = preparseFn(instring, loc) 

1261 nextLoc: int 

1262 tokens: ParseResults 

1263 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1264 except ParseException: 

1265 loc = preloc + 1 

1266 else: 

1267 if nextLoc > loc: 

1268 matches += 1 

1269 if debug: 

1270 print( 

1271 { 

1272 "tokens": tokens.asList(), 

1273 "start": preloc, 

1274 "end": nextLoc, 

1275 } 

1276 ) 

1277 yield tokens, preloc, nextLoc 

1278 if overlap: 

1279 nextloc = preparseFn(instring, loc) 

1280 if nextloc > loc: 

1281 loc = nextLoc 

1282 else: 

1283 loc += 1 

1284 else: 

1285 loc = nextLoc 

1286 else: 

1287 loc = preloc + 1 

1288 except ParseBaseException as exc: 

1289 if ParserElement.verbose_stacktrace: 

1290 raise 

1291 

1292 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1293 raise exc.with_traceback(None) 

1294 

1295 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1296 """ 

1297 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1298 be returned from a parse action. To use ``transform_string``, define a grammar and 

1299 attach a parse action to it that modifies the returned token list. 

1300 Invoking ``transform_string()`` on a target string will then scan for matches, 

1301 and replace the matched text patterns according to the logic in the parse 

1302 action. ``transform_string()`` returns the resulting transformed string. 

1303 

1304 Example:: 

1305 

1306 wd = Word(alphas) 

1307 wd.set_parse_action(lambda toks: toks[0].title()) 

1308 

1309 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) 

1310 

1311 prints:: 

1312 

1313 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

1314 """ 

1315 out: List[str] = [] 

1316 lastE = 0 

1317 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1318 # keep string locs straight between transform_string and scan_string 

1319 self.keepTabs = True 

1320 try: 

1321 for t, s, e in self.scan_string(instring, debug=debug): 

1322 out.append(instring[lastE:s]) 

1323 lastE = e 

1324 

1325 if not t: 

1326 continue 

1327 

1328 if isinstance(t, ParseResults): 

1329 out += t.as_list() 

1330 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1331 out.extend(t) 

1332 else: 

1333 out.append(t) 

1334 

1335 out.append(instring[lastE:]) 

1336 out = [o for o in out if o] 

1337 return "".join([str(s) for s in _flatten(out)]) 

1338 except ParseBaseException as exc: 

1339 if ParserElement.verbose_stacktrace: 

1340 raise 

1341 

1342 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1343 raise exc.with_traceback(None) 

1344 

1345 def search_string( 

1346 self, 

1347 instring: str, 

1348 max_matches: int = _MAX_INT, 

1349 *, 

1350 debug: bool = False, 

1351 maxMatches: int = _MAX_INT, 

1352 ) -> ParseResults: 

1353 """ 

1354 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1355 to match the given parse expression. May be called with optional 

1356 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1357 

1358 Example:: 

1359 

1360 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

1361 cap_word = Word(alphas.upper(), alphas.lower()) 

1362 

1363 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) 

1364 

1365 # the sum() builtin can be used to merge results into a single ParseResults object 

1366 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) 

1367 

1368 prints:: 

1369 

1370 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1371 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1372 """ 

1373 maxMatches = min(maxMatches, max_matches) 

1374 try: 

1375 return ParseResults( 

1376 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] 

1377 ) 

1378 except ParseBaseException as exc: 

1379 if ParserElement.verbose_stacktrace: 

1380 raise 

1381 

1382 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1383 raise exc.with_traceback(None) 

1384 

1385 def split( 

1386 self, 

1387 instring: str, 

1388 maxsplit: int = _MAX_INT, 

1389 include_separators: bool = False, 

1390 *, 

1391 includeSeparators=False, 

1392 ) -> Generator[str, None, None]: 

1393 """ 

1394 Generator method to split a string using the given expression as a separator. 

1395 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1396 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1397 matching text should be included in the split results. 

1398 

1399 Example:: 

1400 

1401 punc = one_of(list(".,;:/-!?")) 

1402 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

1403 

1404 prints:: 

1405 

1406 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1407 """ 

1408 includeSeparators = includeSeparators or include_separators 

1409 last = 0 

1410 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1411 yield instring[last:s] 

1412 if includeSeparators: 

1413 yield t[0] 

1414 last = e 

1415 yield instring[last:] 

1416 

1417 def __add__(self, other) -> "ParserElement": 

1418 """ 

1419 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1420 converts them to :class:`Literal`\\ s by default. 

1421 

1422 Example:: 

1423 

1424 greet = Word(alphas) + "," + Word(alphas) + "!" 

1425 hello = "Hello, World!" 

1426 print(hello, "->", greet.parse_string(hello)) 

1427 

1428 prints:: 

1429 

1430 Hello, World! -> ['Hello', ',', 'World', '!'] 

1431 

1432 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: 

1433 

1434 Literal('start') + ... + Literal('end') 

1435 

1436 is equivalent to:: 

1437 

1438 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1439 

1440 Note that the skipped text is returned with '_skipped' as a results name, 

1441 and to support having multiple skips in the same parser, the value returned is 

1442 a list of all skipped text. 

1443 """ 

1444 if other is Ellipsis: 

1445 return _PendingSkip(self) 

1446 

1447 if isinstance(other, str_type): 

1448 other = self._literalStringClass(other) 

1449 if not isinstance(other, ParserElement): 

1450 return NotImplemented 

1451 return And([self, other]) 

1452 

1453 def __radd__(self, other) -> "ParserElement": 

1454 """ 

1455 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1456 """ 

1457 if other is Ellipsis: 

1458 return SkipTo(self)("_skipped*") + self 

1459 

1460 if isinstance(other, str_type): 

1461 other = self._literalStringClass(other) 

1462 if not isinstance(other, ParserElement): 

1463 return NotImplemented 

1464 return other + self 

1465 

1466 def __sub__(self, other) -> "ParserElement": 

1467 """ 

1468 Implementation of ``-`` operator, returns :class:`And` with error stop 

1469 """ 

1470 if isinstance(other, str_type): 

1471 other = self._literalStringClass(other) 

1472 if not isinstance(other, ParserElement): 

1473 return NotImplemented 

1474 return self + And._ErrorStop() + other 

1475 

1476 def __rsub__(self, other) -> "ParserElement": 

1477 """ 

1478 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1479 """ 

1480 if isinstance(other, str_type): 

1481 other = self._literalStringClass(other) 

1482 if not isinstance(other, ParserElement): 

1483 return NotImplemented 

1484 return other - self 

1485 

1486 def __mul__(self, other) -> "ParserElement": 

1487 """ 

1488 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1489 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1490 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1491 may also include ``None`` as in: 

1492 

1493 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1494 to ``expr*n + ZeroOrMore(expr)`` 

1495 (read as "at least n instances of ``expr``") 

1496 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1497 (read as "0 to n instances of ``expr``") 

1498 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1499 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1500 

1501 Note that ``expr*(None, n)`` does not raise an exception if 

1502 more than n exprs exist in the input stream; that is, 

1503 ``expr*(None, n)`` does not enforce a maximum number of expr 

1504 occurrences. If this behavior is desired, then write 

1505 ``expr*(None, n) + ~expr`` 

1506 """ 

1507 if other is Ellipsis: 

1508 other = (0, None) 

1509 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1510 other = ((0,) + other[1:] + (None,))[:2] 

1511 

1512 if not isinstance(other, (int, tuple)): 

1513 return NotImplemented 

1514 

1515 if isinstance(other, int): 

1516 minElements, optElements = other, 0 

1517 else: 

1518 other = tuple(o if o is not Ellipsis else None for o in other) 

1519 other = (other + (None, None))[:2] 

1520 if other[0] is None: 

1521 other = (0, other[1]) 

1522 if isinstance(other[0], int) and other[1] is None: 

1523 if other[0] == 0: 

1524 return ZeroOrMore(self) 

1525 if other[0] == 1: 

1526 return OneOrMore(self) 

1527 else: 

1528 return self * other[0] + ZeroOrMore(self) 

1529 elif isinstance(other[0], int) and isinstance(other[1], int): 

1530 minElements, optElements = other 

1531 optElements -= minElements 

1532 else: 

1533 return NotImplemented 

1534 

1535 if minElements < 0: 

1536 raise ValueError("cannot multiply ParserElement by negative value") 

1537 if optElements < 0: 

1538 raise ValueError( 

1539 "second tuple value must be greater or equal to first tuple value" 

1540 ) 

1541 if minElements == optElements == 0: 

1542 return And([]) 

1543 

1544 if optElements: 

1545 

1546 def makeOptionalList(n): 

1547 if n > 1: 

1548 return Opt(self + makeOptionalList(n - 1)) 

1549 else: 

1550 return Opt(self) 

1551 

1552 if minElements: 

1553 if minElements == 1: 

1554 ret = self + makeOptionalList(optElements) 

1555 else: 

1556 ret = And([self] * minElements) + makeOptionalList(optElements) 

1557 else: 

1558 ret = makeOptionalList(optElements) 

1559 else: 

1560 if minElements == 1: 

1561 ret = self 

1562 else: 

1563 ret = And([self] * minElements) 

1564 return ret 

1565 

1566 def __rmul__(self, other) -> "ParserElement": 

1567 return self.__mul__(other) 

1568 

1569 def __or__(self, other) -> "ParserElement": 

1570 """ 

1571 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1572 """ 

1573 if other is Ellipsis: 

1574 return _PendingSkip(self, must_skip=True) 

1575 

1576 if isinstance(other, str_type): 

1577 # `expr | ""` is equivalent to `Opt(expr)` 

1578 if other == "": 

1579 return Opt(self) 

1580 other = self._literalStringClass(other) 

1581 if not isinstance(other, ParserElement): 

1582 return NotImplemented 

1583 return MatchFirst([self, other]) 

1584 

1585 def __ror__(self, other) -> "ParserElement": 

1586 """ 

1587 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1588 """ 

1589 if isinstance(other, str_type): 

1590 other = self._literalStringClass(other) 

1591 if not isinstance(other, ParserElement): 

1592 return NotImplemented 

1593 return other | self 

1594 

1595 def __xor__(self, other) -> "ParserElement": 

1596 """ 

1597 Implementation of ``^`` operator - returns :class:`Or` 

1598 """ 

1599 if isinstance(other, str_type): 

1600 other = self._literalStringClass(other) 

1601 if not isinstance(other, ParserElement): 

1602 return NotImplemented 

1603 return Or([self, other]) 

1604 

1605 def __rxor__(self, other) -> "ParserElement": 

1606 """ 

1607 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1608 """ 

1609 if isinstance(other, str_type): 

1610 other = self._literalStringClass(other) 

1611 if not isinstance(other, ParserElement): 

1612 return NotImplemented 

1613 return other ^ self 

1614 

1615 def __and__(self, other) -> "ParserElement": 

1616 """ 

1617 Implementation of ``&`` operator - returns :class:`Each` 

1618 """ 

1619 if isinstance(other, str_type): 

1620 other = self._literalStringClass(other) 

1621 if not isinstance(other, ParserElement): 

1622 return NotImplemented 

1623 return Each([self, other]) 

1624 

1625 def __rand__(self, other) -> "ParserElement": 

1626 """ 

1627 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1628 """ 

1629 if isinstance(other, str_type): 

1630 other = self._literalStringClass(other) 

1631 if not isinstance(other, ParserElement): 

1632 return NotImplemented 

1633 return other & self 

1634 

1635 def __invert__(self) -> "ParserElement": 

1636 """ 

1637 Implementation of ``~`` operator - returns :class:`NotAny` 

1638 """ 

1639 return NotAny(self) 

1640 

1641 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1642 # iterate over a sequence 

1643 __iter__ = None 

1644 

1645 def __getitem__(self, key): 

1646 """ 

1647 use ``[]`` indexing notation as a short form for expression repetition: 

1648 

1649 - ``expr[n]`` is equivalent to ``expr*n`` 

1650 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1651 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1652 to ``expr*n + ZeroOrMore(expr)`` 

1653 (read as "at least n instances of ``expr``") 

1654 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1655 (read as "0 to n instances of ``expr``") 

1656 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1657 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1658 

1659 ``None`` may be used in place of ``...``. 

1660 

1661 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1662 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1663 desired, then write ``expr[..., n] + ~expr``. 

1664 

1665 For repetition with a stop_on expression, use slice notation: 

1666 

1667 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1668 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1669 

1670 """ 

1671 

1672 stop_on_defined = False 

1673 stop_on = NoMatch() 

1674 if isinstance(key, slice): 

1675 key, stop_on = key.start, key.stop 

1676 if key is None: 

1677 key = ... 

1678 stop_on_defined = True 

1679 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1680 key, stop_on = (key[0], key[1].start), key[1].stop 

1681 stop_on_defined = True 

1682 

1683 # convert single arg keys to tuples 

1684 if isinstance(key, str_type): 

1685 key = (key,) 

1686 try: 

1687 iter(key) 

1688 except TypeError: 

1689 key = (key, key) 

1690 

1691 if len(key) > 2: 

1692 raise TypeError( 

1693 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1694 ) 

1695 

1696 # clip to 2 elements 

1697 ret = self * tuple(key[:2]) 

1698 ret = typing.cast(_MultipleMatch, ret) 

1699 

1700 if stop_on_defined: 

1701 ret.stopOn(stop_on) 

1702 

1703 return ret 

1704 

1705 def __call__(self, name: typing.Optional[str] = None) -> "ParserElement": 

1706 """ 

1707 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1708 

1709 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1710 passed as ``True``. 

1711 

1712 If ``name`` is omitted, same as calling :class:`copy`. 

1713 

1714 Example:: 

1715 

1716 # these are equivalent 

1717 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") 

1718 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1719 """ 

1720 if name is not None: 

1721 return self._setResultsName(name) 

1722 

1723 return self.copy() 

1724 

1725 def suppress(self) -> "ParserElement": 

1726 """ 

1727 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1728 cluttering up returned output. 

1729 """ 

1730 return Suppress(self) 

1731 

1732 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": 

1733 """ 

1734 Enables the skipping of whitespace before matching the characters in the 

1735 :class:`ParserElement`'s defined pattern. 

1736 

1737 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1738 """ 

1739 self.skipWhitespace = True 

1740 return self 

1741 

1742 def leave_whitespace(self, recursive: bool = True) -> "ParserElement": 

1743 """ 

1744 Disables the skipping of whitespace before matching the characters in the 

1745 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1746 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1747 

1748 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1749 """ 

1750 self.skipWhitespace = False 

1751 return self 

1752 

1753 def set_whitespace_chars( 

1754 self, chars: Union[Set[str], str], copy_defaults: bool = False 

1755 ) -> "ParserElement": 

1756 """ 

1757 Overrides the default whitespace chars 

1758 """ 

1759 self.skipWhitespace = True 

1760 self.whiteChars = set(chars) 

1761 self.copyDefaultWhiteChars = copy_defaults 

1762 return self 

1763 

1764 def parse_with_tabs(self) -> "ParserElement": 

1765 """ 

1766 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1767 Must be called before ``parse_string`` when the input grammar contains elements that 

1768 match ``<TAB>`` characters. 

1769 """ 

1770 self.keepTabs = True 

1771 return self 

1772 

1773 def ignore(self, other: "ParserElement") -> "ParserElement": 

1774 """ 

1775 Define expression to be ignored (e.g., comments) while doing pattern 

1776 matching; may be called repeatedly, to define multiple comment or other 

1777 ignorable patterns. 

1778 

1779 Example:: 

1780 

1781 patt = Word(alphas)[...] 

1782 patt.parse_string('ablaj /* comment */ lskjd') 

1783 # -> ['ablaj'] 

1784 

1785 patt.ignore(c_style_comment) 

1786 patt.parse_string('ablaj /* comment */ lskjd') 

1787 # -> ['ablaj', 'lskjd'] 

1788 """ 

1789 if isinstance(other, str_type): 

1790 other = Suppress(other) 

1791 

1792 if isinstance(other, Suppress): 

1793 if other not in self.ignoreExprs: 

1794 self.ignoreExprs.append(other) 

1795 else: 

1796 self.ignoreExprs.append(Suppress(other.copy())) 

1797 return self 

1798 

1799 def set_debug_actions( 

1800 self, 

1801 start_action: DebugStartAction, 

1802 success_action: DebugSuccessAction, 

1803 exception_action: DebugExceptionAction, 

1804 ) -> "ParserElement": 

1805 """ 

1806 Customize display of debugging messages while doing pattern matching: 

1807 

1808 - ``start_action`` - method to be called when an expression is about to be parsed; 

1809 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` 

1810 

1811 - ``success_action`` - method to be called when an expression has successfully parsed; 

1812 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` 

1813 

1814 - ``exception_action`` - method to be called when expression fails to parse; 

1815 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` 

1816 """ 

1817 self.debugActions = self.DebugActions( 

1818 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

1819 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

1820 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

1821 ) 

1822 self.debug = True 

1823 return self 

1824 

1825 def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement": 

1826 """ 

1827 Enable display of debugging messages while doing pattern matching. 

1828 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

1829 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

1830 

1831 Example:: 

1832 

1833 wd = Word(alphas).set_name("alphaword") 

1834 integer = Word(nums).set_name("numword") 

1835 term = wd | integer 

1836 

1837 # turn on debugging for wd 

1838 wd.set_debug() 

1839 

1840 term[1, ...].parse_string("abc 123 xyz 890") 

1841 

1842 prints:: 

1843 

1844 Match alphaword at loc 0(1,1) 

1845 Matched alphaword -> ['abc'] 

1846 Match alphaword at loc 3(1,4) 

1847 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

1848 Match alphaword at loc 7(1,8) 

1849 Matched alphaword -> ['xyz'] 

1850 Match alphaword at loc 11(1,12) 

1851 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

1852 Match alphaword at loc 15(1,16) 

1853 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

1854 

1855 The output shown is that produced by the default debug actions - custom debug actions can be 

1856 specified using :class:`set_debug_actions`. Prior to attempting 

1857 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

1858 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

1859 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, 

1860 which makes debugging and exception messages easier to understand - for instance, the default 

1861 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. 

1862 """ 

1863 if recurse: 

1864 for expr in self.visit_all(): 

1865 expr.set_debug(flag, recurse=False) 

1866 return self 

1867 

1868 if flag: 

1869 self.set_debug_actions( 

1870 _default_start_debug_action, 

1871 _default_success_debug_action, 

1872 _default_exception_debug_action, 

1873 ) 

1874 else: 

1875 self.debug = False 

1876 return self 

1877 

1878 @property 

1879 def default_name(self) -> str: 

1880 if self._defaultName is None: 

1881 self._defaultName = self._generateDefaultName() 

1882 return self._defaultName 

1883 

1884 @abstractmethod 

1885 def _generateDefaultName(self) -> str: 

1886 """ 

1887 Child classes must define this method, which defines how the ``default_name`` is set. 

1888 """ 

1889 

1890 def set_name(self, name: str) -> "ParserElement": 

1891 """ 

1892 Define name for this expression, makes debugging and exception messages clearer. 

1893 

1894 Example:: 

1895 

1896 integer = Word(nums) 

1897 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) 

1898 

1899 integer.set_name("integer") 

1900 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1901 """ 

1902 self.customName = name 

1903 self.errmsg = f"Expected {self.name}" 

1904 if __diag__.enable_debug_on_named_expressions: 

1905 self.set_debug() 

1906 return self 

1907 

1908 @property 

1909 def name(self) -> str: 

1910 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 

1911 return self.customName if self.customName is not None else self.default_name 

1912 

1913 def __str__(self) -> str: 

1914 return self.name 

1915 

1916 def __repr__(self) -> str: 

1917 return str(self) 

1918 

1919 def streamline(self) -> "ParserElement": 

1920 self.streamlined = True 

1921 self._defaultName = None 

1922 return self 

1923 

1924 def recurse(self) -> List["ParserElement"]: 

1925 return [] 

1926 

1927 def _checkRecursion(self, parseElementList): 

1928 subRecCheckList = parseElementList[:] + [self] 

1929 for e in self.recurse(): 

1930 e._checkRecursion(subRecCheckList) 

1931 

1932 def validate(self, validateTrace=None) -> None: 

1933 """ 

1934 Check defined expressions for valid structure, check for infinite recursive definitions. 

1935 """ 

1936 warnings.warn( 

1937 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

1938 DeprecationWarning, 

1939 stacklevel=2, 

1940 ) 

1941 self._checkRecursion([]) 

1942 

1943 def parse_file( 

1944 self, 

1945 file_or_filename: Union[str, Path, TextIO], 

1946 encoding: str = "utf-8", 

1947 parse_all: bool = False, 

1948 *, 

1949 parseAll: bool = False, 

1950 ) -> ParseResults: 

1951 """ 

1952 Execute the parse expression on the given file or filename. 

1953 If a filename is specified (instead of a file object), 

1954 the entire file is opened, read, and closed before parsing. 

1955 """ 

1956 parseAll = parseAll or parse_all 

1957 try: 

1958 file_or_filename = typing.cast(TextIO, file_or_filename) 

1959 file_contents = file_or_filename.read() 

1960 except AttributeError: 

1961 file_or_filename = typing.cast(str, file_or_filename) 

1962 with open(file_or_filename, "r", encoding=encoding) as f: 

1963 file_contents = f.read() 

1964 try: 

1965 return self.parse_string(file_contents, parseAll) 

1966 except ParseBaseException as exc: 

1967 if ParserElement.verbose_stacktrace: 

1968 raise 

1969 

1970 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1971 raise exc.with_traceback(None) 

1972 

1973 def __eq__(self, other): 

1974 if self is other: 

1975 return True 

1976 elif isinstance(other, str_type): 

1977 return self.matches(other, parse_all=True) 

1978 elif isinstance(other, ParserElement): 

1979 return vars(self) == vars(other) 

1980 return False 

1981 

1982 def __hash__(self): 

1983 return id(self) 

1984 

1985 def matches( 

1986 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 

1987 ) -> bool: 

1988 """ 

1989 Method for quick testing of a parser against a test string. Good for simple 

1990 inline microtests of sub expressions while building up larger parser. 

1991 

1992 Parameters: 

1993 

1994 - ``test_string`` - to test against this expression for a match 

1995 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

1996 

1997 Example:: 

1998 

1999 expr = Word(nums) 

2000 assert expr.matches("100") 

2001 """ 

2002 parseAll = parseAll and parse_all 

2003 try: 

2004 self.parse_string(str(test_string), parse_all=parseAll) 

2005 return True 

2006 except ParseBaseException: 

2007 return False 

2008 

2009 def run_tests( 

2010 self, 

2011 tests: Union[str, List[str]], 

2012 parse_all: bool = True, 

2013 comment: typing.Optional[Union["ParserElement", str]] = "#", 

2014 full_dump: bool = True, 

2015 print_results: bool = True, 

2016 failure_tests: bool = False, 

2017 post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None, 

2018 file: typing.Optional[TextIO] = None, 

2019 with_line_numbers: bool = False, 

2020 *, 

2021 parseAll: bool = True, 

2022 fullDump: bool = True, 

2023 printResults: bool = True, 

2024 failureTests: bool = False, 

2025 postParse: typing.Optional[Callable[[str, ParseResults], str]] = None, 

2026 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: 

2027 """ 

2028 Execute the parse expression on a series of test strings, showing each 

2029 test, the parsed results or where the parse failed. Quick and easy way to 

2030 run a parse expression against a list of sample strings. 

2031 

2032 Parameters: 

2033 

2034 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2035 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2036 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2037 string; pass None to disable comment filtering 

2038 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2039 if False, only dump nested list 

2040 - ``print_results`` - (default= ``True``) prints test output to stdout 

2041 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2042 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2043 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2044 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2045 if None, will default to ``sys.stdout`` 

2046 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2047 

2048 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2049 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2050 test's output 

2051 

2052 Example:: 

2053 

2054 number_expr = pyparsing_common.number.copy() 

2055 

2056 result = number_expr.run_tests(''' 

2057 # unsigned integer 

2058 100 

2059 # negative integer 

2060 -100 

2061 # float with scientific notation 

2062 6.02e23 

2063 # integer with scientific notation 

2064 1e-12 

2065 ''') 

2066 print("Success" if result[0] else "Failed!") 

2067 

2068 result = number_expr.run_tests(''' 

2069 # stray character 

2070 100Z 

2071 # missing leading digit before '.' 

2072 -.100 

2073 # too many '.' 

2074 3.14.159 

2075 ''', failure_tests=True) 

2076 print("Success" if result[0] else "Failed!") 

2077 

2078 prints:: 

2079 

2080 # unsigned integer 

2081 100 

2082 [100] 

2083 

2084 # negative integer 

2085 -100 

2086 [-100] 

2087 

2088 # float with scientific notation 

2089 6.02e23 

2090 [6.02e+23] 

2091 

2092 # integer with scientific notation 

2093 1e-12 

2094 [1e-12] 

2095 

2096 Success 

2097 

2098 # stray character 

2099 100Z 

2100 ^ 

2101 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2102 

2103 # missing leading digit before '.' 

2104 -.100 

2105 ^ 

2106 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2107 

2108 # too many '.' 

2109 3.14.159 

2110 ^ 

2111 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2112 

2113 Success 

2114 

2115 Each test string must be on a single line. If you want to test a string that spans multiple 

2116 lines, create a test like this:: 

2117 

2118 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2119 

2120 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2121 """ 

2122 from .testing import pyparsing_test 

2123 

2124 parseAll = parseAll and parse_all 

2125 fullDump = fullDump and full_dump 

2126 printResults = printResults and print_results 

2127 failureTests = failureTests or failure_tests 

2128 postParse = postParse or post_parse 

2129 if isinstance(tests, str_type): 

2130 tests = typing.cast(str, tests) 

2131 line_strip = type(tests).strip 

2132 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2133 comment_specified = comment is not None 

2134 if comment_specified: 

2135 if isinstance(comment, str_type): 

2136 comment = typing.cast(str, comment) 

2137 comment = Literal(comment) 

2138 comment = typing.cast(ParserElement, comment) 

2139 if file is None: 

2140 file = sys.stdout 

2141 print_ = file.write 

2142 

2143 result: Union[ParseResults, Exception] 

2144 allResults: List[Tuple[str, Union[ParseResults, Exception]]] = [] 

2145 comments: List[str] = [] 

2146 success = True 

2147 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2148 BOM = "\ufeff" 

2149 nlstr = "\n" 

2150 for t in tests: 

2151 if comment_specified and comment.matches(t, False) or comments and not t: 

2152 comments.append( 

2153 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2154 ) 

2155 continue 

2156 if not t: 

2157 continue 

2158 out = [ 

2159 f"{nlstr}{nlstr.join(comments) if comments else ''}", 

2160 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2161 ] 

2162 comments = [] 

2163 try: 

2164 # convert newline marks to actual newlines, and strip leading BOM if present 

2165 t = NL.transform_string(t.lstrip(BOM)) 

2166 result = self.parse_string(t, parse_all=parseAll) 

2167 except ParseBaseException as pe: 

2168 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" 

2169 out.append(pe.explain()) 

2170 out.append(f"FAIL: {fatal}{pe}") 

2171 if ParserElement.verbose_stacktrace: 

2172 out.extend(traceback.format_tb(pe.__traceback__)) 

2173 success = success and failureTests 

2174 result = pe 

2175 except Exception as exc: 

2176 out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}") 

2177 if ParserElement.verbose_stacktrace: 

2178 out.extend(traceback.format_tb(exc.__traceback__)) 

2179 success = success and failureTests 

2180 result = exc 

2181 else: 

2182 success = success and not failureTests 

2183 if postParse is not None: 

2184 try: 

2185 pp_value = postParse(t, result) 

2186 if pp_value is not None: 

2187 if isinstance(pp_value, ParseResults): 

2188 out.append(pp_value.dump()) 

2189 else: 

2190 out.append(str(pp_value)) 

2191 else: 

2192 out.append(result.dump()) 

2193 except Exception as e: 

2194 out.append(result.dump(full=fullDump)) 

2195 out.append( 

2196 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2197 ) 

2198 else: 

2199 out.append(result.dump(full=fullDump)) 

2200 out.append("") 

2201 

2202 if printResults: 

2203 print_("\n".join(out)) 

2204 

2205 allResults.append((t, result)) 

2206 

2207 return success, allResults 

2208 

2209 def create_diagram( 

2210 self, 

2211 output_html: Union[TextIO, Path, str], 

2212 vertical: int = 3, 

2213 show_results_names: bool = False, 

2214 show_groups: bool = False, 

2215 embed: bool = False, 

2216 **kwargs, 

2217 ) -> None: 

2218 """ 

2219 Create a railroad diagram for the parser. 

2220 

2221 Parameters: 

2222 

2223 - ``output_html`` (str or file-like object) - output target for generated 

2224 diagram HTML 

2225 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2226 instead of horizontally (default=3) 

2227 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2228 defined results names 

2229 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2230 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2231 the resulting HTML in an enclosing HTML source 

2232 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2233 can be used to insert custom CSS styling 

2234 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2235 generated code 

2236 

2237 Additional diagram-formatting keyword arguments can also be included; 

2238 see railroad.Diagram class. 

2239 """ 

2240 

2241 try: 

2242 from .diagram import to_railroad, railroad_to_html 

2243 except ImportError as ie: 

2244 raise Exception( 

2245 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2246 ) from ie 

2247 

2248 self.streamline() 

2249 

2250 railroad = to_railroad( 

2251 self, 

2252 vertical=vertical, 

2253 show_results_names=show_results_names, 

2254 show_groups=show_groups, 

2255 diagram_kwargs=kwargs, 

2256 ) 

2257 if not isinstance(output_html, (str, Path)): 

2258 # we were passed a file-like object, just write to it 

2259 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2260 return 

2261 

2262 with open(output_html, "w", encoding="utf-8") as diag_file: 

2263 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2264 

2265 # Compatibility synonyms 

2266 # fmt: off 

2267 inlineLiteralsUsing = replaced_by_pep8("inlineLiteralsUsing", inline_literals_using) 

2268 setDefaultWhitespaceChars = replaced_by_pep8( 

2269 "setDefaultWhitespaceChars", set_default_whitespace_chars 

2270 ) 

2271 setResultsName = replaced_by_pep8("setResultsName", set_results_name) 

2272 setBreak = replaced_by_pep8("setBreak", set_break) 

2273 setParseAction = replaced_by_pep8("setParseAction", set_parse_action) 

2274 addParseAction = replaced_by_pep8("addParseAction", add_parse_action) 

2275 addCondition = replaced_by_pep8("addCondition", add_condition) 

2276 setFailAction = replaced_by_pep8("setFailAction", set_fail_action) 

2277 tryParse = replaced_by_pep8("tryParse", try_parse) 

2278 enableLeftRecursion = replaced_by_pep8("enableLeftRecursion", enable_left_recursion) 

2279 enablePackrat = replaced_by_pep8("enablePackrat", enable_packrat) 

2280 parseString = replaced_by_pep8("parseString", parse_string) 

2281 scanString = replaced_by_pep8("scanString", scan_string) 

2282 transformString = replaced_by_pep8("transformString", transform_string) 

2283 searchString = replaced_by_pep8("searchString", search_string) 

2284 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

2285 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

2286 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) 

2287 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) 

2288 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) 

2289 setDebug = replaced_by_pep8("setDebug", set_debug) 

2290 setName = replaced_by_pep8("setName", set_name) 

2291 parseFile = replaced_by_pep8("parseFile", parse_file) 

2292 runTests = replaced_by_pep8("runTests", run_tests) 

2293 canParseNext = can_parse_next 

2294 resetCache = reset_cache 

2295 defaultName = default_name 

2296 # fmt: on 

2297 

2298 

2299class _PendingSkip(ParserElement): 

2300 # internal placeholder class to hold a place were '...' is added to a parser element, 

2301 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2302 def __init__(self, expr: ParserElement, must_skip: bool = False): 

2303 super().__init__() 

2304 self.anchor = expr 

2305 self.must_skip = must_skip 

2306 

2307 def _generateDefaultName(self) -> str: 

2308 return str(self.anchor + Empty()).replace("Empty", "...") 

2309 

2310 def __add__(self, other) -> "ParserElement": 

2311 skipper = SkipTo(other).set_name("...")("_skipped*") 

2312 if self.must_skip: 

2313 

2314 def must_skip(t): 

2315 if not t._skipped or t._skipped.as_list() == [""]: 

2316 del t[0] 

2317 t.pop("_skipped", None) 

2318 

2319 def show_skip(t): 

2320 if t._skipped.as_list()[-1:] == [""]: 

2321 t.pop("_skipped") 

2322 t["_skipped"] = f"missing <{self.anchor!r}>" 

2323 

2324 return ( 

2325 self.anchor + skipper().add_parse_action(must_skip) 

2326 | skipper().add_parse_action(show_skip) 

2327 ) + other 

2328 

2329 return self.anchor + skipper + other 

2330 

2331 def __repr__(self): 

2332 return self.defaultName 

2333 

2334 def parseImpl(self, *args): 

2335 raise Exception( 

2336 "use of `...` expression without following SkipTo target expression" 

2337 ) 

2338 

2339 

2340class Token(ParserElement): 

2341 """Abstract :class:`ParserElement` subclass, for defining atomic 

2342 matching patterns. 

2343 """ 

2344 

2345 def __init__(self): 

2346 super().__init__(savelist=False) 

2347 

2348 def _generateDefaultName(self) -> str: 

2349 return type(self).__name__ 

2350 

2351 

2352class NoMatch(Token): 

2353 """ 

2354 A token that will never match. 

2355 """ 

2356 

2357 def __init__(self): 

2358 super().__init__() 

2359 self.mayReturnEmpty = True 

2360 self.mayIndexError = False 

2361 self.errmsg = "Unmatchable token" 

2362 

2363 def parseImpl(self, instring, loc, doActions=True): 

2364 raise ParseException(instring, loc, self.errmsg, self) 

2365 

2366 

2367class Literal(Token): 

2368 """ 

2369 Token to exactly match a specified string. 

2370 

2371 Example:: 

2372 

2373 Literal('abc').parse_string('abc') # -> ['abc'] 

2374 Literal('abc').parse_string('abcdef') # -> ['abc'] 

2375 Literal('abc').parse_string('ab') # -> Exception: Expected "abc" 

2376 

2377 For case-insensitive matching, use :class:`CaselessLiteral`. 

2378 

2379 For keyword matching (force word break before and after the matched string), 

2380 use :class:`Keyword` or :class:`CaselessKeyword`. 

2381 """ 

2382 

2383 def __new__(cls, match_string: str = "", *, matchString: str = ""): 

2384 # Performance tuning: select a subclass with optimized parseImpl 

2385 if cls is Literal: 

2386 match_string = matchString or match_string 

2387 if not match_string: 

2388 return super().__new__(Empty) 

2389 if len(match_string) == 1: 

2390 return super().__new__(_SingleCharLiteral) 

2391 

2392 # Default behavior 

2393 return super().__new__(cls) 

2394 

2395 # Needed to make copy.copy() work correctly if we customize __new__ 

2396 def __getnewargs__(self): 

2397 return (self.match,) 

2398 

2399 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2400 super().__init__() 

2401 match_string = matchString or match_string 

2402 self.match = match_string 

2403 self.matchLen = len(match_string) 

2404 self.firstMatchChar = match_string[:1] 

2405 self.errmsg = f"Expected {self.name}" 

2406 self.mayReturnEmpty = False 

2407 self.mayIndexError = False 

2408 

2409 def _generateDefaultName(self) -> str: 

2410 return repr(self.match) 

2411 

2412 def parseImpl(self, instring, loc, doActions=True): 

2413 if instring[loc] == self.firstMatchChar and instring.startswith( 

2414 self.match, loc 

2415 ): 

2416 return loc + self.matchLen, self.match 

2417 raise ParseException(instring, loc, self.errmsg, self) 

2418 

2419 

2420class Empty(Literal): 

2421 """ 

2422 An empty token, will always match. 

2423 """ 

2424 

2425 def __init__(self, match_string="", *, matchString=""): 

2426 super().__init__("") 

2427 self.mayReturnEmpty = True 

2428 self.mayIndexError = False 

2429 

2430 def _generateDefaultName(self) -> str: 

2431 return "Empty" 

2432 

2433 def parseImpl(self, instring, loc, doActions=True): 

2434 return loc, [] 

2435 

2436 

2437class _SingleCharLiteral(Literal): 

2438 def parseImpl(self, instring, loc, doActions=True): 

2439 if instring[loc] == self.firstMatchChar: 

2440 return loc + 1, self.match 

2441 raise ParseException(instring, loc, self.errmsg, self) 

2442 

2443 

2444ParserElement._literalStringClass = Literal 

2445 

2446 

2447class Keyword(Token): 

2448 """ 

2449 Token to exactly match a specified string as a keyword, that is, 

2450 it must be immediately preceded and followed by whitespace or 

2451 non-keyword characters. Compare with :class:`Literal`: 

2452 

2453 - ``Literal("if")`` will match the leading ``'if'`` in 

2454 ``'ifAndOnlyIf'``. 

2455 - ``Keyword("if")`` will not; it will only match the leading 

2456 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2457 

2458 Accepts two optional constructor arguments in addition to the 

2459 keyword string: 

2460 

2461 - ``ident_chars`` is a string of characters that would be valid 

2462 identifier characters, defaulting to all alphanumerics + "_" and 

2463 "$" 

2464 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2465 

2466 Example:: 

2467 

2468 Keyword("start").parse_string("start") # -> ['start'] 

2469 Keyword("start").parse_string("starting") # -> Exception 

2470 

2471 For case-insensitive matching, use :class:`CaselessKeyword`. 

2472 """ 

2473 

2474 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2475 

2476 def __init__( 

2477 self, 

2478 match_string: str = "", 

2479 ident_chars: typing.Optional[str] = None, 

2480 caseless: bool = False, 

2481 *, 

2482 matchString: str = "", 

2483 identChars: typing.Optional[str] = None, 

2484 ): 

2485 super().__init__() 

2486 identChars = identChars or ident_chars 

2487 if identChars is None: 

2488 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2489 match_string = matchString or match_string 

2490 self.match = match_string 

2491 self.matchLen = len(match_string) 

2492 try: 

2493 self.firstMatchChar = match_string[0] 

2494 except IndexError: 

2495 raise ValueError("null string passed to Keyword; use Empty() instead") 

2496 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2497 self.mayReturnEmpty = False 

2498 self.mayIndexError = False 

2499 self.caseless = caseless 

2500 if caseless: 

2501 self.caselessmatch = match_string.upper() 

2502 identChars = identChars.upper() 

2503 self.identChars = set(identChars) 

2504 

2505 def _generateDefaultName(self) -> str: 

2506 return repr(self.match) 

2507 

2508 def parseImpl(self, instring, loc, doActions=True): 

2509 errmsg = self.errmsg 

2510 errloc = loc 

2511 if self.caseless: 

2512 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2513 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2514 if ( 

2515 loc >= len(instring) - self.matchLen 

2516 or instring[loc + self.matchLen].upper() not in self.identChars 

2517 ): 

2518 return loc + self.matchLen, self.match 

2519 

2520 # followed by keyword char 

2521 errmsg += ", was immediately followed by keyword character" 

2522 errloc = loc + self.matchLen 

2523 else: 

2524 # preceded by keyword char 

2525 errmsg += ", keyword was immediately preceded by keyword character" 

2526 errloc = loc - 1 

2527 # else no match just raise plain exception 

2528 

2529 elif ( 

2530 instring[loc] == self.firstMatchChar 

2531 and self.matchLen == 1 

2532 or instring.startswith(self.match, loc) 

2533 ): 

2534 if loc == 0 or instring[loc - 1] not in self.identChars: 

2535 if ( 

2536 loc >= len(instring) - self.matchLen 

2537 or instring[loc + self.matchLen] not in self.identChars 

2538 ): 

2539 return loc + self.matchLen, self.match 

2540 

2541 # followed by keyword char 

2542 errmsg += ", keyword was immediately followed by keyword character" 

2543 errloc = loc + self.matchLen 

2544 else: 

2545 # preceded by keyword char 

2546 errmsg += ", keyword was immediately preceded by keyword character" 

2547 errloc = loc - 1 

2548 # else no match just raise plain exception 

2549 

2550 raise ParseException(instring, errloc, errmsg, self) 

2551 

2552 @staticmethod 

2553 def set_default_keyword_chars(chars) -> None: 

2554 """ 

2555 Overrides the default characters used by :class:`Keyword` expressions. 

2556 """ 

2557 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2558 

2559 setDefaultKeywordChars = set_default_keyword_chars 

2560 

2561 

2562class CaselessLiteral(Literal): 

2563 """ 

2564 Token to match a specified string, ignoring case of letters. 

2565 Note: the matched results will always be in the case of the given 

2566 match string, NOT the case of the input text. 

2567 

2568 Example:: 

2569 

2570 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2571 # -> ['CMD', 'CMD', 'CMD'] 

2572 

2573 (Contrast with example for :class:`CaselessKeyword`.) 

2574 """ 

2575 

2576 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2577 match_string = matchString or match_string 

2578 super().__init__(match_string.upper()) 

2579 # Preserve the defining literal. 

2580 self.returnString = match_string 

2581 self.errmsg = f"Expected {self.name}" 

2582 

2583 def parseImpl(self, instring, loc, doActions=True): 

2584 if instring[loc : loc + self.matchLen].upper() == self.match: 

2585 return loc + self.matchLen, self.returnString 

2586 raise ParseException(instring, loc, self.errmsg, self) 

2587 

2588 

2589class CaselessKeyword(Keyword): 

2590 """ 

2591 Caseless version of :class:`Keyword`. 

2592 

2593 Example:: 

2594 

2595 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2596 # -> ['CMD', 'CMD'] 

2597 

2598 (Contrast with example for :class:`CaselessLiteral`.) 

2599 """ 

2600 

2601 def __init__( 

2602 self, 

2603 match_string: str = "", 

2604 ident_chars: typing.Optional[str] = None, 

2605 *, 

2606 matchString: str = "", 

2607 identChars: typing.Optional[str] = None, 

2608 ): 

2609 identChars = identChars or ident_chars 

2610 match_string = matchString or match_string 

2611 super().__init__(match_string, identChars, caseless=True) 

2612 

2613 

2614class CloseMatch(Token): 

2615 """A variation on :class:`Literal` which matches "close" matches, 

2616 that is, strings with at most 'n' mismatching characters. 

2617 :class:`CloseMatch` takes parameters: 

2618 

2619 - ``match_string`` - string to be matched 

2620 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2621 - ``max_mismatches`` - (``default=1``) maximum number of 

2622 mismatches allowed to count as a match 

2623 

2624 The results from a successful parse will contain the matched text 

2625 from the input string and the following named results: 

2626 

2627 - ``mismatches`` - a list of the positions within the 

2628 match_string where mismatches were found 

2629 - ``original`` - the original match_string used to compare 

2630 against the input string 

2631 

2632 If ``mismatches`` is an empty list, then the match was an exact 

2633 match. 

2634 

2635 Example:: 

2636 

2637 patt = CloseMatch("ATCATCGAATGGA") 

2638 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

2639 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

2640 

2641 # exact match 

2642 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

2643 

2644 # close match allowing up to 2 mismatches 

2645 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

2646 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

2647 """ 

2648 

2649 def __init__( 

2650 self, 

2651 match_string: str, 

2652 max_mismatches: typing.Optional[int] = None, 

2653 *, 

2654 maxMismatches: int = 1, 

2655 caseless=False, 

2656 ): 

2657 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

2658 super().__init__() 

2659 self.match_string = match_string 

2660 self.maxMismatches = maxMismatches 

2661 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

2662 self.caseless = caseless 

2663 self.mayIndexError = False 

2664 self.mayReturnEmpty = False 

2665 

2666 def _generateDefaultName(self) -> str: 

2667 return f"{type(self).__name__}:{self.match_string!r}" 

2668 

2669 def parseImpl(self, instring, loc, doActions=True): 

2670 start = loc 

2671 instrlen = len(instring) 

2672 maxloc = start + len(self.match_string) 

2673 

2674 if maxloc <= instrlen: 

2675 match_string = self.match_string 

2676 match_stringloc = 0 

2677 mismatches = [] 

2678 maxMismatches = self.maxMismatches 

2679 

2680 for match_stringloc, s_m in enumerate( 

2681 zip(instring[loc:maxloc], match_string) 

2682 ): 

2683 src, mat = s_m 

2684 if self.caseless: 

2685 src, mat = src.lower(), mat.lower() 

2686 

2687 if src != mat: 

2688 mismatches.append(match_stringloc) 

2689 if len(mismatches) > maxMismatches: 

2690 break 

2691 else: 

2692 loc = start + match_stringloc + 1 

2693 results = ParseResults([instring[start:loc]]) 

2694 results["original"] = match_string 

2695 results["mismatches"] = mismatches 

2696 return loc, results 

2697 

2698 raise ParseException(instring, loc, self.errmsg, self) 

2699 

2700 

2701class Word(Token): 

2702 """Token for matching words composed of allowed character sets. 

2703 

2704 Parameters: 

2705 

2706 - ``init_chars`` - string of all characters that should be used to 

2707 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

2708 if ``body_chars`` is also specified, then this is the string of 

2709 initial characters 

2710 - ``body_chars`` - string of characters that 

2711 can be used for matching after a matched initial character as 

2712 given in ``init_chars``; if omitted, same as the initial characters 

2713 (default=``None``) 

2714 - ``min`` - minimum number of characters to match (default=1) 

2715 - ``max`` - maximum number of characters to match (default=0) 

2716 - ``exact`` - exact number of characters to match (default=0) 

2717 - ``as_keyword`` - match as a keyword (default=``False``) 

2718 - ``exclude_chars`` - characters that might be 

2719 found in the input ``body_chars`` string but which should not be 

2720 accepted for matching ;useful to define a word of all 

2721 printables except for one or two characters, for instance 

2722 (default=``None``) 

2723 

2724 :class:`srange` is useful for defining custom character set strings 

2725 for defining :class:`Word` expressions, using range notation from 

2726 regular expression character sets. 

2727 

2728 A common mistake is to use :class:`Word` to match a specific literal 

2729 string, as in ``Word("Address")``. Remember that :class:`Word` 

2730 uses the string argument to define *sets* of matchable characters. 

2731 This expression would match "Add", "AAA", "dAred", or any other word 

2732 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

2733 exact literal string, use :class:`Literal` or :class:`Keyword`. 

2734 

2735 pyparsing includes helper strings for building Words: 

2736 

2737 - :class:`alphas` 

2738 - :class:`nums` 

2739 - :class:`alphanums` 

2740 - :class:`hexnums` 

2741 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 

2742 - accented, tilded, umlauted, etc.) 

2743 - :class:`punc8bit` (non-alphabetic characters in ASCII range 

2744 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

2745 - :class:`printables` (any non-whitespace character) 

2746 

2747 ``alphas``, ``nums``, and ``printables`` are also defined in several 

2748 Unicode sets - see :class:`pyparsing_unicode``. 

2749 

2750 Example:: 

2751 

2752 # a word composed of digits 

2753 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

2754 

2755 # a word with a leading capital, and zero or more lowercase 

2756 capitalized_word = Word(alphas.upper(), alphas.lower()) 

2757 

2758 # hostnames are alphanumeric, with leading alpha, and '-' 

2759 hostname = Word(alphas, alphanums + '-') 

2760 

2761 # roman numeral (not a strict parser, accepts invalid mix of characters) 

2762 roman = Word("IVXLCDM") 

2763 

2764 # any string of non-whitespace characters, except for ',' 

2765 csv_value = Word(printables, exclude_chars=",") 

2766 """ 

2767 

2768 def __init__( 

2769 self, 

2770 init_chars: str = "", 

2771 body_chars: typing.Optional[str] = None, 

2772 min: int = 1, 

2773 max: int = 0, 

2774 exact: int = 0, 

2775 as_keyword: bool = False, 

2776 exclude_chars: typing.Optional[str] = None, 

2777 *, 

2778 initChars: typing.Optional[str] = None, 

2779 bodyChars: typing.Optional[str] = None, 

2780 asKeyword: bool = False, 

2781 excludeChars: typing.Optional[str] = None, 

2782 ): 

2783 initChars = initChars or init_chars 

2784 bodyChars = bodyChars or body_chars 

2785 asKeyword = asKeyword or as_keyword 

2786 excludeChars = excludeChars or exclude_chars 

2787 super().__init__() 

2788 if not initChars: 

2789 raise ValueError( 

2790 f"invalid {type(self).__name__}, initChars cannot be empty string" 

2791 ) 

2792 

2793 initChars_set = set(initChars) 

2794 if excludeChars: 

2795 excludeChars_set = set(excludeChars) 

2796 initChars_set -= excludeChars_set 

2797 if bodyChars: 

2798 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

2799 self.initChars = initChars_set 

2800 self.initCharsOrig = "".join(sorted(initChars_set)) 

2801 

2802 if bodyChars: 

2803 self.bodyChars = set(bodyChars) 

2804 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

2805 else: 

2806 self.bodyChars = initChars_set 

2807 self.bodyCharsOrig = self.initCharsOrig 

2808 

2809 self.maxSpecified = max > 0 

2810 

2811 if min < 1: 

2812 raise ValueError( 

2813 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

2814 ) 

2815 

2816 if self.maxSpecified and min > max: 

2817 raise ValueError( 

2818 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

2819 ) 

2820 

2821 self.minLen = min 

2822 

2823 if max > 0: 

2824 self.maxLen = max 

2825 else: 

2826 self.maxLen = _MAX_INT 

2827 

2828 if exact > 0: 

2829 min = max = exact 

2830 self.maxLen = exact 

2831 self.minLen = exact 

2832 

2833 self.errmsg = f"Expected {self.name}" 

2834 self.mayIndexError = False 

2835 self.asKeyword = asKeyword 

2836 if self.asKeyword: 

2837 self.errmsg += " as a keyword" 

2838 

2839 # see if we can make a regex for this Word 

2840 if " " not in (self.initChars | self.bodyChars): 

2841 if len(self.initChars) == 1: 

2842 re_leading_fragment = re.escape(self.initCharsOrig) 

2843 else: 

2844 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

2845 

2846 if self.bodyChars == self.initChars: 

2847 if max == 0 and self.minLen == 1: 

2848 repeat = "+" 

2849 elif max == 1: 

2850 repeat = "" 

2851 else: 

2852 if self.minLen != self.maxLen: 

2853 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

2854 else: 

2855 repeat = f"{{{self.minLen}}}" 

2856 self.reString = f"{re_leading_fragment}{repeat}" 

2857 else: 

2858 if max == 1: 

2859 re_body_fragment = "" 

2860 repeat = "" 

2861 else: 

2862 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

2863 if max == 0 and self.minLen == 1: 

2864 repeat = "*" 

2865 elif max == 2: 

2866 repeat = "?" if min <= 1 else "" 

2867 else: 

2868 if min != max: 

2869 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

2870 else: 

2871 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

2872 

2873 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

2874 

2875 if self.asKeyword: 

2876 self.reString = rf"\b{self.reString}\b" 

2877 

2878 try: 

2879 self.re = re.compile(self.reString) 

2880 except re.error: 

2881 self.re = None # type: ignore[assignment] 

2882 else: 

2883 self.re_match = self.re.match 

2884 self.parseImpl = self.parseImpl_regex # type: ignore[assignment] 

2885 

2886 def _generateDefaultName(self) -> str: 

2887 def charsAsStr(s): 

2888 max_repr_len = 16 

2889 s = _collapse_string_to_ranges(s, re_escape=False) 

2890 

2891 if len(s) > max_repr_len: 

2892 return s[: max_repr_len - 3] + "..." 

2893 

2894 return s 

2895 

2896 if self.initChars != self.bodyChars: 

2897 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

2898 else: 

2899 base = f"W:({charsAsStr(self.initChars)})" 

2900 

2901 # add length specification 

2902 if self.minLen > 1 or self.maxLen != _MAX_INT: 

2903 if self.minLen == self.maxLen: 

2904 if self.minLen == 1: 

2905 return base[2:] 

2906 else: 

2907 return base + f"{{{self.minLen}}}" 

2908 elif self.maxLen == _MAX_INT: 

2909 return base + f"{{{self.minLen},...}}" 

2910 else: 

2911 return base + f"{{{self.minLen},{self.maxLen}}}" 

2912 return base 

2913 

2914 def parseImpl(self, instring, loc, doActions=True): 

2915 if instring[loc] not in self.initChars: 

2916 raise ParseException(instring, loc, self.errmsg, self) 

2917 

2918 start = loc 

2919 loc += 1 

2920 instrlen = len(instring) 

2921 bodychars = self.bodyChars 

2922 maxloc = start + self.maxLen 

2923 maxloc = min(maxloc, instrlen) 

2924 while loc < maxloc and instring[loc] in bodychars: 

2925 loc += 1 

2926 

2927 throwException = False 

2928 if loc - start < self.minLen: 

2929 throwException = True 

2930 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 

2931 throwException = True 

2932 elif self.asKeyword and ( 

2933 (start > 0 and instring[start - 1] in bodychars) 

2934 or (loc < instrlen and instring[loc] in bodychars) 

2935 ): 

2936 throwException = True 

2937 

2938 if throwException: 

2939 raise ParseException(instring, loc, self.errmsg, self) 

2940 

2941 return loc, instring[start:loc] 

2942 

2943 def parseImpl_regex(self, instring, loc, doActions=True): 

2944 result = self.re_match(instring, loc) 

2945 if not result: 

2946 raise ParseException(instring, loc, self.errmsg, self) 

2947 

2948 loc = result.end() 

2949 return loc, result.group() 

2950 

2951 

2952class Char(Word): 

2953 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

2954 when defining a match of any single character in a string of 

2955 characters. 

2956 """ 

2957 

2958 def __init__( 

2959 self, 

2960 charset: str, 

2961 as_keyword: bool = False, 

2962 exclude_chars: typing.Optional[str] = None, 

2963 *, 

2964 asKeyword: bool = False, 

2965 excludeChars: typing.Optional[str] = None, 

2966 ): 

2967 asKeyword = asKeyword or as_keyword 

2968 excludeChars = excludeChars or exclude_chars 

2969 super().__init__( 

2970 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

2971 ) 

2972 

2973 

2974class Regex(Token): 

2975 r"""Token for matching strings that match a given regular 

2976 expression. Defined with string specifying the regular expression in 

2977 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

2978 If the given regex contains named groups (defined using ``(?P<name>...)``), 

2979 these will be preserved as named :class:`ParseResults`. 

2980 

2981 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

2982 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

2983 a compiled RE that was compiled using ``regex``. 

2984 

2985 Example:: 

2986 

2987 realnum = Regex(r"[+-]?\d+\.\d*") 

2988 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

2989 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

2990 

2991 # named fields in a regex will be returned as named results 

2992 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

2993 

2994 # the Regex class will accept re's compiled using the regex module 

2995 import regex 

2996 parser = pp.Regex(regex.compile(r'[0-9]')) 

2997 """ 

2998 

2999 def __init__( 

3000 self, 

3001 pattern: Any, 

3002 flags: Union[re.RegexFlag, int] = 0, 

3003 as_group_list: bool = False, 

3004 as_match: bool = False, 

3005 *, 

3006 asGroupList: bool = False, 

3007 asMatch: bool = False, 

3008 ): 

3009 """The parameters ``pattern`` and ``flags`` are passed 

3010 to the ``re.compile()`` function as-is. See the Python 

3011 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3012 explanation of the acceptable patterns and flags. 

3013 """ 

3014 super().__init__() 

3015 asGroupList = asGroupList or as_group_list 

3016 asMatch = asMatch or as_match 

3017 

3018 if isinstance(pattern, str_type): 

3019 if not pattern: 

3020 raise ValueError("null string passed to Regex; use Empty() instead") 

3021 

3022 self._re = None 

3023 self.reString = self.pattern = pattern 

3024 self.flags = flags 

3025 

3026 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3027 self._re = pattern 

3028 self.pattern = self.reString = pattern.pattern 

3029 self.flags = flags 

3030 

3031 else: 

3032 raise TypeError( 

3033 "Regex may only be constructed with a string or a compiled RE object" 

3034 ) 

3035 

3036 self.errmsg = f"Expected {self.name}" 

3037 self.mayIndexError = False 

3038 self.asGroupList = asGroupList 

3039 self.asMatch = asMatch 

3040 if self.asGroupList: 

3041 self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment] 

3042 if self.asMatch: 

3043 self.parseImpl = self.parseImplAsMatch # type: ignore [assignment] 

3044 

3045 @cached_property 

3046 def re(self): 

3047 if self._re: 

3048 return self._re 

3049 

3050 try: 

3051 return re.compile(self.pattern, self.flags) 

3052 except re.error: 

3053 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3054 

3055 @cached_property 

3056 def re_match(self): 

3057 return self.re.match 

3058 

3059 @cached_property 

3060 def mayReturnEmpty(self): 

3061 return self.re_match("") is not None 

3062 

3063 def _generateDefaultName(self) -> str: 

3064 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) 

3065 

3066 def parseImpl(self, instring, loc, doActions=True): 

3067 result = self.re_match(instring, loc) 

3068 if not result: 

3069 raise ParseException(instring, loc, self.errmsg, self) 

3070 

3071 loc = result.end() 

3072 ret = ParseResults(result.group()) 

3073 d = result.groupdict() 

3074 

3075 for k, v in d.items(): 

3076 ret[k] = v 

3077 

3078 return loc, ret 

3079 

3080 def parseImplAsGroupList(self, instring, loc, doActions=True): 

3081 result = self.re_match(instring, loc) 

3082 if not result: 

3083 raise ParseException(instring, loc, self.errmsg, self) 

3084 

3085 loc = result.end() 

3086 ret = result.groups() 

3087 return loc, ret 

3088 

3089 def parseImplAsMatch(self, instring, loc, doActions=True): 

3090 result = self.re_match(instring, loc) 

3091 if not result: 

3092 raise ParseException(instring, loc, self.errmsg, self) 

3093 

3094 loc = result.end() 

3095 ret = result 

3096 return loc, ret 

3097 

3098 def sub(self, repl: str) -> ParserElement: 

3099 r""" 

3100 Return :class:`Regex` with an attached parse action to transform the parsed 

3101 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3102 

3103 Example:: 

3104 

3105 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3106 print(make_html.transform_string("h1:main title:")) 

3107 # prints "<h1>main title</h1>" 

3108 """ 

3109 if self.asGroupList: 

3110 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3111 

3112 if self.asMatch and callable(repl): 

3113 raise TypeError( 

3114 "cannot use sub() with a callable with Regex(as_match=True)" 

3115 ) 

3116 

3117 if self.asMatch: 

3118 

3119 def pa(tokens): 

3120 return tokens[0].expand(repl) 

3121 

3122 else: 

3123 

3124 def pa(tokens): 

3125 return self.re.sub(repl, tokens[0]) 

3126 

3127 return self.add_parse_action(pa) 

3128 

3129 

3130class QuotedString(Token): 

3131 r""" 

3132 Token for matching strings that are delimited by quoting characters. 

3133 

3134 Defined with the following parameters: 

3135 

3136 - ``quote_char`` - string of one or more characters defining the 

3137 quote delimiting string 

3138 - ``esc_char`` - character to re_escape quotes, typically backslash 

3139 (default= ``None``) 

3140 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3141 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3142 (default= ``None``) 

3143 - ``multiline`` - boolean indicating whether quotes can span 

3144 multiple lines (default= ``False``) 

3145 - ``unquote_results`` - boolean indicating whether the matched text 

3146 should be unquoted (default= ``True``) 

3147 - ``end_quote_char`` - string of one or more characters defining the 

3148 end of the quote delimited string (default= ``None`` => same as 

3149 quote_char) 

3150 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3151 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3152 (default= ``True``) 

3153 

3154 Example:: 

3155 

3156 qs = QuotedString('"') 

3157 print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3158 complex_qs = QuotedString('{{', end_quote_char='}}') 

3159 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) 

3160 sql_qs = QuotedString('"', esc_quote='""') 

3161 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3162 

3163 prints:: 

3164 

3165 [['This is the quote']] 

3166 [['This is the "quote"']] 

3167 [['This is the quote with "embedded" quotes']] 

3168 """ 

3169 

3170 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3171 

3172 def __init__( 

3173 self, 

3174 quote_char: str = "", 

3175 esc_char: typing.Optional[str] = None, 

3176 esc_quote: typing.Optional[str] = None, 

3177 multiline: bool = False, 

3178 unquote_results: bool = True, 

3179 end_quote_char: typing.Optional[str] = None, 

3180 convert_whitespace_escapes: bool = True, 

3181 *, 

3182 quoteChar: str = "", 

3183 escChar: typing.Optional[str] = None, 

3184 escQuote: typing.Optional[str] = None, 

3185 unquoteResults: bool = True, 

3186 endQuoteChar: typing.Optional[str] = None, 

3187 convertWhitespaceEscapes: bool = True, 

3188 ): 

3189 super().__init__() 

3190 esc_char = escChar or esc_char 

3191 esc_quote = escQuote or esc_quote 

3192 unquote_results = unquoteResults and unquote_results 

3193 end_quote_char = endQuoteChar or end_quote_char 

3194 convert_whitespace_escapes = ( 

3195 convertWhitespaceEscapes and convert_whitespace_escapes 

3196 ) 

3197 quote_char = quoteChar or quote_char 

3198 

3199 # remove white space from quote chars 

3200 quote_char = quote_char.strip() 

3201 if not quote_char: 

3202 raise ValueError("quote_char cannot be the empty string") 

3203 

3204 if end_quote_char is None: 

3205 end_quote_char = quote_char 

3206 else: 

3207 end_quote_char = end_quote_char.strip() 

3208 if not end_quote_char: 

3209 raise ValueError("end_quote_char cannot be the empty string") 

3210 

3211 self.quote_char: str = quote_char 

3212 self.quote_char_len: int = len(quote_char) 

3213 self.first_quote_char: str = quote_char[0] 

3214 self.end_quote_char: str = end_quote_char 

3215 self.end_quote_char_len: int = len(end_quote_char) 

3216 self.esc_char: str = esc_char or "" 

3217 self.has_esc_char: bool = esc_char is not None 

3218 self.esc_quote: str = esc_quote or "" 

3219 self.unquote_results: bool = unquote_results 

3220 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3221 self.multiline = multiline 

3222 self.re_flags = re.RegexFlag(0) 

3223 

3224 # fmt: off 

3225 # build up re pattern for the content between the quote delimiters 

3226 inner_pattern = [] 

3227 

3228 if esc_quote: 

3229 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3230 

3231 if esc_char: 

3232 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3233 

3234 if len(self.end_quote_char) > 1: 

3235 inner_pattern.append( 

3236 "(?:" 

3237 + "|".join( 

3238 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3239 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3240 ) 

3241 + ")" 

3242 ) 

3243 

3244 if self.multiline: 

3245 self.re_flags |= re.MULTILINE | re.DOTALL 

3246 inner_pattern.append( 

3247 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3248 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])" 

3249 ) 

3250 else: 

3251 inner_pattern.append( 

3252 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3253 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])" 

3254 ) 

3255 

3256 self.pattern = "".join( 

3257 [ 

3258 re.escape(self.quote_char), 

3259 "(?:", 

3260 '|'.join(inner_pattern), 

3261 ")*", 

3262 re.escape(self.end_quote_char), 

3263 ] 

3264 ) 

3265 

3266 if self.unquote_results: 

3267 if self.convert_whitespace_escapes: 

3268 self.unquote_scan_re = re.compile( 

3269 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3270 rf"|({re.escape(self.esc_char)}.)" 

3271 rf"|(\n|.)", 

3272 flags=self.re_flags, 

3273 ) 

3274 else: 

3275 self.unquote_scan_re = re.compile( 

3276 rf"({re.escape(self.esc_char)}.)" 

3277 rf"|(\n|.)", 

3278 flags=self.re_flags 

3279 ) 

3280 # fmt: on 

3281 

3282 try: 

3283 self.re = re.compile(self.pattern, self.re_flags) 

3284 self.reString = self.pattern 

3285 self.re_match = self.re.match 

3286 except re.error: 

3287 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3288 

3289 self.errmsg = f"Expected {self.name}" 

3290 self.mayIndexError = False 

3291 self.mayReturnEmpty = True 

3292 

3293 def _generateDefaultName(self) -> str: 

3294 if self.quote_char == self.end_quote_char and isinstance( 

3295 self.quote_char, str_type 

3296 ): 

3297 return f"string enclosed in {self.quote_char!r}" 

3298 

3299 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3300 

3301 def parseImpl(self, instring, loc, doActions=True): 

3302 # check first character of opening quote to see if that is a match 

3303 # before doing the more complicated regex match 

3304 result = ( 

3305 instring[loc] == self.first_quote_char 

3306 and self.re_match(instring, loc) 

3307 or None 

3308 ) 

3309 if not result: 

3310 raise ParseException(instring, loc, self.errmsg, self) 

3311 

3312 # get ending loc and matched string from regex matching result 

3313 loc = result.end() 

3314 ret = result.group() 

3315 

3316 if self.unquote_results: 

3317 # strip off quotes 

3318 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3319 

3320 if isinstance(ret, str_type): 

3321 # fmt: off 

3322 if self.convert_whitespace_escapes: 

3323 # as we iterate over matches in the input string, 

3324 # collect from whichever match group of the unquote_scan_re 

3325 # regex matches (only 1 group will match at any given time) 

3326 ret = "".join( 

3327 # match group 1 matches \t, \n, etc. 

3328 self.ws_map[match.group(1)] if match.group(1) 

3329 # match group 2 matches escaped characters 

3330 else match.group(2)[-1] if match.group(2) 

3331 # match group 3 matches any character 

3332 else match.group(3) 

3333 for match in self.unquote_scan_re.finditer(ret) 

3334 ) 

3335 else: 

3336 ret = "".join( 

3337 # match group 1 matches escaped characters 

3338 match.group(1)[-1] if match.group(1) 

3339 # match group 2 matches any character 

3340 else match.group(2) 

3341 for match in self.unquote_scan_re.finditer(ret) 

3342 ) 

3343 # fmt: on 

3344 

3345 # replace escaped quotes 

3346 if self.esc_quote: 

3347 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3348 

3349 return loc, ret 

3350 

3351 

3352class CharsNotIn(Token): 

3353 """Token for matching words composed of characters *not* in a given 

3354 set (will include whitespace in matched characters if not listed in 

3355 the provided exclusion set - see example). Defined with string 

3356 containing all disallowed characters, and an optional minimum, 

3357 maximum, and/or exact length. The default value for ``min`` is 

3358 1 (a minimum value < 1 is not valid); the default values for 

3359 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3360 length restriction. 

3361 

3362 Example:: 

3363 

3364 # define a comma-separated-value as anything that is not a ',' 

3365 csv_value = CharsNotIn(',') 

3366 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) 

3367 

3368 prints:: 

3369 

3370 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3371 """ 

3372 

3373 def __init__( 

3374 self, 

3375 not_chars: str = "", 

3376 min: int = 1, 

3377 max: int = 0, 

3378 exact: int = 0, 

3379 *, 

3380 notChars: str = "", 

3381 ): 

3382 super().__init__() 

3383 self.skipWhitespace = False 

3384 self.notChars = not_chars or notChars 

3385 self.notCharsSet = set(self.notChars) 

3386 

3387 if min < 1: 

3388 raise ValueError( 

3389 "cannot specify a minimum length < 1; use" 

3390 " Opt(CharsNotIn()) if zero-length char group is permitted" 

3391 ) 

3392 

3393 self.minLen = min 

3394 

3395 if max > 0: 

3396 self.maxLen = max 

3397 else: 

3398 self.maxLen = _MAX_INT 

3399 

3400 if exact > 0: 

3401 self.maxLen = exact 

3402 self.minLen = exact 

3403 

3404 self.errmsg = f"Expected {self.name}" 

3405 self.mayReturnEmpty = self.minLen == 0 

3406 self.mayIndexError = False 

3407 

3408 def _generateDefaultName(self) -> str: 

3409 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3410 if len(not_chars_str) > 16: 

3411 return f"!W:({self.notChars[: 16 - 3]}...)" 

3412 else: 

3413 return f"!W:({self.notChars})" 

3414 

3415 def parseImpl(self, instring, loc, doActions=True): 

3416 notchars = self.notCharsSet 

3417 if instring[loc] in notchars: 

3418 raise ParseException(instring, loc, self.errmsg, self) 

3419 

3420 start = loc 

3421 loc += 1 

3422 maxlen = min(start + self.maxLen, len(instring)) 

3423 while loc < maxlen and instring[loc] not in notchars: 

3424 loc += 1 

3425 

3426 if loc - start < self.minLen: 

3427 raise ParseException(instring, loc, self.errmsg, self) 

3428 

3429 return loc, instring[start:loc] 

3430 

3431 

3432class White(Token): 

3433 """Special matching class for matching whitespace. Normally, 

3434 whitespace is ignored by pyparsing grammars. This class is included 

3435 when some whitespace structures are significant. Define with 

3436 a string containing the whitespace characters to be matched; default 

3437 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3438 ``max``, and ``exact`` arguments, as defined for the 

3439 :class:`Word` class. 

3440 """ 

3441 

3442 whiteStrs = { 

3443 " ": "<SP>", 

3444 "\t": "<TAB>", 

3445 "\n": "<LF>", 

3446 "\r": "<CR>", 

3447 "\f": "<FF>", 

3448 "\u00A0": "<NBSP>", 

3449 "\u1680": "<OGHAM_SPACE_MARK>", 

3450 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3451 "\u2000": "<EN_QUAD>", 

3452 "\u2001": "<EM_QUAD>", 

3453 "\u2002": "<EN_SPACE>", 

3454 "\u2003": "<EM_SPACE>", 

3455 "\u2004": "<THREE-PER-EM_SPACE>", 

3456 "\u2005": "<FOUR-PER-EM_SPACE>", 

3457 "\u2006": "<SIX-PER-EM_SPACE>", 

3458 "\u2007": "<FIGURE_SPACE>", 

3459 "\u2008": "<PUNCTUATION_SPACE>", 

3460 "\u2009": "<THIN_SPACE>", 

3461 "\u200A": "<HAIR_SPACE>", 

3462 "\u200B": "<ZERO_WIDTH_SPACE>", 

3463 "\u202F": "<NNBSP>", 

3464 "\u205F": "<MMSP>", 

3465 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3466 } 

3467 

3468 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): 

3469 super().__init__() 

3470 self.matchWhite = ws 

3471 self.set_whitespace_chars( 

3472 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3473 copy_defaults=True, 

3474 ) 

3475 # self.leave_whitespace() 

3476 self.mayReturnEmpty = True 

3477 self.errmsg = f"Expected {self.name}" 

3478 

3479 self.minLen = min 

3480 

3481 if max > 0: 

3482 self.maxLen = max 

3483 else: 

3484 self.maxLen = _MAX_INT 

3485 

3486 if exact > 0: 

3487 self.maxLen = exact 

3488 self.minLen = exact 

3489 

3490 def _generateDefaultName(self) -> str: 

3491 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

3492 

3493 def parseImpl(self, instring, loc, doActions=True): 

3494 if instring[loc] not in self.matchWhite: 

3495 raise ParseException(instring, loc, self.errmsg, self) 

3496 start = loc 

3497 loc += 1 

3498 maxloc = start + self.maxLen 

3499 maxloc = min(maxloc, len(instring)) 

3500 while loc < maxloc and instring[loc] in self.matchWhite: 

3501 loc += 1 

3502 

3503 if loc - start < self.minLen: 

3504 raise ParseException(instring, loc, self.errmsg, self) 

3505 

3506 return loc, instring[start:loc] 

3507 

3508 

3509class PositionToken(Token): 

3510 def __init__(self): 

3511 super().__init__() 

3512 self.mayReturnEmpty = True 

3513 self.mayIndexError = False 

3514 

3515 

3516class GoToColumn(PositionToken): 

3517 """Token to advance to a specific column of input text; useful for 

3518 tabular report scraping. 

3519 """ 

3520 

3521 def __init__(self, colno: int): 

3522 super().__init__() 

3523 self.col = colno 

3524 

3525 def preParse(self, instring: str, loc: int) -> int: 

3526 if col(loc, instring) == self.col: 

3527 return loc 

3528 

3529 instrlen = len(instring) 

3530 if self.ignoreExprs: 

3531 loc = self._skipIgnorables(instring, loc) 

3532 while ( 

3533 loc < instrlen 

3534 and instring[loc].isspace() 

3535 and col(loc, instring) != self.col 

3536 ): 

3537 loc += 1 

3538 

3539 return loc 

3540 

3541 def parseImpl(self, instring, loc, doActions=True): 

3542 thiscol = col(loc, instring) 

3543 if thiscol > self.col: 

3544 raise ParseException(instring, loc, "Text not in expected column", self) 

3545 newloc = loc + self.col - thiscol 

3546 ret = instring[loc:newloc] 

3547 return newloc, ret 

3548 

3549 

3550class LineStart(PositionToken): 

3551 r"""Matches if current position is at the beginning of a line within 

3552 the parse string 

3553 

3554 Example:: 

3555 

3556 test = '''\ 

3557 AAA this line 

3558 AAA and this line 

3559 AAA but not this one 

3560 B AAA and definitely not this one 

3561 ''' 

3562 

3563 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

3564 print(t) 

3565 

3566 prints:: 

3567 

3568 ['AAA', ' this line'] 

3569 ['AAA', ' and this line'] 

3570 

3571 """ 

3572 

3573 def __init__(self): 

3574 super().__init__() 

3575 self.leave_whitespace() 

3576 self.orig_whiteChars = set() | self.whiteChars 

3577 self.whiteChars.discard("\n") 

3578 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

3579 self.errmsg = "Expected start of line" 

3580 

3581 def preParse(self, instring: str, loc: int) -> int: 

3582 if loc == 0: 

3583 return loc 

3584 

3585 ret = self.skipper.preParse(instring, loc) 

3586 

3587 if "\n" in self.orig_whiteChars: 

3588 while instring[ret : ret + 1] == "\n": 

3589 ret = self.skipper.preParse(instring, ret + 1) 

3590 

3591 return ret 

3592 

3593 def parseImpl(self, instring, loc, doActions=True): 

3594 if col(loc, instring) == 1: 

3595 return loc, [] 

3596 raise ParseException(instring, loc, self.errmsg, self) 

3597 

3598 

3599class LineEnd(PositionToken): 

3600 """Matches if current position is at the end of a line within the 

3601 parse string 

3602 """ 

3603 

3604 def __init__(self): 

3605 super().__init__() 

3606 self.whiteChars.discard("\n") 

3607 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

3608 self.errmsg = "Expected end of line" 

3609 

3610 def parseImpl(self, instring, loc, doActions=True): 

3611 if loc < len(instring): 

3612 if instring[loc] == "\n": 

3613 return loc + 1, "\n" 

3614 else: 

3615 raise ParseException(instring, loc, self.errmsg, self) 

3616 elif loc == len(instring): 

3617 return loc + 1, [] 

3618 else: 

3619 raise ParseException(instring, loc, self.errmsg, self) 

3620 

3621 

3622class StringStart(PositionToken): 

3623 """Matches if current position is at the beginning of the parse 

3624 string 

3625 """ 

3626 

3627 def __init__(self): 

3628 super().__init__() 

3629 self.errmsg = "Expected start of text" 

3630 

3631 def parseImpl(self, instring, loc, doActions=True): 

3632 # see if entire string up to here is just whitespace and ignoreables 

3633 if loc != 0 and loc != self.preParse(instring, 0): 

3634 raise ParseException(instring, loc, self.errmsg, self) 

3635 

3636 return loc, [] 

3637 

3638 

3639class StringEnd(PositionToken): 

3640 """ 

3641 Matches if current position is at the end of the parse string 

3642 """ 

3643 

3644 def __init__(self): 

3645 super().__init__() 

3646 self.errmsg = "Expected end of text" 

3647 

3648 def parseImpl(self, instring, loc, doActions=True): 

3649 if loc < len(instring): 

3650 raise ParseException(instring, loc, self.errmsg, self) 

3651 if loc == len(instring): 

3652 return loc + 1, [] 

3653 if loc > len(instring): 

3654 return loc, [] 

3655 

3656 raise ParseException(instring, loc, self.errmsg, self) 

3657 

3658 

3659class WordStart(PositionToken): 

3660 """Matches if the current position is at the beginning of a 

3661 :class:`Word`, and is not preceded by any character in a given 

3662 set of ``word_chars`` (default= ``printables``). To emulate the 

3663 ``\b`` behavior of regular expressions, use 

3664 ``WordStart(alphanums)``. ``WordStart`` will also match at 

3665 the beginning of the string being parsed, or at the beginning of 

3666 a line. 

3667 """ 

3668 

3669 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3670 wordChars = word_chars if wordChars == printables else wordChars 

3671 super().__init__() 

3672 self.wordChars = set(wordChars) 

3673 self.errmsg = "Not at the start of a word" 

3674 

3675 def parseImpl(self, instring, loc, doActions=True): 

3676 if loc != 0: 

3677 if ( 

3678 instring[loc - 1] in self.wordChars 

3679 or instring[loc] not in self.wordChars 

3680 ): 

3681 raise ParseException(instring, loc, self.errmsg, self) 

3682 return loc, [] 

3683 

3684 

3685class WordEnd(PositionToken): 

3686 """Matches if the current position is at the end of a :class:`Word`, 

3687 and is not followed by any character in a given set of ``word_chars`` 

3688 (default= ``printables``). To emulate the ``\b`` behavior of 

3689 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

3690 will also match at the end of the string being parsed, or at the end 

3691 of a line. 

3692 """ 

3693 

3694 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3695 wordChars = word_chars if wordChars == printables else wordChars 

3696 super().__init__() 

3697 self.wordChars = set(wordChars) 

3698 self.skipWhitespace = False 

3699 self.errmsg = "Not at the end of a word" 

3700 

3701 def parseImpl(self, instring, loc, doActions=True): 

3702 instrlen = len(instring) 

3703 if instrlen > 0 and loc < instrlen: 

3704 if ( 

3705 instring[loc] in self.wordChars 

3706 or instring[loc - 1] not in self.wordChars 

3707 ): 

3708 raise ParseException(instring, loc, self.errmsg, self) 

3709 return loc, [] 

3710 

3711 

3712class ParseExpression(ParserElement): 

3713 """Abstract subclass of ParserElement, for combining and 

3714 post-processing parsed tokens. 

3715 """ 

3716 

3717 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

3718 super().__init__(savelist) 

3719 self.exprs: List[ParserElement] 

3720 if isinstance(exprs, _generatorType): 

3721 exprs = list(exprs) 

3722 

3723 if isinstance(exprs, str_type): 

3724 self.exprs = [self._literalStringClass(exprs)] 

3725 elif isinstance(exprs, ParserElement): 

3726 self.exprs = [exprs] 

3727 elif isinstance(exprs, Iterable): 

3728 exprs = list(exprs) 

3729 # if sequence of strings provided, wrap with Literal 

3730 if any(isinstance(expr, str_type) for expr in exprs): 

3731 exprs = ( 

3732 self._literalStringClass(e) if isinstance(e, str_type) else e 

3733 for e in exprs 

3734 ) 

3735 self.exprs = list(exprs) 

3736 else: 

3737 try: 

3738 self.exprs = list(exprs) 

3739 except TypeError: 

3740 self.exprs = [exprs] 

3741 self.callPreparse = False 

3742 

3743 def recurse(self) -> List[ParserElement]: 

3744 return self.exprs[:] 

3745 

3746 def append(self, other) -> ParserElement: 

3747 self.exprs.append(other) 

3748 self._defaultName = None 

3749 return self 

3750 

3751 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

3752 """ 

3753 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3754 all contained expressions. 

3755 """ 

3756 super().leave_whitespace(recursive) 

3757 

3758 if recursive: 

3759 self.exprs = [e.copy() for e in self.exprs] 

3760 for e in self.exprs: 

3761 e.leave_whitespace(recursive) 

3762 return self 

3763 

3764 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

3765 """ 

3766 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3767 all contained expressions. 

3768 """ 

3769 super().ignore_whitespace(recursive) 

3770 if recursive: 

3771 self.exprs = [e.copy() for e in self.exprs] 

3772 for e in self.exprs: 

3773 e.ignore_whitespace(recursive) 

3774 return self 

3775 

3776 def ignore(self, other) -> ParserElement: 

3777 if isinstance(other, Suppress): 

3778 if other not in self.ignoreExprs: 

3779 super().ignore(other) 

3780 for e in self.exprs: 

3781 e.ignore(self.ignoreExprs[-1]) 

3782 else: 

3783 super().ignore(other) 

3784 for e in self.exprs: 

3785 e.ignore(self.ignoreExprs[-1]) 

3786 return self 

3787 

3788 def _generateDefaultName(self) -> str: 

3789 return f"{type(self).__name__}:({self.exprs})" 

3790 

3791 def streamline(self) -> ParserElement: 

3792 if self.streamlined: 

3793 return self 

3794 

3795 super().streamline() 

3796 

3797 for e in self.exprs: 

3798 e.streamline() 

3799 

3800 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

3801 # but only if there are no parse actions or resultsNames on the nested And's 

3802 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

3803 if len(self.exprs) == 2: 

3804 other = self.exprs[0] 

3805 if ( 

3806 isinstance(other, self.__class__) 

3807 and not other.parseAction 

3808 and other.resultsName is None 

3809 and not other.debug 

3810 ): 

3811 self.exprs = other.exprs[:] + [self.exprs[1]] 

3812 self._defaultName = None 

3813 self.mayReturnEmpty |= other.mayReturnEmpty 

3814 self.mayIndexError |= other.mayIndexError 

3815 

3816 other = self.exprs[-1] 

3817 if ( 

3818 isinstance(other, self.__class__) 

3819 and not other.parseAction 

3820 and other.resultsName is None 

3821 and not other.debug 

3822 ): 

3823 self.exprs = self.exprs[:-1] + other.exprs[:] 

3824 self._defaultName = None 

3825 self.mayReturnEmpty |= other.mayReturnEmpty 

3826 self.mayIndexError |= other.mayIndexError 

3827 

3828 self.errmsg = f"Expected {self}" 

3829 

3830 return self 

3831 

3832 def validate(self, validateTrace=None) -> None: 

3833 warnings.warn( 

3834 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

3835 DeprecationWarning, 

3836 stacklevel=2, 

3837 ) 

3838 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

3839 for e in self.exprs: 

3840 e.validate(tmp) 

3841 self._checkRecursion([]) 

3842 

3843 def copy(self) -> ParserElement: 

3844 ret = super().copy() 

3845 ret = typing.cast(ParseExpression, ret) 

3846 ret.exprs = [e.copy() for e in self.exprs] 

3847 return ret 

3848 

3849 def _setResultsName(self, name, listAllMatches=False): 

3850 if not ( 

3851 __diag__.warn_ungrouped_named_tokens_in_collection 

3852 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

3853 not in self.suppress_warnings_ 

3854 ): 

3855 return super()._setResultsName(name, listAllMatches) 

3856 

3857 for e in self.exprs: 

3858 if ( 

3859 isinstance(e, ParserElement) 

3860 and e.resultsName 

3861 and ( 

3862 Diagnostics.warn_ungrouped_named_tokens_in_collection 

3863 not in e.suppress_warnings_ 

3864 ) 

3865 ): 

3866 warning = ( 

3867 "warn_ungrouped_named_tokens_in_collection:" 

3868 f" setting results name {name!r} on {type(self).__name__} expression" 

3869 f" collides with {e.resultsName!r} on contained expression" 

3870 ) 

3871 warnings.warn(warning, stacklevel=3) 

3872 break 

3873 

3874 return super()._setResultsName(name, listAllMatches) 

3875 

3876 # Compatibility synonyms 

3877 # fmt: off 

3878 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

3879 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

3880 # fmt: on 

3881 

3882 

3883class And(ParseExpression): 

3884 """ 

3885 Requires all given :class:`ParseExpression` s to be found in the given order. 

3886 Expressions may be separated by whitespace. 

3887 May be constructed using the ``'+'`` operator. 

3888 May also be constructed using the ``'-'`` operator, which will 

3889 suppress backtracking. 

3890 

3891 Example:: 

3892 

3893 integer = Word(nums) 

3894 name_expr = Word(alphas)[1, ...] 

3895 

3896 expr = And([integer("id"), name_expr("name"), integer("age")]) 

3897 # more easily written as: 

3898 expr = integer("id") + name_expr("name") + integer("age") 

3899 """ 

3900 

3901 class _ErrorStop(Empty): 

3902 def __init__(self, *args, **kwargs): 

3903 super().__init__(*args, **kwargs) 

3904 self.leave_whitespace() 

3905 

3906 def _generateDefaultName(self) -> str: 

3907 return "-" 

3908 

3909 def __init__( 

3910 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True 

3911 ): 

3912 exprs: List[ParserElement] = list(exprs_arg) 

3913 if exprs and Ellipsis in exprs: 

3914 tmp = [] 

3915 for i, expr in enumerate(exprs): 

3916 if expr is not Ellipsis: 

3917 tmp.append(expr) 

3918 continue 

3919 

3920 if i < len(exprs) - 1: 

3921 skipto_arg: ParserElement = typing.cast( 

3922 ParseExpression, (Empty() + exprs[i + 1]) 

3923 ).exprs[-1] 

3924 tmp.append(SkipTo(skipto_arg)("_skipped*")) 

3925 continue 

3926 

3927 raise Exception("cannot construct And with sequence ending in ...") 

3928 exprs[:] = tmp 

3929 super().__init__(exprs, savelist) 

3930 if self.exprs: 

3931 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

3932 if not isinstance(self.exprs[0], White): 

3933 self.set_whitespace_chars( 

3934 self.exprs[0].whiteChars, 

3935 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

3936 ) 

3937 self.skipWhitespace = self.exprs[0].skipWhitespace 

3938 else: 

3939 self.skipWhitespace = False 

3940 else: 

3941 self.mayReturnEmpty = True 

3942 self.callPreparse = True 

3943 

3944 def streamline(self) -> ParserElement: 

3945 # collapse any _PendingSkip's 

3946 if self.exprs and any( 

3947 isinstance(e, ParseExpression) 

3948 and e.exprs 

3949 and isinstance(e.exprs[-1], _PendingSkip) 

3950 for e in self.exprs[:-1] 

3951 ): 

3952 deleted_expr_marker = NoMatch() 

3953 for i, e in enumerate(self.exprs[:-1]): 

3954 if e is deleted_expr_marker: 

3955 continue 

3956 if ( 

3957 isinstance(e, ParseExpression) 

3958 and e.exprs 

3959 and isinstance(e.exprs[-1], _PendingSkip) 

3960 ): 

3961 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

3962 self.exprs[i + 1] = deleted_expr_marker 

3963 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

3964 

3965 super().streamline() 

3966 

3967 # link any IndentedBlocks to the prior expression 

3968 prev: ParserElement 

3969 cur: ParserElement 

3970 for prev, cur in zip(self.exprs, self.exprs[1:]): 

3971 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

3972 # (but watch out for recursive grammar) 

3973 seen = set() 

3974 while True: 

3975 if id(cur) in seen: 

3976 break 

3977 seen.add(id(cur)) 

3978 if isinstance(cur, IndentedBlock): 

3979 prev.add_parse_action( 

3980 lambda s, l, t, cur_=cur: setattr( 

3981 cur_, "parent_anchor", col(l, s) 

3982 ) 

3983 ) 

3984 break 

3985 subs = cur.recurse() 

3986 next_first = next(iter(subs), None) 

3987 if next_first is None: 

3988 break 

3989 cur = typing.cast(ParserElement, next_first) 

3990 

3991 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

3992 return self 

3993 

3994 def parseImpl(self, instring, loc, doActions=True): 

3995 # pass False as callPreParse arg to _parse for first element, since we already 

3996 # pre-parsed the string as part of our And pre-parsing 

3997 loc, resultlist = self.exprs[0]._parse( 

3998 instring, loc, doActions, callPreParse=False 

3999 ) 

4000 errorStop = False 

4001 for e in self.exprs[1:]: 

4002 # if isinstance(e, And._ErrorStop): 

4003 if type(e) is And._ErrorStop: 

4004 errorStop = True 

4005 continue 

4006 if errorStop: 

4007 try: 

4008 loc, exprtokens = e._parse(instring, loc, doActions) 

4009 except ParseSyntaxException: 

4010 raise 

4011 except ParseBaseException as pe: 

4012 pe.__traceback__ = None 

4013 raise ParseSyntaxException._from_exception(pe) 

4014 except IndexError: 

4015 raise ParseSyntaxException( 

4016 instring, len(instring), self.errmsg, self 

4017 ) 

4018 else: 

4019 loc, exprtokens = e._parse(instring, loc, doActions) 

4020 resultlist += exprtokens 

4021 return loc, resultlist 

4022 

4023 def __iadd__(self, other): 

4024 if isinstance(other, str_type): 

4025 other = self._literalStringClass(other) 

4026 if not isinstance(other, ParserElement): 

4027 return NotImplemented 

4028 return self.append(other) # And([self, other]) 

4029 

4030 def _checkRecursion(self, parseElementList): 

4031 subRecCheckList = parseElementList[:] + [self] 

4032 for e in self.exprs: 

4033 e._checkRecursion(subRecCheckList) 

4034 if not e.mayReturnEmpty: 

4035 break 

4036 

4037 def _generateDefaultName(self) -> str: 

4038 inner = " ".join(str(e) for e in self.exprs) 

4039 # strip off redundant inner {}'s 

4040 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4041 inner = inner[1:-1] 

4042 return f"{{{inner}}}" 

4043 

4044 

4045class Or(ParseExpression): 

4046 """Requires that at least one :class:`ParseExpression` is found. If 

4047 two expressions match, the expression that matches the longest 

4048 string will be used. May be constructed using the ``'^'`` 

4049 operator. 

4050 

4051 Example:: 

4052 

4053 # construct Or using '^' operator 

4054 

4055 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4056 print(number.search_string("123 3.1416 789")) 

4057 

4058 prints:: 

4059 

4060 [['123'], ['3.1416'], ['789']] 

4061 """ 

4062 

4063 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4064 super().__init__(exprs, savelist) 

4065 if self.exprs: 

4066 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4067 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4068 else: 

4069 self.mayReturnEmpty = True 

4070 

4071 def streamline(self) -> ParserElement: 

4072 super().streamline() 

4073 if self.exprs: 

4074 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4075 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4076 self.skipWhitespace = all( 

4077 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4078 ) 

4079 else: 

4080 self.saveAsList = False 

4081 return self 

4082 

4083 def parseImpl(self, instring, loc, doActions=True): 

4084 maxExcLoc = -1 

4085 maxException = None 

4086 matches = [] 

4087 fatals = [] 

4088 if all(e.callPreparse for e in self.exprs): 

4089 loc = self.preParse(instring, loc) 

4090 for e in self.exprs: 

4091 try: 

4092 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4093 except ParseFatalException as pfe: 

4094 pfe.__traceback__ = None 

4095 pfe.parser_element = e 

4096 fatals.append(pfe) 

4097 maxException = None 

4098 maxExcLoc = -1 

4099 except ParseException as err: 

4100 if not fatals: 

4101 err.__traceback__ = None 

4102 if err.loc > maxExcLoc: 

4103 maxException = err 

4104 maxExcLoc = err.loc 

4105 except IndexError: 

4106 if len(instring) > maxExcLoc: 

4107 maxException = ParseException( 

4108 instring, len(instring), e.errmsg, self 

4109 ) 

4110 maxExcLoc = len(instring) 

4111 else: 

4112 # save match among all matches, to retry longest to shortest 

4113 matches.append((loc2, e)) 

4114 

4115 if matches: 

4116 # re-evaluate all matches in descending order of length of match, in case attached actions 

4117 # might change whether or how much they match of the input. 

4118 matches.sort(key=itemgetter(0), reverse=True) 

4119 

4120 if not doActions: 

4121 # no further conditions or parse actions to change the selection of 

4122 # alternative, so the first match will be the best match 

4123 best_expr = matches[0][1] 

4124 return best_expr._parse(instring, loc, doActions) 

4125 

4126 longest = -1, None 

4127 for loc1, expr1 in matches: 

4128 if loc1 <= longest[0]: 

4129 # already have a longer match than this one will deliver, we are done 

4130 return longest 

4131 

4132 try: 

4133 loc2, toks = expr1._parse(instring, loc, doActions) 

4134 except ParseException as err: 

4135 err.__traceback__ = None 

4136 if err.loc > maxExcLoc: 

4137 maxException = err 

4138 maxExcLoc = err.loc 

4139 else: 

4140 if loc2 >= loc1: 

4141 return loc2, toks 

4142 # didn't match as much as before 

4143 elif loc2 > longest[0]: 

4144 longest = loc2, toks 

4145 

4146 if longest != (-1, None): 

4147 return longest 

4148 

4149 if fatals: 

4150 if len(fatals) > 1: 

4151 fatals.sort(key=lambda e: -e.loc) 

4152 if fatals[0].loc == fatals[1].loc: 

4153 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4154 max_fatal = fatals[0] 

4155 raise max_fatal 

4156 

4157 if maxException is not None: 

4158 # infer from this check that all alternatives failed at the current position 

4159 # so emit this collective error message instead of any single error message 

4160 if maxExcLoc == loc: 

4161 maxException.msg = self.errmsg 

4162 raise maxException 

4163 

4164 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4165 

4166 def __ixor__(self, other): 

4167 if isinstance(other, str_type): 

4168 other = self._literalStringClass(other) 

4169 if not isinstance(other, ParserElement): 

4170 return NotImplemented 

4171 return self.append(other) # Or([self, other]) 

4172 

4173 def _generateDefaultName(self) -> str: 

4174 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" 

4175 

4176 def _setResultsName(self, name, listAllMatches=False): 

4177 if ( 

4178 __diag__.warn_multiple_tokens_in_named_alternation 

4179 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4180 not in self.suppress_warnings_ 

4181 ): 

4182 if any( 

4183 isinstance(e, And) 

4184 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4185 not in e.suppress_warnings_ 

4186 for e in self.exprs 

4187 ): 

4188 warning = ( 

4189 "warn_multiple_tokens_in_named_alternation:" 

4190 f" setting results name {name!r} on {type(self).__name__} expression" 

4191 " will return a list of all parsed tokens in an And alternative," 

4192 " in prior versions only the first token was returned; enclose" 

4193 " contained argument in Group" 

4194 ) 

4195 warnings.warn(warning, stacklevel=3) 

4196 

4197 return super()._setResultsName(name, listAllMatches) 

4198 

4199 

4200class MatchFirst(ParseExpression): 

4201 """Requires that at least one :class:`ParseExpression` is found. If 

4202 more than one expression matches, the first one listed is the one that will 

4203 match. May be constructed using the ``'|'`` operator. 

4204 

4205 Example:: 

4206 

4207 # construct MatchFirst using '|' operator 

4208 

4209 # watch the order of expressions to match 

4210 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4211 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

4212 

4213 # put more selective expression first 

4214 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4215 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

4216 """ 

4217 

4218 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4219 super().__init__(exprs, savelist) 

4220 if self.exprs: 

4221 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4222 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4223 else: 

4224 self.mayReturnEmpty = True 

4225 

4226 def streamline(self) -> ParserElement: 

4227 if self.streamlined: 

4228 return self 

4229 

4230 super().streamline() 

4231 if self.exprs: 

4232 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4233 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4234 self.skipWhitespace = all( 

4235 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4236 ) 

4237 else: 

4238 self.saveAsList = False 

4239 self.mayReturnEmpty = True 

4240 return self 

4241 

4242 def parseImpl(self, instring, loc, doActions=True): 

4243 maxExcLoc = -1 

4244 maxException = None 

4245 

4246 for e in self.exprs: 

4247 try: 

4248 return e._parse(instring, loc, doActions) 

4249 except ParseFatalException as pfe: 

4250 pfe.__traceback__ = None 

4251 pfe.parser_element = e 

4252 raise 

4253 except ParseException as err: 

4254 if err.loc > maxExcLoc: 

4255 maxException = err 

4256 maxExcLoc = err.loc 

4257 except IndexError: 

4258 if len(instring) > maxExcLoc: 

4259 maxException = ParseException( 

4260 instring, len(instring), e.errmsg, self 

4261 ) 

4262 maxExcLoc = len(instring) 

4263 

4264 if maxException is not None: 

4265 # infer from this check that all alternatives failed at the current position 

4266 # so emit this collective error message instead of any individual error message 

4267 if maxExcLoc == loc: 

4268 maxException.msg = self.errmsg 

4269 raise maxException 

4270 

4271 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4272 

4273 def __ior__(self, other): 

4274 if isinstance(other, str_type): 

4275 other = self._literalStringClass(other) 

4276 if not isinstance(other, ParserElement): 

4277 return NotImplemented 

4278 return self.append(other) # MatchFirst([self, other]) 

4279 

4280 def _generateDefaultName(self) -> str: 

4281 return f"{{{' | '.join(str(e) for e in self.exprs)}}}" 

4282 

4283 def _setResultsName(self, name, listAllMatches=False): 

4284 if ( 

4285 __diag__.warn_multiple_tokens_in_named_alternation 

4286 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4287 not in self.suppress_warnings_ 

4288 ): 

4289 if any( 

4290 isinstance(e, And) 

4291 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4292 not in e.suppress_warnings_ 

4293 for e in self.exprs 

4294 ): 

4295 warning = ( 

4296 "warn_multiple_tokens_in_named_alternation:" 

4297 f" setting results name {name!r} on {type(self).__name__} expression" 

4298 " will return a list of all parsed tokens in an And alternative," 

4299 " in prior versions only the first token was returned; enclose" 

4300 " contained argument in Group" 

4301 ) 

4302 warnings.warn(warning, stacklevel=3) 

4303 

4304 return super()._setResultsName(name, listAllMatches) 

4305 

4306 

4307class Each(ParseExpression): 

4308 """Requires all given :class:`ParseExpression` s to be found, but in 

4309 any order. Expressions may be separated by whitespace. 

4310 

4311 May be constructed using the ``'&'`` operator. 

4312 

4313 Example:: 

4314 

4315 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4316 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4317 integer = Word(nums) 

4318 shape_attr = "shape:" + shape_type("shape") 

4319 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4320 color_attr = "color:" + color("color") 

4321 size_attr = "size:" + integer("size") 

4322 

4323 # use Each (using operator '&') to accept attributes in any order 

4324 # (shape and posn are required, color and size are optional) 

4325 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4326 

4327 shape_spec.run_tests(''' 

4328 shape: SQUARE color: BLACK posn: 100, 120 

4329 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4330 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4331 ''' 

4332 ) 

4333 

4334 prints:: 

4335 

4336 shape: SQUARE color: BLACK posn: 100, 120 

4337 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4338 - color: BLACK 

4339 - posn: ['100', ',', '120'] 

4340 - x: 100 

4341 - y: 120 

4342 - shape: SQUARE 

4343 

4344 

4345 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4346 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

4347 - color: BLUE 

4348 - posn: ['50', ',', '80'] 

4349 - x: 50 

4350 - y: 80 

4351 - shape: CIRCLE 

4352 - size: 50 

4353 

4354 

4355 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

4356 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

4357 - color: GREEN 

4358 - posn: ['20', ',', '40'] 

4359 - x: 20 

4360 - y: 40 

4361 - shape: TRIANGLE 

4362 - size: 20 

4363 """ 

4364 

4365 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): 

4366 super().__init__(exprs, savelist) 

4367 if self.exprs: 

4368 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4369 else: 

4370 self.mayReturnEmpty = True 

4371 self.skipWhitespace = True 

4372 self.initExprGroups = True 

4373 self.saveAsList = True 

4374 

4375 def __iand__(self, other): 

4376 if isinstance(other, str_type): 

4377 other = self._literalStringClass(other) 

4378 if not isinstance(other, ParserElement): 

4379 return NotImplemented 

4380 return self.append(other) # Each([self, other]) 

4381 

4382 def streamline(self) -> ParserElement: 

4383 super().streamline() 

4384 if self.exprs: 

4385 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4386 else: 

4387 self.mayReturnEmpty = True 

4388 return self 

4389 

4390 def parseImpl(self, instring, loc, doActions=True): 

4391 if self.initExprGroups: 

4392 self.opt1map = dict( 

4393 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

4394 ) 

4395 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

4396 opt2 = [ 

4397 e 

4398 for e in self.exprs 

4399 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

4400 ] 

4401 self.optionals = opt1 + opt2 

4402 self.multioptionals = [ 

4403 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4404 for e in self.exprs 

4405 if isinstance(e, _MultipleMatch) 

4406 ] 

4407 self.multirequired = [ 

4408 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4409 for e in self.exprs 

4410 if isinstance(e, OneOrMore) 

4411 ] 

4412 self.required = [ 

4413 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

4414 ] 

4415 self.required += self.multirequired 

4416 self.initExprGroups = False 

4417 

4418 tmpLoc = loc 

4419 tmpReqd = self.required[:] 

4420 tmpOpt = self.optionals[:] 

4421 multis = self.multioptionals[:] 

4422 matchOrder = [] 

4423 

4424 keepMatching = True 

4425 failed = [] 

4426 fatals = [] 

4427 while keepMatching: 

4428 tmpExprs = tmpReqd + tmpOpt + multis 

4429 failed.clear() 

4430 fatals.clear() 

4431 for e in tmpExprs: 

4432 try: 

4433 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

4434 except ParseFatalException as pfe: 

4435 pfe.__traceback__ = None 

4436 pfe.parser_element = e 

4437 fatals.append(pfe) 

4438 failed.append(e) 

4439 except ParseException: 

4440 failed.append(e) 

4441 else: 

4442 matchOrder.append(self.opt1map.get(id(e), e)) 

4443 if e in tmpReqd: 

4444 tmpReqd.remove(e) 

4445 elif e in tmpOpt: 

4446 tmpOpt.remove(e) 

4447 if len(failed) == len(tmpExprs): 

4448 keepMatching = False 

4449 

4450 # look for any ParseFatalExceptions 

4451 if fatals: 

4452 if len(fatals) > 1: 

4453 fatals.sort(key=lambda e: -e.loc) 

4454 if fatals[0].loc == fatals[1].loc: 

4455 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4456 max_fatal = fatals[0] 

4457 raise max_fatal 

4458 

4459 if tmpReqd: 

4460 missing = ", ".join([str(e) for e in tmpReqd]) 

4461 raise ParseException( 

4462 instring, 

4463 loc, 

4464 f"Missing one or more required elements ({missing})", 

4465 ) 

4466 

4467 # add any unmatched Opts, in case they have default values defined 

4468 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

4469 

4470 total_results = ParseResults([]) 

4471 for e in matchOrder: 

4472 loc, results = e._parse(instring, loc, doActions) 

4473 total_results += results 

4474 

4475 return loc, total_results 

4476 

4477 def _generateDefaultName(self) -> str: 

4478 return f"{{{' & '.join(str(e) for e in self.exprs)}}}" 

4479 

4480 

4481class ParseElementEnhance(ParserElement): 

4482 """Abstract subclass of :class:`ParserElement`, for combining and 

4483 post-processing parsed tokens. 

4484 """ 

4485 

4486 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

4487 super().__init__(savelist) 

4488 if isinstance(expr, str_type): 

4489 expr_str = typing.cast(str, expr) 

4490 if issubclass(self._literalStringClass, Token): 

4491 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

4492 elif issubclass(type(self), self._literalStringClass): 

4493 expr = Literal(expr_str) 

4494 else: 

4495 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

4496 expr = typing.cast(ParserElement, expr) 

4497 self.expr = expr 

4498 if expr is not None: 

4499 self.mayIndexError = expr.mayIndexError 

4500 self.mayReturnEmpty = expr.mayReturnEmpty 

4501 self.set_whitespace_chars( 

4502 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

4503 ) 

4504 self.skipWhitespace = expr.skipWhitespace 

4505 self.saveAsList = expr.saveAsList 

4506 self.callPreparse = expr.callPreparse 

4507 self.ignoreExprs.extend(expr.ignoreExprs) 

4508 

4509 def recurse(self) -> List[ParserElement]: 

4510 return [self.expr] if self.expr is not None else [] 

4511 

4512 def parseImpl(self, instring, loc, doActions=True): 

4513 if self.expr is None: 

4514 raise ParseException(instring, loc, "No expression defined", self) 

4515 

4516 try: 

4517 return self.expr._parse(instring, loc, doActions, callPreParse=False) 

4518 except ParseBaseException as pbe: 

4519 if not isinstance(self, Forward) or self.customName is not None: 

4520 if self.errmsg: 

4521 pbe.msg = self.errmsg 

4522 raise 

4523 

4524 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4525 super().leave_whitespace(recursive) 

4526 

4527 if recursive: 

4528 if self.expr is not None: 

4529 self.expr = self.expr.copy() 

4530 self.expr.leave_whitespace(recursive) 

4531 return self 

4532 

4533 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4534 super().ignore_whitespace(recursive) 

4535 

4536 if recursive: 

4537 if self.expr is not None: 

4538 self.expr = self.expr.copy() 

4539 self.expr.ignore_whitespace(recursive) 

4540 return self 

4541 

4542 def ignore(self, other) -> ParserElement: 

4543 if not isinstance(other, Suppress) or other not in self.ignoreExprs: 

4544 super().ignore(other) 

4545 if self.expr is not None: 

4546 self.expr.ignore(self.ignoreExprs[-1]) 

4547 

4548 return self 

4549 

4550 def streamline(self) -> ParserElement: 

4551 super().streamline() 

4552 if self.expr is not None: 

4553 self.expr.streamline() 

4554 return self 

4555 

4556 def _checkRecursion(self, parseElementList): 

4557 if self in parseElementList: 

4558 raise RecursiveGrammarException(parseElementList + [self]) 

4559 subRecCheckList = parseElementList[:] + [self] 

4560 if self.expr is not None: 

4561 self.expr._checkRecursion(subRecCheckList) 

4562 

4563 def validate(self, validateTrace=None) -> None: 

4564 warnings.warn( 

4565 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4566 DeprecationWarning, 

4567 stacklevel=2, 

4568 ) 

4569 if validateTrace is None: 

4570 validateTrace = [] 

4571 tmp = validateTrace[:] + [self] 

4572 if self.expr is not None: 

4573 self.expr.validate(tmp) 

4574 self._checkRecursion([]) 

4575 

4576 def _generateDefaultName(self) -> str: 

4577 return f"{type(self).__name__}:({self.expr})" 

4578 

4579 # Compatibility synonyms 

4580 # fmt: off 

4581 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4582 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4583 # fmt: on 

4584 

4585 

4586class IndentedBlock(ParseElementEnhance): 

4587 """ 

4588 Expression to match one or more expressions at a given indentation level. 

4589 Useful for parsing text where structure is implied by indentation (like Python source code). 

4590 """ 

4591 

4592 class _Indent(Empty): 

4593 def __init__(self, ref_col: int): 

4594 super().__init__() 

4595 self.errmsg = f"expected indent at column {ref_col}" 

4596 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

4597 

4598 class _IndentGreater(Empty): 

4599 def __init__(self, ref_col: int): 

4600 super().__init__() 

4601 self.errmsg = f"expected indent at column greater than {ref_col}" 

4602 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

4603 

4604 def __init__( 

4605 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

4606 ): 

4607 super().__init__(expr, savelist=True) 

4608 # if recursive: 

4609 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

4610 self._recursive = recursive 

4611 self._grouped = grouped 

4612 self.parent_anchor = 1 

4613 

4614 def parseImpl(self, instring, loc, doActions=True): 

4615 # advance parse position to non-whitespace by using an Empty() 

4616 # this should be the column to be used for all subsequent indented lines 

4617 anchor_loc = Empty().preParse(instring, loc) 

4618 

4619 # see if self.expr matches at the current location - if not it will raise an exception 

4620 # and no further work is necessary 

4621 self.expr.try_parse(instring, anchor_loc, do_actions=doActions) 

4622 

4623 indent_col = col(anchor_loc, instring) 

4624 peer_detect_expr = self._Indent(indent_col) 

4625 

4626 inner_expr = Empty() + peer_detect_expr + self.expr 

4627 if self._recursive: 

4628 sub_indent = self._IndentGreater(indent_col) 

4629 nested_block = IndentedBlock( 

4630 self.expr, recursive=self._recursive, grouped=self._grouped 

4631 ) 

4632 nested_block.set_debug(self.debug) 

4633 nested_block.parent_anchor = indent_col 

4634 inner_expr += Opt(sub_indent + nested_block) 

4635 

4636 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

4637 block = OneOrMore(inner_expr) 

4638 

4639 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

4640 

4641 if self._grouped: 

4642 wrapper = Group 

4643 else: 

4644 wrapper = lambda expr: expr 

4645 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

4646 instring, anchor_loc, doActions 

4647 ) 

4648 

4649 

4650class AtStringStart(ParseElementEnhance): 

4651 """Matches if expression matches at the beginning of the parse 

4652 string:: 

4653 

4654 AtStringStart(Word(nums)).parse_string("123") 

4655 # prints ["123"] 

4656 

4657 AtStringStart(Word(nums)).parse_string(" 123") 

4658 # raises ParseException 

4659 """ 

4660 

4661 def __init__(self, expr: Union[ParserElement, str]): 

4662 super().__init__(expr) 

4663 self.callPreparse = False 

4664 

4665 def parseImpl(self, instring, loc, doActions=True): 

4666 if loc != 0: 

4667 raise ParseException(instring, loc, "not found at string start") 

4668 return super().parseImpl(instring, loc, doActions) 

4669 

4670 

4671class AtLineStart(ParseElementEnhance): 

4672 r"""Matches if an expression matches at the beginning of a line within 

4673 the parse string 

4674 

4675 Example:: 

4676 

4677 test = '''\ 

4678 AAA this line 

4679 AAA and this line 

4680 AAA but not this one 

4681 B AAA and definitely not this one 

4682 ''' 

4683 

4684 for t in (AtLineStart('AAA') + rest_of_line).search_string(test): 

4685 print(t) 

4686 

4687 prints:: 

4688 

4689 ['AAA', ' this line'] 

4690 ['AAA', ' and this line'] 

4691 

4692 """ 

4693 

4694 def __init__(self, expr: Union[ParserElement, str]): 

4695 super().__init__(expr) 

4696 self.callPreparse = False 

4697 

4698 def parseImpl(self, instring, loc, doActions=True): 

4699 if col(loc, instring) != 1: 

4700 raise ParseException(instring, loc, "not found at line start") 

4701 return super().parseImpl(instring, loc, doActions) 

4702 

4703 

4704class FollowedBy(ParseElementEnhance): 

4705 """Lookahead matching of the given parse expression. 

4706 ``FollowedBy`` does *not* advance the parsing position within 

4707 the input string, it only verifies that the specified parse 

4708 expression matches at the current position. ``FollowedBy`` 

4709 always returns a null token list. If any results names are defined 

4710 in the lookahead expression, those *will* be returned for access by 

4711 name. 

4712 

4713 Example:: 

4714 

4715 # use FollowedBy to match a label only if it is followed by a ':' 

4716 data_word = Word(alphas) 

4717 label = data_word + FollowedBy(':') 

4718 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

4719 

4720 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() 

4721 

4722 prints:: 

4723 

4724 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

4725 """ 

4726 

4727 def __init__(self, expr: Union[ParserElement, str]): 

4728 super().__init__(expr) 

4729 self.mayReturnEmpty = True 

4730 

4731 def parseImpl(self, instring, loc, doActions=True): 

4732 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

4733 # we keep any named results that were defined in the FollowedBy expression 

4734 _, ret = self.expr._parse(instring, loc, doActions=doActions) 

4735 del ret[:] 

4736 

4737 return loc, ret 

4738 

4739 

4740class PrecededBy(ParseElementEnhance): 

4741 """Lookbehind matching of the given parse expression. 

4742 ``PrecededBy`` does not advance the parsing position within the 

4743 input string, it only verifies that the specified parse expression 

4744 matches prior to the current position. ``PrecededBy`` always 

4745 returns a null token list, but if a results name is defined on the 

4746 given expression, it is returned. 

4747 

4748 Parameters: 

4749 

4750 - ``expr`` - expression that must match prior to the current parse 

4751 location 

4752 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

4753 to lookbehind prior to the current parse location 

4754 

4755 If the lookbehind expression is a string, :class:`Literal`, 

4756 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

4757 with a specified exact or maximum length, then the retreat 

4758 parameter is not required. Otherwise, retreat must be specified to 

4759 give a maximum number of characters to look back from 

4760 the current parse position for a lookbehind match. 

4761 

4762 Example:: 

4763 

4764 # VB-style variable names with type prefixes 

4765 int_var = PrecededBy("#") + pyparsing_common.identifier 

4766 str_var = PrecededBy("$") + pyparsing_common.identifier 

4767 

4768 """ 

4769 

4770 def __init__( 

4771 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None 

4772 ): 

4773 super().__init__(expr) 

4774 self.expr = self.expr().leave_whitespace() 

4775 self.mayReturnEmpty = True 

4776 self.mayIndexError = False 

4777 self.exact = False 

4778 if isinstance(expr, str_type): 

4779 expr = typing.cast(str, expr) 

4780 retreat = len(expr) 

4781 self.exact = True 

4782 elif isinstance(expr, (Literal, Keyword)): 

4783 retreat = expr.matchLen 

4784 self.exact = True 

4785 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

4786 retreat = expr.maxLen 

4787 self.exact = True 

4788 elif isinstance(expr, PositionToken): 

4789 retreat = 0 

4790 self.exact = True 

4791 self.retreat = retreat 

4792 self.errmsg = f"not preceded by {expr}" 

4793 self.skipWhitespace = False 

4794 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

4795 

4796 def parseImpl(self, instring, loc=0, doActions=True): 

4797 if self.exact: 

4798 if loc < self.retreat: 

4799 raise ParseException(instring, loc, self.errmsg) 

4800 start = loc - self.retreat 

4801 _, ret = self.expr._parse(instring, start) 

4802 return loc, ret 

4803 

4804 # retreat specified a maximum lookbehind window, iterate 

4805 test_expr = self.expr + StringEnd() 

4806 instring_slice = instring[max(0, loc - self.retreat) : loc] 

4807 last_expr = ParseException(instring, loc, self.errmsg) 

4808 

4809 for offset in range(1, min(loc, self.retreat + 1) + 1): 

4810 try: 

4811 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

4812 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

4813 except ParseBaseException as pbe: 

4814 last_expr = pbe 

4815 else: 

4816 break 

4817 else: 

4818 raise last_expr 

4819 

4820 return loc, ret 

4821 

4822 

4823class Located(ParseElementEnhance): 

4824 """ 

4825 Decorates a returned token with its starting and ending 

4826 locations in the input string. 

4827 

4828 This helper adds the following results names: 

4829 

4830 - ``locn_start`` - location where matched expression begins 

4831 - ``locn_end`` - location where matched expression ends 

4832 - ``value`` - the actual parsed results 

4833 

4834 Be careful if the input text contains ``<TAB>`` characters, you 

4835 may want to call :class:`ParserElement.parse_with_tabs` 

4836 

4837 Example:: 

4838 

4839 wd = Word(alphas) 

4840 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

4841 print(match) 

4842 

4843 prints:: 

4844 

4845 [0, ['ljsdf'], 5] 

4846 [8, ['lksdjjf'], 15] 

4847 [18, ['lkkjj'], 23] 

4848 

4849 """ 

4850 

4851 def parseImpl(self, instring, loc, doActions=True): 

4852 start = loc 

4853 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False) 

4854 ret_tokens = ParseResults([start, tokens, loc]) 

4855 ret_tokens["locn_start"] = start 

4856 ret_tokens["value"] = tokens 

4857 ret_tokens["locn_end"] = loc 

4858 if self.resultsName: 

4859 # must return as a list, so that the name will be attached to the complete group 

4860 return loc, [ret_tokens] 

4861 else: 

4862 return loc, ret_tokens 

4863 

4864 

4865class NotAny(ParseElementEnhance): 

4866 """ 

4867 Lookahead to disallow matching with the given parse expression. 

4868 ``NotAny`` does *not* advance the parsing position within the 

4869 input string, it only verifies that the specified parse expression 

4870 does *not* match at the current position. Also, ``NotAny`` does 

4871 *not* skip over leading whitespace. ``NotAny`` always returns 

4872 a null token list. May be constructed using the ``'~'`` operator. 

4873 

4874 Example:: 

4875 

4876 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

4877 

4878 # take care not to mistake keywords for identifiers 

4879 ident = ~(AND | OR | NOT) + Word(alphas) 

4880 boolean_term = Opt(NOT) + ident 

4881 

4882 # very crude boolean expression - to support parenthesis groups and 

4883 # operation hierarchy, use infix_notation 

4884 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

4885 

4886 # integers that are followed by "." are actually floats 

4887 integer = Word(nums) + ~Char(".") 

4888 """ 

4889 

4890 def __init__(self, expr: Union[ParserElement, str]): 

4891 super().__init__(expr) 

4892 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

4893 # self.leave_whitespace() 

4894 self.skipWhitespace = False 

4895 

4896 self.mayReturnEmpty = True 

4897 self.errmsg = f"Found unwanted token, {self.expr}" 

4898 

4899 def parseImpl(self, instring, loc, doActions=True): 

4900 if self.expr.can_parse_next(instring, loc, do_actions=doActions): 

4901 raise ParseException(instring, loc, self.errmsg, self) 

4902 return loc, [] 

4903 

4904 def _generateDefaultName(self) -> str: 

4905 return f"~{{{self.expr}}}" 

4906 

4907 

4908class _MultipleMatch(ParseElementEnhance): 

4909 def __init__( 

4910 self, 

4911 expr: Union[str, ParserElement], 

4912 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

4913 *, 

4914 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

4915 ): 

4916 super().__init__(expr) 

4917 stopOn = stopOn or stop_on 

4918 self.saveAsList = True 

4919 ender = stopOn 

4920 if isinstance(ender, str_type): 

4921 ender = self._literalStringClass(ender) 

4922 self.stopOn(ender) 

4923 

4924 def stopOn(self, ender) -> ParserElement: 

4925 if isinstance(ender, str_type): 

4926 ender = self._literalStringClass(ender) 

4927 self.not_ender = ~ender if ender is not None else None 

4928 return self 

4929 

4930 def parseImpl(self, instring, loc, doActions=True): 

4931 self_expr_parse = self.expr._parse 

4932 self_skip_ignorables = self._skipIgnorables 

4933 check_ender = self.not_ender is not None 

4934 if check_ender: 

4935 try_not_ender = self.not_ender.try_parse 

4936 

4937 # must be at least one (but first see if we are the stopOn sentinel; 

4938 # if so, fail) 

4939 if check_ender: 

4940 try_not_ender(instring, loc) 

4941 loc, tokens = self_expr_parse(instring, loc, doActions) 

4942 try: 

4943 hasIgnoreExprs = not not self.ignoreExprs 

4944 while 1: 

4945 if check_ender: 

4946 try_not_ender(instring, loc) 

4947 if hasIgnoreExprs: 

4948 preloc = self_skip_ignorables(instring, loc) 

4949 else: 

4950 preloc = loc 

4951 loc, tmptokens = self_expr_parse(instring, preloc, doActions) 

4952 tokens += tmptokens 

4953 except (ParseException, IndexError): 

4954 pass 

4955 

4956 return loc, tokens 

4957 

4958 def _setResultsName(self, name, listAllMatches=False): 

4959 if ( 

4960 __diag__.warn_ungrouped_named_tokens_in_collection 

4961 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4962 not in self.suppress_warnings_ 

4963 ): 

4964 for e in [self.expr] + self.expr.recurse(): 

4965 if ( 

4966 isinstance(e, ParserElement) 

4967 and e.resultsName 

4968 and ( 

4969 Diagnostics.warn_ungrouped_named_tokens_in_collection 

4970 not in e.suppress_warnings_ 

4971 ) 

4972 ): 

4973 warning = ( 

4974 "warn_ungrouped_named_tokens_in_collection:" 

4975 f" setting results name {name!r} on {type(self).__name__} expression" 

4976 f" collides with {e.resultsName!r} on contained expression" 

4977 ) 

4978 warnings.warn(warning, stacklevel=3) 

4979 break 

4980 

4981 return super()._setResultsName(name, listAllMatches) 

4982 

4983 

4984class OneOrMore(_MultipleMatch): 

4985 """ 

4986 Repetition of one or more of the given expression. 

4987 

4988 Parameters: 

4989 

4990 - ``expr`` - expression that must match one or more times 

4991 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

4992 (only required if the sentinel would ordinarily match the repetition 

4993 expression) 

4994 

4995 Example:: 

4996 

4997 data_word = Word(alphas) 

4998 label = data_word + FollowedBy(':') 

4999 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) 

5000 

5001 text = "shape: SQUARE posn: upper left color: BLACK" 

5002 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

5003 

5004 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data 

5005 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5006 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5007 

5008 # could also be written as 

5009 (attr_expr * (1,)).parse_string(text).pprint() 

5010 """ 

5011 

5012 def _generateDefaultName(self) -> str: 

5013 return f"{{{self.expr}}}..." 

5014 

5015 

5016class ZeroOrMore(_MultipleMatch): 

5017 """ 

5018 Optional repetition of zero or more of the given expression. 

5019 

5020 Parameters: 

5021 

5022 - ``expr`` - expression that must match zero or more times 

5023 - ``stop_on`` - expression for a terminating sentinel 

5024 (only required if the sentinel would ordinarily match the repetition 

5025 expression) - (default= ``None``) 

5026 

5027 Example: similar to :class:`OneOrMore` 

5028 """ 

5029 

5030 def __init__( 

5031 self, 

5032 expr: Union[str, ParserElement], 

5033 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5034 *, 

5035 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5036 ): 

5037 super().__init__(expr, stopOn=stopOn or stop_on) 

5038 self.mayReturnEmpty = True 

5039 

5040 def parseImpl(self, instring, loc, doActions=True): 

5041 try: 

5042 return super().parseImpl(instring, loc, doActions) 

5043 except (ParseException, IndexError): 

5044 return loc, ParseResults([], name=self.resultsName) 

5045 

5046 def _generateDefaultName(self) -> str: 

5047 return f"[{self.expr}]..." 

5048 

5049 

5050class DelimitedList(ParseElementEnhance): 

5051 def __init__( 

5052 self, 

5053 expr: Union[str, ParserElement], 

5054 delim: Union[str, ParserElement] = ",", 

5055 combine: bool = False, 

5056 min: typing.Optional[int] = None, 

5057 max: typing.Optional[int] = None, 

5058 *, 

5059 allow_trailing_delim: bool = False, 

5060 ): 

5061 """Helper to define a delimited list of expressions - the delimiter 

5062 defaults to ','. By default, the list elements and delimiters can 

5063 have intervening whitespace, and comments, but this can be 

5064 overridden by passing ``combine=True`` in the constructor. If 

5065 ``combine`` is set to ``True``, the matching tokens are 

5066 returned as a single token string, with the delimiters included; 

5067 otherwise, the matching tokens are returned as a list of tokens, 

5068 with the delimiters suppressed. 

5069 

5070 If ``allow_trailing_delim`` is set to True, then the list may end with 

5071 a delimiter. 

5072 

5073 Example:: 

5074 

5075 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

5076 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

5077 """ 

5078 if isinstance(expr, str_type): 

5079 expr = ParserElement._literalStringClass(expr) 

5080 expr = typing.cast(ParserElement, expr) 

5081 

5082 if min is not None and min < 1: 

5083 raise ValueError("min must be greater than 0") 

5084 

5085 if max is not None and min is not None and max < min: 

5086 raise ValueError("max must be greater than, or equal to min") 

5087 

5088 self.content = expr 

5089 self.raw_delim = str(delim) 

5090 self.delim = delim 

5091 self.combine = combine 

5092 if not combine: 

5093 self.delim = Suppress(delim) 

5094 self.min = min or 1 

5095 self.max = max 

5096 self.allow_trailing_delim = allow_trailing_delim 

5097 

5098 delim_list_expr = self.content + (self.delim + self.content) * ( 

5099 self.min - 1, 

5100 None if self.max is None else self.max - 1, 

5101 ) 

5102 if self.allow_trailing_delim: 

5103 delim_list_expr += Opt(self.delim) 

5104 

5105 if self.combine: 

5106 delim_list_expr = Combine(delim_list_expr) 

5107 

5108 super().__init__(delim_list_expr, savelist=True) 

5109 

5110 def _generateDefaultName(self) -> str: 

5111 content_expr = self.content.streamline() 

5112 return f"{content_expr} [{self.raw_delim} {content_expr}]..." 

5113 

5114 

5115class _NullToken: 

5116 def __bool__(self): 

5117 return False 

5118 

5119 def __str__(self): 

5120 return "" 

5121 

5122 

5123class Opt(ParseElementEnhance): 

5124 """ 

5125 Optional matching of the given expression. 

5126 

5127 Parameters: 

5128 

5129 - ``expr`` - expression that must match zero or more times 

5130 - ``default`` (optional) - value to be returned if the optional expression is not found. 

5131 

5132 Example:: 

5133 

5134 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5135 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5136 zip.run_tests(''' 

5137 # traditional ZIP code 

5138 12345 

5139 

5140 # ZIP+4 form 

5141 12101-0001 

5142 

5143 # invalid ZIP 

5144 98765- 

5145 ''') 

5146 

5147 prints:: 

5148 

5149 # traditional ZIP code 

5150 12345 

5151 ['12345'] 

5152 

5153 # ZIP+4 form 

5154 12101-0001 

5155 ['12101-0001'] 

5156 

5157 # invalid ZIP 

5158 98765- 

5159 ^ 

5160 FAIL: Expected end of text (at char 5), (line:1, col:6) 

5161 """ 

5162 

5163 __optionalNotMatched = _NullToken() 

5164 

5165 def __init__( 

5166 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5167 ): 

5168 super().__init__(expr, savelist=False) 

5169 self.saveAsList = self.expr.saveAsList 

5170 self.defaultValue = default 

5171 self.mayReturnEmpty = True 

5172 

5173 def parseImpl(self, instring, loc, doActions=True): 

5174 self_expr = self.expr 

5175 try: 

5176 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False) 

5177 except (ParseException, IndexError): 

5178 default_value = self.defaultValue 

5179 if default_value is not self.__optionalNotMatched: 

5180 if self_expr.resultsName: 

5181 tokens = ParseResults([default_value]) 

5182 tokens[self_expr.resultsName] = default_value 

5183 else: 

5184 tokens = [default_value] 

5185 else: 

5186 tokens = [] 

5187 return loc, tokens 

5188 

5189 def _generateDefaultName(self) -> str: 

5190 inner = str(self.expr) 

5191 # strip off redundant inner {}'s 

5192 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5193 inner = inner[1:-1] 

5194 return f"[{inner}]" 

5195 

5196 

5197Optional = Opt 

5198 

5199 

5200class SkipTo(ParseElementEnhance): 

5201 """ 

5202 Token for skipping over all undefined text until the matched 

5203 expression is found. 

5204 

5205 Parameters: 

5206 

5207 - ``expr`` - target expression marking the end of the data to be skipped 

5208 - ``include`` - if ``True``, the target expression is also parsed 

5209 (the skipped text and target expression are returned as a 2-element 

5210 list) (default= ``False``). 

5211 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and 

5212 comments) that might contain false matches to the target expression 

5213 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be 

5214 included in the skipped test; if found before the target expression is found, 

5215 the :class:`SkipTo` is not a match 

5216 

5217 Example:: 

5218 

5219 report = ''' 

5220 Outstanding Issues Report - 1 Jan 2000 

5221 

5222 # | Severity | Description | Days Open 

5223 -----+----------+-------------------------------------------+----------- 

5224 101 | Critical | Intermittent system crash | 6 

5225 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5226 79 | Minor | System slow when running too many reports | 47 

5227 ''' 

5228 integer = Word(nums) 

5229 SEP = Suppress('|') 

5230 # use SkipTo to simply match everything up until the next SEP 

5231 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5232 # - parse action will call token.strip() for each matched token, i.e., the description body 

5233 string_data = SkipTo(SEP, ignore=quoted_string) 

5234 string_data.set_parse_action(token_map(str.strip)) 

5235 ticket_expr = (integer("issue_num") + SEP 

5236 + string_data("sev") + SEP 

5237 + string_data("desc") + SEP 

5238 + integer("days_open")) 

5239 

5240 for tkt in ticket_expr.search_string(report): 

5241 print tkt.dump() 

5242 

5243 prints:: 

5244 

5245 ['101', 'Critical', 'Intermittent system crash', '6'] 

5246 - days_open: '6' 

5247 - desc: 'Intermittent system crash' 

5248 - issue_num: '101' 

5249 - sev: 'Critical' 

5250 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

5251 - days_open: '14' 

5252 - desc: "Spelling error on Login ('log|n')" 

5253 - issue_num: '94' 

5254 - sev: 'Cosmetic' 

5255 ['79', 'Minor', 'System slow when running too many reports', '47'] 

5256 - days_open: '47' 

5257 - desc: 'System slow when running too many reports' 

5258 - issue_num: '79' 

5259 - sev: 'Minor' 

5260 """ 

5261 

5262 def __init__( 

5263 self, 

5264 other: Union[ParserElement, str], 

5265 include: bool = False, 

5266 ignore: typing.Optional[Union[ParserElement, str]] = None, 

5267 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

5268 *, 

5269 failOn: typing.Optional[Union[ParserElement, str]] = None, 

5270 ): 

5271 super().__init__(other) 

5272 failOn = failOn or fail_on 

5273 self.ignoreExpr = ignore 

5274 self.mayReturnEmpty = True 

5275 self.mayIndexError = False 

5276 self.includeMatch = include 

5277 self.saveAsList = False 

5278 if isinstance(failOn, str_type): 

5279 self.failOn = self._literalStringClass(failOn) 

5280 else: 

5281 self.failOn = failOn 

5282 self.errmsg = "No match found for " + str(self.expr) 

5283 self.ignorer = Empty().leave_whitespace() 

5284 self._update_ignorer() 

5285 

5286 def _update_ignorer(self): 

5287 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

5288 self.ignorer.ignoreExprs.clear() 

5289 for e in self.expr.ignoreExprs: 

5290 self.ignorer.ignore(e) 

5291 if self.ignoreExpr: 

5292 self.ignorer.ignore(self.ignoreExpr) 

5293 

5294 def ignore(self, expr): 

5295 super().ignore(expr) 

5296 self._update_ignorer() 

5297 

5298 def parseImpl(self, instring, loc, doActions=True): 

5299 startloc = loc 

5300 instrlen = len(instring) 

5301 self_expr_parse = self.expr._parse 

5302 self_failOn_canParseNext = ( 

5303 self.failOn.canParseNext if self.failOn is not None else None 

5304 ) 

5305 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

5306 

5307 tmploc = loc 

5308 while tmploc <= instrlen: 

5309 if self_failOn_canParseNext is not None: 

5310 # break if failOn expression matches 

5311 if self_failOn_canParseNext(instring, tmploc): 

5312 break 

5313 

5314 if ignorer_try_parse is not None: 

5315 # advance past ignore expressions 

5316 prev_tmploc = tmploc 

5317 while 1: 

5318 try: 

5319 tmploc = ignorer_try_parse(instring, tmploc) 

5320 except ParseBaseException: 

5321 break 

5322 # see if all ignorers matched, but didn't actually ignore anything 

5323 if tmploc == prev_tmploc: 

5324 break 

5325 prev_tmploc = tmploc 

5326 

5327 try: 

5328 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) 

5329 except (ParseException, IndexError): 

5330 # no match, advance loc in string 

5331 tmploc += 1 

5332 else: 

5333 # matched skipto expr, done 

5334 break 

5335 

5336 else: 

5337 # ran off the end of the input string without matching skipto expr, fail 

5338 raise ParseException(instring, loc, self.errmsg, self) 

5339 

5340 # build up return values 

5341 loc = tmploc 

5342 skiptext = instring[startloc:loc] 

5343 skipresult = ParseResults(skiptext) 

5344 

5345 if self.includeMatch: 

5346 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False) 

5347 skipresult += mat 

5348 

5349 return loc, skipresult 

5350 

5351 

5352class Forward(ParseElementEnhance): 

5353 """ 

5354 Forward declaration of an expression to be defined later - 

5355 used for recursive grammars, such as algebraic infix notation. 

5356 When the expression is known, it is assigned to the ``Forward`` 

5357 variable using the ``'<<'`` operator. 

5358 

5359 Note: take care when assigning to ``Forward`` not to overlook 

5360 precedence of operators. 

5361 

5362 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

5363 

5364 fwd_expr << a | b | c 

5365 

5366 will actually be evaluated as:: 

5367 

5368 (fwd_expr << a) | b | c 

5369 

5370 thereby leaving b and c out as parseable alternatives. It is recommended that you 

5371 explicitly group the values inserted into the ``Forward``:: 

5372 

5373 fwd_expr << (a | b | c) 

5374 

5375 Converting to use the ``'<<='`` operator instead will avoid this problem. 

5376 

5377 See :class:`ParseResults.pprint` for an example of a recursive 

5378 parser created using ``Forward``. 

5379 """ 

5380 

5381 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): 

5382 self.caller_frame = traceback.extract_stack(limit=2)[0] 

5383 super().__init__(other, savelist=False) # type: ignore[arg-type] 

5384 self.lshift_line = None 

5385 

5386 def __lshift__(self, other) -> "Forward": 

5387 if hasattr(self, "caller_frame"): 

5388 del self.caller_frame 

5389 if isinstance(other, str_type): 

5390 other = self._literalStringClass(other) 

5391 

5392 if not isinstance(other, ParserElement): 

5393 return NotImplemented 

5394 

5395 self.expr = other 

5396 self.streamlined = other.streamlined 

5397 self.mayIndexError = self.expr.mayIndexError 

5398 self.mayReturnEmpty = self.expr.mayReturnEmpty 

5399 self.set_whitespace_chars( 

5400 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

5401 ) 

5402 self.skipWhitespace = self.expr.skipWhitespace 

5403 self.saveAsList = self.expr.saveAsList 

5404 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5405 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

5406 return self 

5407 

5408 def __ilshift__(self, other) -> "Forward": 

5409 if not isinstance(other, ParserElement): 

5410 return NotImplemented 

5411 

5412 return self << other 

5413 

5414 def __or__(self, other) -> "ParserElement": 

5415 caller_line = traceback.extract_stack(limit=2)[-2] 

5416 if ( 

5417 __diag__.warn_on_match_first_with_lshift_operator 

5418 and caller_line == self.lshift_line 

5419 and Diagnostics.warn_on_match_first_with_lshift_operator 

5420 not in self.suppress_warnings_ 

5421 ): 

5422 warnings.warn( 

5423 "using '<<' operator with '|' is probably an error, use '<<='", 

5424 stacklevel=2, 

5425 ) 

5426 ret = super().__or__(other) 

5427 return ret 

5428 

5429 def __del__(self): 

5430 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

5431 if ( 

5432 self.expr is None 

5433 and __diag__.warn_on_assignment_to_Forward 

5434 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

5435 ): 

5436 warnings.warn_explicit( 

5437 "Forward defined here but no expression attached later using '<<=' or '<<'", 

5438 UserWarning, 

5439 filename=self.caller_frame.filename, 

5440 lineno=self.caller_frame.lineno, 

5441 ) 

5442 

5443 def parseImpl(self, instring, loc, doActions=True): 

5444 if ( 

5445 self.expr is None 

5446 and __diag__.warn_on_parse_using_empty_Forward 

5447 and Diagnostics.warn_on_parse_using_empty_Forward 

5448 not in self.suppress_warnings_ 

5449 ): 

5450 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

5451 parse_fns = ( 

5452 "parse_string", 

5453 "scan_string", 

5454 "search_string", 

5455 "transform_string", 

5456 ) 

5457 tb = traceback.extract_stack(limit=200) 

5458 for i, frm in enumerate(reversed(tb), start=1): 

5459 if frm.name in parse_fns: 

5460 stacklevel = i + 1 

5461 break 

5462 else: 

5463 stacklevel = 2 

5464 warnings.warn( 

5465 "Forward expression was never assigned a value, will not parse any input", 

5466 stacklevel=stacklevel, 

5467 ) 

5468 if not ParserElement._left_recursion_enabled: 

5469 return super().parseImpl(instring, loc, doActions) 

5470 # ## Bounded Recursion algorithm ## 

5471 # Recursion only needs to be processed at ``Forward`` elements, since they are 

5472 # the only ones that can actually refer to themselves. The general idea is 

5473 # to handle recursion stepwise: We start at no recursion, then recurse once, 

5474 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

5475 # 

5476 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

5477 # - to *match* a specific recursion level, and 

5478 # - to *search* the bounded recursion level 

5479 # and the two run concurrently. The *search* must *match* each recursion level 

5480 # to find the best possible match. This is handled by a memo table, which 

5481 # provides the previous match to the next level match attempt. 

5482 # 

5483 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

5484 # 

5485 # There is a complication since we not only *parse* but also *transform* via 

5486 # actions: We do not want to run the actions too often while expanding. Thus, 

5487 # we expand using `doActions=False` and only run `doActions=True` if the next 

5488 # recursion level is acceptable. 

5489 with ParserElement.recursion_lock: 

5490 memo = ParserElement.recursion_memos 

5491 try: 

5492 # we are parsing at a specific recursion expansion - use it as-is 

5493 prev_loc, prev_result = memo[loc, self, doActions] 

5494 if isinstance(prev_result, Exception): 

5495 raise prev_result 

5496 return prev_loc, prev_result.copy() 

5497 except KeyError: 

5498 act_key = (loc, self, True) 

5499 peek_key = (loc, self, False) 

5500 # we are searching for the best recursion expansion - keep on improving 

5501 # both `doActions` cases must be tracked separately here! 

5502 prev_loc, prev_peek = memo[peek_key] = ( 

5503 loc - 1, 

5504 ParseException( 

5505 instring, loc, "Forward recursion without base case", self 

5506 ), 

5507 ) 

5508 if doActions: 

5509 memo[act_key] = memo[peek_key] 

5510 while True: 

5511 try: 

5512 new_loc, new_peek = super().parseImpl(instring, loc, False) 

5513 except ParseException: 

5514 # we failed before getting any match – do not hide the error 

5515 if isinstance(prev_peek, Exception): 

5516 raise 

5517 new_loc, new_peek = prev_loc, prev_peek 

5518 # the match did not get better: we are done 

5519 if new_loc <= prev_loc: 

5520 if doActions: 

5521 # replace the match for doActions=False as well, 

5522 # in case the action did backtrack 

5523 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

5524 del memo[peek_key], memo[act_key] 

5525 return prev_loc, prev_result.copy() 

5526 del memo[peek_key] 

5527 return prev_loc, prev_peek.copy() 

5528 # the match did get better: see if we can improve further 

5529 if doActions: 

5530 try: 

5531 memo[act_key] = super().parseImpl(instring, loc, True) 

5532 except ParseException as e: 

5533 memo[peek_key] = memo[act_key] = (new_loc, e) 

5534 raise 

5535 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

5536 

5537 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5538 self.skipWhitespace = False 

5539 return self 

5540 

5541 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5542 self.skipWhitespace = True 

5543 return self 

5544 

5545 def streamline(self) -> ParserElement: 

5546 if not self.streamlined: 

5547 self.streamlined = True 

5548 if self.expr is not None: 

5549 self.expr.streamline() 

5550 return self 

5551 

5552 def validate(self, validateTrace=None) -> None: 

5553 warnings.warn( 

5554 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5555 DeprecationWarning, 

5556 stacklevel=2, 

5557 ) 

5558 if validateTrace is None: 

5559 validateTrace = [] 

5560 

5561 if self not in validateTrace: 

5562 tmp = validateTrace[:] + [self] 

5563 if self.expr is not None: 

5564 self.expr.validate(tmp) 

5565 self._checkRecursion([]) 

5566 

5567 def _generateDefaultName(self) -> str: 

5568 # Avoid infinite recursion by setting a temporary _defaultName 

5569 self._defaultName = ": ..." 

5570 

5571 # Use the string representation of main expression. 

5572 retString = "..." 

5573 try: 

5574 if self.expr is not None: 

5575 retString = str(self.expr)[:1000] 

5576 else: 

5577 retString = "None" 

5578 finally: 

5579 return f"{type(self).__name__}: {retString}" 

5580 

5581 def copy(self) -> ParserElement: 

5582 if self.expr is not None: 

5583 return super().copy() 

5584 else: 

5585 ret = Forward() 

5586 ret <<= self 

5587 return ret 

5588 

5589 def _setResultsName(self, name, list_all_matches=False): 

5590 # fmt: off 

5591 if ( 

5592 __diag__.warn_name_set_on_empty_Forward 

5593 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ 

5594 and self.expr is None 

5595 ): 

5596 warning = ( 

5597 "warn_name_set_on_empty_Forward:" 

5598 f" setting results name {name!r} on {type(self).__name__} expression" 

5599 " that has no contained expression" 

5600 ) 

5601 warnings.warn(warning, stacklevel=3) 

5602 # fmt: on 

5603 

5604 return super()._setResultsName(name, list_all_matches) 

5605 

5606 # Compatibility synonyms 

5607 # fmt: off 

5608 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

5609 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

5610 # fmt: on 

5611 

5612 

5613class TokenConverter(ParseElementEnhance): 

5614 """ 

5615 Abstract subclass of :class:`ParseExpression`, for converting parsed results. 

5616 """ 

5617 

5618 def __init__(self, expr: Union[ParserElement, str], savelist=False): 

5619 super().__init__(expr) # , savelist) 

5620 self.saveAsList = False 

5621 

5622 

5623class Combine(TokenConverter): 

5624 """Converter to concatenate all matching tokens to a single string. 

5625 By default, the matching patterns must also be contiguous in the 

5626 input string; this can be disabled by specifying 

5627 ``'adjacent=False'`` in the constructor. 

5628 

5629 Example:: 

5630 

5631 real = Word(nums) + '.' + Word(nums) 

5632 print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] 

5633 # will also erroneously match the following 

5634 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] 

5635 

5636 real = Combine(Word(nums) + '.' + Word(nums)) 

5637 print(real.parse_string('3.1416')) # -> ['3.1416'] 

5638 # no match when there are internal spaces 

5639 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) 

5640 """ 

5641 

5642 def __init__( 

5643 self, 

5644 expr: ParserElement, 

5645 join_string: str = "", 

5646 adjacent: bool = True, 

5647 *, 

5648 joinString: typing.Optional[str] = None, 

5649 ): 

5650 super().__init__(expr) 

5651 joinString = joinString if joinString is not None else join_string 

5652 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

5653 if adjacent: 

5654 self.leave_whitespace() 

5655 self.adjacent = adjacent 

5656 self.skipWhitespace = True 

5657 self.joinString = joinString 

5658 self.callPreparse = True 

5659 

5660 def ignore(self, other) -> ParserElement: 

5661 if self.adjacent: 

5662 ParserElement.ignore(self, other) 

5663 else: 

5664 super().ignore(other) 

5665 return self 

5666 

5667 def postParse(self, instring, loc, tokenlist): 

5668 retToks = tokenlist.copy() 

5669 del retToks[:] 

5670 retToks += ParseResults( 

5671 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

5672 ) 

5673 

5674 if self.resultsName and retToks.haskeys(): 

5675 return [retToks] 

5676 else: 

5677 return retToks 

5678 

5679 

5680class Group(TokenConverter): 

5681 """Converter to return the matched tokens as a list - useful for 

5682 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

5683 

5684 The optional ``aslist`` argument when set to True will return the 

5685 parsed tokens as a Python list instead of a pyparsing ParseResults. 

5686 

5687 Example:: 

5688 

5689 ident = Word(alphas) 

5690 num = Word(nums) 

5691 term = ident | num 

5692 func = ident + Opt(DelimitedList(term)) 

5693 print(func.parse_string("fn a, b, 100")) 

5694 # -> ['fn', 'a', 'b', '100'] 

5695 

5696 func = ident + Group(Opt(DelimitedList(term))) 

5697 print(func.parse_string("fn a, b, 100")) 

5698 # -> ['fn', ['a', 'b', '100']] 

5699 """ 

5700 

5701 def __init__(self, expr: ParserElement, aslist: bool = False): 

5702 super().__init__(expr) 

5703 self.saveAsList = True 

5704 self._asPythonList = aslist 

5705 

5706 def postParse(self, instring, loc, tokenlist): 

5707 if self._asPythonList: 

5708 return ParseResults.List( 

5709 tokenlist.asList() 

5710 if isinstance(tokenlist, ParseResults) 

5711 else list(tokenlist) 

5712 ) 

5713 

5714 return [tokenlist] 

5715 

5716 

5717class Dict(TokenConverter): 

5718 """Converter to return a repetitive expression as a list, but also 

5719 as a dictionary. Each element can also be referenced using the first 

5720 token in the expression as its key. Useful for tabular report 

5721 scraping when the first column can be used as a item key. 

5722 

5723 The optional ``asdict`` argument when set to True will return the 

5724 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

5725 

5726 Example:: 

5727 

5728 data_word = Word(alphas) 

5729 label = data_word + FollowedBy(':') 

5730 

5731 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5732 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5733 

5734 # print attributes as plain groups 

5735 print(attr_expr[1, ...].parse_string(text).dump()) 

5736 

5737 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names 

5738 result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

5739 print(result.dump()) 

5740 

5741 # access named fields as dict entries, or output as dict 

5742 print(result['shape']) 

5743 print(result.as_dict()) 

5744 

5745 prints:: 

5746 

5747 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

5748 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5749 - color: 'light blue' 

5750 - posn: 'upper left' 

5751 - shape: 'SQUARE' 

5752 - texture: 'burlap' 

5753 SQUARE 

5754 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

5755 

5756 See more examples at :class:`ParseResults` of accessing fields by results name. 

5757 """ 

5758 

5759 def __init__(self, expr: ParserElement, asdict: bool = False): 

5760 super().__init__(expr) 

5761 self.saveAsList = True 

5762 self._asPythonDict = asdict 

5763 

5764 def postParse(self, instring, loc, tokenlist): 

5765 for i, tok in enumerate(tokenlist): 

5766 if len(tok) == 0: 

5767 continue 

5768 

5769 ikey = tok[0] 

5770 if isinstance(ikey, int): 

5771 ikey = str(ikey).strip() 

5772 

5773 if len(tok) == 1: 

5774 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

5775 

5776 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

5777 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

5778 

5779 else: 

5780 try: 

5781 dictvalue = tok.copy() # ParseResults(i) 

5782 except Exception: 

5783 exc = TypeError( 

5784 "could not extract dict values from parsed results" 

5785 " - Dict expression must contain Grouped expressions" 

5786 ) 

5787 raise exc from None 

5788 

5789 del dictvalue[0] 

5790 

5791 if len(dictvalue) != 1 or ( 

5792 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

5793 ): 

5794 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

5795 else: 

5796 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

5797 

5798 if self._asPythonDict: 

5799 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

5800 

5801 return [tokenlist] if self.resultsName else tokenlist 

5802 

5803 

5804class Suppress(TokenConverter): 

5805 """Converter for ignoring the results of a parsed expression. 

5806 

5807 Example:: 

5808 

5809 source = "a, b, c,d" 

5810 wd = Word(alphas) 

5811 wd_list1 = wd + (',' + wd)[...] 

5812 print(wd_list1.parse_string(source)) 

5813 

5814 # often, delimiters that are useful during parsing are just in the 

5815 # way afterward - use Suppress to keep them out of the parsed output 

5816 wd_list2 = wd + (Suppress(',') + wd)[...] 

5817 print(wd_list2.parse_string(source)) 

5818 

5819 # Skipped text (using '...') can be suppressed as well 

5820 source = "lead in START relevant text END trailing text" 

5821 start_marker = Keyword("START") 

5822 end_marker = Keyword("END") 

5823 find_body = Suppress(...) + start_marker + ... + end_marker 

5824 print(find_body.parse_string(source) 

5825 

5826 prints:: 

5827 

5828 ['a', ',', 'b', ',', 'c', ',', 'd'] 

5829 ['a', 'b', 'c', 'd'] 

5830 ['START', 'relevant text ', 'END'] 

5831 

5832 (See also :class:`DelimitedList`.) 

5833 """ 

5834 

5835 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

5836 if expr is ...: 

5837 expr = _PendingSkip(NoMatch()) 

5838 super().__init__(expr) 

5839 

5840 def __add__(self, other) -> "ParserElement": 

5841 if isinstance(self.expr, _PendingSkip): 

5842 return Suppress(SkipTo(other)) + other 

5843 

5844 return super().__add__(other) 

5845 

5846 def __sub__(self, other) -> "ParserElement": 

5847 if isinstance(self.expr, _PendingSkip): 

5848 return Suppress(SkipTo(other)) - other 

5849 

5850 return super().__sub__(other) 

5851 

5852 def postParse(self, instring, loc, tokenlist): 

5853 return [] 

5854 

5855 def suppress(self) -> ParserElement: 

5856 return self 

5857 

5858 

5859def trace_parse_action(f: ParseAction) -> ParseAction: 

5860 """Decorator for debugging parse actions. 

5861 

5862 When the parse action is called, this decorator will print 

5863 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

5864 When the parse action completes, the decorator will print 

5865 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

5866 

5867 Example:: 

5868 

5869 wd = Word(alphas) 

5870 

5871 @trace_parse_action 

5872 def remove_duplicate_chars(tokens): 

5873 return ''.join(sorted(set(''.join(tokens)))) 

5874 

5875 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

5876 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

5877 

5878 prints:: 

5879 

5880 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

5881 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

5882 ['dfjkls'] 

5883 """ 

5884 f = _trim_arity(f) 

5885 

5886 def z(*paArgs): 

5887 thisFunc = f.__name__ 

5888 s, l, t = paArgs[-3:] 

5889 if len(paArgs) > 3: 

5890 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" 

5891 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

5892 try: 

5893 ret = f(*paArgs) 

5894 except Exception as exc: 

5895 sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n") 

5896 raise 

5897 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

5898 return ret 

5899 

5900 z.__name__ = f.__name__ 

5901 return z 

5902 

5903 

5904# convenience constants for positional expressions 

5905empty = Empty().set_name("empty") 

5906line_start = LineStart().set_name("line_start") 

5907line_end = LineEnd().set_name("line_end") 

5908string_start = StringStart().set_name("string_start") 

5909string_end = StringEnd().set_name("string_end") 

5910 

5911_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

5912 lambda s, l, t: t[0][1] 

5913) 

5914_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

5915 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

5916) 

5917_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

5918 lambda s, l, t: chr(int(t[0][1:], 8)) 

5919) 

5920_singleChar = ( 

5921 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

5922) 

5923_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

5924_reBracketExpr = ( 

5925 Literal("[") 

5926 + Opt("^").set_results_name("negate") 

5927 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

5928 + Literal("]") 

5929) 

5930 

5931 

5932def srange(s: str) -> str: 

5933 r"""Helper to easily define string ranges for use in :class:`Word` 

5934 construction. Borrows syntax from regexp ``'[]'`` string range 

5935 definitions:: 

5936 

5937 srange("[0-9]") -> "0123456789" 

5938 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

5939 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

5940 

5941 The input string must be enclosed in []'s, and the returned string 

5942 is the expanded character set joined into a single string. The 

5943 values enclosed in the []'s may be: 

5944 

5945 - a single character 

5946 - an escaped character with a leading backslash (such as ``\-`` 

5947 or ``\]``) 

5948 - an escaped hex character with a leading ``'\x'`` 

5949 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

5950 is also supported for backwards compatibility) 

5951 - an escaped octal character with a leading ``'\0'`` 

5952 (``\041``, which is a ``'!'`` character) 

5953 - a range of any of the above, separated by a dash (``'a-z'``, 

5954 etc.) 

5955 - any combination of the above (``'aeiouy'``, 

5956 ``'a-zA-Z0-9_$'``, etc.) 

5957 """ 

5958 _expanded = lambda p: ( 

5959 p 

5960 if not isinstance(p, ParseResults) 

5961 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

5962 ) 

5963 try: 

5964 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) 

5965 except Exception as e: 

5966 return "" 

5967 

5968 

5969def token_map(func, *args) -> ParseAction: 

5970 """Helper to define a parse action by mapping a function to all 

5971 elements of a :class:`ParseResults` list. If any additional args are passed, 

5972 they are forwarded to the given function as additional arguments 

5973 after the token, as in 

5974 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

5975 which will convert the parsed data to an integer using base 16. 

5976 

5977 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

5978 

5979 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

5980 hex_ints.run_tests(''' 

5981 00 11 22 aa FF 0a 0d 1a 

5982 ''') 

5983 

5984 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

5985 upperword[1, ...].run_tests(''' 

5986 my kingdom for a horse 

5987 ''') 

5988 

5989 wd = Word(alphas).set_parse_action(token_map(str.title)) 

5990 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

5991 now is the winter of our discontent made glorious summer by this sun of york 

5992 ''') 

5993 

5994 prints:: 

5995 

5996 00 11 22 aa FF 0a 0d 1a 

5997 [0, 17, 34, 170, 255, 10, 13, 26] 

5998 

5999 my kingdom for a horse 

6000 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6001 

6002 now is the winter of our discontent made glorious summer by this sun of york 

6003 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6004 """ 

6005 

6006 def pa(s, l, t): 

6007 return [func(tokn, *args) for tokn in t] 

6008 

6009 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6010 pa.__name__ = func_name 

6011 

6012 return pa 

6013 

6014 

6015def autoname_elements() -> None: 

6016 """ 

6017 Utility to simplify mass-naming of parser elements, for 

6018 generating railroad diagram with named subdiagrams. 

6019 """ 

6020 calling_frame = sys._getframe().f_back 

6021 if calling_frame is None: 

6022 return 

6023 calling_frame = typing.cast(types.FrameType, calling_frame) 

6024 for name, var in calling_frame.f_locals.items(): 

6025 if isinstance(var, ParserElement) and not var.customName: 

6026 var.set_name(name) 

6027 

6028 

6029dbl_quoted_string = Combine( 

6030 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6031).set_name("string enclosed in double quotes") 

6032 

6033sgl_quoted_string = Combine( 

6034 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6035).set_name("string enclosed in single quotes") 

6036 

6037quoted_string = Combine( 

6038 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6039 "double quoted string" 

6040 ) 

6041 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6042 "single quoted string" 

6043 ) 

6044).set_name("quoted string using single or double quotes") 

6045 

6046python_quoted_string = Combine( 

6047 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6048 "multiline double quoted string" 

6049 ) 

6050 ^ ( 

6051 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6052 ).set_name("multiline single quoted string") 

6053 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6054 "double quoted string" 

6055 ) 

6056 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6057 "single quoted string" 

6058 ) 

6059).set_name("Python quoted string") 

6060 

6061unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6062 

6063 

6064alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6065punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6066 

6067# build list of built-in expressions, for future reference if a global default value 

6068# gets updated 

6069_builtin_exprs: List[ParserElement] = [ 

6070 v for v in vars().values() if isinstance(v, ParserElement) 

6071] 

6072 

6073# backward compatibility names 

6074# fmt: off 

6075sglQuotedString = sgl_quoted_string 

6076dblQuotedString = dbl_quoted_string 

6077quotedString = quoted_string 

6078unicodeString = unicode_string 

6079lineStart = line_start 

6080lineEnd = line_end 

6081stringStart = string_start 

6082stringEnd = string_end 

6083nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) 

6084traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) 

6085conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) 

6086tokenMap = replaced_by_pep8("tokenMap", token_map) 

6087# fmt: on