Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 44%

2602 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:51 +0000

1# 

2# core.py 

3# 

4 

5from collections import deque 

6import os 

7import typing 

8from typing import ( 

9 Any, 

10 Callable, 

11 Generator, 

12 List, 

13 NamedTuple, 

14 Sequence, 

15 Set, 

16 TextIO, 

17 Tuple, 

18 Union, 

19 cast, 

20) 

21from abc import ABC, abstractmethod 

22from enum import Enum 

23import string 

24import copy 

25import warnings 

26import re 

27import sys 

28from collections.abc import Iterable 

29import traceback 

30import types 

31from operator import itemgetter 

32from functools import wraps 

33from threading import RLock 

34from pathlib import Path 

35 

36from .util import ( 

37 _FifoCache, 

38 _UnboundedCache, 

39 __config_flags, 

40 _collapse_string_to_ranges, 

41 _escape_regex_range_chars, 

42 _bslash, 

43 _flatten, 

44 LRUMemo as _LRUMemo, 

45 UnboundedMemo as _UnboundedMemo, 

46 replaced_by_pep8, 

47) 

48from .exceptions import * 

49from .actions import * 

50from .results import ParseResults, _ParseResultsWithOffset 

51from .unicode import pyparsing_unicode 

52 

53_MAX_INT = sys.maxsize 

54str_type: Tuple[type, ...] = (str, bytes) 

55 

56# 

57# Copyright (c) 2003-2022 Paul T. McGuire 

58# 

59# Permission is hereby granted, free of charge, to any person obtaining 

60# a copy of this software and associated documentation files (the 

61# "Software"), to deal in the Software without restriction, including 

62# without limitation the rights to use, copy, modify, merge, publish, 

63# distribute, sublicense, and/or sell copies of the Software, and to 

64# permit persons to whom the Software is furnished to do so, subject to 

65# the following conditions: 

66# 

67# The above copyright notice and this permission notice shall be 

68# included in all copies or substantial portions of the Software. 

69# 

70# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

71# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

72# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

73# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

74# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

75# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

76# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

77# 

78 

79 

80if sys.version_info >= (3, 8): 

81 from functools import cached_property 

82else: 

83 

84 class cached_property: 

85 def __init__(self, func): 

86 self._func = func 

87 

88 def __get__(self, instance, owner=None): 

89 ret = instance.__dict__[self._func.__name__] = self._func(instance) 

90 return ret 

91 

92 

93class __compat__(__config_flags): 

94 """ 

95 A cross-version compatibility configuration for pyparsing features that will be 

96 released in a future version. By setting values in this configuration to True, 

97 those features can be enabled in prior versions for compatibility development 

98 and testing. 

99 

100 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

101 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

102 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

103 behavior 

104 """ 

105 

106 _type_desc = "compatibility" 

107 

108 collect_all_And_tokens = True 

109 

110 _all_names = [__ for __ in locals() if not __.startswith("_")] 

111 _fixed_names = """ 

112 collect_all_And_tokens 

113 """.split() 

114 

115 

116class __diag__(__config_flags): 

117 _type_desc = "diagnostic" 

118 

119 warn_multiple_tokens_in_named_alternation = False 

120 warn_ungrouped_named_tokens_in_collection = False 

121 warn_name_set_on_empty_Forward = False 

122 warn_on_parse_using_empty_Forward = False 

123 warn_on_assignment_to_Forward = False 

124 warn_on_multiple_string_args_to_oneof = False 

125 warn_on_match_first_with_lshift_operator = False 

126 enable_debug_on_named_expressions = False 

127 

128 _all_names = [__ for __ in locals() if not __.startswith("_")] 

129 _warning_names = [name for name in _all_names if name.startswith("warn")] 

130 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

131 

132 @classmethod 

133 def enable_all_warnings(cls) -> None: 

134 for name in cls._warning_names: 

135 cls.enable(name) 

136 

137 

138class Diagnostics(Enum): 

139 """ 

140 Diagnostic configuration (all default to disabled) 

141 

142 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

143 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

144 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

145 name is defined on a containing expression with ungrouped subexpressions that also 

146 have results names 

147 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

148 with a results name, but has no contents defined 

149 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

150 defined in a grammar but has never had an expression attached to it 

151 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

152 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

153 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

154 incorrectly called with multiple str arguments 

155 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

156 calls to :class:`ParserElement.set_name` 

157 

158 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

159 All warnings can be enabled by calling :class:`enable_all_warnings`. 

160 """ 

161 

162 warn_multiple_tokens_in_named_alternation = 0 

163 warn_ungrouped_named_tokens_in_collection = 1 

164 warn_name_set_on_empty_Forward = 2 

165 warn_on_parse_using_empty_Forward = 3 

166 warn_on_assignment_to_Forward = 4 

167 warn_on_multiple_string_args_to_oneof = 5 

168 warn_on_match_first_with_lshift_operator = 6 

169 enable_debug_on_named_expressions = 7 

170 

171 

172def enable_diag(diag_enum: Diagnostics) -> None: 

173 """ 

174 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

175 """ 

176 __diag__.enable(diag_enum.name) 

177 

178 

179def disable_diag(diag_enum: Diagnostics) -> None: 

180 """ 

181 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

182 """ 

183 __diag__.disable(diag_enum.name) 

184 

185 

186def enable_all_warnings() -> None: 

187 """ 

188 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

189 """ 

190 __diag__.enable_all_warnings() 

191 

192 

193# hide abstract class 

194del __config_flags 

195 

196 

197def _should_enable_warnings( 

198 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

199) -> bool: 

200 enable = bool(warn_env_var) 

201 for warn_opt in cmd_line_warn_options: 

202 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

203 ":" 

204 )[:5] 

205 if not w_action.lower().startswith("i") and ( 

206 not (w_message or w_category or w_module) or w_module == "pyparsing" 

207 ): 

208 enable = True 

209 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

210 enable = False 

211 return enable 

212 

213 

214if _should_enable_warnings( 

215 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

216): 

217 enable_all_warnings() 

218 

219 

220# build list of single arg builtins, that can be used as parse actions 

221_single_arg_builtins = { 

222 sum, 

223 len, 

224 sorted, 

225 reversed, 

226 list, 

227 tuple, 

228 set, 

229 any, 

230 all, 

231 min, 

232 max, 

233} 

234 

235_generatorType = types.GeneratorType 

236ParseImplReturnType = Tuple[int, Any] 

237PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

238ParseAction = Union[ 

239 Callable[[], Any], 

240 Callable[[ParseResults], Any], 

241 Callable[[int, ParseResults], Any], 

242 Callable[[str, int, ParseResults], Any], 

243] 

244ParseCondition = Union[ 

245 Callable[[], bool], 

246 Callable[[ParseResults], bool], 

247 Callable[[int, ParseResults], bool], 

248 Callable[[str, int, ParseResults], bool], 

249] 

250ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

251DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

252DebugSuccessAction = Callable[ 

253 [str, int, int, "ParserElement", ParseResults, bool], None 

254] 

255DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

256 

257 

258alphas = string.ascii_uppercase + string.ascii_lowercase 

259identchars = pyparsing_unicode.Latin1.identchars 

260identbodychars = pyparsing_unicode.Latin1.identbodychars 

261nums = "0123456789" 

262hexnums = nums + "ABCDEFabcdef" 

263alphanums = alphas + nums 

264printables = "".join([c for c in string.printable if c not in string.whitespace]) 

265 

266_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

267 

268 

269def _trim_arity(func, max_limit=3): 

270 """decorator to trim function calls to match the arity of the target""" 

271 global _trim_arity_call_line 

272 

273 if func in _single_arg_builtins: 

274 return lambda s, l, t: func(t) 

275 

276 limit = 0 

277 found_arity = False 

278 

279 # synthesize what would be returned by traceback.extract_stack at the call to 

280 # user's parse action 'func', so that we don't incur call penalty at parse time 

281 

282 # fmt: off 

283 LINE_DIFF = 7 

284 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

285 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

286 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) 

287 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

288 

289 def wrapper(*args): 

290 nonlocal found_arity, limit 

291 while 1: 

292 try: 

293 ret = func(*args[limit:]) 

294 found_arity = True 

295 return ret 

296 except TypeError as te: 

297 # re-raise TypeErrors if they did not come from our arity testing 

298 if found_arity: 

299 raise 

300 else: 

301 tb = te.__traceback__ 

302 frames = traceback.extract_tb(tb, limit=2) 

303 frame_summary = frames[-1] 

304 trim_arity_type_error = ( 

305 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

306 ) 

307 del tb 

308 

309 if trim_arity_type_error: 

310 if limit < max_limit: 

311 limit += 1 

312 continue 

313 

314 raise 

315 # fmt: on 

316 

317 # copy func name to wrapper for sensible debug output 

318 # (can't use functools.wraps, since that messes with function signature) 

319 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

320 wrapper.__name__ = func_name 

321 wrapper.__doc__ = func.__doc__ 

322 

323 return wrapper 

324 

325 

326def condition_as_parse_action( 

327 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

328) -> ParseAction: 

329 """ 

330 Function to convert a simple predicate function that returns ``True`` or ``False`` 

331 into a parse action. Can be used in places when a parse action is required 

332 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition 

333 to an operator level in :class:`infix_notation`). 

334 

335 Optional keyword arguments: 

336 

337 - ``message`` - define a custom message to be used in the raised exception 

338 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; 

339 otherwise will raise :class:`ParseException` 

340 

341 """ 

342 msg = message if message is not None else "failed user-defined condition" 

343 exc_type = ParseFatalException if fatal else ParseException 

344 fn = _trim_arity(fn) 

345 

346 @wraps(fn) 

347 def pa(s, l, t): 

348 if not bool(fn(s, l, t)): 

349 raise exc_type(s, l, msg) 

350 

351 return pa 

352 

353 

354def _default_start_debug_action( 

355 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False 

356): 

357 cache_hit_str = "*" if cache_hit else "" 

358 print( 

359 ( 

360 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

361 f" {line(loc, instring)}\n" 

362 f" {' ' * (col(loc, instring) - 1)}^" 

363 ) 

364 ) 

365 

366 

367def _default_success_debug_action( 

368 instring: str, 

369 startloc: int, 

370 endloc: int, 

371 expr: "ParserElement", 

372 toks: ParseResults, 

373 cache_hit: bool = False, 

374): 

375 cache_hit_str = "*" if cache_hit else "" 

376 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

377 

378 

379def _default_exception_debug_action( 

380 instring: str, 

381 loc: int, 

382 expr: "ParserElement", 

383 exc: Exception, 

384 cache_hit: bool = False, 

385): 

386 cache_hit_str = "*" if cache_hit else "" 

387 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

388 

389 

390def null_debug_action(*args): 

391 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

392 

393 

394class ParserElement(ABC): 

395 """Abstract base level parser element class.""" 

396 

397 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

398 verbose_stacktrace: bool = False 

399 _literalStringClass: type = None # type: ignore[assignment] 

400 

401 @staticmethod 

402 def set_default_whitespace_chars(chars: str) -> None: 

403 r""" 

404 Overrides the default whitespace chars 

405 

406 Example:: 

407 

408 # default whitespace chars are space, <TAB> and newline 

409 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

410 

411 # change to just treat newline as significant 

412 ParserElement.set_default_whitespace_chars(" \t") 

413 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] 

414 """ 

415 ParserElement.DEFAULT_WHITE_CHARS = chars 

416 

417 # update whitespace all parse expressions defined in this module 

418 for expr in _builtin_exprs: 

419 if expr.copyDefaultWhiteChars: 

420 expr.whiteChars = set(chars) 

421 

422 @staticmethod 

423 def inline_literals_using(cls: type) -> None: 

424 """ 

425 Set class to be used for inclusion of string literals into a parser. 

426 

427 Example:: 

428 

429 # default literal class used is Literal 

430 integer = Word(nums) 

431 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

432 

433 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

434 

435 

436 # change to Suppress 

437 ParserElement.inline_literals_using(Suppress) 

438 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

439 

440 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] 

441 """ 

442 ParserElement._literalStringClass = cls 

443 

444 @classmethod 

445 def using_each(cls, seq, **class_kwargs): 

446 """ 

447 Yields a sequence of class(obj, **class_kwargs) for obj in seq. 

448 

449 Example:: 

450 

451 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

452 

453 """ 

454 yield from (cls(obj, **class_kwargs) for obj in seq) 

455 

456 class DebugActions(NamedTuple): 

457 debug_try: typing.Optional[DebugStartAction] 

458 debug_match: typing.Optional[DebugSuccessAction] 

459 debug_fail: typing.Optional[DebugExceptionAction] 

460 

461 def __init__(self, savelist: bool = False): 

462 self.parseAction: List[ParseAction] = list() 

463 self.failAction: typing.Optional[ParseFailAction] = None 

464 self.customName: str = None # type: ignore[assignment] 

465 self._defaultName: typing.Optional[str] = None 

466 self.resultsName: str = None # type: ignore[assignment] 

467 self.saveAsList = savelist 

468 self.skipWhitespace = True 

469 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

470 self.copyDefaultWhiteChars = True 

471 # used when checking for left-recursion 

472 self.mayReturnEmpty = False 

473 self.keepTabs = False 

474 self.ignoreExprs: List["ParserElement"] = list() 

475 self.debug = False 

476 self.streamlined = False 

477 # optimize exception handling for subclasses that don't advance parse index 

478 self.mayIndexError = True 

479 self.errmsg = "" 

480 # mark results names as modal (report only last) or cumulative (list all) 

481 self.modalResults = True 

482 # custom debug actions 

483 self.debugActions = self.DebugActions(None, None, None) 

484 # avoid redundant calls to preParse 

485 self.callPreparse = True 

486 self.callDuringTry = False 

487 self.suppress_warnings_: List[Diagnostics] = [] 

488 

489 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": 

490 """ 

491 Suppress warnings emitted for a particular diagnostic on this expression. 

492 

493 Example:: 

494 

495 base = pp.Forward() 

496 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) 

497 

498 # statement would normally raise a warning, but is now suppressed 

499 print(base.parse_string("x")) 

500 

501 """ 

502 self.suppress_warnings_.append(warning_type) 

503 return self 

504 

505 def visit_all(self): 

506 """General-purpose method to yield all expressions and sub-expressions 

507 in a grammar. Typically just for internal use. 

508 """ 

509 to_visit = deque([self]) 

510 seen = set() 

511 while to_visit: 

512 cur = to_visit.popleft() 

513 

514 # guard against looping forever through recursive grammars 

515 if cur in seen: 

516 continue 

517 seen.add(cur) 

518 

519 to_visit.extend(cur.recurse()) 

520 yield cur 

521 

522 def copy(self) -> "ParserElement": 

523 """ 

524 Make a copy of this :class:`ParserElement`. Useful for defining 

525 different parse actions for the same parsing pattern, using copies of 

526 the original parse element. 

527 

528 Example:: 

529 

530 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

531 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") 

532 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

533 

534 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) 

535 

536 prints:: 

537 

538 [5120, 100, 655360, 268435456] 

539 

540 Equivalent form of ``expr.copy()`` is just ``expr()``:: 

541 

542 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

543 """ 

544 cpy = copy.copy(self) 

545 cpy.parseAction = self.parseAction[:] 

546 cpy.ignoreExprs = self.ignoreExprs[:] 

547 if self.copyDefaultWhiteChars: 

548 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

549 return cpy 

550 

551 def set_results_name( 

552 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 

553 ) -> "ParserElement": 

554 """ 

555 Define name for referencing matching tokens as a nested attribute 

556 of the returned parse results. 

557 

558 Normally, results names are assigned as you would assign keys in a dict: 

559 any existing value is overwritten by later values. If it is necessary to 

560 keep all values captured for a particular results name, call ``set_results_name`` 

561 with ``list_all_matches`` = True. 

562 

563 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

564 this is so that the client can define a basic element, such as an 

565 integer, and reference it in multiple places with different names. 

566 

567 You can also set results names using the abbreviated syntax, 

568 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

569 - see :class:`__call__`. If ``list_all_matches`` is required, use 

570 ``expr("name*")``. 

571 

572 Example:: 

573 

574 date_str = (integer.set_results_name("year") + '/' 

575 + integer.set_results_name("month") + '/' 

576 + integer.set_results_name("day")) 

577 

578 # equivalent form: 

579 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

580 """ 

581 listAllMatches = listAllMatches or list_all_matches 

582 return self._setResultsName(name, listAllMatches) 

583 

584 def _setResultsName(self, name, listAllMatches=False): 

585 if name is None: 

586 return self 

587 newself = self.copy() 

588 if name.endswith("*"): 

589 name = name[:-1] 

590 listAllMatches = True 

591 newself.resultsName = name 

592 newself.modalResults = not listAllMatches 

593 return newself 

594 

595 def set_break(self, break_flag: bool = True) -> "ParserElement": 

596 """ 

597 Method to invoke the Python pdb debugger when this element is 

598 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

599 disable. 

600 """ 

601 if break_flag: 

602 _parseMethod = self._parse 

603 

604 def breaker(instring, loc, doActions=True, callPreParse=True): 

605 import pdb 

606 

607 # this call to pdb.set_trace() is intentional, not a checkin error 

608 pdb.set_trace() 

609 return _parseMethod(instring, loc, doActions, callPreParse) 

610 

611 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

612 self._parse = breaker # type: ignore [assignment] 

613 else: 

614 if hasattr(self._parse, "_originalParseMethod"): 

615 self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment] 

616 return self 

617 

618 def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": 

619 """ 

620 Define one or more actions to perform when successfully matching parse element definition. 

621 

622 Parse actions can be called to perform data conversions, do extra validation, 

623 update external data structures, or enhance or replace the parsed tokens. 

624 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

625 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

626 

627 - ``s`` = the original string being parsed (see note below) 

628 - ``loc`` = the location of the matching substring 

629 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

630 

631 The parsed tokens are passed to the parse action as ParseResults. They can be 

632 modified in place using list-style append, extend, and pop operations to update 

633 the parsed list elements; and with dictionary-style item set and del operations 

634 to add, update, or remove any named results. If the tokens are modified in place, 

635 it is not necessary to return them with a return statement. 

636 

637 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

638 object, or with some entirely different object (common for parse actions that perform data 

639 conversions). A convenient way to build a new parse result is to define the values 

640 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

641 

642 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

643 expression are cleared. 

644 

645 Optional keyword arguments: 

646 

647 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during 

648 lookaheads and alternate testing. For parse actions that have side effects, it is 

649 important to only call the parse action once it is determined that it is being 

650 called as part of a successful parse. For parse actions that perform additional 

651 validation, then call_during_try should be passed as True, so that the validation 

652 code is included in the preliminary "try" parses. 

653 

654 Note: the default parsing behavior is to expand tabs in the input string 

655 before starting the parsing process. See :class:`parse_string` for more 

656 information on parsing strings containing ``<TAB>`` s, and suggested 

657 methods to maintain a consistent view of the parsed string, the parse 

658 location, and line and column positions within the parsed string. 

659 

660 Example:: 

661 

662 # parse dates in the form YYYY/MM/DD 

663 

664 # use parse action to convert toks from str to int at parse time 

665 def convert_to_int(toks): 

666 return int(toks[0]) 

667 

668 # use a parse action to verify that the date is a valid date 

669 def is_valid_date(instring, loc, toks): 

670 from datetime import date 

671 year, month, day = toks[::2] 

672 try: 

673 date(year, month, day) 

674 except ValueError: 

675 raise ParseException(instring, loc, "invalid date given") 

676 

677 integer = Word(nums) 

678 date_str = integer + '/' + integer + '/' + integer 

679 

680 # add parse actions 

681 integer.set_parse_action(convert_to_int) 

682 date_str.set_parse_action(is_valid_date) 

683 

684 # note that integer fields are now ints, not strings 

685 date_str.run_tests(''' 

686 # successful parse - note that integer fields were converted to ints 

687 1999/12/31 

688 

689 # fail - invalid date 

690 1999/13/31 

691 ''') 

692 """ 

693 if list(fns) == [None]: 

694 self.parseAction = [] 

695 else: 

696 if not all(callable(fn) for fn in fns): 

697 raise TypeError("parse actions must be callable") 

698 self.parseAction = [_trim_arity(fn) for fn in fns] 

699 self.callDuringTry = kwargs.get( 

700 "call_during_try", kwargs.get("callDuringTry", False) 

701 ) 

702 return self 

703 

704 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": 

705 """ 

706 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

707 

708 See examples in :class:`copy`. 

709 """ 

710 self.parseAction += [_trim_arity(fn) for fn in fns] 

711 self.callDuringTry = self.callDuringTry or kwargs.get( 

712 "call_during_try", kwargs.get("callDuringTry", False) 

713 ) 

714 return self 

715 

716 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": 

717 """Add a boolean predicate function to expression's list of parse actions. See 

718 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

719 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

720 

721 Optional keyword arguments: 

722 

723 - ``message`` = define a custom message to be used in the raised exception 

724 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

725 ParseException 

726 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

727 default=False 

728 

729 Example:: 

730 

731 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

732 year_int = integer.copy() 

733 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

734 date_str = year_int + '/' + integer + '/' + integer 

735 

736 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), 

737 (line:1, col:1) 

738 """ 

739 for fn in fns: 

740 self.parseAction.append( 

741 condition_as_parse_action( 

742 fn, 

743 message=str(kwargs.get("message")), 

744 fatal=bool(kwargs.get("fatal", False)), 

745 ) 

746 ) 

747 

748 self.callDuringTry = self.callDuringTry or kwargs.get( 

749 "call_during_try", kwargs.get("callDuringTry", False) 

750 ) 

751 return self 

752 

753 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": 

754 """ 

755 Define action to perform if parsing fails at this expression. 

756 Fail acton fn is a callable function that takes the arguments 

757 ``fn(s, loc, expr, err)`` where: 

758 

759 - ``s`` = string being parsed 

760 - ``loc`` = location where expression match was attempted and failed 

761 - ``expr`` = the parse expression that failed 

762 - ``err`` = the exception thrown 

763 

764 The function returns no value. It may throw :class:`ParseFatalException` 

765 if it is desired to stop parsing immediately.""" 

766 self.failAction = fn 

767 return self 

768 

769 def _skipIgnorables(self, instring: str, loc: int) -> int: 

770 if not self.ignoreExprs: 

771 return loc 

772 exprsFound = True 

773 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

774 last_loc = loc 

775 while exprsFound: 

776 exprsFound = False 

777 for ignore_fn in ignore_expr_fns: 

778 try: 

779 while 1: 

780 loc, dummy = ignore_fn(instring, loc) 

781 exprsFound = True 

782 except ParseException: 

783 pass 

784 # check if all ignore exprs matched but didn't actually advance the parse location 

785 if loc == last_loc: 

786 break 

787 last_loc = loc 

788 return loc 

789 

790 def preParse(self, instring: str, loc: int) -> int: 

791 if self.ignoreExprs: 

792 loc = self._skipIgnorables(instring, loc) 

793 

794 if self.skipWhitespace: 

795 instrlen = len(instring) 

796 white_chars = self.whiteChars 

797 while loc < instrlen and instring[loc] in white_chars: 

798 loc += 1 

799 

800 return loc 

801 

802 def parseImpl(self, instring, loc, doActions=True): 

803 return loc, [] 

804 

805 def postParse(self, instring, loc, tokenlist): 

806 return tokenlist 

807 

808 # @profile 

809 def _parseNoCache( 

810 self, instring, loc, doActions=True, callPreParse=True 

811 ) -> Tuple[int, ParseResults]: 

812 TRY, MATCH, FAIL = 0, 1, 2 

813 debugging = self.debug # and doActions) 

814 len_instring = len(instring) 

815 

816 if debugging or self.failAction: 

817 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

818 try: 

819 if callPreParse and self.callPreparse: 

820 pre_loc = self.preParse(instring, loc) 

821 else: 

822 pre_loc = loc 

823 tokens_start = pre_loc 

824 if self.debugActions.debug_try: 

825 self.debugActions.debug_try(instring, tokens_start, self, False) 

826 if self.mayIndexError or pre_loc >= len_instring: 

827 try: 

828 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

829 except IndexError: 

830 raise ParseException(instring, len_instring, self.errmsg, self) 

831 else: 

832 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

833 except Exception as err: 

834 # print("Exception raised:", err) 

835 if self.debugActions.debug_fail: 

836 self.debugActions.debug_fail( 

837 instring, tokens_start, self, err, False 

838 ) 

839 if self.failAction: 

840 self.failAction(instring, tokens_start, self, err) 

841 raise 

842 else: 

843 if callPreParse and self.callPreparse: 

844 pre_loc = self.preParse(instring, loc) 

845 else: 

846 pre_loc = loc 

847 tokens_start = pre_loc 

848 if self.mayIndexError or pre_loc >= len_instring: 

849 try: 

850 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

851 except IndexError: 

852 raise ParseException(instring, len_instring, self.errmsg, self) 

853 else: 

854 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

855 

856 tokens = self.postParse(instring, loc, tokens) 

857 

858 ret_tokens = ParseResults( 

859 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 

860 ) 

861 if self.parseAction and (doActions or self.callDuringTry): 

862 if debugging: 

863 try: 

864 for fn in self.parseAction: 

865 try: 

866 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

867 except IndexError as parse_action_exc: 

868 exc = ParseException("exception raised in parse action") 

869 raise exc from parse_action_exc 

870 

871 if tokens is not None and tokens is not ret_tokens: 

872 ret_tokens = ParseResults( 

873 tokens, 

874 self.resultsName, 

875 asList=self.saveAsList 

876 and isinstance(tokens, (ParseResults, list)), 

877 modal=self.modalResults, 

878 ) 

879 except Exception as err: 

880 # print "Exception raised in user parse action:", err 

881 if self.debugActions.debug_fail: 

882 self.debugActions.debug_fail( 

883 instring, tokens_start, self, err, False 

884 ) 

885 raise 

886 else: 

887 for fn in self.parseAction: 

888 try: 

889 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

890 except IndexError as parse_action_exc: 

891 exc = ParseException("exception raised in parse action") 

892 raise exc from parse_action_exc 

893 

894 if tokens is not None and tokens is not ret_tokens: 

895 ret_tokens = ParseResults( 

896 tokens, 

897 self.resultsName, 

898 asList=self.saveAsList 

899 and isinstance(tokens, (ParseResults, list)), 

900 modal=self.modalResults, 

901 ) 

902 if debugging: 

903 # print("Matched", self, "->", ret_tokens.as_list()) 

904 if self.debugActions.debug_match: 

905 self.debugActions.debug_match( 

906 instring, tokens_start, loc, self, ret_tokens, False 

907 ) 

908 

909 return loc, ret_tokens 

910 

911 def try_parse( 

912 self, 

913 instring: str, 

914 loc: int, 

915 *, 

916 raise_fatal: bool = False, 

917 do_actions: bool = False, 

918 ) -> int: 

919 try: 

920 return self._parse(instring, loc, doActions=do_actions)[0] 

921 except ParseFatalException: 

922 if raise_fatal: 

923 raise 

924 raise ParseException(instring, loc, self.errmsg, self) 

925 

926 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

927 try: 

928 self.try_parse(instring, loc, do_actions=do_actions) 

929 except (ParseException, IndexError): 

930 return False 

931 else: 

932 return True 

933 

934 # cache for left-recursion in Forward references 

935 recursion_lock = RLock() 

936 recursion_memos: typing.Dict[ 

937 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] 

938 ] = {} 

939 

940 class _CacheType(dict): 

941 """ 

942 class to help type checking 

943 """ 

944 

945 not_in_cache: bool 

946 

947 def get(self, *args): 

948 ... 

949 

950 def set(self, *args): 

951 ... 

952 

953 # argument cache for optimizing repeated calls when backtracking through recursive expressions 

954 packrat_cache = ( 

955 _CacheType() 

956 ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail 

957 packrat_cache_lock = RLock() 

958 packrat_cache_stats = [0, 0] 

959 

960 # this method gets repeatedly called during backtracking with the same arguments - 

961 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

962 def _parseCache( 

963 self, instring, loc, doActions=True, callPreParse=True 

964 ) -> Tuple[int, ParseResults]: 

965 HIT, MISS = 0, 1 

966 TRY, MATCH, FAIL = 0, 1, 2 

967 lookup = (self, instring, loc, callPreParse, doActions) 

968 with ParserElement.packrat_cache_lock: 

969 cache = ParserElement.packrat_cache 

970 value = cache.get(lookup) 

971 if value is cache.not_in_cache: 

972 ParserElement.packrat_cache_stats[MISS] += 1 

973 try: 

974 value = self._parseNoCache(instring, loc, doActions, callPreParse) 

975 except ParseBaseException as pe: 

976 # cache a copy of the exception, without the traceback 

977 cache.set(lookup, pe.__class__(*pe.args)) 

978 raise 

979 else: 

980 cache.set(lookup, (value[0], value[1].copy(), loc)) 

981 return value 

982 else: 

983 ParserElement.packrat_cache_stats[HIT] += 1 

984 if self.debug and self.debugActions.debug_try: 

985 try: 

986 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

987 except TypeError: 

988 pass 

989 if isinstance(value, Exception): 

990 if self.debug and self.debugActions.debug_fail: 

991 try: 

992 self.debugActions.debug_fail( 

993 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

994 ) 

995 except TypeError: 

996 pass 

997 raise value 

998 

999 value = cast(Tuple[int, ParseResults, int], value) 

1000 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1001 if self.debug and self.debugActions.debug_match: 

1002 try: 

1003 self.debugActions.debug_match( 

1004 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1005 ) 

1006 except TypeError: 

1007 pass 

1008 

1009 return loc_, result 

1010 

1011 _parse = _parseNoCache 

1012 

1013 @staticmethod 

1014 def reset_cache() -> None: 

1015 ParserElement.packrat_cache.clear() 

1016 ParserElement.packrat_cache_stats[:] = [0] * len( 

1017 ParserElement.packrat_cache_stats 

1018 ) 

1019 ParserElement.recursion_memos.clear() 

1020 

1021 _packratEnabled = False 

1022 _left_recursion_enabled = False 

1023 

1024 @staticmethod 

1025 def disable_memoization() -> None: 

1026 """ 

1027 Disables active Packrat or Left Recursion parsing and their memoization 

1028 

1029 This method also works if neither Packrat nor Left Recursion are enabled. 

1030 This makes it safe to call before activating Packrat nor Left Recursion 

1031 to clear any previous settings. 

1032 """ 

1033 ParserElement.reset_cache() 

1034 ParserElement._left_recursion_enabled = False 

1035 ParserElement._packratEnabled = False 

1036 ParserElement._parse = ParserElement._parseNoCache 

1037 

1038 @staticmethod 

1039 def enable_left_recursion( 

1040 cache_size_limit: typing.Optional[int] = None, *, force=False 

1041 ) -> None: 

1042 """ 

1043 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1044 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1045 repeatedly matched with a fixed recursion depth that is gradually increased 

1046 until finding the longest match. 

1047 

1048 Example:: 

1049 

1050 import pyparsing as pp 

1051 pp.ParserElement.enable_left_recursion() 

1052 

1053 E = pp.Forward("E") 

1054 num = pp.Word(pp.nums) 

1055 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1056 E <<= E + '+' - num | num 

1057 

1058 print(E.parse_string("1+2+3")) 

1059 

1060 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1061 thus skip reevaluation of parse actions during backtracking. This may break 

1062 programs with parse actions which rely on strict ordering of side-effects. 

1063 

1064 Parameters: 

1065 

1066 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1067 ``Forward`` elements during matching; if ``None`` (the default), 

1068 memoize all ``Forward`` elements. 

1069 

1070 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1071 thus the two cannot be used together. Use ``force=True`` to disable any 

1072 previous, conflicting settings. 

1073 """ 

1074 if force: 

1075 ParserElement.disable_memoization() 

1076 elif ParserElement._packratEnabled: 

1077 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1078 if cache_size_limit is None: 

1079 ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment] 

1080 elif cache_size_limit > 0: 

1081 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1082 else: 

1083 raise NotImplementedError("Memo size of %s" % cache_size_limit) 

1084 ParserElement._left_recursion_enabled = True 

1085 

1086 @staticmethod 

1087 def enable_packrat( 

1088 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1089 ) -> None: 

1090 """ 

1091 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1092 Repeated parse attempts at the same string location (which happens 

1093 often in many complex grammars) can immediately return a cached value, 

1094 instead of re-executing parsing/validating code. Memoizing is done of 

1095 both valid results and parsing exceptions. 

1096 

1097 Parameters: 

1098 

1099 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1100 will limit the size of the packrat cache; if None is passed, then 

1101 the cache size will be unbounded; if 0 is passed, the cache will 

1102 be effectively disabled. 

1103 

1104 This speedup may break existing programs that use parse actions that 

1105 have side-effects. For this reason, packrat parsing is disabled when 

1106 you first import pyparsing. To activate the packrat feature, your 

1107 program must call the class method :class:`ParserElement.enable_packrat`. 

1108 For best results, call ``enable_packrat()`` immediately after 

1109 importing pyparsing. 

1110 

1111 Example:: 

1112 

1113 import pyparsing 

1114 pyparsing.ParserElement.enable_packrat() 

1115 

1116 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1117 thus the two cannot be used together. Use ``force=True`` to disable any 

1118 previous, conflicting settings. 

1119 """ 

1120 if force: 

1121 ParserElement.disable_memoization() 

1122 elif ParserElement._left_recursion_enabled: 

1123 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1124 if not ParserElement._packratEnabled: 

1125 ParserElement._packratEnabled = True 

1126 if cache_size_limit is None: 

1127 ParserElement.packrat_cache = _UnboundedCache() 

1128 else: 

1129 ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment] 

1130 ParserElement._parse = ParserElement._parseCache 

1131 

1132 def parse_string( 

1133 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 

1134 ) -> ParseResults: 

1135 """ 

1136 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1137 client code. 

1138 

1139 :param instring: The input string to be parsed. 

1140 :param parse_all: If set, the entire input string must match the grammar. 

1141 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1142 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1143 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1144 an object with attributes if the given parser includes results names. 

1145 

1146 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1147 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1148 

1149 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1150 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1151 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1152 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1153 

1154 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1155 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1156 parse action's ``s`` argument, or 

1157 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1158 

1159 Examples: 

1160 

1161 By default, partial matches are OK. 

1162 

1163 >>> res = Word('a').parse_string('aaaaabaaa') 

1164 >>> print(res) 

1165 ['aaaaa'] 

1166 

1167 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1168 directly to see more examples. 

1169 

1170 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1171 

1172 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1173 Traceback (most recent call last): 

1174 ... 

1175 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) 

1176 """ 

1177 parseAll = parse_all or parseAll 

1178 

1179 ParserElement.reset_cache() 

1180 if not self.streamlined: 

1181 self.streamline() 

1182 for e in self.ignoreExprs: 

1183 e.streamline() 

1184 if not self.keepTabs: 

1185 instring = instring.expandtabs() 

1186 try: 

1187 loc, tokens = self._parse(instring, 0) 

1188 if parseAll: 

1189 loc = self.preParse(instring, loc) 

1190 se = Empty() + StringEnd() 

1191 se._parse(instring, loc) 

1192 except ParseBaseException as exc: 

1193 if ParserElement.verbose_stacktrace: 

1194 raise 

1195 else: 

1196 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1197 raise exc.with_traceback(None) 

1198 else: 

1199 return tokens 

1200 

1201 def scan_string( 

1202 self, 

1203 instring: str, 

1204 max_matches: int = _MAX_INT, 

1205 overlap: bool = False, 

1206 *, 

1207 debug: bool = False, 

1208 maxMatches: int = _MAX_INT, 

1209 ) -> Generator[Tuple[ParseResults, int, int], None, None]: 

1210 """ 

1211 Scan the input string for expression matches. Each match will return the 

1212 matching tokens, start location, and end location. May be called with optional 

1213 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1214 ``overlap`` is specified, then overlapping matches will be reported. 

1215 

1216 Note that the start and end locations are reported relative to the string 

1217 being parsed. See :class:`parse_string` for more information on parsing 

1218 strings with embedded tabs. 

1219 

1220 Example:: 

1221 

1222 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1223 print(source) 

1224 for tokens, start, end in Word(alphas).scan_string(source): 

1225 print(' '*start + '^'*(end-start)) 

1226 print(' '*start + tokens[0]) 

1227 

1228 prints:: 

1229 

1230 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1231 ^^^^^ 

1232 sldjf 

1233 ^^^^^^^ 

1234 lsdjjkf 

1235 ^^^^^^ 

1236 sldkjf 

1237 ^^^^^^ 

1238 lkjsfd 

1239 """ 

1240 maxMatches = min(maxMatches, max_matches) 

1241 if not self.streamlined: 

1242 self.streamline() 

1243 for e in self.ignoreExprs: 

1244 e.streamline() 

1245 

1246 if not self.keepTabs: 

1247 instring = str(instring).expandtabs() 

1248 instrlen = len(instring) 

1249 loc = 0 

1250 preparseFn = self.preParse 

1251 parseFn = self._parse 

1252 ParserElement.resetCache() 

1253 matches = 0 

1254 try: 

1255 while loc <= instrlen and matches < maxMatches: 

1256 try: 

1257 preloc: int = preparseFn(instring, loc) 

1258 nextLoc: int 

1259 tokens: ParseResults 

1260 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1261 except ParseException: 

1262 loc = preloc + 1 

1263 else: 

1264 if nextLoc > loc: 

1265 matches += 1 

1266 if debug: 

1267 print( 

1268 { 

1269 "tokens": tokens.asList(), 

1270 "start": preloc, 

1271 "end": nextLoc, 

1272 } 

1273 ) 

1274 yield tokens, preloc, nextLoc 

1275 if overlap: 

1276 nextloc = preparseFn(instring, loc) 

1277 if nextloc > loc: 

1278 loc = nextLoc 

1279 else: 

1280 loc += 1 

1281 else: 

1282 loc = nextLoc 

1283 else: 

1284 loc = preloc + 1 

1285 except ParseBaseException as exc: 

1286 if ParserElement.verbose_stacktrace: 

1287 raise 

1288 else: 

1289 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1290 raise exc.with_traceback(None) 

1291 

1292 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1293 """ 

1294 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1295 be returned from a parse action. To use ``transform_string``, define a grammar and 

1296 attach a parse action to it that modifies the returned token list. 

1297 Invoking ``transform_string()`` on a target string will then scan for matches, 

1298 and replace the matched text patterns according to the logic in the parse 

1299 action. ``transform_string()`` returns the resulting transformed string. 

1300 

1301 Example:: 

1302 

1303 wd = Word(alphas) 

1304 wd.set_parse_action(lambda toks: toks[0].title()) 

1305 

1306 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) 

1307 

1308 prints:: 

1309 

1310 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

1311 """ 

1312 out: List[str] = [] 

1313 lastE = 0 

1314 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1315 # keep string locs straight between transform_string and scan_string 

1316 self.keepTabs = True 

1317 try: 

1318 for t, s, e in self.scan_string(instring, debug=debug): 

1319 out.append(instring[lastE:s]) 

1320 if t: 

1321 if isinstance(t, ParseResults): 

1322 out += t.as_list() 

1323 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1324 out.extend(t) 

1325 else: 

1326 out.append(t) 

1327 lastE = e 

1328 out.append(instring[lastE:]) 

1329 out = [o for o in out if o] 

1330 return "".join([str(s) for s in _flatten(out)]) 

1331 except ParseBaseException as exc: 

1332 if ParserElement.verbose_stacktrace: 

1333 raise 

1334 else: 

1335 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1336 raise exc.with_traceback(None) 

1337 

1338 def search_string( 

1339 self, 

1340 instring: str, 

1341 max_matches: int = _MAX_INT, 

1342 *, 

1343 debug: bool = False, 

1344 maxMatches: int = _MAX_INT, 

1345 ) -> ParseResults: 

1346 """ 

1347 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1348 to match the given parse expression. May be called with optional 

1349 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1350 

1351 Example:: 

1352 

1353 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

1354 cap_word = Word(alphas.upper(), alphas.lower()) 

1355 

1356 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) 

1357 

1358 # the sum() builtin can be used to merge results into a single ParseResults object 

1359 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) 

1360 

1361 prints:: 

1362 

1363 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1364 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1365 """ 

1366 maxMatches = min(maxMatches, max_matches) 

1367 try: 

1368 return ParseResults( 

1369 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] 

1370 ) 

1371 except ParseBaseException as exc: 

1372 if ParserElement.verbose_stacktrace: 

1373 raise 

1374 else: 

1375 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1376 raise exc.with_traceback(None) 

1377 

1378 def split( 

1379 self, 

1380 instring: str, 

1381 maxsplit: int = _MAX_INT, 

1382 include_separators: bool = False, 

1383 *, 

1384 includeSeparators=False, 

1385 ) -> Generator[str, None, None]: 

1386 """ 

1387 Generator method to split a string using the given expression as a separator. 

1388 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1389 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1390 matching text should be included in the split results. 

1391 

1392 Example:: 

1393 

1394 punc = one_of(list(".,;:/-!?")) 

1395 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

1396 

1397 prints:: 

1398 

1399 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1400 """ 

1401 includeSeparators = includeSeparators or include_separators 

1402 last = 0 

1403 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1404 yield instring[last:s] 

1405 if includeSeparators: 

1406 yield t[0] 

1407 last = e 

1408 yield instring[last:] 

1409 

1410 def __add__(self, other) -> "ParserElement": 

1411 """ 

1412 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1413 converts them to :class:`Literal`\\ s by default. 

1414 

1415 Example:: 

1416 

1417 greet = Word(alphas) + "," + Word(alphas) + "!" 

1418 hello = "Hello, World!" 

1419 print(hello, "->", greet.parse_string(hello)) 

1420 

1421 prints:: 

1422 

1423 Hello, World! -> ['Hello', ',', 'World', '!'] 

1424 

1425 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: 

1426 

1427 Literal('start') + ... + Literal('end') 

1428 

1429 is equivalent to:: 

1430 

1431 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1432 

1433 Note that the skipped text is returned with '_skipped' as a results name, 

1434 and to support having multiple skips in the same parser, the value returned is 

1435 a list of all skipped text. 

1436 """ 

1437 if other is Ellipsis: 

1438 return _PendingSkip(self) 

1439 

1440 if isinstance(other, str_type): 

1441 other = self._literalStringClass(other) 

1442 if not isinstance(other, ParserElement): 

1443 return NotImplemented 

1444 return And([self, other]) 

1445 

1446 def __radd__(self, other) -> "ParserElement": 

1447 """ 

1448 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1449 """ 

1450 if other is Ellipsis: 

1451 return SkipTo(self)("_skipped*") + self 

1452 

1453 if isinstance(other, str_type): 

1454 other = self._literalStringClass(other) 

1455 if not isinstance(other, ParserElement): 

1456 return NotImplemented 

1457 return other + self 

1458 

1459 def __sub__(self, other) -> "ParserElement": 

1460 """ 

1461 Implementation of ``-`` operator, returns :class:`And` with error stop 

1462 """ 

1463 if isinstance(other, str_type): 

1464 other = self._literalStringClass(other) 

1465 if not isinstance(other, ParserElement): 

1466 return NotImplemented 

1467 return self + And._ErrorStop() + other 

1468 

1469 def __rsub__(self, other) -> "ParserElement": 

1470 """ 

1471 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1472 """ 

1473 if isinstance(other, str_type): 

1474 other = self._literalStringClass(other) 

1475 if not isinstance(other, ParserElement): 

1476 return NotImplemented 

1477 return other - self 

1478 

1479 def __mul__(self, other) -> "ParserElement": 

1480 """ 

1481 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1482 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1483 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1484 may also include ``None`` as in: 

1485 

1486 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1487 to ``expr*n + ZeroOrMore(expr)`` 

1488 (read as "at least n instances of ``expr``") 

1489 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1490 (read as "0 to n instances of ``expr``") 

1491 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1492 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1493 

1494 Note that ``expr*(None, n)`` does not raise an exception if 

1495 more than n exprs exist in the input stream; that is, 

1496 ``expr*(None, n)`` does not enforce a maximum number of expr 

1497 occurrences. If this behavior is desired, then write 

1498 ``expr*(None, n) + ~expr`` 

1499 """ 

1500 if other is Ellipsis: 

1501 other = (0, None) 

1502 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1503 other = ((0,) + other[1:] + (None,))[:2] 

1504 

1505 if isinstance(other, int): 

1506 minElements, optElements = other, 0 

1507 elif isinstance(other, tuple): 

1508 other = tuple(o if o is not Ellipsis else None for o in other) 

1509 other = (other + (None, None))[:2] 

1510 if other[0] is None: 

1511 other = (0, other[1]) 

1512 if isinstance(other[0], int) and other[1] is None: 

1513 if other[0] == 0: 

1514 return ZeroOrMore(self) 

1515 if other[0] == 1: 

1516 return OneOrMore(self) 

1517 else: 

1518 return self * other[0] + ZeroOrMore(self) 

1519 elif isinstance(other[0], int) and isinstance(other[1], int): 

1520 minElements, optElements = other 

1521 optElements -= minElements 

1522 else: 

1523 return NotImplemented 

1524 else: 

1525 return NotImplemented 

1526 

1527 if minElements < 0: 

1528 raise ValueError("cannot multiply ParserElement by negative value") 

1529 if optElements < 0: 

1530 raise ValueError( 

1531 "second tuple value must be greater or equal to first tuple value" 

1532 ) 

1533 if minElements == optElements == 0: 

1534 return And([]) 

1535 

1536 if optElements: 

1537 

1538 def makeOptionalList(n): 

1539 if n > 1: 

1540 return Opt(self + makeOptionalList(n - 1)) 

1541 else: 

1542 return Opt(self) 

1543 

1544 if minElements: 

1545 if minElements == 1: 

1546 ret = self + makeOptionalList(optElements) 

1547 else: 

1548 ret = And([self] * minElements) + makeOptionalList(optElements) 

1549 else: 

1550 ret = makeOptionalList(optElements) 

1551 else: 

1552 if minElements == 1: 

1553 ret = self 

1554 else: 

1555 ret = And([self] * minElements) 

1556 return ret 

1557 

1558 def __rmul__(self, other) -> "ParserElement": 

1559 return self.__mul__(other) 

1560 

1561 def __or__(self, other) -> "ParserElement": 

1562 """ 

1563 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1564 """ 

1565 if other is Ellipsis: 

1566 return _PendingSkip(self, must_skip=True) 

1567 

1568 if isinstance(other, str_type): 

1569 # `expr | ""` is equivalent to `Opt(expr)` 

1570 if other == "": 

1571 return Opt(self) 

1572 other = self._literalStringClass(other) 

1573 if not isinstance(other, ParserElement): 

1574 return NotImplemented 

1575 return MatchFirst([self, other]) 

1576 

1577 def __ror__(self, other) -> "ParserElement": 

1578 """ 

1579 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1580 """ 

1581 if isinstance(other, str_type): 

1582 other = self._literalStringClass(other) 

1583 if not isinstance(other, ParserElement): 

1584 return NotImplemented 

1585 return other | self 

1586 

1587 def __xor__(self, other) -> "ParserElement": 

1588 """ 

1589 Implementation of ``^`` operator - returns :class:`Or` 

1590 """ 

1591 if isinstance(other, str_type): 

1592 other = self._literalStringClass(other) 

1593 if not isinstance(other, ParserElement): 

1594 return NotImplemented 

1595 return Or([self, other]) 

1596 

1597 def __rxor__(self, other) -> "ParserElement": 

1598 """ 

1599 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1600 """ 

1601 if isinstance(other, str_type): 

1602 other = self._literalStringClass(other) 

1603 if not isinstance(other, ParserElement): 

1604 return NotImplemented 

1605 return other ^ self 

1606 

1607 def __and__(self, other) -> "ParserElement": 

1608 """ 

1609 Implementation of ``&`` operator - returns :class:`Each` 

1610 """ 

1611 if isinstance(other, str_type): 

1612 other = self._literalStringClass(other) 

1613 if not isinstance(other, ParserElement): 

1614 return NotImplemented 

1615 return Each([self, other]) 

1616 

1617 def __rand__(self, other) -> "ParserElement": 

1618 """ 

1619 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1620 """ 

1621 if isinstance(other, str_type): 

1622 other = self._literalStringClass(other) 

1623 if not isinstance(other, ParserElement): 

1624 return NotImplemented 

1625 return other & self 

1626 

1627 def __invert__(self) -> "ParserElement": 

1628 """ 

1629 Implementation of ``~`` operator - returns :class:`NotAny` 

1630 """ 

1631 return NotAny(self) 

1632 

1633 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1634 # iterate over a sequence 

1635 __iter__ = None 

1636 

1637 def __getitem__(self, key): 

1638 """ 

1639 use ``[]`` indexing notation as a short form for expression repetition: 

1640 

1641 - ``expr[n]`` is equivalent to ``expr*n`` 

1642 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1643 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1644 to ``expr*n + ZeroOrMore(expr)`` 

1645 (read as "at least n instances of ``expr``") 

1646 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1647 (read as "0 to n instances of ``expr``") 

1648 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1649 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1650 

1651 ``None`` may be used in place of ``...``. 

1652 

1653 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1654 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1655 desired, then write ``expr[..., n] + ~expr``. 

1656 

1657 For repetition with a stop_on expression, use slice notation: 

1658 

1659 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1660 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1661 

1662 """ 

1663 

1664 stop_on_defined = False 

1665 stop_on = NoMatch() 

1666 if isinstance(key, slice): 

1667 key, stop_on = key.start, key.stop 

1668 if key is None: 

1669 key = ... 

1670 stop_on_defined = True 

1671 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1672 key, stop_on = (key[0], key[1].start), key[1].stop 

1673 stop_on_defined = True 

1674 

1675 # convert single arg keys to tuples 

1676 if isinstance(key, str_type): 

1677 key = (key,) 

1678 try: 

1679 iter(key) 

1680 except TypeError: 

1681 key = (key, key) 

1682 

1683 if len(key) > 2: 

1684 raise TypeError( 

1685 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1686 ) 

1687 

1688 # clip to 2 elements 

1689 ret = self * tuple(key[:2]) 

1690 ret = typing.cast(_MultipleMatch, ret) 

1691 

1692 if stop_on_defined: 

1693 ret.stopOn(stop_on) 

1694 

1695 return ret 

1696 

1697 def __call__(self, name: typing.Optional[str] = None) -> "ParserElement": 

1698 """ 

1699 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1700 

1701 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1702 passed as ``True``. 

1703 

1704 If ``name`` is omitted, same as calling :class:`copy`. 

1705 

1706 Example:: 

1707 

1708 # these are equivalent 

1709 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") 

1710 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1711 """ 

1712 if name is not None: 

1713 return self._setResultsName(name) 

1714 else: 

1715 return self.copy() 

1716 

1717 def suppress(self) -> "ParserElement": 

1718 """ 

1719 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1720 cluttering up returned output. 

1721 """ 

1722 return Suppress(self) 

1723 

1724 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": 

1725 """ 

1726 Enables the skipping of whitespace before matching the characters in the 

1727 :class:`ParserElement`'s defined pattern. 

1728 

1729 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1730 """ 

1731 self.skipWhitespace = True 

1732 return self 

1733 

1734 def leave_whitespace(self, recursive: bool = True) -> "ParserElement": 

1735 """ 

1736 Disables the skipping of whitespace before matching the characters in the 

1737 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1738 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1739 

1740 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1741 """ 

1742 self.skipWhitespace = False 

1743 return self 

1744 

1745 def set_whitespace_chars( 

1746 self, chars: Union[Set[str], str], copy_defaults: bool = False 

1747 ) -> "ParserElement": 

1748 """ 

1749 Overrides the default whitespace chars 

1750 """ 

1751 self.skipWhitespace = True 

1752 self.whiteChars = set(chars) 

1753 self.copyDefaultWhiteChars = copy_defaults 

1754 return self 

1755 

1756 def parse_with_tabs(self) -> "ParserElement": 

1757 """ 

1758 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1759 Must be called before ``parse_string`` when the input grammar contains elements that 

1760 match ``<TAB>`` characters. 

1761 """ 

1762 self.keepTabs = True 

1763 return self 

1764 

1765 def ignore(self, other: "ParserElement") -> "ParserElement": 

1766 """ 

1767 Define expression to be ignored (e.g., comments) while doing pattern 

1768 matching; may be called repeatedly, to define multiple comment or other 

1769 ignorable patterns. 

1770 

1771 Example:: 

1772 

1773 patt = Word(alphas)[1, ...] 

1774 patt.parse_string('ablaj /* comment */ lskjd') 

1775 # -> ['ablaj'] 

1776 

1777 patt.ignore(c_style_comment) 

1778 patt.parse_string('ablaj /* comment */ lskjd') 

1779 # -> ['ablaj', 'lskjd'] 

1780 """ 

1781 import typing 

1782 

1783 if isinstance(other, str_type): 

1784 other = Suppress(other) 

1785 

1786 if isinstance(other, Suppress): 

1787 if other not in self.ignoreExprs: 

1788 self.ignoreExprs.append(other) 

1789 else: 

1790 self.ignoreExprs.append(Suppress(other.copy())) 

1791 return self 

1792 

1793 def set_debug_actions( 

1794 self, 

1795 start_action: DebugStartAction, 

1796 success_action: DebugSuccessAction, 

1797 exception_action: DebugExceptionAction, 

1798 ) -> "ParserElement": 

1799 """ 

1800 Customize display of debugging messages while doing pattern matching: 

1801 

1802 - ``start_action`` - method to be called when an expression is about to be parsed; 

1803 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` 

1804 

1805 - ``success_action`` - method to be called when an expression has successfully parsed; 

1806 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` 

1807 

1808 - ``exception_action`` - method to be called when expression fails to parse; 

1809 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` 

1810 """ 

1811 self.debugActions = self.DebugActions( 

1812 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

1813 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

1814 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

1815 ) 

1816 self.debug = True 

1817 return self 

1818 

1819 def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement": 

1820 """ 

1821 Enable display of debugging messages while doing pattern matching. 

1822 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

1823 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

1824 

1825 Example:: 

1826 

1827 wd = Word(alphas).set_name("alphaword") 

1828 integer = Word(nums).set_name("numword") 

1829 term = wd | integer 

1830 

1831 # turn on debugging for wd 

1832 wd.set_debug() 

1833 

1834 term[1, ...].parse_string("abc 123 xyz 890") 

1835 

1836 prints:: 

1837 

1838 Match alphaword at loc 0(1,1) 

1839 Matched alphaword -> ['abc'] 

1840 Match alphaword at loc 3(1,4) 

1841 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

1842 Match alphaword at loc 7(1,8) 

1843 Matched alphaword -> ['xyz'] 

1844 Match alphaword at loc 11(1,12) 

1845 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

1846 Match alphaword at loc 15(1,16) 

1847 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

1848 

1849 The output shown is that produced by the default debug actions - custom debug actions can be 

1850 specified using :class:`set_debug_actions`. Prior to attempting 

1851 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

1852 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

1853 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, 

1854 which makes debugging and exception messages easier to understand - for instance, the default 

1855 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. 

1856 """ 

1857 if recurse: 

1858 for expr in self.visit_all(): 

1859 expr.set_debug(flag, recurse=False) 

1860 return self 

1861 

1862 if flag: 

1863 self.set_debug_actions( 

1864 _default_start_debug_action, 

1865 _default_success_debug_action, 

1866 _default_exception_debug_action, 

1867 ) 

1868 else: 

1869 self.debug = False 

1870 return self 

1871 

1872 @property 

1873 def default_name(self) -> str: 

1874 if self._defaultName is None: 

1875 self._defaultName = self._generateDefaultName() 

1876 return self._defaultName 

1877 

1878 @abstractmethod 

1879 def _generateDefaultName(self) -> str: 

1880 """ 

1881 Child classes must define this method, which defines how the ``default_name`` is set. 

1882 """ 

1883 

1884 def set_name(self, name: str) -> "ParserElement": 

1885 """ 

1886 Define name for this expression, makes debugging and exception messages clearer. 

1887 

1888 Example:: 

1889 

1890 Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) 

1891 Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1892 """ 

1893 self.customName = name 

1894 self.errmsg = "Expected " + self.name 

1895 if __diag__.enable_debug_on_named_expressions: 

1896 self.set_debug() 

1897 return self 

1898 

1899 @property 

1900 def name(self) -> str: 

1901 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 

1902 return self.customName if self.customName is not None else self.default_name 

1903 

1904 def __str__(self) -> str: 

1905 return self.name 

1906 

1907 def __repr__(self) -> str: 

1908 return str(self) 

1909 

1910 def streamline(self) -> "ParserElement": 

1911 self.streamlined = True 

1912 self._defaultName = None 

1913 return self 

1914 

1915 def recurse(self) -> List["ParserElement"]: 

1916 return [] 

1917 

1918 def _checkRecursion(self, parseElementList): 

1919 subRecCheckList = parseElementList[:] + [self] 

1920 for e in self.recurse(): 

1921 e._checkRecursion(subRecCheckList) 

1922 

1923 def validate(self, validateTrace=None) -> None: 

1924 """ 

1925 Check defined expressions for valid structure, check for infinite recursive definitions. 

1926 """ 

1927 warnings.warn( 

1928 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

1929 DeprecationWarning, 

1930 stacklevel=2, 

1931 ) 

1932 self._checkRecursion([]) 

1933 

1934 def parse_file( 

1935 self, 

1936 file_or_filename: Union[str, Path, TextIO], 

1937 encoding: str = "utf-8", 

1938 parse_all: bool = False, 

1939 *, 

1940 parseAll: bool = False, 

1941 ) -> ParseResults: 

1942 """ 

1943 Execute the parse expression on the given file or filename. 

1944 If a filename is specified (instead of a file object), 

1945 the entire file is opened, read, and closed before parsing. 

1946 """ 

1947 parseAll = parseAll or parse_all 

1948 try: 

1949 file_or_filename = typing.cast(TextIO, file_or_filename) 

1950 file_contents = file_or_filename.read() 

1951 except AttributeError: 

1952 file_or_filename = typing.cast(str, file_or_filename) 

1953 with open(file_or_filename, "r", encoding=encoding) as f: 

1954 file_contents = f.read() 

1955 try: 

1956 return self.parse_string(file_contents, parseAll) 

1957 except ParseBaseException as exc: 

1958 if ParserElement.verbose_stacktrace: 

1959 raise 

1960 else: 

1961 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1962 raise exc.with_traceback(None) 

1963 

1964 def __eq__(self, other): 

1965 if self is other: 

1966 return True 

1967 elif isinstance(other, str_type): 

1968 return self.matches(other, parse_all=True) 

1969 elif isinstance(other, ParserElement): 

1970 return vars(self) == vars(other) 

1971 return False 

1972 

1973 def __hash__(self): 

1974 return id(self) 

1975 

1976 def matches( 

1977 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 

1978 ) -> bool: 

1979 """ 

1980 Method for quick testing of a parser against a test string. Good for simple 

1981 inline microtests of sub expressions while building up larger parser. 

1982 

1983 Parameters: 

1984 

1985 - ``test_string`` - to test against this expression for a match 

1986 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

1987 

1988 Example:: 

1989 

1990 expr = Word(nums) 

1991 assert expr.matches("100") 

1992 """ 

1993 parseAll = parseAll and parse_all 

1994 try: 

1995 self.parse_string(str(test_string), parse_all=parseAll) 

1996 return True 

1997 except ParseBaseException: 

1998 return False 

1999 

2000 def run_tests( 

2001 self, 

2002 tests: Union[str, List[str]], 

2003 parse_all: bool = True, 

2004 comment: typing.Optional[Union["ParserElement", str]] = "#", 

2005 full_dump: bool = True, 

2006 print_results: bool = True, 

2007 failure_tests: bool = False, 

2008 post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None, 

2009 file: typing.Optional[TextIO] = None, 

2010 with_line_numbers: bool = False, 

2011 *, 

2012 parseAll: bool = True, 

2013 fullDump: bool = True, 

2014 printResults: bool = True, 

2015 failureTests: bool = False, 

2016 postParse: typing.Optional[Callable[[str, ParseResults], str]] = None, 

2017 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: 

2018 """ 

2019 Execute the parse expression on a series of test strings, showing each 

2020 test, the parsed results or where the parse failed. Quick and easy way to 

2021 run a parse expression against a list of sample strings. 

2022 

2023 Parameters: 

2024 

2025 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2026 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2027 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2028 string; pass None to disable comment filtering 

2029 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2030 if False, only dump nested list 

2031 - ``print_results`` - (default= ``True``) prints test output to stdout 

2032 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2033 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2034 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2035 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2036 if None, will default to ``sys.stdout`` 

2037 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2038 

2039 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2040 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2041 test's output 

2042 

2043 Example:: 

2044 

2045 number_expr = pyparsing_common.number.copy() 

2046 

2047 result = number_expr.run_tests(''' 

2048 # unsigned integer 

2049 100 

2050 # negative integer 

2051 -100 

2052 # float with scientific notation 

2053 6.02e23 

2054 # integer with scientific notation 

2055 1e-12 

2056 ''') 

2057 print("Success" if result[0] else "Failed!") 

2058 

2059 result = number_expr.run_tests(''' 

2060 # stray character 

2061 100Z 

2062 # missing leading digit before '.' 

2063 -.100 

2064 # too many '.' 

2065 3.14.159 

2066 ''', failure_tests=True) 

2067 print("Success" if result[0] else "Failed!") 

2068 

2069 prints:: 

2070 

2071 # unsigned integer 

2072 100 

2073 [100] 

2074 

2075 # negative integer 

2076 -100 

2077 [-100] 

2078 

2079 # float with scientific notation 

2080 6.02e23 

2081 [6.02e+23] 

2082 

2083 # integer with scientific notation 

2084 1e-12 

2085 [1e-12] 

2086 

2087 Success 

2088 

2089 # stray character 

2090 100Z 

2091 ^ 

2092 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2093 

2094 # missing leading digit before '.' 

2095 -.100 

2096 ^ 

2097 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2098 

2099 # too many '.' 

2100 3.14.159 

2101 ^ 

2102 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2103 

2104 Success 

2105 

2106 Each test string must be on a single line. If you want to test a string that spans multiple 

2107 lines, create a test like this:: 

2108 

2109 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2110 

2111 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2112 """ 

2113 from .testing import pyparsing_test 

2114 

2115 parseAll = parseAll and parse_all 

2116 fullDump = fullDump and full_dump 

2117 printResults = printResults and print_results 

2118 failureTests = failureTests or failure_tests 

2119 postParse = postParse or post_parse 

2120 if isinstance(tests, str_type): 

2121 tests = typing.cast(str, tests) 

2122 line_strip = type(tests).strip 

2123 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2124 comment_specified = comment is not None 

2125 if comment_specified: 

2126 if isinstance(comment, str_type): 

2127 comment = typing.cast(str, comment) 

2128 comment = Literal(comment) 

2129 comment = typing.cast(ParserElement, comment) 

2130 if file is None: 

2131 file = sys.stdout 

2132 print_ = file.write 

2133 

2134 result: Union[ParseResults, Exception] 

2135 allResults: List[Tuple[str, Union[ParseResults, Exception]]] = [] 

2136 comments: List[str] = [] 

2137 success = True 

2138 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2139 BOM = "\ufeff" 

2140 for t in tests: 

2141 if comment_specified and comment.matches(t, False) or comments and not t: 

2142 comments.append( 

2143 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2144 ) 

2145 continue 

2146 if not t: 

2147 continue 

2148 out = [ 

2149 "\n" + "\n".join(comments) if comments else "", 

2150 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2151 ] 

2152 comments = [] 

2153 try: 

2154 # convert newline marks to actual newlines, and strip leading BOM if present 

2155 t = NL.transform_string(t.lstrip(BOM)) 

2156 result = self.parse_string(t, parse_all=parseAll) 

2157 except ParseBaseException as pe: 

2158 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 

2159 out.append(pe.explain()) 

2160 out.append("FAIL: " + str(pe)) 

2161 if ParserElement.verbose_stacktrace: 

2162 out.extend(traceback.format_tb(pe.__traceback__)) 

2163 success = success and failureTests 

2164 result = pe 

2165 except Exception as exc: 

2166 out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}") 

2167 if ParserElement.verbose_stacktrace: 

2168 out.extend(traceback.format_tb(exc.__traceback__)) 

2169 success = success and failureTests 

2170 result = exc 

2171 else: 

2172 success = success and not failureTests 

2173 if postParse is not None: 

2174 try: 

2175 pp_value = postParse(t, result) 

2176 if pp_value is not None: 

2177 if isinstance(pp_value, ParseResults): 

2178 out.append(pp_value.dump()) 

2179 else: 

2180 out.append(str(pp_value)) 

2181 else: 

2182 out.append(result.dump()) 

2183 except Exception as e: 

2184 out.append(result.dump(full=fullDump)) 

2185 out.append( 

2186 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2187 ) 

2188 else: 

2189 out.append(result.dump(full=fullDump)) 

2190 out.append("") 

2191 

2192 if printResults: 

2193 print_("\n".join(out)) 

2194 

2195 allResults.append((t, result)) 

2196 

2197 return success, allResults 

2198 

2199 def create_diagram( 

2200 self, 

2201 output_html: Union[TextIO, Path, str], 

2202 vertical: int = 3, 

2203 show_results_names: bool = False, 

2204 show_groups: bool = False, 

2205 embed: bool = False, 

2206 **kwargs, 

2207 ) -> None: 

2208 """ 

2209 Create a railroad diagram for the parser. 

2210 

2211 Parameters: 

2212 

2213 - ``output_html`` (str or file-like object) - output target for generated 

2214 diagram HTML 

2215 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2216 instead of horizontally (default=3) 

2217 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2218 defined results names 

2219 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2220 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2221 the resulting HTML in an enclosing HTML source 

2222 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2223 can be used to insert custom CSS styling 

2224 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2225 generated code 

2226 

2227 Additional diagram-formatting keyword arguments can also be included; 

2228 see railroad.Diagram class. 

2229 """ 

2230 

2231 try: 

2232 from .diagram import to_railroad, railroad_to_html 

2233 except ImportError as ie: 

2234 raise Exception( 

2235 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2236 ) from ie 

2237 

2238 self.streamline() 

2239 

2240 railroad = to_railroad( 

2241 self, 

2242 vertical=vertical, 

2243 show_results_names=show_results_names, 

2244 show_groups=show_groups, 

2245 diagram_kwargs=kwargs, 

2246 ) 

2247 if isinstance(output_html, (str, Path)): 

2248 with open(output_html, "w", encoding="utf-8") as diag_file: 

2249 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2250 else: 

2251 # we were passed a file-like object, just write to it 

2252 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2253 

2254 # Compatibility synonyms 

2255 # fmt: off 

2256 @staticmethod 

2257 @replaced_by_pep8(inline_literals_using) 

2258 def inlineLiteralsUsing(): ... 

2259 

2260 @staticmethod 

2261 @replaced_by_pep8(set_default_whitespace_chars) 

2262 def setDefaultWhitespaceChars(): ... 

2263 

2264 @replaced_by_pep8(set_results_name) 

2265 def setResultsName(self): ... 

2266 

2267 @replaced_by_pep8(set_break) 

2268 def setBreak(self): ... 

2269 

2270 @replaced_by_pep8(set_parse_action) 

2271 def setParseAction(self): ... 

2272 

2273 @replaced_by_pep8(add_parse_action) 

2274 def addParseAction(self): ... 

2275 

2276 @replaced_by_pep8(add_condition) 

2277 def addCondition(self): ... 

2278 

2279 @replaced_by_pep8(set_fail_action) 

2280 def setFailAction(self): ... 

2281 

2282 @replaced_by_pep8(try_parse) 

2283 def tryParse(self): ... 

2284 

2285 @staticmethod 

2286 @replaced_by_pep8(enable_left_recursion) 

2287 def enableLeftRecursion(): ... 

2288 

2289 @staticmethod 

2290 @replaced_by_pep8(enable_packrat) 

2291 def enablePackrat(): ... 

2292 

2293 @replaced_by_pep8(parse_string) 

2294 def parseString(self): ... 

2295 

2296 @replaced_by_pep8(scan_string) 

2297 def scanString(self): ... 

2298 

2299 @replaced_by_pep8(transform_string) 

2300 def transformString(self): ... 

2301 

2302 @replaced_by_pep8(search_string) 

2303 def searchString(self): ... 

2304 

2305 @replaced_by_pep8(ignore_whitespace) 

2306 def ignoreWhitespace(self): ... 

2307 

2308 @replaced_by_pep8(leave_whitespace) 

2309 def leaveWhitespace(self): ... 

2310 

2311 @replaced_by_pep8(set_whitespace_chars) 

2312 def setWhitespaceChars(self): ... 

2313 

2314 @replaced_by_pep8(parse_with_tabs) 

2315 def parseWithTabs(self): ... 

2316 

2317 @replaced_by_pep8(set_debug_actions) 

2318 def setDebugActions(self): ... 

2319 

2320 @replaced_by_pep8(set_debug) 

2321 def setDebug(self): ... 

2322 

2323 @replaced_by_pep8(set_name) 

2324 def setName(self): ... 

2325 

2326 @replaced_by_pep8(parse_file) 

2327 def parseFile(self): ... 

2328 

2329 @replaced_by_pep8(run_tests) 

2330 def runTests(self): ... 

2331 

2332 canParseNext = can_parse_next 

2333 resetCache = reset_cache 

2334 defaultName = default_name 

2335 # fmt: on 

2336 

2337 

2338class _PendingSkip(ParserElement): 

2339 # internal placeholder class to hold a place were '...' is added to a parser element, 

2340 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2341 def __init__(self, expr: ParserElement, must_skip: bool = False): 

2342 super().__init__() 

2343 self.anchor = expr 

2344 self.must_skip = must_skip 

2345 

2346 def _generateDefaultName(self) -> str: 

2347 return str(self.anchor + Empty()).replace("Empty", "...") 

2348 

2349 def __add__(self, other) -> "ParserElement": 

2350 skipper = SkipTo(other).set_name("...")("_skipped*") 

2351 if self.must_skip: 

2352 

2353 def must_skip(t): 

2354 if not t._skipped or t._skipped.as_list() == [""]: 

2355 del t[0] 

2356 t.pop("_skipped", None) 

2357 

2358 def show_skip(t): 

2359 if t._skipped.as_list()[-1:] == [""]: 

2360 t.pop("_skipped") 

2361 t["_skipped"] = "missing <" + repr(self.anchor) + ">" 

2362 

2363 return ( 

2364 self.anchor + skipper().add_parse_action(must_skip) 

2365 | skipper().add_parse_action(show_skip) 

2366 ) + other 

2367 

2368 return self.anchor + skipper + other 

2369 

2370 def __repr__(self): 

2371 return self.defaultName 

2372 

2373 def parseImpl(self, *args): 

2374 raise Exception( 

2375 "use of `...` expression without following SkipTo target expression" 

2376 ) 

2377 

2378 

2379class Token(ParserElement): 

2380 """Abstract :class:`ParserElement` subclass, for defining atomic 

2381 matching patterns. 

2382 """ 

2383 

2384 def __init__(self): 

2385 super().__init__(savelist=False) 

2386 

2387 def _generateDefaultName(self) -> str: 

2388 return type(self).__name__ 

2389 

2390 

2391class NoMatch(Token): 

2392 """ 

2393 A token that will never match. 

2394 """ 

2395 

2396 def __init__(self): 

2397 super().__init__() 

2398 self.mayReturnEmpty = True 

2399 self.mayIndexError = False 

2400 self.errmsg = "Unmatchable token" 

2401 

2402 def parseImpl(self, instring, loc, doActions=True): 

2403 raise ParseException(instring, loc, self.errmsg, self) 

2404 

2405 

2406class Literal(Token): 

2407 """ 

2408 Token to exactly match a specified string. 

2409 

2410 Example:: 

2411 

2412 Literal('blah').parse_string('blah') # -> ['blah'] 

2413 Literal('blah').parse_string('blahfooblah') # -> ['blah'] 

2414 Literal('blah').parse_string('bla') # -> Exception: Expected "blah" 

2415 

2416 For case-insensitive matching, use :class:`CaselessLiteral`. 

2417 

2418 For keyword matching (force word break before and after the matched string), 

2419 use :class:`Keyword` or :class:`CaselessKeyword`. 

2420 """ 

2421 

2422 def __new__(cls, match_string: str = "", *, matchString: str = ""): 

2423 # Performance tuning: select a subclass with optimized parseImpl 

2424 if cls is Literal: 

2425 match_string = matchString or match_string 

2426 if not match_string: 

2427 return super().__new__(Empty) 

2428 if len(match_string) == 1: 

2429 return super().__new__(_SingleCharLiteral) 

2430 

2431 # Default behavior 

2432 return super().__new__(cls) 

2433 

2434 # Needed to make copy.copy() work correctly if we customize __new__ 

2435 def __getnewargs__(self): 

2436 return (self.match,) 

2437 

2438 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2439 super().__init__() 

2440 match_string = matchString or match_string 

2441 self.match = match_string 

2442 self.matchLen = len(match_string) 

2443 self.firstMatchChar = match_string[:1] 

2444 self.errmsg = "Expected " + self.name 

2445 self.mayReturnEmpty = False 

2446 self.mayIndexError = False 

2447 

2448 def _generateDefaultName(self) -> str: 

2449 return repr(self.match) 

2450 

2451 def parseImpl(self, instring, loc, doActions=True): 

2452 if instring[loc] == self.firstMatchChar and instring.startswith( 

2453 self.match, loc 

2454 ): 

2455 return loc + self.matchLen, self.match 

2456 raise ParseException(instring, loc, self.errmsg, self) 

2457 

2458 

2459class Empty(Literal): 

2460 """ 

2461 An empty token, will always match. 

2462 """ 

2463 

2464 def __init__(self, match_string="", *, matchString=""): 

2465 super().__init__("") 

2466 self.mayReturnEmpty = True 

2467 self.mayIndexError = False 

2468 

2469 def _generateDefaultName(self) -> str: 

2470 return "Empty" 

2471 

2472 def parseImpl(self, instring, loc, doActions=True): 

2473 return loc, [] 

2474 

2475 

2476class _SingleCharLiteral(Literal): 

2477 def parseImpl(self, instring, loc, doActions=True): 

2478 if instring[loc] == self.firstMatchChar: 

2479 return loc + 1, self.match 

2480 raise ParseException(instring, loc, self.errmsg, self) 

2481 

2482 

2483ParserElement._literalStringClass = Literal 

2484 

2485 

2486class Keyword(Token): 

2487 """ 

2488 Token to exactly match a specified string as a keyword, that is, 

2489 it must be immediately preceded and followed by whitespace or 

2490 non-keyword characters. Compare with :class:`Literal`: 

2491 

2492 - ``Literal("if")`` will match the leading ``'if'`` in 

2493 ``'ifAndOnlyIf'``. 

2494 - ``Keyword("if")`` will not; it will only match the leading 

2495 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2496 

2497 Accepts two optional constructor arguments in addition to the 

2498 keyword string: 

2499 

2500 - ``ident_chars`` is a string of characters that would be valid 

2501 identifier characters, defaulting to all alphanumerics + "_" and 

2502 "$" 

2503 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2504 

2505 Example:: 

2506 

2507 Keyword("start").parse_string("start") # -> ['start'] 

2508 Keyword("start").parse_string("starting") # -> Exception 

2509 

2510 For case-insensitive matching, use :class:`CaselessKeyword`. 

2511 """ 

2512 

2513 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2514 

2515 def __init__( 

2516 self, 

2517 match_string: str = "", 

2518 ident_chars: typing.Optional[str] = None, 

2519 caseless: bool = False, 

2520 *, 

2521 matchString: str = "", 

2522 identChars: typing.Optional[str] = None, 

2523 ): 

2524 super().__init__() 

2525 identChars = identChars or ident_chars 

2526 if identChars is None: 

2527 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2528 match_string = matchString or match_string 

2529 self.match = match_string 

2530 self.matchLen = len(match_string) 

2531 try: 

2532 self.firstMatchChar = match_string[0] 

2533 except IndexError: 

2534 raise ValueError("null string passed to Keyword; use Empty() instead") 

2535 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2536 self.mayReturnEmpty = False 

2537 self.mayIndexError = False 

2538 self.caseless = caseless 

2539 if caseless: 

2540 self.caselessmatch = match_string.upper() 

2541 identChars = identChars.upper() 

2542 self.identChars = set(identChars) 

2543 

2544 def _generateDefaultName(self) -> str: 

2545 return repr(self.match) 

2546 

2547 def parseImpl(self, instring, loc, doActions=True): 

2548 errmsg = self.errmsg 

2549 errloc = loc 

2550 if self.caseless: 

2551 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2552 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2553 if ( 

2554 loc >= len(instring) - self.matchLen 

2555 or instring[loc + self.matchLen].upper() not in self.identChars 

2556 ): 

2557 return loc + self.matchLen, self.match 

2558 else: 

2559 # followed by keyword char 

2560 errmsg += ", was immediately followed by keyword character" 

2561 errloc = loc + self.matchLen 

2562 else: 

2563 # preceded by keyword char 

2564 errmsg += ", keyword was immediately preceded by keyword character" 

2565 errloc = loc - 1 

2566 # else no match just raise plain exception 

2567 

2568 else: 

2569 if ( 

2570 instring[loc] == self.firstMatchChar 

2571 and self.matchLen == 1 

2572 or instring.startswith(self.match, loc) 

2573 ): 

2574 if loc == 0 or instring[loc - 1] not in self.identChars: 

2575 if ( 

2576 loc >= len(instring) - self.matchLen 

2577 or instring[loc + self.matchLen] not in self.identChars 

2578 ): 

2579 return loc + self.matchLen, self.match 

2580 else: 

2581 # followed by keyword char 

2582 errmsg += ( 

2583 ", keyword was immediately followed by keyword character" 

2584 ) 

2585 errloc = loc + self.matchLen 

2586 else: 

2587 # preceded by keyword char 

2588 errmsg += ", keyword was immediately preceded by keyword character" 

2589 errloc = loc - 1 

2590 # else no match just raise plain exception 

2591 

2592 raise ParseException(instring, errloc, errmsg, self) 

2593 

2594 @staticmethod 

2595 def set_default_keyword_chars(chars) -> None: 

2596 """ 

2597 Overrides the default characters used by :class:`Keyword` expressions. 

2598 """ 

2599 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2600 

2601 setDefaultKeywordChars = set_default_keyword_chars 

2602 

2603 

2604class CaselessLiteral(Literal): 

2605 """ 

2606 Token to match a specified string, ignoring case of letters. 

2607 Note: the matched results will always be in the case of the given 

2608 match string, NOT the case of the input text. 

2609 

2610 Example:: 

2611 

2612 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2613 # -> ['CMD', 'CMD', 'CMD'] 

2614 

2615 (Contrast with example for :class:`CaselessKeyword`.) 

2616 """ 

2617 

2618 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2619 match_string = matchString or match_string 

2620 super().__init__(match_string.upper()) 

2621 # Preserve the defining literal. 

2622 self.returnString = match_string 

2623 self.errmsg = "Expected " + self.name 

2624 

2625 def parseImpl(self, instring, loc, doActions=True): 

2626 if instring[loc : loc + self.matchLen].upper() == self.match: 

2627 return loc + self.matchLen, self.returnString 

2628 raise ParseException(instring, loc, self.errmsg, self) 

2629 

2630 

2631class CaselessKeyword(Keyword): 

2632 """ 

2633 Caseless version of :class:`Keyword`. 

2634 

2635 Example:: 

2636 

2637 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2638 # -> ['CMD', 'CMD'] 

2639 

2640 (Contrast with example for :class:`CaselessLiteral`.) 

2641 """ 

2642 

2643 def __init__( 

2644 self, 

2645 match_string: str = "", 

2646 ident_chars: typing.Optional[str] = None, 

2647 *, 

2648 matchString: str = "", 

2649 identChars: typing.Optional[str] = None, 

2650 ): 

2651 identChars = identChars or ident_chars 

2652 match_string = matchString or match_string 

2653 super().__init__(match_string, identChars, caseless=True) 

2654 

2655 

2656class CloseMatch(Token): 

2657 """A variation on :class:`Literal` which matches "close" matches, 

2658 that is, strings with at most 'n' mismatching characters. 

2659 :class:`CloseMatch` takes parameters: 

2660 

2661 - ``match_string`` - string to be matched 

2662 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2663 - ``max_mismatches`` - (``default=1``) maximum number of 

2664 mismatches allowed to count as a match 

2665 

2666 The results from a successful parse will contain the matched text 

2667 from the input string and the following named results: 

2668 

2669 - ``mismatches`` - a list of the positions within the 

2670 match_string where mismatches were found 

2671 - ``original`` - the original match_string used to compare 

2672 against the input string 

2673 

2674 If ``mismatches`` is an empty list, then the match was an exact 

2675 match. 

2676 

2677 Example:: 

2678 

2679 patt = CloseMatch("ATCATCGAATGGA") 

2680 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

2681 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

2682 

2683 # exact match 

2684 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

2685 

2686 # close match allowing up to 2 mismatches 

2687 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

2688 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

2689 """ 

2690 

2691 def __init__( 

2692 self, 

2693 match_string: str, 

2694 max_mismatches: typing.Optional[int] = None, 

2695 *, 

2696 maxMismatches: int = 1, 

2697 caseless=False, 

2698 ): 

2699 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

2700 super().__init__() 

2701 self.match_string = match_string 

2702 self.maxMismatches = maxMismatches 

2703 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

2704 self.caseless = caseless 

2705 self.mayIndexError = False 

2706 self.mayReturnEmpty = False 

2707 

2708 def _generateDefaultName(self) -> str: 

2709 return f"{type(self).__name__}:{self.match_string!r}" 

2710 

2711 def parseImpl(self, instring, loc, doActions=True): 

2712 start = loc 

2713 instrlen = len(instring) 

2714 maxloc = start + len(self.match_string) 

2715 

2716 if maxloc <= instrlen: 

2717 match_string = self.match_string 

2718 match_stringloc = 0 

2719 mismatches = [] 

2720 maxMismatches = self.maxMismatches 

2721 

2722 for match_stringloc, s_m in enumerate( 

2723 zip(instring[loc:maxloc], match_string) 

2724 ): 

2725 src, mat = s_m 

2726 if self.caseless: 

2727 src, mat = src.lower(), mat.lower() 

2728 

2729 if src != mat: 

2730 mismatches.append(match_stringloc) 

2731 if len(mismatches) > maxMismatches: 

2732 break 

2733 else: 

2734 loc = start + match_stringloc + 1 

2735 results = ParseResults([instring[start:loc]]) 

2736 results["original"] = match_string 

2737 results["mismatches"] = mismatches 

2738 return loc, results 

2739 

2740 raise ParseException(instring, loc, self.errmsg, self) 

2741 

2742 

2743class Word(Token): 

2744 """Token for matching words composed of allowed character sets. 

2745 

2746 Parameters: 

2747 

2748 - ``init_chars`` - string of all characters that should be used to 

2749 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

2750 if ``body_chars`` is also specified, then this is the string of 

2751 initial characters 

2752 - ``body_chars`` - string of characters that 

2753 can be used for matching after a matched initial character as 

2754 given in ``init_chars``; if omitted, same as the initial characters 

2755 (default=``None``) 

2756 - ``min`` - minimum number of characters to match (default=1) 

2757 - ``max`` - maximum number of characters to match (default=0) 

2758 - ``exact`` - exact number of characters to match (default=0) 

2759 - ``as_keyword`` - match as a keyword (default=``False``) 

2760 - ``exclude_chars`` - characters that might be 

2761 found in the input ``body_chars`` string but which should not be 

2762 accepted for matching ;useful to define a word of all 

2763 printables except for one or two characters, for instance 

2764 (default=``None``) 

2765 

2766 :class:`srange` is useful for defining custom character set strings 

2767 for defining :class:`Word` expressions, using range notation from 

2768 regular expression character sets. 

2769 

2770 A common mistake is to use :class:`Word` to match a specific literal 

2771 string, as in ``Word("Address")``. Remember that :class:`Word` 

2772 uses the string argument to define *sets* of matchable characters. 

2773 This expression would match "Add", "AAA", "dAred", or any other word 

2774 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

2775 exact literal string, use :class:`Literal` or :class:`Keyword`. 

2776 

2777 pyparsing includes helper strings for building Words: 

2778 

2779 - :class:`alphas` 

2780 - :class:`nums` 

2781 - :class:`alphanums` 

2782 - :class:`hexnums` 

2783 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 

2784 - accented, tilded, umlauted, etc.) 

2785 - :class:`punc8bit` (non-alphabetic characters in ASCII range 

2786 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

2787 - :class:`printables` (any non-whitespace character) 

2788 

2789 ``alphas``, ``nums``, and ``printables`` are also defined in several 

2790 Unicode sets - see :class:`pyparsing_unicode``. 

2791 

2792 Example:: 

2793 

2794 # a word composed of digits 

2795 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

2796 

2797 # a word with a leading capital, and zero or more lowercase 

2798 capital_word = Word(alphas.upper(), alphas.lower()) 

2799 

2800 # hostnames are alphanumeric, with leading alpha, and '-' 

2801 hostname = Word(alphas, alphanums + '-') 

2802 

2803 # roman numeral (not a strict parser, accepts invalid mix of characters) 

2804 roman = Word("IVXLCDM") 

2805 

2806 # any string of non-whitespace characters, except for ',' 

2807 csv_value = Word(printables, exclude_chars=",") 

2808 """ 

2809 

2810 def __init__( 

2811 self, 

2812 init_chars: str = "", 

2813 body_chars: typing.Optional[str] = None, 

2814 min: int = 1, 

2815 max: int = 0, 

2816 exact: int = 0, 

2817 as_keyword: bool = False, 

2818 exclude_chars: typing.Optional[str] = None, 

2819 *, 

2820 initChars: typing.Optional[str] = None, 

2821 bodyChars: typing.Optional[str] = None, 

2822 asKeyword: bool = False, 

2823 excludeChars: typing.Optional[str] = None, 

2824 ): 

2825 initChars = initChars or init_chars 

2826 bodyChars = bodyChars or body_chars 

2827 asKeyword = asKeyword or as_keyword 

2828 excludeChars = excludeChars or exclude_chars 

2829 super().__init__() 

2830 if not initChars: 

2831 raise ValueError( 

2832 f"invalid {type(self).__name__}, initChars cannot be empty string" 

2833 ) 

2834 

2835 initChars_set = set(initChars) 

2836 if excludeChars: 

2837 excludeChars_set = set(excludeChars) 

2838 initChars_set -= excludeChars_set 

2839 if bodyChars: 

2840 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

2841 self.initChars = initChars_set 

2842 self.initCharsOrig = "".join(sorted(initChars_set)) 

2843 

2844 if bodyChars: 

2845 self.bodyChars = set(bodyChars) 

2846 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

2847 else: 

2848 self.bodyChars = initChars_set 

2849 self.bodyCharsOrig = self.initCharsOrig 

2850 

2851 self.maxSpecified = max > 0 

2852 

2853 if min < 1: 

2854 raise ValueError( 

2855 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

2856 ) 

2857 

2858 if self.maxSpecified and min > max: 

2859 raise ValueError( 

2860 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

2861 ) 

2862 

2863 self.minLen = min 

2864 

2865 if max > 0: 

2866 self.maxLen = max 

2867 else: 

2868 self.maxLen = _MAX_INT 

2869 

2870 if exact > 0: 

2871 min = max = exact 

2872 self.maxLen = exact 

2873 self.minLen = exact 

2874 

2875 self.errmsg = "Expected " + self.name 

2876 self.mayIndexError = False 

2877 self.asKeyword = asKeyword 

2878 if self.asKeyword: 

2879 self.errmsg += " as a keyword" 

2880 

2881 # see if we can make a regex for this Word 

2882 if " " not in (self.initChars | self.bodyChars): 

2883 if len(self.initChars) == 1: 

2884 re_leading_fragment = re.escape(self.initCharsOrig) 

2885 else: 

2886 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

2887 

2888 if self.bodyChars == self.initChars: 

2889 if max == 0 and self.minLen == 1: 

2890 repeat = "+" 

2891 elif max == 1: 

2892 repeat = "" 

2893 else: 

2894 if self.minLen != self.maxLen: 

2895 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

2896 else: 

2897 repeat = f"{{{self.minLen}}}" 

2898 self.reString = f"{re_leading_fragment}{repeat}" 

2899 else: 

2900 if max == 1: 

2901 re_body_fragment = "" 

2902 repeat = "" 

2903 else: 

2904 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

2905 if max == 0 and self.minLen == 1: 

2906 repeat = "*" 

2907 elif max == 2: 

2908 repeat = "?" if min <= 1 else "" 

2909 else: 

2910 if min != max: 

2911 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

2912 else: 

2913 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

2914 

2915 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

2916 

2917 if self.asKeyword: 

2918 self.reString = rf"\b{self.reString}\b" 

2919 

2920 try: 

2921 self.re = re.compile(self.reString) 

2922 except re.error: 

2923 self.re = None # type: ignore[assignment] 

2924 else: 

2925 self.re_match = self.re.match 

2926 self.parseImpl = self.parseImpl_regex # type: ignore[assignment] 

2927 

2928 def _generateDefaultName(self) -> str: 

2929 def charsAsStr(s): 

2930 max_repr_len = 16 

2931 s = _collapse_string_to_ranges(s, re_escape=False) 

2932 if len(s) > max_repr_len: 

2933 return s[: max_repr_len - 3] + "..." 

2934 else: 

2935 return s 

2936 

2937 if self.initChars != self.bodyChars: 

2938 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

2939 else: 

2940 base = f"W:({charsAsStr(self.initChars)})" 

2941 

2942 # add length specification 

2943 if self.minLen > 1 or self.maxLen != _MAX_INT: 

2944 if self.minLen == self.maxLen: 

2945 if self.minLen == 1: 

2946 return base[2:] 

2947 else: 

2948 return base + f"{{{self.minLen}}}" 

2949 elif self.maxLen == _MAX_INT: 

2950 return base + f"{{{self.minLen},...}}" 

2951 else: 

2952 return base + f"{{{self.minLen},{self.maxLen}}}" 

2953 return base 

2954 

2955 def parseImpl(self, instring, loc, doActions=True): 

2956 if instring[loc] not in self.initChars: 

2957 raise ParseException(instring, loc, self.errmsg, self) 

2958 

2959 start = loc 

2960 loc += 1 

2961 instrlen = len(instring) 

2962 bodychars = self.bodyChars 

2963 maxloc = start + self.maxLen 

2964 maxloc = min(maxloc, instrlen) 

2965 while loc < maxloc and instring[loc] in bodychars: 

2966 loc += 1 

2967 

2968 throwException = False 

2969 if loc - start < self.minLen: 

2970 throwException = True 

2971 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 

2972 throwException = True 

2973 elif self.asKeyword: 

2974 if ( 

2975 start > 0 

2976 and instring[start - 1] in bodychars 

2977 or loc < instrlen 

2978 and instring[loc] in bodychars 

2979 ): 

2980 throwException = True 

2981 

2982 if throwException: 

2983 raise ParseException(instring, loc, self.errmsg, self) 

2984 

2985 return loc, instring[start:loc] 

2986 

2987 def parseImpl_regex(self, instring, loc, doActions=True): 

2988 result = self.re_match(instring, loc) 

2989 if not result: 

2990 raise ParseException(instring, loc, self.errmsg, self) 

2991 

2992 loc = result.end() 

2993 return loc, result.group() 

2994 

2995 

2996class Char(Word): 

2997 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

2998 when defining a match of any single character in a string of 

2999 characters. 

3000 """ 

3001 

3002 def __init__( 

3003 self, 

3004 charset: str, 

3005 as_keyword: bool = False, 

3006 exclude_chars: typing.Optional[str] = None, 

3007 *, 

3008 asKeyword: bool = False, 

3009 excludeChars: typing.Optional[str] = None, 

3010 ): 

3011 asKeyword = asKeyword or as_keyword 

3012 excludeChars = excludeChars or exclude_chars 

3013 super().__init__( 

3014 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

3015 ) 

3016 

3017 

3018class Regex(Token): 

3019 r"""Token for matching strings that match a given regular 

3020 expression. Defined with string specifying the regular expression in 

3021 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3022 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3023 these will be preserved as named :class:`ParseResults`. 

3024 

3025 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3026 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3027 a compiled RE that was compiled using ``regex``. 

3028 

3029 Example:: 

3030 

3031 realnum = Regex(r"[+-]?\d+\.\d*") 

3032 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3033 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3034 

3035 # named fields in a regex will be returned as named results 

3036 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3037 

3038 # the Regex class will accept re's compiled using the regex module 

3039 import regex 

3040 parser = pp.Regex(regex.compile(r'[0-9]')) 

3041 """ 

3042 

3043 def __init__( 

3044 self, 

3045 pattern: Any, 

3046 flags: Union[re.RegexFlag, int] = 0, 

3047 as_group_list: bool = False, 

3048 as_match: bool = False, 

3049 *, 

3050 asGroupList: bool = False, 

3051 asMatch: bool = False, 

3052 ): 

3053 """The parameters ``pattern`` and ``flags`` are passed 

3054 to the ``re.compile()`` function as-is. See the Python 

3055 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3056 explanation of the acceptable patterns and flags. 

3057 """ 

3058 super().__init__() 

3059 asGroupList = asGroupList or as_group_list 

3060 asMatch = asMatch or as_match 

3061 

3062 if isinstance(pattern, str_type): 

3063 if not pattern: 

3064 raise ValueError("null string passed to Regex; use Empty() instead") 

3065 

3066 self._re = None 

3067 self.reString = self.pattern = pattern 

3068 self.flags = flags 

3069 

3070 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3071 self._re = pattern 

3072 self.pattern = self.reString = pattern.pattern 

3073 self.flags = flags 

3074 

3075 else: 

3076 raise TypeError( 

3077 "Regex may only be constructed with a string or a compiled RE object" 

3078 ) 

3079 

3080 self.errmsg = "Expected " + self.name 

3081 self.mayIndexError = False 

3082 self.asGroupList = asGroupList 

3083 self.asMatch = asMatch 

3084 if self.asGroupList: 

3085 self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment] 

3086 if self.asMatch: 

3087 self.parseImpl = self.parseImplAsMatch # type: ignore [assignment] 

3088 

3089 @cached_property 

3090 def re(self): 

3091 if self._re: 

3092 return self._re 

3093 else: 

3094 try: 

3095 return re.compile(self.pattern, self.flags) 

3096 except re.error: 

3097 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3098 

3099 @cached_property 

3100 def re_match(self): 

3101 return self.re.match 

3102 

3103 @cached_property 

3104 def mayReturnEmpty(self): 

3105 return self.re_match("") is not None 

3106 

3107 def _generateDefaultName(self) -> str: 

3108 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) 

3109 

3110 def parseImpl(self, instring, loc, doActions=True): 

3111 result = self.re_match(instring, loc) 

3112 if not result: 

3113 raise ParseException(instring, loc, self.errmsg, self) 

3114 

3115 loc = result.end() 

3116 ret = ParseResults(result.group()) 

3117 d = result.groupdict() 

3118 if d: 

3119 for k, v in d.items(): 

3120 ret[k] = v 

3121 return loc, ret 

3122 

3123 def parseImplAsGroupList(self, instring, loc, doActions=True): 

3124 result = self.re_match(instring, loc) 

3125 if not result: 

3126 raise ParseException(instring, loc, self.errmsg, self) 

3127 

3128 loc = result.end() 

3129 ret = result.groups() 

3130 return loc, ret 

3131 

3132 def parseImplAsMatch(self, instring, loc, doActions=True): 

3133 result = self.re_match(instring, loc) 

3134 if not result: 

3135 raise ParseException(instring, loc, self.errmsg, self) 

3136 

3137 loc = result.end() 

3138 ret = result 

3139 return loc, ret 

3140 

3141 def sub(self, repl: str) -> ParserElement: 

3142 r""" 

3143 Return :class:`Regex` with an attached parse action to transform the parsed 

3144 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3145 

3146 Example:: 

3147 

3148 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3149 print(make_html.transform_string("h1:main title:")) 

3150 # prints "<h1>main title</h1>" 

3151 """ 

3152 if self.asGroupList: 

3153 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3154 

3155 if self.asMatch and callable(repl): 

3156 raise TypeError( 

3157 "cannot use sub() with a callable with Regex(as_match=True)" 

3158 ) 

3159 

3160 if self.asMatch: 

3161 

3162 def pa(tokens): 

3163 return tokens[0].expand(repl) 

3164 

3165 else: 

3166 

3167 def pa(tokens): 

3168 return self.re.sub(repl, tokens[0]) 

3169 

3170 return self.add_parse_action(pa) 

3171 

3172 

3173class QuotedString(Token): 

3174 r""" 

3175 Token for matching strings that are delimited by quoting characters. 

3176 

3177 Defined with the following parameters: 

3178 

3179 - ``quote_char`` - string of one or more characters defining the 

3180 quote delimiting string 

3181 - ``esc_char`` - character to re_escape quotes, typically backslash 

3182 (default= ``None``) 

3183 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3184 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3185 (default= ``None``) 

3186 - ``multiline`` - boolean indicating whether quotes can span 

3187 multiple lines (default= ``False``) 

3188 - ``unquote_results`` - boolean indicating whether the matched text 

3189 should be unquoted (default= ``True``) 

3190 - ``end_quote_char`` - string of one or more characters defining the 

3191 end of the quote delimited string (default= ``None`` => same as 

3192 quote_char) 

3193 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3194 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3195 (default= ``True``) 

3196 

3197 Example:: 

3198 

3199 qs = QuotedString('"') 

3200 print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3201 complex_qs = QuotedString('{{', end_quote_char='}}') 

3202 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) 

3203 sql_qs = QuotedString('"', esc_quote='""') 

3204 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3205 

3206 prints:: 

3207 

3208 [['This is the quote']] 

3209 [['This is the "quote"']] 

3210 [['This is the quote with "embedded" quotes']] 

3211 """ 

3212 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3213 

3214 def __init__( 

3215 self, 

3216 quote_char: str = "", 

3217 esc_char: typing.Optional[str] = None, 

3218 esc_quote: typing.Optional[str] = None, 

3219 multiline: bool = False, 

3220 unquote_results: bool = True, 

3221 end_quote_char: typing.Optional[str] = None, 

3222 convert_whitespace_escapes: bool = True, 

3223 *, 

3224 quoteChar: str = "", 

3225 escChar: typing.Optional[str] = None, 

3226 escQuote: typing.Optional[str] = None, 

3227 unquoteResults: bool = True, 

3228 endQuoteChar: typing.Optional[str] = None, 

3229 convertWhitespaceEscapes: bool = True, 

3230 ): 

3231 super().__init__() 

3232 esc_char = escChar or esc_char 

3233 esc_quote = escQuote or esc_quote 

3234 unquote_results = unquoteResults and unquote_results 

3235 end_quote_char = endQuoteChar or end_quote_char 

3236 convert_whitespace_escapes = ( 

3237 convertWhitespaceEscapes and convert_whitespace_escapes 

3238 ) 

3239 quote_char = quoteChar or quote_char 

3240 

3241 # remove white space from quote chars 

3242 quote_char = quote_char.strip() 

3243 if not quote_char: 

3244 raise ValueError("quote_char cannot be the empty string") 

3245 

3246 if end_quote_char is None: 

3247 end_quote_char = quote_char 

3248 else: 

3249 end_quote_char = end_quote_char.strip() 

3250 if not end_quote_char: 

3251 raise ValueError("end_quote_char cannot be the empty string") 

3252 

3253 self.quote_char: str = quote_char 

3254 self.quote_char_len: int = len(quote_char) 

3255 self.first_quote_char: str = quote_char[0] 

3256 self.end_quote_char: str = end_quote_char 

3257 self.end_quote_char_len: int = len(end_quote_char) 

3258 self.esc_char: str = esc_char or "" 

3259 self.has_esc_char: bool = esc_char is not None 

3260 self.esc_quote: str = esc_quote or "" 

3261 self.unquote_results: bool = unquote_results 

3262 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3263 self.multiline = multiline 

3264 self.re_flags = re.RegexFlag(0) 

3265 

3266 # fmt: off 

3267 # build up re pattern for the content between the quote delimiters 

3268 inner_pattern = [] 

3269 

3270 if esc_quote: 

3271 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3272 

3273 if esc_char: 

3274 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3275 

3276 if len(self.end_quote_char) > 1: 

3277 inner_pattern.append( 

3278 "(?:" 

3279 + "|".join( 

3280 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3281 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3282 ) 

3283 + ")" 

3284 ) 

3285 

3286 if self.multiline: 

3287 self.re_flags |= re.MULTILINE | re.DOTALL 

3288 inner_pattern.append( 

3289 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3290 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])" 

3291 ) 

3292 else: 

3293 inner_pattern.append( 

3294 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3295 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])" 

3296 ) 

3297 

3298 self.pattern = "".join( 

3299 [ 

3300 re.escape(self.quote_char), 

3301 "(?:", 

3302 '|'.join(inner_pattern), 

3303 ")*", 

3304 re.escape(self.end_quote_char), 

3305 ] 

3306 ) 

3307 

3308 if self.unquote_results: 

3309 if self.convert_whitespace_escapes: 

3310 self.unquote_scan_re = re.compile( 

3311 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3312 rf"|({re.escape(self.esc_char)}.)" 

3313 rf"|(\n|.)", 

3314 flags=self.re_flags, 

3315 ) 

3316 else: 

3317 self.unquote_scan_re = re.compile( 

3318 rf"({re.escape(self.esc_char)}.)" 

3319 rf"|(\n|.)", 

3320 flags=self.re_flags 

3321 ) 

3322 # fmt: on 

3323 

3324 try: 

3325 self.re = re.compile(self.pattern, self.re_flags) 

3326 self.reString = self.pattern 

3327 self.re_match = self.re.match 

3328 except re.error: 

3329 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3330 

3331 self.errmsg = "Expected " + self.name 

3332 self.mayIndexError = False 

3333 self.mayReturnEmpty = True 

3334 

3335 def _generateDefaultName(self) -> str: 

3336 if self.quote_char == self.end_quote_char and isinstance( 

3337 self.quote_char, str_type 

3338 ): 

3339 return f"string enclosed in {self.quote_char!r}" 

3340 

3341 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3342 

3343 def parseImpl(self, instring, loc, doActions=True): 

3344 # check first character of opening quote to see if that is a match 

3345 # before doing the more complicated regex match 

3346 result = ( 

3347 instring[loc] == self.first_quote_char 

3348 and self.re_match(instring, loc) 

3349 or None 

3350 ) 

3351 if not result: 

3352 raise ParseException(instring, loc, self.errmsg, self) 

3353 

3354 # get ending loc and matched string from regex matching result 

3355 loc = result.end() 

3356 ret = result.group() 

3357 

3358 if self.unquote_results: 

3359 # strip off quotes 

3360 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3361 

3362 if isinstance(ret, str_type): 

3363 # fmt: off 

3364 if self.convert_whitespace_escapes: 

3365 # as we iterate over matches in the input string, 

3366 # collect from whichever match group of the unquote_scan_re 

3367 # regex matches (only 1 group will match at any given time) 

3368 ret = "".join( 

3369 # match group 1 matches \t, \n, etc. 

3370 self.ws_map[match.group(1)] if match.group(1) 

3371 # match group 2 matches escaped characters 

3372 else match.group(2)[-1] if match.group(2) 

3373 # match group 3 matches any character 

3374 else match.group(3) 

3375 for match in self.unquote_scan_re.finditer(ret) 

3376 ) 

3377 else: 

3378 ret = "".join( 

3379 # match group 1 matches escaped characters 

3380 match.group(1)[-1] if match.group(1) 

3381 # match group 2 matches any character 

3382 else match.group(2) 

3383 for match in self.unquote_scan_re.finditer(ret) 

3384 ) 

3385 # fmt: on 

3386 

3387 # replace escaped quotes 

3388 if self.esc_quote: 

3389 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3390 

3391 return loc, ret 

3392 

3393 

3394class CharsNotIn(Token): 

3395 """Token for matching words composed of characters *not* in a given 

3396 set (will include whitespace in matched characters if not listed in 

3397 the provided exclusion set - see example). Defined with string 

3398 containing all disallowed characters, and an optional minimum, 

3399 maximum, and/or exact length. The default value for ``min`` is 

3400 1 (a minimum value < 1 is not valid); the default values for 

3401 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3402 length restriction. 

3403 

3404 Example:: 

3405 

3406 # define a comma-separated-value as anything that is not a ',' 

3407 csv_value = CharsNotIn(',') 

3408 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) 

3409 

3410 prints:: 

3411 

3412 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3413 """ 

3414 

3415 def __init__( 

3416 self, 

3417 not_chars: str = "", 

3418 min: int = 1, 

3419 max: int = 0, 

3420 exact: int = 0, 

3421 *, 

3422 notChars: str = "", 

3423 ): 

3424 super().__init__() 

3425 self.skipWhitespace = False 

3426 self.notChars = not_chars or notChars 

3427 self.notCharsSet = set(self.notChars) 

3428 

3429 if min < 1: 

3430 raise ValueError( 

3431 "cannot specify a minimum length < 1; use " 

3432 "Opt(CharsNotIn()) if zero-length char group is permitted" 

3433 ) 

3434 

3435 self.minLen = min 

3436 

3437 if max > 0: 

3438 self.maxLen = max 

3439 else: 

3440 self.maxLen = _MAX_INT 

3441 

3442 if exact > 0: 

3443 self.maxLen = exact 

3444 self.minLen = exact 

3445 

3446 self.errmsg = "Expected " + self.name 

3447 self.mayReturnEmpty = self.minLen == 0 

3448 self.mayIndexError = False 

3449 

3450 def _generateDefaultName(self) -> str: 

3451 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3452 if len(not_chars_str) > 16: 

3453 return f"!W:({self.notChars[: 16 - 3]}...)" 

3454 else: 

3455 return f"!W:({self.notChars})" 

3456 

3457 def parseImpl(self, instring, loc, doActions=True): 

3458 notchars = self.notCharsSet 

3459 if instring[loc] in notchars: 

3460 raise ParseException(instring, loc, self.errmsg, self) 

3461 

3462 start = loc 

3463 loc += 1 

3464 maxlen = min(start + self.maxLen, len(instring)) 

3465 while loc < maxlen and instring[loc] not in notchars: 

3466 loc += 1 

3467 

3468 if loc - start < self.minLen: 

3469 raise ParseException(instring, loc, self.errmsg, self) 

3470 

3471 return loc, instring[start:loc] 

3472 

3473 

3474class White(Token): 

3475 """Special matching class for matching whitespace. Normally, 

3476 whitespace is ignored by pyparsing grammars. This class is included 

3477 when some whitespace structures are significant. Define with 

3478 a string containing the whitespace characters to be matched; default 

3479 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3480 ``max``, and ``exact`` arguments, as defined for the 

3481 :class:`Word` class. 

3482 """ 

3483 

3484 whiteStrs = { 

3485 " ": "<SP>", 

3486 "\t": "<TAB>", 

3487 "\n": "<LF>", 

3488 "\r": "<CR>", 

3489 "\f": "<FF>", 

3490 "\u00A0": "<NBSP>", 

3491 "\u1680": "<OGHAM_SPACE_MARK>", 

3492 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3493 "\u2000": "<EN_QUAD>", 

3494 "\u2001": "<EM_QUAD>", 

3495 "\u2002": "<EN_SPACE>", 

3496 "\u2003": "<EM_SPACE>", 

3497 "\u2004": "<THREE-PER-EM_SPACE>", 

3498 "\u2005": "<FOUR-PER-EM_SPACE>", 

3499 "\u2006": "<SIX-PER-EM_SPACE>", 

3500 "\u2007": "<FIGURE_SPACE>", 

3501 "\u2008": "<PUNCTUATION_SPACE>", 

3502 "\u2009": "<THIN_SPACE>", 

3503 "\u200A": "<HAIR_SPACE>", 

3504 "\u200B": "<ZERO_WIDTH_SPACE>", 

3505 "\u202F": "<NNBSP>", 

3506 "\u205F": "<MMSP>", 

3507 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3508 } 

3509 

3510 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): 

3511 super().__init__() 

3512 self.matchWhite = ws 

3513 self.set_whitespace_chars( 

3514 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3515 copy_defaults=True, 

3516 ) 

3517 # self.leave_whitespace() 

3518 self.mayReturnEmpty = True 

3519 self.errmsg = "Expected " + self.name 

3520 

3521 self.minLen = min 

3522 

3523 if max > 0: 

3524 self.maxLen = max 

3525 else: 

3526 self.maxLen = _MAX_INT 

3527 

3528 if exact > 0: 

3529 self.maxLen = exact 

3530 self.minLen = exact 

3531 

3532 def _generateDefaultName(self) -> str: 

3533 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

3534 

3535 def parseImpl(self, instring, loc, doActions=True): 

3536 if instring[loc] not in self.matchWhite: 

3537 raise ParseException(instring, loc, self.errmsg, self) 

3538 start = loc 

3539 loc += 1 

3540 maxloc = start + self.maxLen 

3541 maxloc = min(maxloc, len(instring)) 

3542 while loc < maxloc and instring[loc] in self.matchWhite: 

3543 loc += 1 

3544 

3545 if loc - start < self.minLen: 

3546 raise ParseException(instring, loc, self.errmsg, self) 

3547 

3548 return loc, instring[start:loc] 

3549 

3550 

3551class PositionToken(Token): 

3552 def __init__(self): 

3553 super().__init__() 

3554 self.mayReturnEmpty = True 

3555 self.mayIndexError = False 

3556 

3557 

3558class GoToColumn(PositionToken): 

3559 """Token to advance to a specific column of input text; useful for 

3560 tabular report scraping. 

3561 """ 

3562 

3563 def __init__(self, colno: int): 

3564 super().__init__() 

3565 self.col = colno 

3566 

3567 def preParse(self, instring: str, loc: int) -> int: 

3568 if col(loc, instring) != self.col: 

3569 instrlen = len(instring) 

3570 if self.ignoreExprs: 

3571 loc = self._skipIgnorables(instring, loc) 

3572 while ( 

3573 loc < instrlen 

3574 and instring[loc].isspace() 

3575 and col(loc, instring) != self.col 

3576 ): 

3577 loc += 1 

3578 return loc 

3579 

3580 def parseImpl(self, instring, loc, doActions=True): 

3581 thiscol = col(loc, instring) 

3582 if thiscol > self.col: 

3583 raise ParseException(instring, loc, "Text not in expected column", self) 

3584 newloc = loc + self.col - thiscol 

3585 ret = instring[loc:newloc] 

3586 return newloc, ret 

3587 

3588 

3589class LineStart(PositionToken): 

3590 r"""Matches if current position is at the beginning of a line within 

3591 the parse string 

3592 

3593 Example:: 

3594 

3595 test = '''\ 

3596 AAA this line 

3597 AAA and this line 

3598 AAA but not this one 

3599 B AAA and definitely not this one 

3600 ''' 

3601 

3602 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

3603 print(t) 

3604 

3605 prints:: 

3606 

3607 ['AAA', ' this line'] 

3608 ['AAA', ' and this line'] 

3609 

3610 """ 

3611 

3612 def __init__(self): 

3613 super().__init__() 

3614 self.leave_whitespace() 

3615 self.orig_whiteChars = set() | self.whiteChars 

3616 self.whiteChars.discard("\n") 

3617 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

3618 self.errmsg = "Expected start of line" 

3619 

3620 def preParse(self, instring: str, loc: int) -> int: 

3621 if loc == 0: 

3622 return loc 

3623 else: 

3624 ret = self.skipper.preParse(instring, loc) 

3625 if "\n" in self.orig_whiteChars: 

3626 while instring[ret : ret + 1] == "\n": 

3627 ret = self.skipper.preParse(instring, ret + 1) 

3628 return ret 

3629 

3630 def parseImpl(self, instring, loc, doActions=True): 

3631 if col(loc, instring) == 1: 

3632 return loc, [] 

3633 raise ParseException(instring, loc, self.errmsg, self) 

3634 

3635 

3636class LineEnd(PositionToken): 

3637 """Matches if current position is at the end of a line within the 

3638 parse string 

3639 """ 

3640 

3641 def __init__(self): 

3642 super().__init__() 

3643 self.whiteChars.discard("\n") 

3644 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

3645 self.errmsg = "Expected end of line" 

3646 

3647 def parseImpl(self, instring, loc, doActions=True): 

3648 if loc < len(instring): 

3649 if instring[loc] == "\n": 

3650 return loc + 1, "\n" 

3651 else: 

3652 raise ParseException(instring, loc, self.errmsg, self) 

3653 elif loc == len(instring): 

3654 return loc + 1, [] 

3655 else: 

3656 raise ParseException(instring, loc, self.errmsg, self) 

3657 

3658 

3659class StringStart(PositionToken): 

3660 """Matches if current position is at the beginning of the parse 

3661 string 

3662 """ 

3663 

3664 def __init__(self): 

3665 super().__init__() 

3666 self.errmsg = "Expected start of text" 

3667 

3668 def parseImpl(self, instring, loc, doActions=True): 

3669 if loc != 0: 

3670 # see if entire string up to here is just whitespace and ignoreables 

3671 if loc != self.preParse(instring, 0): 

3672 raise ParseException(instring, loc, self.errmsg, self) 

3673 return loc, [] 

3674 

3675 

3676class StringEnd(PositionToken): 

3677 """ 

3678 Matches if current position is at the end of the parse string 

3679 """ 

3680 

3681 def __init__(self): 

3682 super().__init__() 

3683 self.errmsg = "Expected end of text" 

3684 

3685 def parseImpl(self, instring, loc, doActions=True): 

3686 if loc < len(instring): 

3687 raise ParseException(instring, loc, self.errmsg, self) 

3688 elif loc == len(instring): 

3689 return loc + 1, [] 

3690 elif loc > len(instring): 

3691 return loc, [] 

3692 else: 

3693 raise ParseException(instring, loc, self.errmsg, self) 

3694 

3695 

3696class WordStart(PositionToken): 

3697 """Matches if the current position is at the beginning of a 

3698 :class:`Word`, and is not preceded by any character in a given 

3699 set of ``word_chars`` (default= ``printables``). To emulate the 

3700 ``\b`` behavior of regular expressions, use 

3701 ``WordStart(alphanums)``. ``WordStart`` will also match at 

3702 the beginning of the string being parsed, or at the beginning of 

3703 a line. 

3704 """ 

3705 

3706 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3707 wordChars = word_chars if wordChars == printables else wordChars 

3708 super().__init__() 

3709 self.wordChars = set(wordChars) 

3710 self.errmsg = "Not at the start of a word" 

3711 

3712 def parseImpl(self, instring, loc, doActions=True): 

3713 if loc != 0: 

3714 if ( 

3715 instring[loc - 1] in self.wordChars 

3716 or instring[loc] not in self.wordChars 

3717 ): 

3718 raise ParseException(instring, loc, self.errmsg, self) 

3719 return loc, [] 

3720 

3721 

3722class WordEnd(PositionToken): 

3723 """Matches if the current position is at the end of a :class:`Word`, 

3724 and is not followed by any character in a given set of ``word_chars`` 

3725 (default= ``printables``). To emulate the ``\b`` behavior of 

3726 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

3727 will also match at the end of the string being parsed, or at the end 

3728 of a line. 

3729 """ 

3730 

3731 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3732 wordChars = word_chars if wordChars == printables else wordChars 

3733 super().__init__() 

3734 self.wordChars = set(wordChars) 

3735 self.skipWhitespace = False 

3736 self.errmsg = "Not at the end of a word" 

3737 

3738 def parseImpl(self, instring, loc, doActions=True): 

3739 instrlen = len(instring) 

3740 if instrlen > 0 and loc < instrlen: 

3741 if ( 

3742 instring[loc] in self.wordChars 

3743 or instring[loc - 1] not in self.wordChars 

3744 ): 

3745 raise ParseException(instring, loc, self.errmsg, self) 

3746 return loc, [] 

3747 

3748 

3749class ParseExpression(ParserElement): 

3750 """Abstract subclass of ParserElement, for combining and 

3751 post-processing parsed tokens. 

3752 """ 

3753 

3754 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

3755 super().__init__(savelist) 

3756 self.exprs: List[ParserElement] 

3757 if isinstance(exprs, _generatorType): 

3758 exprs = list(exprs) 

3759 

3760 if isinstance(exprs, str_type): 

3761 self.exprs = [self._literalStringClass(exprs)] 

3762 elif isinstance(exprs, ParserElement): 

3763 self.exprs = [exprs] 

3764 elif isinstance(exprs, Iterable): 

3765 exprs = list(exprs) 

3766 # if sequence of strings provided, wrap with Literal 

3767 if any(isinstance(expr, str_type) for expr in exprs): 

3768 exprs = ( 

3769 self._literalStringClass(e) if isinstance(e, str_type) else e 

3770 for e in exprs 

3771 ) 

3772 self.exprs = list(exprs) 

3773 else: 

3774 try: 

3775 self.exprs = list(exprs) 

3776 except TypeError: 

3777 self.exprs = [exprs] 

3778 self.callPreparse = False 

3779 

3780 def recurse(self) -> List[ParserElement]: 

3781 return self.exprs[:] 

3782 

3783 def append(self, other) -> ParserElement: 

3784 self.exprs.append(other) 

3785 self._defaultName = None 

3786 return self 

3787 

3788 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

3789 """ 

3790 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3791 all contained expressions. 

3792 """ 

3793 super().leave_whitespace(recursive) 

3794 

3795 if recursive: 

3796 self.exprs = [e.copy() for e in self.exprs] 

3797 for e in self.exprs: 

3798 e.leave_whitespace(recursive) 

3799 return self 

3800 

3801 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

3802 """ 

3803 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3804 all contained expressions. 

3805 """ 

3806 super().ignore_whitespace(recursive) 

3807 if recursive: 

3808 self.exprs = [e.copy() for e in self.exprs] 

3809 for e in self.exprs: 

3810 e.ignore_whitespace(recursive) 

3811 return self 

3812 

3813 def ignore(self, other) -> ParserElement: 

3814 if isinstance(other, Suppress): 

3815 if other not in self.ignoreExprs: 

3816 super().ignore(other) 

3817 for e in self.exprs: 

3818 e.ignore(self.ignoreExprs[-1]) 

3819 else: 

3820 super().ignore(other) 

3821 for e in self.exprs: 

3822 e.ignore(self.ignoreExprs[-1]) 

3823 return self 

3824 

3825 def _generateDefaultName(self) -> str: 

3826 return f"{self.__class__.__name__}:({str(self.exprs)})" 

3827 

3828 def streamline(self) -> ParserElement: 

3829 if self.streamlined: 

3830 return self 

3831 

3832 super().streamline() 

3833 

3834 for e in self.exprs: 

3835 e.streamline() 

3836 

3837 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

3838 # but only if there are no parse actions or resultsNames on the nested And's 

3839 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

3840 if len(self.exprs) == 2: 

3841 other = self.exprs[0] 

3842 if ( 

3843 isinstance(other, self.__class__) 

3844 and not other.parseAction 

3845 and other.resultsName is None 

3846 and not other.debug 

3847 ): 

3848 self.exprs = other.exprs[:] + [self.exprs[1]] 

3849 self._defaultName = None 

3850 self.mayReturnEmpty |= other.mayReturnEmpty 

3851 self.mayIndexError |= other.mayIndexError 

3852 

3853 other = self.exprs[-1] 

3854 if ( 

3855 isinstance(other, self.__class__) 

3856 and not other.parseAction 

3857 and other.resultsName is None 

3858 and not other.debug 

3859 ): 

3860 self.exprs = self.exprs[:-1] + other.exprs[:] 

3861 self._defaultName = None 

3862 self.mayReturnEmpty |= other.mayReturnEmpty 

3863 self.mayIndexError |= other.mayIndexError 

3864 

3865 self.errmsg = "Expected " + str(self) 

3866 

3867 return self 

3868 

3869 def validate(self, validateTrace=None) -> None: 

3870 warnings.warn( 

3871 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

3872 DeprecationWarning, 

3873 stacklevel=2, 

3874 ) 

3875 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

3876 for e in self.exprs: 

3877 e.validate(tmp) 

3878 self._checkRecursion([]) 

3879 

3880 def copy(self) -> ParserElement: 

3881 ret = super().copy() 

3882 ret = typing.cast(ParseExpression, ret) 

3883 ret.exprs = [e.copy() for e in self.exprs] 

3884 return ret 

3885 

3886 def _setResultsName(self, name, listAllMatches=False): 

3887 if ( 

3888 __diag__.warn_ungrouped_named_tokens_in_collection 

3889 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

3890 not in self.suppress_warnings_ 

3891 ): 

3892 for e in self.exprs: 

3893 if ( 

3894 isinstance(e, ParserElement) 

3895 and e.resultsName 

3896 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

3897 not in e.suppress_warnings_ 

3898 ): 

3899 warnings.warn( 

3900 "{}: setting results name {!r} on {} expression " 

3901 "collides with {!r} on contained expression".format( 

3902 "warn_ungrouped_named_tokens_in_collection", 

3903 name, 

3904 type(self).__name__, 

3905 e.resultsName, 

3906 ), 

3907 stacklevel=3, 

3908 ) 

3909 

3910 return super()._setResultsName(name, listAllMatches) 

3911 

3912 # Compatibility synonyms 

3913 # fmt: off 

3914 @replaced_by_pep8(leave_whitespace) 

3915 def leaveWhitespace(self): ... 

3916 

3917 @replaced_by_pep8(ignore_whitespace) 

3918 def ignoreWhitespace(self): ... 

3919 # fmt: on 

3920 

3921 

3922class And(ParseExpression): 

3923 """ 

3924 Requires all given :class:`ParseExpression` s to be found in the given order. 

3925 Expressions may be separated by whitespace. 

3926 May be constructed using the ``'+'`` operator. 

3927 May also be constructed using the ``'-'`` operator, which will 

3928 suppress backtracking. 

3929 

3930 Example:: 

3931 

3932 integer = Word(nums) 

3933 name_expr = Word(alphas)[1, ...] 

3934 

3935 expr = And([integer("id"), name_expr("name"), integer("age")]) 

3936 # more easily written as: 

3937 expr = integer("id") + name_expr("name") + integer("age") 

3938 """ 

3939 

3940 class _ErrorStop(Empty): 

3941 def __init__(self, *args, **kwargs): 

3942 super().__init__(*args, **kwargs) 

3943 self.leave_whitespace() 

3944 

3945 def _generateDefaultName(self) -> str: 

3946 return "-" 

3947 

3948 def __init__( 

3949 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True 

3950 ): 

3951 exprs: List[ParserElement] = list(exprs_arg) 

3952 if exprs and Ellipsis in exprs: 

3953 tmp = [] 

3954 for i, expr in enumerate(exprs): 

3955 if expr is Ellipsis: 

3956 if i < len(exprs) - 1: 

3957 skipto_arg: ParserElement = typing.cast( 

3958 ParseExpression, (Empty() + exprs[i + 1]) 

3959 ).exprs[-1] 

3960 tmp.append(SkipTo(skipto_arg)("_skipped*")) 

3961 else: 

3962 raise Exception( 

3963 "cannot construct And with sequence ending in ..." 

3964 ) 

3965 else: 

3966 tmp.append(expr) 

3967 exprs[:] = tmp 

3968 super().__init__(exprs, savelist) 

3969 if self.exprs: 

3970 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

3971 if not isinstance(self.exprs[0], White): 

3972 self.set_whitespace_chars( 

3973 self.exprs[0].whiteChars, 

3974 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

3975 ) 

3976 self.skipWhitespace = self.exprs[0].skipWhitespace 

3977 else: 

3978 self.skipWhitespace = False 

3979 else: 

3980 self.mayReturnEmpty = True 

3981 self.callPreparse = True 

3982 

3983 def streamline(self) -> ParserElement: 

3984 # collapse any _PendingSkip's 

3985 if self.exprs: 

3986 if any( 

3987 isinstance(e, ParseExpression) 

3988 and e.exprs 

3989 and isinstance(e.exprs[-1], _PendingSkip) 

3990 for e in self.exprs[:-1] 

3991 ): 

3992 deleted_expr_marker = NoMatch() 

3993 for i, e in enumerate(self.exprs[:-1]): 

3994 if e is deleted_expr_marker: 

3995 continue 

3996 if ( 

3997 isinstance(e, ParseExpression) 

3998 and e.exprs 

3999 and isinstance(e.exprs[-1], _PendingSkip) 

4000 ): 

4001 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4002 self.exprs[i + 1] = deleted_expr_marker 

4003 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

4004 

4005 super().streamline() 

4006 

4007 # link any IndentedBlocks to the prior expression 

4008 prev: ParserElement 

4009 cur: ParserElement 

4010 for prev, cur in zip(self.exprs, self.exprs[1:]): 

4011 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

4012 # (but watch out for recursive grammar) 

4013 seen = set() 

4014 while True: 

4015 if id(cur) in seen: 

4016 break 

4017 seen.add(id(cur)) 

4018 if isinstance(cur, IndentedBlock): 

4019 prev.add_parse_action( 

4020 lambda s, l, t, cur_=cur: setattr( 

4021 cur_, "parent_anchor", col(l, s) 

4022 ) 

4023 ) 

4024 break 

4025 subs = cur.recurse() 

4026 next_first = next(iter(subs), None) 

4027 if next_first is None: 

4028 break 

4029 cur = typing.cast(ParserElement, next_first) 

4030 

4031 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4032 return self 

4033 

4034 def parseImpl(self, instring, loc, doActions=True): 

4035 # pass False as callPreParse arg to _parse for first element, since we already 

4036 # pre-parsed the string as part of our And pre-parsing 

4037 loc, resultlist = self.exprs[0]._parse( 

4038 instring, loc, doActions, callPreParse=False 

4039 ) 

4040 errorStop = False 

4041 for e in self.exprs[1:]: 

4042 # if isinstance(e, And._ErrorStop): 

4043 if type(e) is And._ErrorStop: 

4044 errorStop = True 

4045 continue 

4046 if errorStop: 

4047 try: 

4048 loc, exprtokens = e._parse(instring, loc, doActions) 

4049 except ParseSyntaxException: 

4050 raise 

4051 except ParseBaseException as pe: 

4052 pe.__traceback__ = None 

4053 raise ParseSyntaxException._from_exception(pe) 

4054 except IndexError: 

4055 raise ParseSyntaxException( 

4056 instring, len(instring), self.errmsg, self 

4057 ) 

4058 else: 

4059 loc, exprtokens = e._parse(instring, loc, doActions) 

4060 resultlist += exprtokens 

4061 return loc, resultlist 

4062 

4063 def __iadd__(self, other): 

4064 if isinstance(other, str_type): 

4065 other = self._literalStringClass(other) 

4066 if not isinstance(other, ParserElement): 

4067 return NotImplemented 

4068 return self.append(other) # And([self, other]) 

4069 

4070 def _checkRecursion(self, parseElementList): 

4071 subRecCheckList = parseElementList[:] + [self] 

4072 for e in self.exprs: 

4073 e._checkRecursion(subRecCheckList) 

4074 if not e.mayReturnEmpty: 

4075 break 

4076 

4077 def _generateDefaultName(self) -> str: 

4078 inner = " ".join(str(e) for e in self.exprs) 

4079 # strip off redundant inner {}'s 

4080 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4081 inner = inner[1:-1] 

4082 return "{" + inner + "}" 

4083 

4084 

4085class Or(ParseExpression): 

4086 """Requires that at least one :class:`ParseExpression` is found. If 

4087 two expressions match, the expression that matches the longest 

4088 string will be used. May be constructed using the ``'^'`` 

4089 operator. 

4090 

4091 Example:: 

4092 

4093 # construct Or using '^' operator 

4094 

4095 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4096 print(number.search_string("123 3.1416 789")) 

4097 

4098 prints:: 

4099 

4100 [['123'], ['3.1416'], ['789']] 

4101 """ 

4102 

4103 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4104 super().__init__(exprs, savelist) 

4105 if self.exprs: 

4106 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4107 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4108 else: 

4109 self.mayReturnEmpty = True 

4110 

4111 def streamline(self) -> ParserElement: 

4112 super().streamline() 

4113 if self.exprs: 

4114 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4115 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4116 self.skipWhitespace = all( 

4117 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4118 ) 

4119 else: 

4120 self.saveAsList = False 

4121 return self 

4122 

4123 def parseImpl(self, instring, loc, doActions=True): 

4124 maxExcLoc = -1 

4125 maxException = None 

4126 matches = [] 

4127 fatals = [] 

4128 if all(e.callPreparse for e in self.exprs): 

4129 loc = self.preParse(instring, loc) 

4130 for e in self.exprs: 

4131 try: 

4132 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4133 except ParseFatalException as pfe: 

4134 pfe.__traceback__ = None 

4135 pfe.parser_element = e 

4136 fatals.append(pfe) 

4137 maxException = None 

4138 maxExcLoc = -1 

4139 except ParseException as err: 

4140 if not fatals: 

4141 err.__traceback__ = None 

4142 if err.loc > maxExcLoc: 

4143 maxException = err 

4144 maxExcLoc = err.loc 

4145 except IndexError: 

4146 if len(instring) > maxExcLoc: 

4147 maxException = ParseException( 

4148 instring, len(instring), e.errmsg, self 

4149 ) 

4150 maxExcLoc = len(instring) 

4151 else: 

4152 # save match among all matches, to retry longest to shortest 

4153 matches.append((loc2, e)) 

4154 

4155 if matches: 

4156 # re-evaluate all matches in descending order of length of match, in case attached actions 

4157 # might change whether or how much they match of the input. 

4158 matches.sort(key=itemgetter(0), reverse=True) 

4159 

4160 if not doActions: 

4161 # no further conditions or parse actions to change the selection of 

4162 # alternative, so the first match will be the best match 

4163 best_expr = matches[0][1] 

4164 return best_expr._parse(instring, loc, doActions) 

4165 

4166 longest = -1, None 

4167 for loc1, expr1 in matches: 

4168 if loc1 <= longest[0]: 

4169 # already have a longer match than this one will deliver, we are done 

4170 return longest 

4171 

4172 try: 

4173 loc2, toks = expr1._parse(instring, loc, doActions) 

4174 except ParseException as err: 

4175 err.__traceback__ = None 

4176 if err.loc > maxExcLoc: 

4177 maxException = err 

4178 maxExcLoc = err.loc 

4179 else: 

4180 if loc2 >= loc1: 

4181 return loc2, toks 

4182 # didn't match as much as before 

4183 elif loc2 > longest[0]: 

4184 longest = loc2, toks 

4185 

4186 if longest != (-1, None): 

4187 return longest 

4188 

4189 if fatals: 

4190 if len(fatals) > 1: 

4191 fatals.sort(key=lambda e: -e.loc) 

4192 if fatals[0].loc == fatals[1].loc: 

4193 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4194 max_fatal = fatals[0] 

4195 raise max_fatal 

4196 

4197 if maxException is not None: 

4198 # infer from this check that all alternatives failed at the current position 

4199 # so emit this collective error message instead of any single error message 

4200 if maxExcLoc == loc: 

4201 maxException.msg = self.errmsg 

4202 raise maxException 

4203 else: 

4204 raise ParseException( 

4205 instring, loc, "no defined alternatives to match", self 

4206 ) 

4207 

4208 def __ixor__(self, other): 

4209 if isinstance(other, str_type): 

4210 other = self._literalStringClass(other) 

4211 if not isinstance(other, ParserElement): 

4212 return NotImplemented 

4213 return self.append(other) # Or([self, other]) 

4214 

4215 def _generateDefaultName(self) -> str: 

4216 return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" 

4217 

4218 def _setResultsName(self, name, listAllMatches=False): 

4219 if ( 

4220 __diag__.warn_multiple_tokens_in_named_alternation 

4221 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4222 not in self.suppress_warnings_ 

4223 ): 

4224 if any( 

4225 isinstance(e, And) 

4226 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4227 not in e.suppress_warnings_ 

4228 for e in self.exprs 

4229 ): 

4230 warnings.warn( 

4231 "{}: setting results name {!r} on {} expression " 

4232 "will return a list of all parsed tokens in an And alternative, " 

4233 "in prior versions only the first token was returned; enclose " 

4234 "contained argument in Group".format( 

4235 "warn_multiple_tokens_in_named_alternation", 

4236 name, 

4237 type(self).__name__, 

4238 ), 

4239 stacklevel=3, 

4240 ) 

4241 

4242 return super()._setResultsName(name, listAllMatches) 

4243 

4244 

4245class MatchFirst(ParseExpression): 

4246 """Requires that at least one :class:`ParseExpression` is found. If 

4247 more than one expression matches, the first one listed is the one that will 

4248 match. May be constructed using the ``'|'`` operator. 

4249 

4250 Example:: 

4251 

4252 # construct MatchFirst using '|' operator 

4253 

4254 # watch the order of expressions to match 

4255 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4256 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

4257 

4258 # put more selective expression first 

4259 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4260 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

4261 """ 

4262 

4263 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4264 super().__init__(exprs, savelist) 

4265 if self.exprs: 

4266 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4267 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4268 else: 

4269 self.mayReturnEmpty = True 

4270 

4271 def streamline(self) -> ParserElement: 

4272 if self.streamlined: 

4273 return self 

4274 

4275 super().streamline() 

4276 if self.exprs: 

4277 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4278 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4279 self.skipWhitespace = all( 

4280 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4281 ) 

4282 else: 

4283 self.saveAsList = False 

4284 self.mayReturnEmpty = True 

4285 return self 

4286 

4287 def parseImpl(self, instring, loc, doActions=True): 

4288 maxExcLoc = -1 

4289 maxException = None 

4290 

4291 for e in self.exprs: 

4292 try: 

4293 return e._parse( 

4294 instring, 

4295 loc, 

4296 doActions, 

4297 ) 

4298 except ParseFatalException as pfe: 

4299 pfe.__traceback__ = None 

4300 pfe.parser_element = e 

4301 raise 

4302 except ParseException as err: 

4303 if err.loc > maxExcLoc: 

4304 maxException = err 

4305 maxExcLoc = err.loc 

4306 except IndexError: 

4307 if len(instring) > maxExcLoc: 

4308 maxException = ParseException( 

4309 instring, len(instring), e.errmsg, self 

4310 ) 

4311 maxExcLoc = len(instring) 

4312 

4313 if maxException is not None: 

4314 # infer from this check that all alternatives failed at the current position 

4315 # so emit this collective error message instead of any individual error message 

4316 if maxExcLoc == loc: 

4317 maxException.msg = self.errmsg 

4318 raise maxException 

4319 else: 

4320 raise ParseException( 

4321 instring, loc, "no defined alternatives to match", self 

4322 ) 

4323 

4324 def __ior__(self, other): 

4325 if isinstance(other, str_type): 

4326 other = self._literalStringClass(other) 

4327 if not isinstance(other, ParserElement): 

4328 return NotImplemented 

4329 return self.append(other) # MatchFirst([self, other]) 

4330 

4331 def _generateDefaultName(self) -> str: 

4332 return "{" + " | ".join(str(e) for e in self.exprs) + "}" 

4333 

4334 def _setResultsName(self, name, listAllMatches=False): 

4335 if ( 

4336 __diag__.warn_multiple_tokens_in_named_alternation 

4337 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4338 not in self.suppress_warnings_ 

4339 ): 

4340 if any( 

4341 isinstance(e, And) 

4342 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4343 not in e.suppress_warnings_ 

4344 for e in self.exprs 

4345 ): 

4346 warnings.warn( 

4347 "{}: setting results name {!r} on {} expression " 

4348 "will return a list of all parsed tokens in an And alternative, " 

4349 "in prior versions only the first token was returned; enclose " 

4350 "contained argument in Group".format( 

4351 "warn_multiple_tokens_in_named_alternation", 

4352 name, 

4353 type(self).__name__, 

4354 ), 

4355 stacklevel=3, 

4356 ) 

4357 

4358 return super()._setResultsName(name, listAllMatches) 

4359 

4360 

4361class Each(ParseExpression): 

4362 """Requires all given :class:`ParseExpression` s to be found, but in 

4363 any order. Expressions may be separated by whitespace. 

4364 

4365 May be constructed using the ``'&'`` operator. 

4366 

4367 Example:: 

4368 

4369 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4370 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4371 integer = Word(nums) 

4372 shape_attr = "shape:" + shape_type("shape") 

4373 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4374 color_attr = "color:" + color("color") 

4375 size_attr = "size:" + integer("size") 

4376 

4377 # use Each (using operator '&') to accept attributes in any order 

4378 # (shape and posn are required, color and size are optional) 

4379 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4380 

4381 shape_spec.run_tests(''' 

4382 shape: SQUARE color: BLACK posn: 100, 120 

4383 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4384 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4385 ''' 

4386 ) 

4387 

4388 prints:: 

4389 

4390 shape: SQUARE color: BLACK posn: 100, 120 

4391 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4392 - color: BLACK 

4393 - posn: ['100', ',', '120'] 

4394 - x: 100 

4395 - y: 120 

4396 - shape: SQUARE 

4397 

4398 

4399 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4400 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

4401 - color: BLUE 

4402 - posn: ['50', ',', '80'] 

4403 - x: 50 

4404 - y: 80 

4405 - shape: CIRCLE 

4406 - size: 50 

4407 

4408 

4409 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

4410 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

4411 - color: GREEN 

4412 - posn: ['20', ',', '40'] 

4413 - x: 20 

4414 - y: 40 

4415 - shape: TRIANGLE 

4416 - size: 20 

4417 """ 

4418 

4419 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): 

4420 super().__init__(exprs, savelist) 

4421 if self.exprs: 

4422 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4423 else: 

4424 self.mayReturnEmpty = True 

4425 self.skipWhitespace = True 

4426 self.initExprGroups = True 

4427 self.saveAsList = True 

4428 

4429 def __iand__(self, other): 

4430 if isinstance(other, str_type): 

4431 other = self._literalStringClass(other) 

4432 if not isinstance(other, ParserElement): 

4433 return NotImplemented 

4434 return self.append(other) # Each([self, other]) 

4435 

4436 def streamline(self) -> ParserElement: 

4437 super().streamline() 

4438 if self.exprs: 

4439 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4440 else: 

4441 self.mayReturnEmpty = True 

4442 return self 

4443 

4444 def parseImpl(self, instring, loc, doActions=True): 

4445 if self.initExprGroups: 

4446 self.opt1map = dict( 

4447 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

4448 ) 

4449 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

4450 opt2 = [ 

4451 e 

4452 for e in self.exprs 

4453 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

4454 ] 

4455 self.optionals = opt1 + opt2 

4456 self.multioptionals = [ 

4457 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4458 for e in self.exprs 

4459 if isinstance(e, _MultipleMatch) 

4460 ] 

4461 self.multirequired = [ 

4462 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4463 for e in self.exprs 

4464 if isinstance(e, OneOrMore) 

4465 ] 

4466 self.required = [ 

4467 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

4468 ] 

4469 self.required += self.multirequired 

4470 self.initExprGroups = False 

4471 

4472 tmpLoc = loc 

4473 tmpReqd = self.required[:] 

4474 tmpOpt = self.optionals[:] 

4475 multis = self.multioptionals[:] 

4476 matchOrder = [] 

4477 

4478 keepMatching = True 

4479 failed = [] 

4480 fatals = [] 

4481 while keepMatching: 

4482 tmpExprs = tmpReqd + tmpOpt + multis 

4483 failed.clear() 

4484 fatals.clear() 

4485 for e in tmpExprs: 

4486 try: 

4487 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

4488 except ParseFatalException as pfe: 

4489 pfe.__traceback__ = None 

4490 pfe.parser_element = e 

4491 fatals.append(pfe) 

4492 failed.append(e) 

4493 except ParseException: 

4494 failed.append(e) 

4495 else: 

4496 matchOrder.append(self.opt1map.get(id(e), e)) 

4497 if e in tmpReqd: 

4498 tmpReqd.remove(e) 

4499 elif e in tmpOpt: 

4500 tmpOpt.remove(e) 

4501 if len(failed) == len(tmpExprs): 

4502 keepMatching = False 

4503 

4504 # look for any ParseFatalExceptions 

4505 if fatals: 

4506 if len(fatals) > 1: 

4507 fatals.sort(key=lambda e: -e.loc) 

4508 if fatals[0].loc == fatals[1].loc: 

4509 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4510 max_fatal = fatals[0] 

4511 raise max_fatal 

4512 

4513 if tmpReqd: 

4514 missing = ", ".join([str(e) for e in tmpReqd]) 

4515 raise ParseException( 

4516 instring, 

4517 loc, 

4518 f"Missing one or more required elements ({missing})", 

4519 ) 

4520 

4521 # add any unmatched Opts, in case they have default values defined 

4522 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

4523 

4524 total_results = ParseResults([]) 

4525 for e in matchOrder: 

4526 loc, results = e._parse(instring, loc, doActions) 

4527 total_results += results 

4528 

4529 return loc, total_results 

4530 

4531 def _generateDefaultName(self) -> str: 

4532 return "{" + " & ".join(str(e) for e in self.exprs) + "}" 

4533 

4534 

4535class ParseElementEnhance(ParserElement): 

4536 """Abstract subclass of :class:`ParserElement`, for combining and 

4537 post-processing parsed tokens. 

4538 """ 

4539 

4540 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

4541 super().__init__(savelist) 

4542 if isinstance(expr, str_type): 

4543 expr_str = typing.cast(str, expr) 

4544 if issubclass(self._literalStringClass, Token): 

4545 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

4546 elif issubclass(type(self), self._literalStringClass): 

4547 expr = Literal(expr_str) 

4548 else: 

4549 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

4550 expr = typing.cast(ParserElement, expr) 

4551 self.expr = expr 

4552 if expr is not None: 

4553 self.mayIndexError = expr.mayIndexError 

4554 self.mayReturnEmpty = expr.mayReturnEmpty 

4555 self.set_whitespace_chars( 

4556 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

4557 ) 

4558 self.skipWhitespace = expr.skipWhitespace 

4559 self.saveAsList = expr.saveAsList 

4560 self.callPreparse = expr.callPreparse 

4561 self.ignoreExprs.extend(expr.ignoreExprs) 

4562 

4563 def recurse(self) -> List[ParserElement]: 

4564 return [self.expr] if self.expr is not None else [] 

4565 

4566 def parseImpl(self, instring, loc, doActions=True): 

4567 if self.expr is not None: 

4568 try: 

4569 return self.expr._parse(instring, loc, doActions, callPreParse=False) 

4570 except ParseBaseException as pbe: 

4571 if not isinstance(self, Forward) or self.customName is not None: 

4572 pbe.msg = self.errmsg 

4573 raise 

4574 else: 

4575 raise ParseException(instring, loc, "No expression defined", self) 

4576 

4577 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4578 super().leave_whitespace(recursive) 

4579 

4580 if recursive: 

4581 if self.expr is not None: 

4582 self.expr = self.expr.copy() 

4583 self.expr.leave_whitespace(recursive) 

4584 return self 

4585 

4586 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4587 super().ignore_whitespace(recursive) 

4588 

4589 if recursive: 

4590 if self.expr is not None: 

4591 self.expr = self.expr.copy() 

4592 self.expr.ignore_whitespace(recursive) 

4593 return self 

4594 

4595 def ignore(self, other) -> ParserElement: 

4596 if isinstance(other, Suppress): 

4597 if other not in self.ignoreExprs: 

4598 super().ignore(other) 

4599 if self.expr is not None: 

4600 self.expr.ignore(self.ignoreExprs[-1]) 

4601 else: 

4602 super().ignore(other) 

4603 if self.expr is not None: 

4604 self.expr.ignore(self.ignoreExprs[-1]) 

4605 return self 

4606 

4607 def streamline(self) -> ParserElement: 

4608 super().streamline() 

4609 if self.expr is not None: 

4610 self.expr.streamline() 

4611 return self 

4612 

4613 def _checkRecursion(self, parseElementList): 

4614 if self in parseElementList: 

4615 raise RecursiveGrammarException(parseElementList + [self]) 

4616 subRecCheckList = parseElementList[:] + [self] 

4617 if self.expr is not None: 

4618 self.expr._checkRecursion(subRecCheckList) 

4619 

4620 def validate(self, validateTrace=None) -> None: 

4621 warnings.warn( 

4622 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4623 DeprecationWarning, 

4624 stacklevel=2, 

4625 ) 

4626 if validateTrace is None: 

4627 validateTrace = [] 

4628 tmp = validateTrace[:] + [self] 

4629 if self.expr is not None: 

4630 self.expr.validate(tmp) 

4631 self._checkRecursion([]) 

4632 

4633 def _generateDefaultName(self) -> str: 

4634 return f"{self.__class__.__name__}:({str(self.expr)})" 

4635 

4636 # Compatibility synonyms 

4637 # fmt: off 

4638 @replaced_by_pep8(leave_whitespace) 

4639 def leaveWhitespace(self): ... 

4640 

4641 @replaced_by_pep8(ignore_whitespace) 

4642 def ignoreWhitespace(self): ... 

4643 # fmt: on 

4644 

4645 

4646class IndentedBlock(ParseElementEnhance): 

4647 """ 

4648 Expression to match one or more expressions at a given indentation level. 

4649 Useful for parsing text where structure is implied by indentation (like Python source code). 

4650 """ 

4651 

4652 class _Indent(Empty): 

4653 def __init__(self, ref_col: int): 

4654 super().__init__() 

4655 self.errmsg = f"expected indent at column {ref_col}" 

4656 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

4657 

4658 class _IndentGreater(Empty): 

4659 def __init__(self, ref_col: int): 

4660 super().__init__() 

4661 self.errmsg = f"expected indent at column greater than {ref_col}" 

4662 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

4663 

4664 def __init__( 

4665 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

4666 ): 

4667 super().__init__(expr, savelist=True) 

4668 # if recursive: 

4669 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

4670 self._recursive = recursive 

4671 self._grouped = grouped 

4672 self.parent_anchor = 1 

4673 

4674 def parseImpl(self, instring, loc, doActions=True): 

4675 # advance parse position to non-whitespace by using an Empty() 

4676 # this should be the column to be used for all subsequent indented lines 

4677 anchor_loc = Empty().preParse(instring, loc) 

4678 

4679 # see if self.expr matches at the current location - if not it will raise an exception 

4680 # and no further work is necessary 

4681 self.expr.try_parse(instring, anchor_loc, do_actions=doActions) 

4682 

4683 indent_col = col(anchor_loc, instring) 

4684 peer_detect_expr = self._Indent(indent_col) 

4685 

4686 inner_expr = Empty() + peer_detect_expr + self.expr 

4687 if self._recursive: 

4688 sub_indent = self._IndentGreater(indent_col) 

4689 nested_block = IndentedBlock( 

4690 self.expr, recursive=self._recursive, grouped=self._grouped 

4691 ) 

4692 nested_block.set_debug(self.debug) 

4693 nested_block.parent_anchor = indent_col 

4694 inner_expr += Opt(sub_indent + nested_block) 

4695 

4696 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

4697 block = OneOrMore(inner_expr) 

4698 

4699 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

4700 

4701 if self._grouped: 

4702 wrapper = Group 

4703 else: 

4704 wrapper = lambda expr: expr 

4705 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

4706 instring, anchor_loc, doActions 

4707 ) 

4708 

4709 

4710class AtStringStart(ParseElementEnhance): 

4711 """Matches if expression matches at the beginning of the parse 

4712 string:: 

4713 

4714 AtStringStart(Word(nums)).parse_string("123") 

4715 # prints ["123"] 

4716 

4717 AtStringStart(Word(nums)).parse_string(" 123") 

4718 # raises ParseException 

4719 """ 

4720 

4721 def __init__(self, expr: Union[ParserElement, str]): 

4722 super().__init__(expr) 

4723 self.callPreparse = False 

4724 

4725 def parseImpl(self, instring, loc, doActions=True): 

4726 if loc != 0: 

4727 raise ParseException(instring, loc, "not found at string start") 

4728 return super().parseImpl(instring, loc, doActions) 

4729 

4730 

4731class AtLineStart(ParseElementEnhance): 

4732 r"""Matches if an expression matches at the beginning of a line within 

4733 the parse string 

4734 

4735 Example:: 

4736 

4737 test = '''\ 

4738 AAA this line 

4739 AAA and this line 

4740 AAA but not this one 

4741 B AAA and definitely not this one 

4742 ''' 

4743 

4744 for t in (AtLineStart('AAA') + rest_of_line).search_string(test): 

4745 print(t) 

4746 

4747 prints:: 

4748 

4749 ['AAA', ' this line'] 

4750 ['AAA', ' and this line'] 

4751 

4752 """ 

4753 

4754 def __init__(self, expr: Union[ParserElement, str]): 

4755 super().__init__(expr) 

4756 self.callPreparse = False 

4757 

4758 def parseImpl(self, instring, loc, doActions=True): 

4759 if col(loc, instring) != 1: 

4760 raise ParseException(instring, loc, "not found at line start") 

4761 return super().parseImpl(instring, loc, doActions) 

4762 

4763 

4764class FollowedBy(ParseElementEnhance): 

4765 """Lookahead matching of the given parse expression. 

4766 ``FollowedBy`` does *not* advance the parsing position within 

4767 the input string, it only verifies that the specified parse 

4768 expression matches at the current position. ``FollowedBy`` 

4769 always returns a null token list. If any results names are defined 

4770 in the lookahead expression, those *will* be returned for access by 

4771 name. 

4772 

4773 Example:: 

4774 

4775 # use FollowedBy to match a label only if it is followed by a ':' 

4776 data_word = Word(alphas) 

4777 label = data_word + FollowedBy(':') 

4778 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

4779 

4780 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() 

4781 

4782 prints:: 

4783 

4784 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

4785 """ 

4786 

4787 def __init__(self, expr: Union[ParserElement, str]): 

4788 super().__init__(expr) 

4789 self.mayReturnEmpty = True 

4790 

4791 def parseImpl(self, instring, loc, doActions=True): 

4792 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

4793 # we keep any named results that were defined in the FollowedBy expression 

4794 _, ret = self.expr._parse(instring, loc, doActions=doActions) 

4795 del ret[:] 

4796 

4797 return loc, ret 

4798 

4799 

4800class PrecededBy(ParseElementEnhance): 

4801 """Lookbehind matching of the given parse expression. 

4802 ``PrecededBy`` does not advance the parsing position within the 

4803 input string, it only verifies that the specified parse expression 

4804 matches prior to the current position. ``PrecededBy`` always 

4805 returns a null token list, but if a results name is defined on the 

4806 given expression, it is returned. 

4807 

4808 Parameters: 

4809 

4810 - ``expr`` - expression that must match prior to the current parse 

4811 location 

4812 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

4813 to lookbehind prior to the current parse location 

4814 

4815 If the lookbehind expression is a string, :class:`Literal`, 

4816 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

4817 with a specified exact or maximum length, then the retreat 

4818 parameter is not required. Otherwise, retreat must be specified to 

4819 give a maximum number of characters to look back from 

4820 the current parse position for a lookbehind match. 

4821 

4822 Example:: 

4823 

4824 # VB-style variable names with type prefixes 

4825 int_var = PrecededBy("#") + pyparsing_common.identifier 

4826 str_var = PrecededBy("$") + pyparsing_common.identifier 

4827 

4828 """ 

4829 

4830 def __init__( 

4831 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None 

4832 ): 

4833 super().__init__(expr) 

4834 self.expr = self.expr().leave_whitespace() 

4835 self.mayReturnEmpty = True 

4836 self.mayIndexError = False 

4837 self.exact = False 

4838 if isinstance(expr, str_type): 

4839 expr = typing.cast(str, expr) 

4840 retreat = len(expr) 

4841 self.exact = True 

4842 elif isinstance(expr, (Literal, Keyword)): 

4843 retreat = expr.matchLen 

4844 self.exact = True 

4845 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

4846 retreat = expr.maxLen 

4847 self.exact = True 

4848 elif isinstance(expr, PositionToken): 

4849 retreat = 0 

4850 self.exact = True 

4851 self.retreat = retreat 

4852 self.errmsg = "not preceded by " + str(expr) 

4853 self.skipWhitespace = False 

4854 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

4855 

4856 def parseImpl(self, instring, loc=0, doActions=True): 

4857 if self.exact: 

4858 if loc < self.retreat: 

4859 raise ParseException(instring, loc, self.errmsg) 

4860 start = loc - self.retreat 

4861 _, ret = self.expr._parse(instring, start) 

4862 else: 

4863 # retreat specified a maximum lookbehind window, iterate 

4864 test_expr = self.expr + StringEnd() 

4865 instring_slice = instring[max(0, loc - self.retreat) : loc] 

4866 last_expr = ParseException(instring, loc, self.errmsg) 

4867 for offset in range(1, min(loc, self.retreat + 1) + 1): 

4868 try: 

4869 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

4870 _, ret = test_expr._parse( 

4871 instring_slice, len(instring_slice) - offset 

4872 ) 

4873 except ParseBaseException as pbe: 

4874 last_expr = pbe 

4875 else: 

4876 break 

4877 else: 

4878 raise last_expr 

4879 return loc, ret 

4880 

4881 

4882class Located(ParseElementEnhance): 

4883 """ 

4884 Decorates a returned token with its starting and ending 

4885 locations in the input string. 

4886 

4887 This helper adds the following results names: 

4888 

4889 - ``locn_start`` - location where matched expression begins 

4890 - ``locn_end`` - location where matched expression ends 

4891 - ``value`` - the actual parsed results 

4892 

4893 Be careful if the input text contains ``<TAB>`` characters, you 

4894 may want to call :class:`ParserElement.parse_with_tabs` 

4895 

4896 Example:: 

4897 

4898 wd = Word(alphas) 

4899 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

4900 print(match) 

4901 

4902 prints:: 

4903 

4904 [0, ['ljsdf'], 5] 

4905 [8, ['lksdjjf'], 15] 

4906 [18, ['lkkjj'], 23] 

4907 

4908 """ 

4909 

4910 def parseImpl(self, instring, loc, doActions=True): 

4911 start = loc 

4912 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False) 

4913 ret_tokens = ParseResults([start, tokens, loc]) 

4914 ret_tokens["locn_start"] = start 

4915 ret_tokens["value"] = tokens 

4916 ret_tokens["locn_end"] = loc 

4917 if self.resultsName: 

4918 # must return as a list, so that the name will be attached to the complete group 

4919 return loc, [ret_tokens] 

4920 else: 

4921 return loc, ret_tokens 

4922 

4923 

4924class NotAny(ParseElementEnhance): 

4925 """ 

4926 Lookahead to disallow matching with the given parse expression. 

4927 ``NotAny`` does *not* advance the parsing position within the 

4928 input string, it only verifies that the specified parse expression 

4929 does *not* match at the current position. Also, ``NotAny`` does 

4930 *not* skip over leading whitespace. ``NotAny`` always returns 

4931 a null token list. May be constructed using the ``'~'`` operator. 

4932 

4933 Example:: 

4934 

4935 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

4936 

4937 # take care not to mistake keywords for identifiers 

4938 ident = ~(AND | OR | NOT) + Word(alphas) 

4939 boolean_term = Opt(NOT) + ident 

4940 

4941 # very crude boolean expression - to support parenthesis groups and 

4942 # operation hierarchy, use infix_notation 

4943 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

4944 

4945 # integers that are followed by "." are actually floats 

4946 integer = Word(nums) + ~Char(".") 

4947 """ 

4948 

4949 def __init__(self, expr: Union[ParserElement, str]): 

4950 super().__init__(expr) 

4951 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

4952 # self.leave_whitespace() 

4953 self.skipWhitespace = False 

4954 

4955 self.mayReturnEmpty = True 

4956 self.errmsg = "Found unwanted token, " + str(self.expr) 

4957 

4958 def parseImpl(self, instring, loc, doActions=True): 

4959 if self.expr.can_parse_next(instring, loc, do_actions=doActions): 

4960 raise ParseException(instring, loc, self.errmsg, self) 

4961 return loc, [] 

4962 

4963 def _generateDefaultName(self) -> str: 

4964 return "~{" + str(self.expr) + "}" 

4965 

4966 

4967class _MultipleMatch(ParseElementEnhance): 

4968 def __init__( 

4969 self, 

4970 expr: Union[str, ParserElement], 

4971 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

4972 *, 

4973 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

4974 ): 

4975 super().__init__(expr) 

4976 stopOn = stopOn or stop_on 

4977 self.saveAsList = True 

4978 ender = stopOn 

4979 if isinstance(ender, str_type): 

4980 ender = self._literalStringClass(ender) 

4981 self.stopOn(ender) 

4982 

4983 def stopOn(self, ender) -> ParserElement: 

4984 if isinstance(ender, str_type): 

4985 ender = self._literalStringClass(ender) 

4986 self.not_ender = ~ender if ender is not None else None 

4987 return self 

4988 

4989 def parseImpl(self, instring, loc, doActions=True): 

4990 self_expr_parse = self.expr._parse 

4991 self_skip_ignorables = self._skipIgnorables 

4992 check_ender = self.not_ender is not None 

4993 if check_ender: 

4994 try_not_ender = self.not_ender.try_parse 

4995 

4996 # must be at least one (but first see if we are the stopOn sentinel; 

4997 # if so, fail) 

4998 if check_ender: 

4999 try_not_ender(instring, loc) 

5000 loc, tokens = self_expr_parse(instring, loc, doActions) 

5001 try: 

5002 hasIgnoreExprs = not not self.ignoreExprs 

5003 while 1: 

5004 if check_ender: 

5005 try_not_ender(instring, loc) 

5006 if hasIgnoreExprs: 

5007 preloc = self_skip_ignorables(instring, loc) 

5008 else: 

5009 preloc = loc 

5010 loc, tmptokens = self_expr_parse(instring, preloc, doActions) 

5011 tokens += tmptokens 

5012 except (ParseException, IndexError): 

5013 pass 

5014 

5015 return loc, tokens 

5016 

5017 def _setResultsName(self, name, listAllMatches=False): 

5018 if ( 

5019 __diag__.warn_ungrouped_named_tokens_in_collection 

5020 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5021 not in self.suppress_warnings_ 

5022 ): 

5023 for e in [self.expr] + self.expr.recurse(): 

5024 if ( 

5025 isinstance(e, ParserElement) 

5026 and e.resultsName 

5027 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5028 not in e.suppress_warnings_ 

5029 ): 

5030 warnings.warn( 

5031 "{}: setting results name {!r} on {} expression " 

5032 "collides with {!r} on contained expression".format( 

5033 "warn_ungrouped_named_tokens_in_collection", 

5034 name, 

5035 type(self).__name__, 

5036 e.resultsName, 

5037 ), 

5038 stacklevel=3, 

5039 ) 

5040 

5041 return super()._setResultsName(name, listAllMatches) 

5042 

5043 

5044class OneOrMore(_MultipleMatch): 

5045 """ 

5046 Repetition of one or more of the given expression. 

5047 

5048 Parameters: 

5049 

5050 - ``expr`` - expression that must match one or more times 

5051 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5052 (only required if the sentinel would ordinarily match the repetition 

5053 expression) 

5054 

5055 Example:: 

5056 

5057 data_word = Word(alphas) 

5058 label = data_word + FollowedBy(':') 

5059 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) 

5060 

5061 text = "shape: SQUARE posn: upper left color: BLACK" 

5062 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

5063 

5064 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data 

5065 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5066 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5067 

5068 # could also be written as 

5069 (attr_expr * (1,)).parse_string(text).pprint() 

5070 """ 

5071 

5072 def _generateDefaultName(self) -> str: 

5073 return "{" + str(self.expr) + "}..." 

5074 

5075 

5076class ZeroOrMore(_MultipleMatch): 

5077 """ 

5078 Optional repetition of zero or more of the given expression. 

5079 

5080 Parameters: 

5081 

5082 - ``expr`` - expression that must match zero or more times 

5083 - ``stop_on`` - expression for a terminating sentinel 

5084 (only required if the sentinel would ordinarily match the repetition 

5085 expression) - (default= ``None``) 

5086 

5087 Example: similar to :class:`OneOrMore` 

5088 """ 

5089 

5090 def __init__( 

5091 self, 

5092 expr: Union[str, ParserElement], 

5093 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5094 *, 

5095 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5096 ): 

5097 super().__init__(expr, stopOn=stopOn or stop_on) 

5098 self.mayReturnEmpty = True 

5099 

5100 def parseImpl(self, instring, loc, doActions=True): 

5101 try: 

5102 return super().parseImpl(instring, loc, doActions) 

5103 except (ParseException, IndexError): 

5104 return loc, ParseResults([], name=self.resultsName) 

5105 

5106 def _generateDefaultName(self) -> str: 

5107 return "[" + str(self.expr) + "]..." 

5108 

5109 

5110class DelimitedList(ParseElementEnhance): 

5111 def __init__( 

5112 self, 

5113 expr: Union[str, ParserElement], 

5114 delim: Union[str, ParserElement] = ",", 

5115 combine: bool = False, 

5116 min: typing.Optional[int] = None, 

5117 max: typing.Optional[int] = None, 

5118 *, 

5119 allow_trailing_delim: bool = False, 

5120 ): 

5121 """Helper to define a delimited list of expressions - the delimiter 

5122 defaults to ','. By default, the list elements and delimiters can 

5123 have intervening whitespace, and comments, but this can be 

5124 overridden by passing ``combine=True`` in the constructor. If 

5125 ``combine`` is set to ``True``, the matching tokens are 

5126 returned as a single token string, with the delimiters included; 

5127 otherwise, the matching tokens are returned as a list of tokens, 

5128 with the delimiters suppressed. 

5129 

5130 If ``allow_trailing_delim`` is set to True, then the list may end with 

5131 a delimiter. 

5132 

5133 Example:: 

5134 

5135 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

5136 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

5137 """ 

5138 if isinstance(expr, str_type): 

5139 expr = ParserElement._literalStringClass(expr) 

5140 expr = typing.cast(ParserElement, expr) 

5141 

5142 if min is not None: 

5143 if min < 1: 

5144 raise ValueError("min must be greater than 0") 

5145 if max is not None: 

5146 if min is not None and max < min: 

5147 raise ValueError("max must be greater than, or equal to min") 

5148 

5149 self.content = expr 

5150 self.raw_delim = str(delim) 

5151 self.delim = delim 

5152 self.combine = combine 

5153 if not combine: 

5154 self.delim = Suppress(delim) 

5155 self.min = min or 1 

5156 self.max = max 

5157 self.allow_trailing_delim = allow_trailing_delim 

5158 

5159 delim_list_expr = self.content + (self.delim + self.content) * ( 

5160 self.min - 1, 

5161 None if self.max is None else self.max - 1, 

5162 ) 

5163 if self.allow_trailing_delim: 

5164 delim_list_expr += Opt(self.delim) 

5165 

5166 if self.combine: 

5167 delim_list_expr = Combine(delim_list_expr) 

5168 

5169 super().__init__(delim_list_expr, savelist=True) 

5170 

5171 def _generateDefaultName(self) -> str: 

5172 return "{0} [{1} {0}]...".format(self.content.streamline(), self.raw_delim) 

5173 

5174 

5175class _NullToken: 

5176 def __bool__(self): 

5177 return False 

5178 

5179 def __str__(self): 

5180 return "" 

5181 

5182 

5183class Opt(ParseElementEnhance): 

5184 """ 

5185 Optional matching of the given expression. 

5186 

5187 Parameters: 

5188 

5189 - ``expr`` - expression that must match zero or more times 

5190 - ``default`` (optional) - value to be returned if the optional expression is not found. 

5191 

5192 Example:: 

5193 

5194 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5195 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5196 zip.run_tests(''' 

5197 # traditional ZIP code 

5198 12345 

5199 

5200 # ZIP+4 form 

5201 12101-0001 

5202 

5203 # invalid ZIP 

5204 98765- 

5205 ''') 

5206 

5207 prints:: 

5208 

5209 # traditional ZIP code 

5210 12345 

5211 ['12345'] 

5212 

5213 # ZIP+4 form 

5214 12101-0001 

5215 ['12101-0001'] 

5216 

5217 # invalid ZIP 

5218 98765- 

5219 ^ 

5220 FAIL: Expected end of text (at char 5), (line:1, col:6) 

5221 """ 

5222 

5223 __optionalNotMatched = _NullToken() 

5224 

5225 def __init__( 

5226 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5227 ): 

5228 super().__init__(expr, savelist=False) 

5229 self.saveAsList = self.expr.saveAsList 

5230 self.defaultValue = default 

5231 self.mayReturnEmpty = True 

5232 

5233 def parseImpl(self, instring, loc, doActions=True): 

5234 self_expr = self.expr 

5235 try: 

5236 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False) 

5237 except (ParseException, IndexError): 

5238 default_value = self.defaultValue 

5239 if default_value is not self.__optionalNotMatched: 

5240 if self_expr.resultsName: 

5241 tokens = ParseResults([default_value]) 

5242 tokens[self_expr.resultsName] = default_value 

5243 else: 

5244 tokens = [default_value] 

5245 else: 

5246 tokens = [] 

5247 return loc, tokens 

5248 

5249 def _generateDefaultName(self) -> str: 

5250 inner = str(self.expr) 

5251 # strip off redundant inner {}'s 

5252 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5253 inner = inner[1:-1] 

5254 return "[" + inner + "]" 

5255 

5256 

5257Optional = Opt 

5258 

5259 

5260class SkipTo(ParseElementEnhance): 

5261 """ 

5262 Token for skipping over all undefined text until the matched 

5263 expression is found. 

5264 

5265 Parameters: 

5266 

5267 - ``expr`` - target expression marking the end of the data to be skipped 

5268 - ``include`` - if ``True``, the target expression is also parsed 

5269 (the skipped text and target expression are returned as a 2-element 

5270 list) (default= ``False``). 

5271 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and 

5272 comments) that might contain false matches to the target expression 

5273 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be 

5274 included in the skipped test; if found before the target expression is found, 

5275 the :class:`SkipTo` is not a match 

5276 

5277 Example:: 

5278 

5279 report = ''' 

5280 Outstanding Issues Report - 1 Jan 2000 

5281 

5282 # | Severity | Description | Days Open 

5283 -----+----------+-------------------------------------------+----------- 

5284 101 | Critical | Intermittent system crash | 6 

5285 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5286 79 | Minor | System slow when running too many reports | 47 

5287 ''' 

5288 integer = Word(nums) 

5289 SEP = Suppress('|') 

5290 # use SkipTo to simply match everything up until the next SEP 

5291 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5292 # - parse action will call token.strip() for each matched token, i.e., the description body 

5293 string_data = SkipTo(SEP, ignore=quoted_string) 

5294 string_data.set_parse_action(token_map(str.strip)) 

5295 ticket_expr = (integer("issue_num") + SEP 

5296 + string_data("sev") + SEP 

5297 + string_data("desc") + SEP 

5298 + integer("days_open")) 

5299 

5300 for tkt in ticket_expr.search_string(report): 

5301 print tkt.dump() 

5302 

5303 prints:: 

5304 

5305 ['101', 'Critical', 'Intermittent system crash', '6'] 

5306 - days_open: '6' 

5307 - desc: 'Intermittent system crash' 

5308 - issue_num: '101' 

5309 - sev: 'Critical' 

5310 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

5311 - days_open: '14' 

5312 - desc: "Spelling error on Login ('log|n')" 

5313 - issue_num: '94' 

5314 - sev: 'Cosmetic' 

5315 ['79', 'Minor', 'System slow when running too many reports', '47'] 

5316 - days_open: '47' 

5317 - desc: 'System slow when running too many reports' 

5318 - issue_num: '79' 

5319 - sev: 'Minor' 

5320 """ 

5321 

5322 def __init__( 

5323 self, 

5324 other: Union[ParserElement, str], 

5325 include: bool = False, 

5326 ignore: typing.Optional[Union[ParserElement, str]] = None, 

5327 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

5328 *, 

5329 failOn: typing.Optional[Union[ParserElement, str]] = None, 

5330 ): 

5331 super().__init__(other) 

5332 failOn = failOn or fail_on 

5333 self.ignoreExpr = ignore 

5334 self.mayReturnEmpty = True 

5335 self.mayIndexError = False 

5336 self.includeMatch = include 

5337 self.saveAsList = False 

5338 if isinstance(failOn, str_type): 

5339 self.failOn = self._literalStringClass(failOn) 

5340 else: 

5341 self.failOn = failOn 

5342 self.errmsg = "No match found for " + str(self.expr) 

5343 self.ignorer = Empty().leave_whitespace() 

5344 self._update_ignorer() 

5345 

5346 def _update_ignorer(self): 

5347 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

5348 self.ignorer.ignoreExprs.clear() 

5349 for e in self.expr.ignoreExprs: 

5350 self.ignorer.ignore(e) 

5351 if self.ignoreExpr: 

5352 self.ignorer.ignore(self.ignoreExpr) 

5353 

5354 def ignore(self, expr): 

5355 super().ignore(expr) 

5356 self._update_ignorer() 

5357 

5358 def parseImpl(self, instring, loc, doActions=True): 

5359 startloc = loc 

5360 instrlen = len(instring) 

5361 self_expr_parse = self.expr._parse 

5362 self_failOn_canParseNext = ( 

5363 self.failOn.canParseNext if self.failOn is not None else None 

5364 ) 

5365 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

5366 

5367 tmploc = loc 

5368 while tmploc <= instrlen: 

5369 if self_failOn_canParseNext is not None: 

5370 # break if failOn expression matches 

5371 if self_failOn_canParseNext(instring, tmploc): 

5372 break 

5373 

5374 if ignorer_try_parse is not None: 

5375 # advance past ignore expressions 

5376 prev_tmploc = tmploc 

5377 while 1: 

5378 try: 

5379 tmploc = ignorer_try_parse(instring, tmploc) 

5380 except ParseBaseException: 

5381 break 

5382 # see if all ignorers matched, but didn't actually ignore anything 

5383 if tmploc == prev_tmploc: 

5384 break 

5385 prev_tmploc = tmploc 

5386 

5387 try: 

5388 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) 

5389 except (ParseException, IndexError): 

5390 # no match, advance loc in string 

5391 tmploc += 1 

5392 else: 

5393 # matched skipto expr, done 

5394 break 

5395 

5396 else: 

5397 # ran off the end of the input string without matching skipto expr, fail 

5398 raise ParseException(instring, loc, self.errmsg, self) 

5399 

5400 # build up return values 

5401 loc = tmploc 

5402 skiptext = instring[startloc:loc] 

5403 skipresult = ParseResults(skiptext) 

5404 

5405 if self.includeMatch: 

5406 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False) 

5407 skipresult += mat 

5408 

5409 return loc, skipresult 

5410 

5411 

5412class Forward(ParseElementEnhance): 

5413 """ 

5414 Forward declaration of an expression to be defined later - 

5415 used for recursive grammars, such as algebraic infix notation. 

5416 When the expression is known, it is assigned to the ``Forward`` 

5417 variable using the ``'<<'`` operator. 

5418 

5419 Note: take care when assigning to ``Forward`` not to overlook 

5420 precedence of operators. 

5421 

5422 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

5423 

5424 fwd_expr << a | b | c 

5425 

5426 will actually be evaluated as:: 

5427 

5428 (fwd_expr << a) | b | c 

5429 

5430 thereby leaving b and c out as parseable alternatives. It is recommended that you 

5431 explicitly group the values inserted into the ``Forward``:: 

5432 

5433 fwd_expr << (a | b | c) 

5434 

5435 Converting to use the ``'<<='`` operator instead will avoid this problem. 

5436 

5437 See :class:`ParseResults.pprint` for an example of a recursive 

5438 parser created using ``Forward``. 

5439 """ 

5440 

5441 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): 

5442 self.caller_frame = traceback.extract_stack(limit=2)[0] 

5443 super().__init__(other, savelist=False) # type: ignore[arg-type] 

5444 self.lshift_line = None 

5445 

5446 def __lshift__(self, other) -> "Forward": 

5447 if hasattr(self, "caller_frame"): 

5448 del self.caller_frame 

5449 if isinstance(other, str_type): 

5450 other = self._literalStringClass(other) 

5451 

5452 if not isinstance(other, ParserElement): 

5453 return NotImplemented 

5454 

5455 self.expr = other 

5456 self.streamlined = other.streamlined 

5457 self.mayIndexError = self.expr.mayIndexError 

5458 self.mayReturnEmpty = self.expr.mayReturnEmpty 

5459 self.set_whitespace_chars( 

5460 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

5461 ) 

5462 self.skipWhitespace = self.expr.skipWhitespace 

5463 self.saveAsList = self.expr.saveAsList 

5464 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5465 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

5466 return self 

5467 

5468 def __ilshift__(self, other) -> "Forward": 

5469 if not isinstance(other, ParserElement): 

5470 return NotImplemented 

5471 

5472 return self << other 

5473 

5474 def __or__(self, other) -> "ParserElement": 

5475 caller_line = traceback.extract_stack(limit=2)[-2] 

5476 if ( 

5477 __diag__.warn_on_match_first_with_lshift_operator 

5478 and caller_line == self.lshift_line 

5479 and Diagnostics.warn_on_match_first_with_lshift_operator 

5480 not in self.suppress_warnings_ 

5481 ): 

5482 warnings.warn( 

5483 "using '<<' operator with '|' is probably an error, use '<<='", 

5484 stacklevel=2, 

5485 ) 

5486 ret = super().__or__(other) 

5487 return ret 

5488 

5489 def __del__(self): 

5490 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

5491 if ( 

5492 self.expr is None 

5493 and __diag__.warn_on_assignment_to_Forward 

5494 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

5495 ): 

5496 warnings.warn_explicit( 

5497 "Forward defined here but no expression attached later using '<<=' or '<<'", 

5498 UserWarning, 

5499 filename=self.caller_frame.filename, 

5500 lineno=self.caller_frame.lineno, 

5501 ) 

5502 

5503 def parseImpl(self, instring, loc, doActions=True): 

5504 if ( 

5505 self.expr is None 

5506 and __diag__.warn_on_parse_using_empty_Forward 

5507 and Diagnostics.warn_on_parse_using_empty_Forward 

5508 not in self.suppress_warnings_ 

5509 ): 

5510 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

5511 parse_fns = ( 

5512 "parse_string", 

5513 "scan_string", 

5514 "search_string", 

5515 "transform_string", 

5516 ) 

5517 tb = traceback.extract_stack(limit=200) 

5518 for i, frm in enumerate(reversed(tb), start=1): 

5519 if frm.name in parse_fns: 

5520 stacklevel = i + 1 

5521 break 

5522 else: 

5523 stacklevel = 2 

5524 warnings.warn( 

5525 "Forward expression was never assigned a value, will not parse any input", 

5526 stacklevel=stacklevel, 

5527 ) 

5528 if not ParserElement._left_recursion_enabled: 

5529 return super().parseImpl(instring, loc, doActions) 

5530 # ## Bounded Recursion algorithm ## 

5531 # Recursion only needs to be processed at ``Forward`` elements, since they are 

5532 # the only ones that can actually refer to themselves. The general idea is 

5533 # to handle recursion stepwise: We start at no recursion, then recurse once, 

5534 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

5535 # 

5536 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

5537 # - to *match* a specific recursion level, and 

5538 # - to *search* the bounded recursion level 

5539 # and the two run concurrently. The *search* must *match* each recursion level 

5540 # to find the best possible match. This is handled by a memo table, which 

5541 # provides the previous match to the next level match attempt. 

5542 # 

5543 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

5544 # 

5545 # There is a complication since we not only *parse* but also *transform* via 

5546 # actions: We do not want to run the actions too often while expanding. Thus, 

5547 # we expand using `doActions=False` and only run `doActions=True` if the next 

5548 # recursion level is acceptable. 

5549 with ParserElement.recursion_lock: 

5550 memo = ParserElement.recursion_memos 

5551 try: 

5552 # we are parsing at a specific recursion expansion - use it as-is 

5553 prev_loc, prev_result = memo[loc, self, doActions] 

5554 if isinstance(prev_result, Exception): 

5555 raise prev_result 

5556 return prev_loc, prev_result.copy() 

5557 except KeyError: 

5558 act_key = (loc, self, True) 

5559 peek_key = (loc, self, False) 

5560 # we are searching for the best recursion expansion - keep on improving 

5561 # both `doActions` cases must be tracked separately here! 

5562 prev_loc, prev_peek = memo[peek_key] = ( 

5563 loc - 1, 

5564 ParseException( 

5565 instring, loc, "Forward recursion without base case", self 

5566 ), 

5567 ) 

5568 if doActions: 

5569 memo[act_key] = memo[peek_key] 

5570 while True: 

5571 try: 

5572 new_loc, new_peek = super().parseImpl(instring, loc, False) 

5573 except ParseException: 

5574 # we failed before getting any match – do not hide the error 

5575 if isinstance(prev_peek, Exception): 

5576 raise 

5577 new_loc, new_peek = prev_loc, prev_peek 

5578 # the match did not get better: we are done 

5579 if new_loc <= prev_loc: 

5580 if doActions: 

5581 # replace the match for doActions=False as well, 

5582 # in case the action did backtrack 

5583 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

5584 del memo[peek_key], memo[act_key] 

5585 return prev_loc, prev_result.copy() 

5586 del memo[peek_key] 

5587 return prev_loc, prev_peek.copy() 

5588 # the match did get better: see if we can improve further 

5589 else: 

5590 if doActions: 

5591 try: 

5592 memo[act_key] = super().parseImpl(instring, loc, True) 

5593 except ParseException as e: 

5594 memo[peek_key] = memo[act_key] = (new_loc, e) 

5595 raise 

5596 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

5597 

5598 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5599 self.skipWhitespace = False 

5600 return self 

5601 

5602 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5603 self.skipWhitespace = True 

5604 return self 

5605 

5606 def streamline(self) -> ParserElement: 

5607 if not self.streamlined: 

5608 self.streamlined = True 

5609 if self.expr is not None: 

5610 self.expr.streamline() 

5611 return self 

5612 

5613 def validate(self, validateTrace=None) -> None: 

5614 warnings.warn( 

5615 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5616 DeprecationWarning, 

5617 stacklevel=2, 

5618 ) 

5619 if validateTrace is None: 

5620 validateTrace = [] 

5621 

5622 if self not in validateTrace: 

5623 tmp = validateTrace[:] + [self] 

5624 if self.expr is not None: 

5625 self.expr.validate(tmp) 

5626 self._checkRecursion([]) 

5627 

5628 def _generateDefaultName(self) -> str: 

5629 # Avoid infinite recursion by setting a temporary _defaultName 

5630 self._defaultName = ": ..." 

5631 

5632 # Use the string representation of main expression. 

5633 retString = "..." 

5634 try: 

5635 if self.expr is not None: 

5636 retString = str(self.expr)[:1000] 

5637 else: 

5638 retString = "None" 

5639 finally: 

5640 return self.__class__.__name__ + ": " + retString 

5641 

5642 def copy(self) -> ParserElement: 

5643 if self.expr is not None: 

5644 return super().copy() 

5645 else: 

5646 ret = Forward() 

5647 ret <<= self 

5648 return ret 

5649 

5650 def _setResultsName(self, name, list_all_matches=False): 

5651 if ( 

5652 __diag__.warn_name_set_on_empty_Forward 

5653 and Diagnostics.warn_name_set_on_empty_Forward 

5654 not in self.suppress_warnings_ 

5655 ): 

5656 if self.expr is None: 

5657 warnings.warn( 

5658 "{}: setting results name {!r} on {} expression " 

5659 "that has no contained expression".format( 

5660 "warn_name_set_on_empty_Forward", name, type(self).__name__ 

5661 ), 

5662 stacklevel=3, 

5663 ) 

5664 

5665 return super()._setResultsName(name, list_all_matches) 

5666 

5667 # Compatibility synonyms 

5668 # fmt: off 

5669 @replaced_by_pep8(leave_whitespace) 

5670 def leaveWhitespace(self): ... 

5671 

5672 @replaced_by_pep8(ignore_whitespace) 

5673 def ignoreWhitespace(self): ... 

5674 # fmt: on 

5675 

5676 

5677class TokenConverter(ParseElementEnhance): 

5678 """ 

5679 Abstract subclass of :class:`ParseExpression`, for converting parsed results. 

5680 """ 

5681 

5682 def __init__(self, expr: Union[ParserElement, str], savelist=False): 

5683 super().__init__(expr) # , savelist) 

5684 self.saveAsList = False 

5685 

5686 

5687class Combine(TokenConverter): 

5688 """Converter to concatenate all matching tokens to a single string. 

5689 By default, the matching patterns must also be contiguous in the 

5690 input string; this can be disabled by specifying 

5691 ``'adjacent=False'`` in the constructor. 

5692 

5693 Example:: 

5694 

5695 real = Word(nums) + '.' + Word(nums) 

5696 print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] 

5697 # will also erroneously match the following 

5698 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] 

5699 

5700 real = Combine(Word(nums) + '.' + Word(nums)) 

5701 print(real.parse_string('3.1416')) # -> ['3.1416'] 

5702 # no match when there are internal spaces 

5703 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) 

5704 """ 

5705 

5706 def __init__( 

5707 self, 

5708 expr: ParserElement, 

5709 join_string: str = "", 

5710 adjacent: bool = True, 

5711 *, 

5712 joinString: typing.Optional[str] = None, 

5713 ): 

5714 super().__init__(expr) 

5715 joinString = joinString if joinString is not None else join_string 

5716 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

5717 if adjacent: 

5718 self.leave_whitespace() 

5719 self.adjacent = adjacent 

5720 self.skipWhitespace = True 

5721 self.joinString = joinString 

5722 self.callPreparse = True 

5723 

5724 def ignore(self, other) -> ParserElement: 

5725 if self.adjacent: 

5726 ParserElement.ignore(self, other) 

5727 else: 

5728 super().ignore(other) 

5729 return self 

5730 

5731 def postParse(self, instring, loc, tokenlist): 

5732 retToks = tokenlist.copy() 

5733 del retToks[:] 

5734 retToks += ParseResults( 

5735 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

5736 ) 

5737 

5738 if self.resultsName and retToks.haskeys(): 

5739 return [retToks] 

5740 else: 

5741 return retToks 

5742 

5743 

5744class Group(TokenConverter): 

5745 """Converter to return the matched tokens as a list - useful for 

5746 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

5747 

5748 The optional ``aslist`` argument when set to True will return the 

5749 parsed tokens as a Python list instead of a pyparsing ParseResults. 

5750 

5751 Example:: 

5752 

5753 ident = Word(alphas) 

5754 num = Word(nums) 

5755 term = ident | num 

5756 func = ident + Opt(DelimitedList(term)) 

5757 print(func.parse_string("fn a, b, 100")) 

5758 # -> ['fn', 'a', 'b', '100'] 

5759 

5760 func = ident + Group(Opt(DelimitedList(term))) 

5761 print(func.parse_string("fn a, b, 100")) 

5762 # -> ['fn', ['a', 'b', '100']] 

5763 """ 

5764 

5765 def __init__(self, expr: ParserElement, aslist: bool = False): 

5766 super().__init__(expr) 

5767 self.saveAsList = True 

5768 self._asPythonList = aslist 

5769 

5770 def postParse(self, instring, loc, tokenlist): 

5771 if self._asPythonList: 

5772 return ParseResults.List( 

5773 tokenlist.asList() 

5774 if isinstance(tokenlist, ParseResults) 

5775 else list(tokenlist) 

5776 ) 

5777 else: 

5778 return [tokenlist] 

5779 

5780 

5781class Dict(TokenConverter): 

5782 """Converter to return a repetitive expression as a list, but also 

5783 as a dictionary. Each element can also be referenced using the first 

5784 token in the expression as its key. Useful for tabular report 

5785 scraping when the first column can be used as a item key. 

5786 

5787 The optional ``asdict`` argument when set to True will return the 

5788 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

5789 

5790 Example:: 

5791 

5792 data_word = Word(alphas) 

5793 label = data_word + FollowedBy(':') 

5794 

5795 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5796 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5797 

5798 # print attributes as plain groups 

5799 print(attr_expr[1, ...].parse_string(text).dump()) 

5800 

5801 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names 

5802 result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

5803 print(result.dump()) 

5804 

5805 # access named fields as dict entries, or output as dict 

5806 print(result['shape']) 

5807 print(result.as_dict()) 

5808 

5809 prints:: 

5810 

5811 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

5812 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5813 - color: 'light blue' 

5814 - posn: 'upper left' 

5815 - shape: 'SQUARE' 

5816 - texture: 'burlap' 

5817 SQUARE 

5818 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

5819 

5820 See more examples at :class:`ParseResults` of accessing fields by results name. 

5821 """ 

5822 

5823 def __init__(self, expr: ParserElement, asdict: bool = False): 

5824 super().__init__(expr) 

5825 self.saveAsList = True 

5826 self._asPythonDict = asdict 

5827 

5828 def postParse(self, instring, loc, tokenlist): 

5829 for i, tok in enumerate(tokenlist): 

5830 if len(tok) == 0: 

5831 continue 

5832 

5833 ikey = tok[0] 

5834 if isinstance(ikey, int): 

5835 ikey = str(ikey).strip() 

5836 

5837 if len(tok) == 1: 

5838 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

5839 

5840 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

5841 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

5842 

5843 else: 

5844 try: 

5845 dictvalue = tok.copy() # ParseResults(i) 

5846 except Exception: 

5847 exc = TypeError( 

5848 "could not extract dict values from parsed results" 

5849 " - Dict expression must contain Grouped expressions" 

5850 ) 

5851 raise exc from None 

5852 

5853 del dictvalue[0] 

5854 

5855 if len(dictvalue) != 1 or ( 

5856 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

5857 ): 

5858 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

5859 else: 

5860 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

5861 

5862 if self._asPythonDict: 

5863 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

5864 else: 

5865 return [tokenlist] if self.resultsName else tokenlist 

5866 

5867 

5868class Suppress(TokenConverter): 

5869 """Converter for ignoring the results of a parsed expression. 

5870 

5871 Example:: 

5872 

5873 source = "a, b, c,d" 

5874 wd = Word(alphas) 

5875 wd_list1 = wd + (',' + wd)[...] 

5876 print(wd_list1.parse_string(source)) 

5877 

5878 # often, delimiters that are useful during parsing are just in the 

5879 # way afterward - use Suppress to keep them out of the parsed output 

5880 wd_list2 = wd + (Suppress(',') + wd)[...] 

5881 print(wd_list2.parse_string(source)) 

5882 

5883 # Skipped text (using '...') can be suppressed as well 

5884 source = "lead in START relevant text END trailing text" 

5885 start_marker = Keyword("START") 

5886 end_marker = Keyword("END") 

5887 find_body = Suppress(...) + start_marker + ... + end_marker 

5888 print(find_body.parse_string(source) 

5889 

5890 prints:: 

5891 

5892 ['a', ',', 'b', ',', 'c', ',', 'd'] 

5893 ['a', 'b', 'c', 'd'] 

5894 ['START', 'relevant text ', 'END'] 

5895 

5896 (See also :class:`DelimitedList`.) 

5897 """ 

5898 

5899 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

5900 if expr is ...: 

5901 expr = _PendingSkip(NoMatch()) 

5902 super().__init__(expr) 

5903 

5904 def __add__(self, other) -> "ParserElement": 

5905 if isinstance(self.expr, _PendingSkip): 

5906 return Suppress(SkipTo(other)) + other 

5907 else: 

5908 return super().__add__(other) 

5909 

5910 def __sub__(self, other) -> "ParserElement": 

5911 if isinstance(self.expr, _PendingSkip): 

5912 return Suppress(SkipTo(other)) - other 

5913 else: 

5914 return super().__sub__(other) 

5915 

5916 def postParse(self, instring, loc, tokenlist): 

5917 return [] 

5918 

5919 def suppress(self) -> ParserElement: 

5920 return self 

5921 

5922 

5923def trace_parse_action(f: ParseAction) -> ParseAction: 

5924 """Decorator for debugging parse actions. 

5925 

5926 When the parse action is called, this decorator will print 

5927 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

5928 When the parse action completes, the decorator will print 

5929 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

5930 

5931 Example:: 

5932 

5933 wd = Word(alphas) 

5934 

5935 @trace_parse_action 

5936 def remove_duplicate_chars(tokens): 

5937 return ''.join(sorted(set(''.join(tokens)))) 

5938 

5939 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

5940 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

5941 

5942 prints:: 

5943 

5944 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

5945 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

5946 ['dfjkls'] 

5947 """ 

5948 f = _trim_arity(f) 

5949 

5950 def z(*paArgs): 

5951 thisFunc = f.__name__ 

5952 s, l, t = paArgs[-3:] 

5953 if len(paArgs) > 3: 

5954 thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc 

5955 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

5956 try: 

5957 ret = f(*paArgs) 

5958 except Exception as exc: 

5959 sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n") 

5960 raise 

5961 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

5962 return ret 

5963 

5964 z.__name__ = f.__name__ 

5965 return z 

5966 

5967 

5968# convenience constants for positional expressions 

5969empty = Empty().set_name("empty") 

5970line_start = LineStart().set_name("line_start") 

5971line_end = LineEnd().set_name("line_end") 

5972string_start = StringStart().set_name("string_start") 

5973string_end = StringEnd().set_name("string_end") 

5974 

5975_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

5976 lambda s, l, t: t[0][1] 

5977) 

5978_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

5979 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

5980) 

5981_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

5982 lambda s, l, t: chr(int(t[0][1:], 8)) 

5983) 

5984_singleChar = ( 

5985 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

5986) 

5987_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

5988_reBracketExpr = ( 

5989 Literal("[") 

5990 + Opt("^").set_results_name("negate") 

5991 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

5992 + Literal("]") 

5993) 

5994 

5995 

5996def srange(s: str) -> str: 

5997 r"""Helper to easily define string ranges for use in :class:`Word` 

5998 construction. Borrows syntax from regexp ``'[]'`` string range 

5999 definitions:: 

6000 

6001 srange("[0-9]") -> "0123456789" 

6002 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

6003 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

6004 

6005 The input string must be enclosed in []'s, and the returned string 

6006 is the expanded character set joined into a single string. The 

6007 values enclosed in the []'s may be: 

6008 

6009 - a single character 

6010 - an escaped character with a leading backslash (such as ``\-`` 

6011 or ``\]``) 

6012 - an escaped hex character with a leading ``'\x'`` 

6013 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

6014 is also supported for backwards compatibility) 

6015 - an escaped octal character with a leading ``'\0'`` 

6016 (``\041``, which is a ``'!'`` character) 

6017 - a range of any of the above, separated by a dash (``'a-z'``, 

6018 etc.) 

6019 - any combination of the above (``'aeiouy'``, 

6020 ``'a-zA-Z0-9_$'``, etc.) 

6021 """ 

6022 _expanded = ( 

6023 lambda p: p 

6024 if not isinstance(p, ParseResults) 

6025 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

6026 ) 

6027 try: 

6028 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) 

6029 except Exception as e: 

6030 return "" 

6031 

6032 

6033def token_map(func, *args) -> ParseAction: 

6034 """Helper to define a parse action by mapping a function to all 

6035 elements of a :class:`ParseResults` list. If any additional args are passed, 

6036 they are forwarded to the given function as additional arguments 

6037 after the token, as in 

6038 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

6039 which will convert the parsed data to an integer using base 16. 

6040 

6041 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

6042 

6043 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6044 hex_ints.run_tests(''' 

6045 00 11 22 aa FF 0a 0d 1a 

6046 ''') 

6047 

6048 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6049 upperword[1, ...].run_tests(''' 

6050 my kingdom for a horse 

6051 ''') 

6052 

6053 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6054 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6055 now is the winter of our discontent made glorious summer by this sun of york 

6056 ''') 

6057 

6058 prints:: 

6059 

6060 00 11 22 aa FF 0a 0d 1a 

6061 [0, 17, 34, 170, 255, 10, 13, 26] 

6062 

6063 my kingdom for a horse 

6064 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6065 

6066 now is the winter of our discontent made glorious summer by this sun of york 

6067 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6068 """ 

6069 

6070 def pa(s, l, t): 

6071 return [func(tokn, *args) for tokn in t] 

6072 

6073 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6074 pa.__name__ = func_name 

6075 

6076 return pa 

6077 

6078 

6079def autoname_elements() -> None: 

6080 """ 

6081 Utility to simplify mass-naming of parser elements, for 

6082 generating railroad diagram with named subdiagrams. 

6083 """ 

6084 calling_frame = sys._getframe().f_back 

6085 if calling_frame is None: 

6086 return 

6087 calling_frame = typing.cast(types.FrameType, calling_frame) 

6088 for name, var in calling_frame.f_locals.items(): 

6089 if isinstance(var, ParserElement) and not var.customName: 

6090 var.set_name(name) 

6091 

6092 

6093dbl_quoted_string = Combine( 

6094 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6095).set_name("string enclosed in double quotes") 

6096 

6097sgl_quoted_string = Combine( 

6098 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6099).set_name("string enclosed in single quotes") 

6100 

6101quoted_string = Combine( 

6102 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6103 "double quoted string" 

6104 ) 

6105 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6106 "single quoted string" 

6107 ) 

6108).set_name("quoted string using single or double quotes") 

6109 

6110python_quoted_string = Combine( 

6111 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6112 "multiline double quoted string" 

6113 ) 

6114 ^ ( 

6115 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6116 ).set_name("multiline single quoted string") 

6117 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6118 "double quoted string" 

6119 ) 

6120 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6121 "single quoted string" 

6122 ) 

6123).set_name("Python quoted string") 

6124 

6125unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6126 

6127 

6128alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6129punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6130 

6131# build list of built-in expressions, for future reference if a global default value 

6132# gets updated 

6133_builtin_exprs: List[ParserElement] = [ 

6134 v for v in vars().values() if isinstance(v, ParserElement) 

6135] 

6136 

6137# backward compatibility names 

6138# fmt: off 

6139sglQuotedString = sgl_quoted_string 

6140dblQuotedString = dbl_quoted_string 

6141quotedString = quoted_string 

6142unicodeString = unicode_string 

6143lineStart = line_start 

6144lineEnd = line_end 

6145stringStart = string_start 

6146stringEnd = string_end 

6147 

6148@replaced_by_pep8(null_debug_action) 

6149def nullDebugAction(): ... 

6150 

6151@replaced_by_pep8(trace_parse_action) 

6152def traceParseAction(): ... 

6153 

6154@replaced_by_pep8(condition_as_parse_action) 

6155def conditionAsParseAction(): ... 

6156 

6157@replaced_by_pep8(token_map) 

6158def tokenMap(): ... 

6159# fmt: on