Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/_vendor/pyparsing/core.py: 49%

2584 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-02-26 06:33 +0000

1# 

2# core.py 

3# 

4 

5from collections import deque 

6import os 

7import typing 

8from typing import ( 

9 Any, 

10 Callable, 

11 Generator, 

12 List, 

13 NamedTuple, 

14 Sequence, 

15 Set, 

16 TextIO, 

17 Tuple, 

18 Union, 

19 cast, 

20) 

21from abc import ABC, abstractmethod 

22from enum import Enum 

23import string 

24import copy 

25import warnings 

26import re 

27import sys 

28from collections.abc import Iterable 

29import traceback 

30import types 

31from operator import itemgetter 

32from functools import wraps 

33from threading import RLock 

34from pathlib import Path 

35 

36from .util import ( 

37 _FifoCache, 

38 _UnboundedCache, 

39 __config_flags, 

40 _collapse_string_to_ranges, 

41 _escape_regex_range_chars, 

42 _bslash, 

43 _flatten, 

44 LRUMemo as _LRUMemo, 

45 UnboundedMemo as _UnboundedMemo, 

46 replaced_by_pep8, 

47) 

48from .exceptions import * 

49from .actions import * 

50from .results import ParseResults, _ParseResultsWithOffset 

51from .unicode import pyparsing_unicode 

52 

53_MAX_INT = sys.maxsize 

54str_type: Tuple[type, ...] = (str, bytes) 

55 

56# 

57# Copyright (c) 2003-2022 Paul T. McGuire 

58# 

59# Permission is hereby granted, free of charge, to any person obtaining 

60# a copy of this software and associated documentation files (the 

61# "Software"), to deal in the Software without restriction, including 

62# without limitation the rights to use, copy, modify, merge, publish, 

63# distribute, sublicense, and/or sell copies of the Software, and to 

64# permit persons to whom the Software is furnished to do so, subject to 

65# the following conditions: 

66# 

67# The above copyright notice and this permission notice shall be 

68# included in all copies or substantial portions of the Software. 

69# 

70# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

71# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

72# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

73# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

74# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

75# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

76# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

77# 

78 

79 

80if sys.version_info >= (3, 8): 

81 from functools import cached_property 

82else: 

83 

84 class cached_property: 

85 def __init__(self, func): 

86 self._func = func 

87 

88 def __get__(self, instance, owner=None): 

89 ret = instance.__dict__[self._func.__name__] = self._func(instance) 

90 return ret 

91 

92 

93class __compat__(__config_flags): 

94 """ 

95 A cross-version compatibility configuration for pyparsing features that will be 

96 released in a future version. By setting values in this configuration to True, 

97 those features can be enabled in prior versions for compatibility development 

98 and testing. 

99 

100 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

101 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

102 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

103 behavior 

104 """ 

105 

106 _type_desc = "compatibility" 

107 

108 collect_all_And_tokens = True 

109 

110 _all_names = [__ for __ in locals() if not __.startswith("_")] 

111 _fixed_names = """ 

112 collect_all_And_tokens 

113 """.split() 

114 

115 

116class __diag__(__config_flags): 

117 _type_desc = "diagnostic" 

118 

119 warn_multiple_tokens_in_named_alternation = False 

120 warn_ungrouped_named_tokens_in_collection = False 

121 warn_name_set_on_empty_Forward = False 

122 warn_on_parse_using_empty_Forward = False 

123 warn_on_assignment_to_Forward = False 

124 warn_on_multiple_string_args_to_oneof = False 

125 warn_on_match_first_with_lshift_operator = False 

126 enable_debug_on_named_expressions = False 

127 

128 _all_names = [__ for __ in locals() if not __.startswith("_")] 

129 _warning_names = [name for name in _all_names if name.startswith("warn")] 

130 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

131 

132 @classmethod 

133 def enable_all_warnings(cls) -> None: 

134 for name in cls._warning_names: 

135 cls.enable(name) 

136 

137 

138class Diagnostics(Enum): 

139 """ 

140 Diagnostic configuration (all default to disabled) 

141 

142 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

143 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

144 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

145 name is defined on a containing expression with ungrouped subexpressions that also 

146 have results names 

147 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

148 with a results name, but has no contents defined 

149 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

150 defined in a grammar but has never had an expression attached to it 

151 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

152 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

153 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

154 incorrectly called with multiple str arguments 

155 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

156 calls to :class:`ParserElement.set_name` 

157 

158 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

159 All warnings can be enabled by calling :class:`enable_all_warnings`. 

160 """ 

161 

162 warn_multiple_tokens_in_named_alternation = 0 

163 warn_ungrouped_named_tokens_in_collection = 1 

164 warn_name_set_on_empty_Forward = 2 

165 warn_on_parse_using_empty_Forward = 3 

166 warn_on_assignment_to_Forward = 4 

167 warn_on_multiple_string_args_to_oneof = 5 

168 warn_on_match_first_with_lshift_operator = 6 

169 enable_debug_on_named_expressions = 7 

170 

171 

172def enable_diag(diag_enum: Diagnostics) -> None: 

173 """ 

174 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

175 """ 

176 __diag__.enable(diag_enum.name) 

177 

178 

179def disable_diag(diag_enum: Diagnostics) -> None: 

180 """ 

181 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

182 """ 

183 __diag__.disable(diag_enum.name) 

184 

185 

186def enable_all_warnings() -> None: 

187 """ 

188 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

189 """ 

190 __diag__.enable_all_warnings() 

191 

192 

193# hide abstract class 

194del __config_flags 

195 

196 

197def _should_enable_warnings( 

198 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

199) -> bool: 

200 enable = bool(warn_env_var) 

201 for warn_opt in cmd_line_warn_options: 

202 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

203 ":" 

204 )[:5] 

205 if not w_action.lower().startswith("i") and ( 

206 not (w_message or w_category or w_module) or w_module == "pyparsing" 

207 ): 

208 enable = True 

209 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

210 enable = False 

211 return enable 

212 

213 

214if _should_enable_warnings( 

215 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

216): 

217 enable_all_warnings() 

218 

219 

220# build list of single arg builtins, that can be used as parse actions 

221_single_arg_builtins = { 

222 sum, 

223 len, 

224 sorted, 

225 reversed, 

226 list, 

227 tuple, 

228 set, 

229 any, 

230 all, 

231 min, 

232 max, 

233} 

234 

235_generatorType = types.GeneratorType 

236ParseImplReturnType = Tuple[int, Any] 

237PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

238ParseAction = Union[ 

239 Callable[[], Any], 

240 Callable[[ParseResults], Any], 

241 Callable[[int, ParseResults], Any], 

242 Callable[[str, int, ParseResults], Any], 

243] 

244ParseCondition = Union[ 

245 Callable[[], bool], 

246 Callable[[ParseResults], bool], 

247 Callable[[int, ParseResults], bool], 

248 Callable[[str, int, ParseResults], bool], 

249] 

250ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

251DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

252DebugSuccessAction = Callable[ 

253 [str, int, int, "ParserElement", ParseResults, bool], None 

254] 

255DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

256 

257 

258alphas = string.ascii_uppercase + string.ascii_lowercase 

259identchars = pyparsing_unicode.Latin1.identchars 

260identbodychars = pyparsing_unicode.Latin1.identbodychars 

261nums = "0123456789" 

262hexnums = nums + "ABCDEFabcdef" 

263alphanums = alphas + nums 

264printables = "".join([c for c in string.printable if c not in string.whitespace]) 

265 

266_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

267 

268 

269def _trim_arity(func, max_limit=3): 

270 """decorator to trim function calls to match the arity of the target""" 

271 global _trim_arity_call_line 

272 

273 if func in _single_arg_builtins: 

274 return lambda s, l, t: func(t) 

275 

276 limit = 0 

277 found_arity = False 

278 

279 # synthesize what would be returned by traceback.extract_stack at the call to 

280 # user's parse action 'func', so that we don't incur call penalty at parse time 

281 

282 # fmt: off 

283 LINE_DIFF = 7 

284 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

285 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

286 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) 

287 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

288 

289 def wrapper(*args): 

290 nonlocal found_arity, limit 

291 while 1: 

292 try: 

293 ret = func(*args[limit:]) 

294 found_arity = True 

295 return ret 

296 except TypeError as te: 

297 # re-raise TypeErrors if they did not come from our arity testing 

298 if found_arity: 

299 raise 

300 else: 

301 tb = te.__traceback__ 

302 frames = traceback.extract_tb(tb, limit=2) 

303 frame_summary = frames[-1] 

304 trim_arity_type_error = ( 

305 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

306 ) 

307 del tb 

308 

309 if trim_arity_type_error: 

310 if limit < max_limit: 

311 limit += 1 

312 continue 

313 

314 raise 

315 # fmt: on 

316 

317 # copy func name to wrapper for sensible debug output 

318 # (can't use functools.wraps, since that messes with function signature) 

319 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

320 wrapper.__name__ = func_name 

321 wrapper.__doc__ = func.__doc__ 

322 

323 return wrapper 

324 

325 

326def condition_as_parse_action( 

327 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

328) -> ParseAction: 

329 """ 

330 Function to convert a simple predicate function that returns ``True`` or ``False`` 

331 into a parse action. Can be used in places when a parse action is required 

332 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition 

333 to an operator level in :class:`infix_notation`). 

334 

335 Optional keyword arguments: 

336 

337 - ``message`` - define a custom message to be used in the raised exception 

338 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; 

339 otherwise will raise :class:`ParseException` 

340 

341 """ 

342 msg = message if message is not None else "failed user-defined condition" 

343 exc_type = ParseFatalException if fatal else ParseException 

344 fn = _trim_arity(fn) 

345 

346 @wraps(fn) 

347 def pa(s, l, t): 

348 if not bool(fn(s, l, t)): 

349 raise exc_type(s, l, msg) 

350 

351 return pa 

352 

353 

354def _default_start_debug_action( 

355 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False 

356): 

357 cache_hit_str = "*" if cache_hit else "" 

358 print( 

359 ( 

360 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

361 f" {line(loc, instring)}\n" 

362 f" {' ' * (col(loc, instring) - 1)}^" 

363 ) 

364 ) 

365 

366 

367def _default_success_debug_action( 

368 instring: str, 

369 startloc: int, 

370 endloc: int, 

371 expr: "ParserElement", 

372 toks: ParseResults, 

373 cache_hit: bool = False, 

374): 

375 cache_hit_str = "*" if cache_hit else "" 

376 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

377 

378 

379def _default_exception_debug_action( 

380 instring: str, 

381 loc: int, 

382 expr: "ParserElement", 

383 exc: Exception, 

384 cache_hit: bool = False, 

385): 

386 cache_hit_str = "*" if cache_hit else "" 

387 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

388 

389 

390def null_debug_action(*args): 

391 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

392 

393 

394class ParserElement(ABC): 

395 """Abstract base level parser element class.""" 

396 

397 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

398 verbose_stacktrace: bool = False 

399 _literalStringClass: type = None # type: ignore[assignment] 

400 

401 @staticmethod 

402 def set_default_whitespace_chars(chars: str) -> None: 

403 r""" 

404 Overrides the default whitespace chars 

405 

406 Example:: 

407 

408 # default whitespace chars are space, <TAB> and newline 

409 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

410 

411 # change to just treat newline as significant 

412 ParserElement.set_default_whitespace_chars(" \t") 

413 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] 

414 """ 

415 ParserElement.DEFAULT_WHITE_CHARS = chars 

416 

417 # update whitespace all parse expressions defined in this module 

418 for expr in _builtin_exprs: 

419 if expr.copyDefaultWhiteChars: 

420 expr.whiteChars = set(chars) 

421 

422 @staticmethod 

423 def inline_literals_using(cls: type) -> None: 

424 """ 

425 Set class to be used for inclusion of string literals into a parser. 

426 

427 Example:: 

428 

429 # default literal class used is Literal 

430 integer = Word(nums) 

431 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

432 

433 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

434 

435 

436 # change to Suppress 

437 ParserElement.inline_literals_using(Suppress) 

438 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

439 

440 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] 

441 """ 

442 ParserElement._literalStringClass = cls 

443 

444 @classmethod 

445 def using_each(cls, seq, **class_kwargs): 

446 """ 

447 Yields a sequence of class(obj, **class_kwargs) for obj in seq. 

448 

449 Example:: 

450 

451 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

452 

453 """ 

454 yield from (cls(obj, **class_kwargs) for obj in seq) 

455 

456 class DebugActions(NamedTuple): 

457 debug_try: typing.Optional[DebugStartAction] 

458 debug_match: typing.Optional[DebugSuccessAction] 

459 debug_fail: typing.Optional[DebugExceptionAction] 

460 

461 def __init__(self, savelist: bool = False): 

462 self.parseAction: List[ParseAction] = list() 

463 self.failAction: typing.Optional[ParseFailAction] = None 

464 self.customName: str = None # type: ignore[assignment] 

465 self._defaultName: typing.Optional[str] = None 

466 self.resultsName: str = None # type: ignore[assignment] 

467 self.saveAsList = savelist 

468 self.skipWhitespace = True 

469 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

470 self.copyDefaultWhiteChars = True 

471 # used when checking for left-recursion 

472 self.mayReturnEmpty = False 

473 self.keepTabs = False 

474 self.ignoreExprs: List["ParserElement"] = list() 

475 self.debug = False 

476 self.streamlined = False 

477 # optimize exception handling for subclasses that don't advance parse index 

478 self.mayIndexError = True 

479 self.errmsg = "" 

480 # mark results names as modal (report only last) or cumulative (list all) 

481 self.modalResults = True 

482 # custom debug actions 

483 self.debugActions = self.DebugActions(None, None, None) 

484 # avoid redundant calls to preParse 

485 self.callPreparse = True 

486 self.callDuringTry = False 

487 self.suppress_warnings_: List[Diagnostics] = [] 

488 

489 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": 

490 """ 

491 Suppress warnings emitted for a particular diagnostic on this expression. 

492 

493 Example:: 

494 

495 base = pp.Forward() 

496 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) 

497 

498 # statement would normally raise a warning, but is now suppressed 

499 print(base.parse_string("x")) 

500 

501 """ 

502 self.suppress_warnings_.append(warning_type) 

503 return self 

504 

505 def visit_all(self): 

506 """General-purpose method to yield all expressions and sub-expressions 

507 in a grammar. Typically just for internal use. 

508 """ 

509 to_visit = deque([self]) 

510 seen = set() 

511 while to_visit: 

512 cur = to_visit.popleft() 

513 

514 # guard against looping forever through recursive grammars 

515 if cur in seen: 

516 continue 

517 seen.add(cur) 

518 

519 to_visit.extend(cur.recurse()) 

520 yield cur 

521 

522 def copy(self) -> "ParserElement": 

523 """ 

524 Make a copy of this :class:`ParserElement`. Useful for defining 

525 different parse actions for the same parsing pattern, using copies of 

526 the original parse element. 

527 

528 Example:: 

529 

530 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

531 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") 

532 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

533 

534 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) 

535 

536 prints:: 

537 

538 [5120, 100, 655360, 268435456] 

539 

540 Equivalent form of ``expr.copy()`` is just ``expr()``:: 

541 

542 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

543 """ 

544 cpy = copy.copy(self) 

545 cpy.parseAction = self.parseAction[:] 

546 cpy.ignoreExprs = self.ignoreExprs[:] 

547 if self.copyDefaultWhiteChars: 

548 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

549 return cpy 

550 

551 def set_results_name( 

552 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 

553 ) -> "ParserElement": 

554 """ 

555 Define name for referencing matching tokens as a nested attribute 

556 of the returned parse results. 

557 

558 Normally, results names are assigned as you would assign keys in a dict: 

559 any existing value is overwritten by later values. If it is necessary to 

560 keep all values captured for a particular results name, call ``set_results_name`` 

561 with ``list_all_matches`` = True. 

562 

563 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

564 this is so that the client can define a basic element, such as an 

565 integer, and reference it in multiple places with different names. 

566 

567 You can also set results names using the abbreviated syntax, 

568 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

569 - see :class:`__call__`. If ``list_all_matches`` is required, use 

570 ``expr("name*")``. 

571 

572 Example:: 

573 

574 date_str = (integer.set_results_name("year") + '/' 

575 + integer.set_results_name("month") + '/' 

576 + integer.set_results_name("day")) 

577 

578 # equivalent form: 

579 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

580 """ 

581 listAllMatches = listAllMatches or list_all_matches 

582 return self._setResultsName(name, listAllMatches) 

583 

584 def _setResultsName(self, name, listAllMatches=False): 

585 if name is None: 

586 return self 

587 newself = self.copy() 

588 if name.endswith("*"): 

589 name = name[:-1] 

590 listAllMatches = True 

591 newself.resultsName = name 

592 newself.modalResults = not listAllMatches 

593 return newself 

594 

595 def set_break(self, break_flag: bool = True) -> "ParserElement": 

596 """ 

597 Method to invoke the Python pdb debugger when this element is 

598 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

599 disable. 

600 """ 

601 if break_flag: 

602 _parseMethod = self._parse 

603 

604 def breaker(instring, loc, doActions=True, callPreParse=True): 

605 import pdb 

606 

607 # this call to pdb.set_trace() is intentional, not a checkin error 

608 pdb.set_trace() 

609 return _parseMethod(instring, loc, doActions, callPreParse) 

610 

611 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

612 self._parse = breaker # type: ignore [assignment] 

613 else: 

614 if hasattr(self._parse, "_originalParseMethod"): 

615 self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment] 

616 return self 

617 

618 def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": 

619 """ 

620 Define one or more actions to perform when successfully matching parse element definition. 

621 

622 Parse actions can be called to perform data conversions, do extra validation, 

623 update external data structures, or enhance or replace the parsed tokens. 

624 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

625 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

626 

627 - ``s`` = the original string being parsed (see note below) 

628 - ``loc`` = the location of the matching substring 

629 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

630 

631 The parsed tokens are passed to the parse action as ParseResults. They can be 

632 modified in place using list-style append, extend, and pop operations to update 

633 the parsed list elements; and with dictionary-style item set and del operations 

634 to add, update, or remove any named results. If the tokens are modified in place, 

635 it is not necessary to return them with a return statement. 

636 

637 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

638 object, or with some entirely different object (common for parse actions that perform data 

639 conversions). A convenient way to build a new parse result is to define the values 

640 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

641 

642 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

643 expression are cleared. 

644 

645 Optional keyword arguments: 

646 

647 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during 

648 lookaheads and alternate testing. For parse actions that have side effects, it is 

649 important to only call the parse action once it is determined that it is being 

650 called as part of a successful parse. For parse actions that perform additional 

651 validation, then call_during_try should be passed as True, so that the validation 

652 code is included in the preliminary "try" parses. 

653 

654 Note: the default parsing behavior is to expand tabs in the input string 

655 before starting the parsing process. See :class:`parse_string` for more 

656 information on parsing strings containing ``<TAB>`` s, and suggested 

657 methods to maintain a consistent view of the parsed string, the parse 

658 location, and line and column positions within the parsed string. 

659 

660 Example:: 

661 

662 # parse dates in the form YYYY/MM/DD 

663 

664 # use parse action to convert toks from str to int at parse time 

665 def convert_to_int(toks): 

666 return int(toks[0]) 

667 

668 # use a parse action to verify that the date is a valid date 

669 def is_valid_date(instring, loc, toks): 

670 from datetime import date 

671 year, month, day = toks[::2] 

672 try: 

673 date(year, month, day) 

674 except ValueError: 

675 raise ParseException(instring, loc, "invalid date given") 

676 

677 integer = Word(nums) 

678 date_str = integer + '/' + integer + '/' + integer 

679 

680 # add parse actions 

681 integer.set_parse_action(convert_to_int) 

682 date_str.set_parse_action(is_valid_date) 

683 

684 # note that integer fields are now ints, not strings 

685 date_str.run_tests(''' 

686 # successful parse - note that integer fields were converted to ints 

687 1999/12/31 

688 

689 # fail - invalid date 

690 1999/13/31 

691 ''') 

692 """ 

693 if list(fns) == [None]: 

694 self.parseAction = [] 

695 else: 

696 if not all(callable(fn) for fn in fns): 

697 raise TypeError("parse actions must be callable") 

698 self.parseAction = [_trim_arity(fn) for fn in fns] 

699 self.callDuringTry = kwargs.get( 

700 "call_during_try", kwargs.get("callDuringTry", False) 

701 ) 

702 return self 

703 

704 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": 

705 """ 

706 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

707 

708 See examples in :class:`copy`. 

709 """ 

710 self.parseAction += [_trim_arity(fn) for fn in fns] 

711 self.callDuringTry = self.callDuringTry or kwargs.get( 

712 "call_during_try", kwargs.get("callDuringTry", False) 

713 ) 

714 return self 

715 

716 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": 

717 """Add a boolean predicate function to expression's list of parse actions. See 

718 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

719 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

720 

721 Optional keyword arguments: 

722 

723 - ``message`` = define a custom message to be used in the raised exception 

724 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

725 ParseException 

726 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

727 default=False 

728 

729 Example:: 

730 

731 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

732 year_int = integer.copy() 

733 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

734 date_str = year_int + '/' + integer + '/' + integer 

735 

736 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), 

737 (line:1, col:1) 

738 """ 

739 for fn in fns: 

740 self.parseAction.append( 

741 condition_as_parse_action( 

742 fn, 

743 message=str(kwargs.get("message")), 

744 fatal=bool(kwargs.get("fatal", False)), 

745 ) 

746 ) 

747 

748 self.callDuringTry = self.callDuringTry or kwargs.get( 

749 "call_during_try", kwargs.get("callDuringTry", False) 

750 ) 

751 return self 

752 

753 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": 

754 """ 

755 Define action to perform if parsing fails at this expression. 

756 Fail acton fn is a callable function that takes the arguments 

757 ``fn(s, loc, expr, err)`` where: 

758 

759 - ``s`` = string being parsed 

760 - ``loc`` = location where expression match was attempted and failed 

761 - ``expr`` = the parse expression that failed 

762 - ``err`` = the exception thrown 

763 

764 The function returns no value. It may throw :class:`ParseFatalException` 

765 if it is desired to stop parsing immediately.""" 

766 self.failAction = fn 

767 return self 

768 

769 def _skipIgnorables(self, instring: str, loc: int) -> int: 

770 if not self.ignoreExprs: 

771 return loc 

772 exprsFound = True 

773 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

774 while exprsFound: 

775 exprsFound = False 

776 for ignore_fn in ignore_expr_fns: 

777 try: 

778 while 1: 

779 loc, dummy = ignore_fn(instring, loc) 

780 exprsFound = True 

781 except ParseException: 

782 pass 

783 return loc 

784 

785 def preParse(self, instring: str, loc: int) -> int: 

786 if self.ignoreExprs: 

787 loc = self._skipIgnorables(instring, loc) 

788 

789 if self.skipWhitespace: 

790 instrlen = len(instring) 

791 white_chars = self.whiteChars 

792 while loc < instrlen and instring[loc] in white_chars: 

793 loc += 1 

794 

795 return loc 

796 

797 def parseImpl(self, instring, loc, doActions=True): 

798 return loc, [] 

799 

800 def postParse(self, instring, loc, tokenlist): 

801 return tokenlist 

802 

803 # @profile 

804 def _parseNoCache( 

805 self, instring, loc, doActions=True, callPreParse=True 

806 ) -> Tuple[int, ParseResults]: 

807 TRY, MATCH, FAIL = 0, 1, 2 

808 debugging = self.debug # and doActions) 

809 len_instring = len(instring) 

810 

811 if debugging or self.failAction: 

812 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

813 try: 

814 if callPreParse and self.callPreparse: 

815 pre_loc = self.preParse(instring, loc) 

816 else: 

817 pre_loc = loc 

818 tokens_start = pre_loc 

819 if self.debugActions.debug_try: 

820 self.debugActions.debug_try(instring, tokens_start, self, False) 

821 if self.mayIndexError or pre_loc >= len_instring: 

822 try: 

823 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

824 except IndexError: 

825 raise ParseException(instring, len_instring, self.errmsg, self) 

826 else: 

827 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

828 except Exception as err: 

829 # print("Exception raised:", err) 

830 if self.debugActions.debug_fail: 

831 self.debugActions.debug_fail( 

832 instring, tokens_start, self, err, False 

833 ) 

834 if self.failAction: 

835 self.failAction(instring, tokens_start, self, err) 

836 raise 

837 else: 

838 if callPreParse and self.callPreparse: 

839 pre_loc = self.preParse(instring, loc) 

840 else: 

841 pre_loc = loc 

842 tokens_start = pre_loc 

843 if self.mayIndexError or pre_loc >= len_instring: 

844 try: 

845 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

846 except IndexError: 

847 raise ParseException(instring, len_instring, self.errmsg, self) 

848 else: 

849 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

850 

851 tokens = self.postParse(instring, loc, tokens) 

852 

853 ret_tokens = ParseResults( 

854 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 

855 ) 

856 if self.parseAction and (doActions or self.callDuringTry): 

857 if debugging: 

858 try: 

859 for fn in self.parseAction: 

860 try: 

861 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

862 except IndexError as parse_action_exc: 

863 exc = ParseException("exception raised in parse action") 

864 raise exc from parse_action_exc 

865 

866 if tokens is not None and tokens is not ret_tokens: 

867 ret_tokens = ParseResults( 

868 tokens, 

869 self.resultsName, 

870 asList=self.saveAsList 

871 and isinstance(tokens, (ParseResults, list)), 

872 modal=self.modalResults, 

873 ) 

874 except Exception as err: 

875 # print "Exception raised in user parse action:", err 

876 if self.debugActions.debug_fail: 

877 self.debugActions.debug_fail( 

878 instring, tokens_start, self, err, False 

879 ) 

880 raise 

881 else: 

882 for fn in self.parseAction: 

883 try: 

884 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

885 except IndexError as parse_action_exc: 

886 exc = ParseException("exception raised in parse action") 

887 raise exc from parse_action_exc 

888 

889 if tokens is not None and tokens is not ret_tokens: 

890 ret_tokens = ParseResults( 

891 tokens, 

892 self.resultsName, 

893 asList=self.saveAsList 

894 and isinstance(tokens, (ParseResults, list)), 

895 modal=self.modalResults, 

896 ) 

897 if debugging: 

898 # print("Matched", self, "->", ret_tokens.as_list()) 

899 if self.debugActions.debug_match: 

900 self.debugActions.debug_match( 

901 instring, tokens_start, loc, self, ret_tokens, False 

902 ) 

903 

904 return loc, ret_tokens 

905 

906 def try_parse( 

907 self, 

908 instring: str, 

909 loc: int, 

910 *, 

911 raise_fatal: bool = False, 

912 do_actions: bool = False, 

913 ) -> int: 

914 try: 

915 return self._parse(instring, loc, doActions=do_actions)[0] 

916 except ParseFatalException: 

917 if raise_fatal: 

918 raise 

919 raise ParseException(instring, loc, self.errmsg, self) 

920 

921 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

922 try: 

923 self.try_parse(instring, loc, do_actions=do_actions) 

924 except (ParseException, IndexError): 

925 return False 

926 else: 

927 return True 

928 

929 # cache for left-recursion in Forward references 

930 recursion_lock = RLock() 

931 recursion_memos: typing.Dict[ 

932 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] 

933 ] = {} 

934 

935 class _CacheType(dict): 

936 """ 

937 class to help type checking 

938 """ 

939 

940 not_in_cache: bool 

941 

942 def get(self, *args): 

943 ... 

944 

945 def set(self, *args): 

946 ... 

947 

948 # argument cache for optimizing repeated calls when backtracking through recursive expressions 

949 packrat_cache = ( 

950 _CacheType() 

951 ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail 

952 packrat_cache_lock = RLock() 

953 packrat_cache_stats = [0, 0] 

954 

955 # this method gets repeatedly called during backtracking with the same arguments - 

956 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

957 def _parseCache( 

958 self, instring, loc, doActions=True, callPreParse=True 

959 ) -> Tuple[int, ParseResults]: 

960 HIT, MISS = 0, 1 

961 TRY, MATCH, FAIL = 0, 1, 2 

962 lookup = (self, instring, loc, callPreParse, doActions) 

963 with ParserElement.packrat_cache_lock: 

964 cache = ParserElement.packrat_cache 

965 value = cache.get(lookup) 

966 if value is cache.not_in_cache: 

967 ParserElement.packrat_cache_stats[MISS] += 1 

968 try: 

969 value = self._parseNoCache(instring, loc, doActions, callPreParse) 

970 except ParseBaseException as pe: 

971 # cache a copy of the exception, without the traceback 

972 cache.set(lookup, pe.__class__(*pe.args)) 

973 raise 

974 else: 

975 cache.set(lookup, (value[0], value[1].copy(), loc)) 

976 return value 

977 else: 

978 ParserElement.packrat_cache_stats[HIT] += 1 

979 if self.debug and self.debugActions.debug_try: 

980 try: 

981 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

982 except TypeError: 

983 pass 

984 if isinstance(value, Exception): 

985 if self.debug and self.debugActions.debug_fail: 

986 try: 

987 self.debugActions.debug_fail( 

988 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

989 ) 

990 except TypeError: 

991 pass 

992 raise value 

993 

994 value = cast(Tuple[int, ParseResults, int], value) 

995 loc_, result, endloc = value[0], value[1].copy(), value[2] 

996 if self.debug and self.debugActions.debug_match: 

997 try: 

998 self.debugActions.debug_match( 

999 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1000 ) 

1001 except TypeError: 

1002 pass 

1003 

1004 return loc_, result 

1005 

1006 _parse = _parseNoCache 

1007 

1008 @staticmethod 

1009 def reset_cache() -> None: 

1010 ParserElement.packrat_cache.clear() 

1011 ParserElement.packrat_cache_stats[:] = [0] * len( 

1012 ParserElement.packrat_cache_stats 

1013 ) 

1014 ParserElement.recursion_memos.clear() 

1015 

1016 _packratEnabled = False 

1017 _left_recursion_enabled = False 

1018 

1019 @staticmethod 

1020 def disable_memoization() -> None: 

1021 """ 

1022 Disables active Packrat or Left Recursion parsing and their memoization 

1023 

1024 This method also works if neither Packrat nor Left Recursion are enabled. 

1025 This makes it safe to call before activating Packrat nor Left Recursion 

1026 to clear any previous settings. 

1027 """ 

1028 ParserElement.reset_cache() 

1029 ParserElement._left_recursion_enabled = False 

1030 ParserElement._packratEnabled = False 

1031 ParserElement._parse = ParserElement._parseNoCache 

1032 

1033 @staticmethod 

1034 def enable_left_recursion( 

1035 cache_size_limit: typing.Optional[int] = None, *, force=False 

1036 ) -> None: 

1037 """ 

1038 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1039 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1040 repeatedly matched with a fixed recursion depth that is gradually increased 

1041 until finding the longest match. 

1042 

1043 Example:: 

1044 

1045 from pip._vendor import pyparsing as pp 

1046 pp.ParserElement.enable_left_recursion() 

1047 

1048 E = pp.Forward("E") 

1049 num = pp.Word(pp.nums) 

1050 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1051 E <<= E + '+' - num | num 

1052 

1053 print(E.parse_string("1+2+3")) 

1054 

1055 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1056 thus skip reevaluation of parse actions during backtracking. This may break 

1057 programs with parse actions which rely on strict ordering of side-effects. 

1058 

1059 Parameters: 

1060 

1061 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1062 ``Forward`` elements during matching; if ``None`` (the default), 

1063 memoize all ``Forward`` elements. 

1064 

1065 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1066 thus the two cannot be used together. Use ``force=True`` to disable any 

1067 previous, conflicting settings. 

1068 """ 

1069 if force: 

1070 ParserElement.disable_memoization() 

1071 elif ParserElement._packratEnabled: 

1072 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1073 if cache_size_limit is None: 

1074 ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment] 

1075 elif cache_size_limit > 0: 

1076 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1077 else: 

1078 raise NotImplementedError("Memo size of %s" % cache_size_limit) 

1079 ParserElement._left_recursion_enabled = True 

1080 

1081 @staticmethod 

1082 def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None: 

1083 """ 

1084 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1085 Repeated parse attempts at the same string location (which happens 

1086 often in many complex grammars) can immediately return a cached value, 

1087 instead of re-executing parsing/validating code. Memoizing is done of 

1088 both valid results and parsing exceptions. 

1089 

1090 Parameters: 

1091 

1092 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1093 will limit the size of the packrat cache; if None is passed, then 

1094 the cache size will be unbounded; if 0 is passed, the cache will 

1095 be effectively disabled. 

1096 

1097 This speedup may break existing programs that use parse actions that 

1098 have side-effects. For this reason, packrat parsing is disabled when 

1099 you first import pyparsing. To activate the packrat feature, your 

1100 program must call the class method :class:`ParserElement.enable_packrat`. 

1101 For best results, call ``enable_packrat()`` immediately after 

1102 importing pyparsing. 

1103 

1104 Example:: 

1105 

1106 from pip._vendor import pyparsing 

1107 pyparsing.ParserElement.enable_packrat() 

1108 

1109 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1110 thus the two cannot be used together. Use ``force=True`` to disable any 

1111 previous, conflicting settings. 

1112 """ 

1113 if force: 

1114 ParserElement.disable_memoization() 

1115 elif ParserElement._left_recursion_enabled: 

1116 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1117 if not ParserElement._packratEnabled: 

1118 ParserElement._packratEnabled = True 

1119 if cache_size_limit is None: 

1120 ParserElement.packrat_cache = _UnboundedCache() 

1121 else: 

1122 ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment] 

1123 ParserElement._parse = ParserElement._parseCache 

1124 

1125 def parse_string( 

1126 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 

1127 ) -> ParseResults: 

1128 """ 

1129 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1130 client code. 

1131 

1132 :param instring: The input string to be parsed. 

1133 :param parse_all: If set, the entire input string must match the grammar. 

1134 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1135 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1136 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1137 an object with attributes if the given parser includes results names. 

1138 

1139 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1140 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1141 

1142 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1143 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1144 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1145 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1146 

1147 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1148 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1149 parse action's ``s`` argument, or 

1150 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1151 

1152 Examples: 

1153 

1154 By default, partial matches are OK. 

1155 

1156 >>> res = Word('a').parse_string('aaaaabaaa') 

1157 >>> print(res) 

1158 ['aaaaa'] 

1159 

1160 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1161 directly to see more examples. 

1162 

1163 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1164 

1165 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1166 Traceback (most recent call last): 

1167 ... 

1168 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) 

1169 """ 

1170 parseAll = parse_all or parseAll 

1171 

1172 ParserElement.reset_cache() 

1173 if not self.streamlined: 

1174 self.streamline() 

1175 for e in self.ignoreExprs: 

1176 e.streamline() 

1177 if not self.keepTabs: 

1178 instring = instring.expandtabs() 

1179 try: 

1180 loc, tokens = self._parse(instring, 0) 

1181 if parseAll: 

1182 loc = self.preParse(instring, loc) 

1183 se = Empty() + StringEnd() 

1184 se._parse(instring, loc) 

1185 except ParseBaseException as exc: 

1186 if ParserElement.verbose_stacktrace: 

1187 raise 

1188 else: 

1189 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1190 raise exc.with_traceback(None) 

1191 else: 

1192 return tokens 

1193 

1194 def scan_string( 

1195 self, 

1196 instring: str, 

1197 max_matches: int = _MAX_INT, 

1198 overlap: bool = False, 

1199 *, 

1200 debug: bool = False, 

1201 maxMatches: int = _MAX_INT, 

1202 ) -> Generator[Tuple[ParseResults, int, int], None, None]: 

1203 """ 

1204 Scan the input string for expression matches. Each match will return the 

1205 matching tokens, start location, and end location. May be called with optional 

1206 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1207 ``overlap`` is specified, then overlapping matches will be reported. 

1208 

1209 Note that the start and end locations are reported relative to the string 

1210 being parsed. See :class:`parse_string` for more information on parsing 

1211 strings with embedded tabs. 

1212 

1213 Example:: 

1214 

1215 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1216 print(source) 

1217 for tokens, start, end in Word(alphas).scan_string(source): 

1218 print(' '*start + '^'*(end-start)) 

1219 print(' '*start + tokens[0]) 

1220 

1221 prints:: 

1222 

1223 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1224 ^^^^^ 

1225 sldjf 

1226 ^^^^^^^ 

1227 lsdjjkf 

1228 ^^^^^^ 

1229 sldkjf 

1230 ^^^^^^ 

1231 lkjsfd 

1232 """ 

1233 maxMatches = min(maxMatches, max_matches) 

1234 if not self.streamlined: 

1235 self.streamline() 

1236 for e in self.ignoreExprs: 

1237 e.streamline() 

1238 

1239 if not self.keepTabs: 

1240 instring = str(instring).expandtabs() 

1241 instrlen = len(instring) 

1242 loc = 0 

1243 preparseFn = self.preParse 

1244 parseFn = self._parse 

1245 ParserElement.resetCache() 

1246 matches = 0 

1247 try: 

1248 while loc <= instrlen and matches < maxMatches: 

1249 try: 

1250 preloc: int = preparseFn(instring, loc) 

1251 nextLoc: int 

1252 tokens: ParseResults 

1253 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1254 except ParseException: 

1255 loc = preloc + 1 

1256 else: 

1257 if nextLoc > loc: 

1258 matches += 1 

1259 if debug: 

1260 print( 

1261 { 

1262 "tokens": tokens.asList(), 

1263 "start": preloc, 

1264 "end": nextLoc, 

1265 } 

1266 ) 

1267 yield tokens, preloc, nextLoc 

1268 if overlap: 

1269 nextloc = preparseFn(instring, loc) 

1270 if nextloc > loc: 

1271 loc = nextLoc 

1272 else: 

1273 loc += 1 

1274 else: 

1275 loc = nextLoc 

1276 else: 

1277 loc = preloc + 1 

1278 except ParseBaseException as exc: 

1279 if ParserElement.verbose_stacktrace: 

1280 raise 

1281 else: 

1282 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1283 raise exc.with_traceback(None) 

1284 

1285 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1286 """ 

1287 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1288 be returned from a parse action. To use ``transform_string``, define a grammar and 

1289 attach a parse action to it that modifies the returned token list. 

1290 Invoking ``transform_string()`` on a target string will then scan for matches, 

1291 and replace the matched text patterns according to the logic in the parse 

1292 action. ``transform_string()`` returns the resulting transformed string. 

1293 

1294 Example:: 

1295 

1296 wd = Word(alphas) 

1297 wd.set_parse_action(lambda toks: toks[0].title()) 

1298 

1299 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) 

1300 

1301 prints:: 

1302 

1303 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

1304 """ 

1305 out: List[str] = [] 

1306 lastE = 0 

1307 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1308 # keep string locs straight between transform_string and scan_string 

1309 self.keepTabs = True 

1310 try: 

1311 for t, s, e in self.scan_string(instring, debug=debug): 

1312 out.append(instring[lastE:s]) 

1313 if t: 

1314 if isinstance(t, ParseResults): 

1315 out += t.as_list() 

1316 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1317 out.extend(t) 

1318 else: 

1319 out.append(t) 

1320 lastE = e 

1321 out.append(instring[lastE:]) 

1322 out = [o for o in out if o] 

1323 return "".join([str(s) for s in _flatten(out)]) 

1324 except ParseBaseException as exc: 

1325 if ParserElement.verbose_stacktrace: 

1326 raise 

1327 else: 

1328 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1329 raise exc.with_traceback(None) 

1330 

1331 def search_string( 

1332 self, 

1333 instring: str, 

1334 max_matches: int = _MAX_INT, 

1335 *, 

1336 debug: bool = False, 

1337 maxMatches: int = _MAX_INT, 

1338 ) -> ParseResults: 

1339 """ 

1340 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1341 to match the given parse expression. May be called with optional 

1342 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1343 

1344 Example:: 

1345 

1346 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

1347 cap_word = Word(alphas.upper(), alphas.lower()) 

1348 

1349 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) 

1350 

1351 # the sum() builtin can be used to merge results into a single ParseResults object 

1352 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) 

1353 

1354 prints:: 

1355 

1356 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1357 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1358 """ 

1359 maxMatches = min(maxMatches, max_matches) 

1360 try: 

1361 return ParseResults( 

1362 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] 

1363 ) 

1364 except ParseBaseException as exc: 

1365 if ParserElement.verbose_stacktrace: 

1366 raise 

1367 else: 

1368 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1369 raise exc.with_traceback(None) 

1370 

1371 def split( 

1372 self, 

1373 instring: str, 

1374 maxsplit: int = _MAX_INT, 

1375 include_separators: bool = False, 

1376 *, 

1377 includeSeparators=False, 

1378 ) -> Generator[str, None, None]: 

1379 """ 

1380 Generator method to split a string using the given expression as a separator. 

1381 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1382 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1383 matching text should be included in the split results. 

1384 

1385 Example:: 

1386 

1387 punc = one_of(list(".,;:/-!?")) 

1388 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

1389 

1390 prints:: 

1391 

1392 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1393 """ 

1394 includeSeparators = includeSeparators or include_separators 

1395 last = 0 

1396 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1397 yield instring[last:s] 

1398 if includeSeparators: 

1399 yield t[0] 

1400 last = e 

1401 yield instring[last:] 

1402 

1403 def __add__(self, other) -> "ParserElement": 

1404 """ 

1405 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1406 converts them to :class:`Literal`\\ s by default. 

1407 

1408 Example:: 

1409 

1410 greet = Word(alphas) + "," + Word(alphas) + "!" 

1411 hello = "Hello, World!" 

1412 print(hello, "->", greet.parse_string(hello)) 

1413 

1414 prints:: 

1415 

1416 Hello, World! -> ['Hello', ',', 'World', '!'] 

1417 

1418 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: 

1419 

1420 Literal('start') + ... + Literal('end') 

1421 

1422 is equivalent to:: 

1423 

1424 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1425 

1426 Note that the skipped text is returned with '_skipped' as a results name, 

1427 and to support having multiple skips in the same parser, the value returned is 

1428 a list of all skipped text. 

1429 """ 

1430 if other is Ellipsis: 

1431 return _PendingSkip(self) 

1432 

1433 if isinstance(other, str_type): 

1434 other = self._literalStringClass(other) 

1435 if not isinstance(other, ParserElement): 

1436 return NotImplemented 

1437 return And([self, other]) 

1438 

1439 def __radd__(self, other) -> "ParserElement": 

1440 """ 

1441 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1442 """ 

1443 if other is Ellipsis: 

1444 return SkipTo(self)("_skipped*") + self 

1445 

1446 if isinstance(other, str_type): 

1447 other = self._literalStringClass(other) 

1448 if not isinstance(other, ParserElement): 

1449 return NotImplemented 

1450 return other + self 

1451 

1452 def __sub__(self, other) -> "ParserElement": 

1453 """ 

1454 Implementation of ``-`` operator, returns :class:`And` with error stop 

1455 """ 

1456 if isinstance(other, str_type): 

1457 other = self._literalStringClass(other) 

1458 if not isinstance(other, ParserElement): 

1459 return NotImplemented 

1460 return self + And._ErrorStop() + other 

1461 

1462 def __rsub__(self, other) -> "ParserElement": 

1463 """ 

1464 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1465 """ 

1466 if isinstance(other, str_type): 

1467 other = self._literalStringClass(other) 

1468 if not isinstance(other, ParserElement): 

1469 return NotImplemented 

1470 return other - self 

1471 

1472 def __mul__(self, other) -> "ParserElement": 

1473 """ 

1474 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1475 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1476 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1477 may also include ``None`` as in: 

1478 

1479 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1480 to ``expr*n + ZeroOrMore(expr)`` 

1481 (read as "at least n instances of ``expr``") 

1482 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1483 (read as "0 to n instances of ``expr``") 

1484 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1485 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1486 

1487 Note that ``expr*(None, n)`` does not raise an exception if 

1488 more than n exprs exist in the input stream; that is, 

1489 ``expr*(None, n)`` does not enforce a maximum number of expr 

1490 occurrences. If this behavior is desired, then write 

1491 ``expr*(None, n) + ~expr`` 

1492 """ 

1493 if other is Ellipsis: 

1494 other = (0, None) 

1495 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1496 other = ((0,) + other[1:] + (None,))[:2] 

1497 

1498 if isinstance(other, int): 

1499 minElements, optElements = other, 0 

1500 elif isinstance(other, tuple): 

1501 other = tuple(o if o is not Ellipsis else None for o in other) 

1502 other = (other + (None, None))[:2] 

1503 if other[0] is None: 

1504 other = (0, other[1]) 

1505 if isinstance(other[0], int) and other[1] is None: 

1506 if other[0] == 0: 

1507 return ZeroOrMore(self) 

1508 if other[0] == 1: 

1509 return OneOrMore(self) 

1510 else: 

1511 return self * other[0] + ZeroOrMore(self) 

1512 elif isinstance(other[0], int) and isinstance(other[1], int): 

1513 minElements, optElements = other 

1514 optElements -= minElements 

1515 else: 

1516 return NotImplemented 

1517 else: 

1518 return NotImplemented 

1519 

1520 if minElements < 0: 

1521 raise ValueError("cannot multiply ParserElement by negative value") 

1522 if optElements < 0: 

1523 raise ValueError( 

1524 "second tuple value must be greater or equal to first tuple value" 

1525 ) 

1526 if minElements == optElements == 0: 

1527 return And([]) 

1528 

1529 if optElements: 

1530 

1531 def makeOptionalList(n): 

1532 if n > 1: 

1533 return Opt(self + makeOptionalList(n - 1)) 

1534 else: 

1535 return Opt(self) 

1536 

1537 if minElements: 

1538 if minElements == 1: 

1539 ret = self + makeOptionalList(optElements) 

1540 else: 

1541 ret = And([self] * minElements) + makeOptionalList(optElements) 

1542 else: 

1543 ret = makeOptionalList(optElements) 

1544 else: 

1545 if minElements == 1: 

1546 ret = self 

1547 else: 

1548 ret = And([self] * minElements) 

1549 return ret 

1550 

1551 def __rmul__(self, other) -> "ParserElement": 

1552 return self.__mul__(other) 

1553 

1554 def __or__(self, other) -> "ParserElement": 

1555 """ 

1556 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1557 """ 

1558 if other is Ellipsis: 

1559 return _PendingSkip(self, must_skip=True) 

1560 

1561 if isinstance(other, str_type): 

1562 # `expr | ""` is equivalent to `Opt(expr)` 

1563 if other == "": 

1564 return Opt(self) 

1565 other = self._literalStringClass(other) 

1566 if not isinstance(other, ParserElement): 

1567 return NotImplemented 

1568 return MatchFirst([self, other]) 

1569 

1570 def __ror__(self, other) -> "ParserElement": 

1571 """ 

1572 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1573 """ 

1574 if isinstance(other, str_type): 

1575 other = self._literalStringClass(other) 

1576 if not isinstance(other, ParserElement): 

1577 return NotImplemented 

1578 return other | self 

1579 

1580 def __xor__(self, other) -> "ParserElement": 

1581 """ 

1582 Implementation of ``^`` operator - returns :class:`Or` 

1583 """ 

1584 if isinstance(other, str_type): 

1585 other = self._literalStringClass(other) 

1586 if not isinstance(other, ParserElement): 

1587 return NotImplemented 

1588 return Or([self, other]) 

1589 

1590 def __rxor__(self, other) -> "ParserElement": 

1591 """ 

1592 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1593 """ 

1594 if isinstance(other, str_type): 

1595 other = self._literalStringClass(other) 

1596 if not isinstance(other, ParserElement): 

1597 return NotImplemented 

1598 return other ^ self 

1599 

1600 def __and__(self, other) -> "ParserElement": 

1601 """ 

1602 Implementation of ``&`` operator - returns :class:`Each` 

1603 """ 

1604 if isinstance(other, str_type): 

1605 other = self._literalStringClass(other) 

1606 if not isinstance(other, ParserElement): 

1607 return NotImplemented 

1608 return Each([self, other]) 

1609 

1610 def __rand__(self, other) -> "ParserElement": 

1611 """ 

1612 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1613 """ 

1614 if isinstance(other, str_type): 

1615 other = self._literalStringClass(other) 

1616 if not isinstance(other, ParserElement): 

1617 return NotImplemented 

1618 return other & self 

1619 

1620 def __invert__(self) -> "ParserElement": 

1621 """ 

1622 Implementation of ``~`` operator - returns :class:`NotAny` 

1623 """ 

1624 return NotAny(self) 

1625 

1626 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1627 # iterate over a sequence 

1628 __iter__ = None 

1629 

1630 def __getitem__(self, key): 

1631 """ 

1632 use ``[]`` indexing notation as a short form for expression repetition: 

1633 

1634 - ``expr[n]`` is equivalent to ``expr*n`` 

1635 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1636 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1637 to ``expr*n + ZeroOrMore(expr)`` 

1638 (read as "at least n instances of ``expr``") 

1639 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1640 (read as "0 to n instances of ``expr``") 

1641 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1642 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1643 

1644 ``None`` may be used in place of ``...``. 

1645 

1646 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1647 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1648 desired, then write ``expr[..., n] + ~expr``. 

1649 

1650 For repetition with a stop_on expression, use slice notation: 

1651 

1652 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1653 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1654 

1655 """ 

1656 

1657 stop_on_defined = False 

1658 stop_on = NoMatch() 

1659 if isinstance(key, slice): 

1660 key, stop_on = key.start, key.stop 

1661 if key is None: 

1662 key = ... 

1663 stop_on_defined = True 

1664 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1665 key, stop_on = (key[0], key[1].start), key[1].stop 

1666 stop_on_defined = True 

1667 

1668 # convert single arg keys to tuples 

1669 if isinstance(key, str_type): 

1670 key = (key,) 

1671 try: 

1672 iter(key) 

1673 except TypeError: 

1674 key = (key, key) 

1675 

1676 if len(key) > 2: 

1677 raise TypeError( 

1678 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1679 ) 

1680 

1681 # clip to 2 elements 

1682 ret = self * tuple(key[:2]) 

1683 ret = typing.cast(_MultipleMatch, ret) 

1684 

1685 if stop_on_defined: 

1686 ret.stopOn(stop_on) 

1687 

1688 return ret 

1689 

1690 def __call__(self, name: typing.Optional[str] = None) -> "ParserElement": 

1691 """ 

1692 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1693 

1694 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1695 passed as ``True``. 

1696 

1697 If ``name`` is omitted, same as calling :class:`copy`. 

1698 

1699 Example:: 

1700 

1701 # these are equivalent 

1702 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") 

1703 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1704 """ 

1705 if name is not None: 

1706 return self._setResultsName(name) 

1707 else: 

1708 return self.copy() 

1709 

1710 def suppress(self) -> "ParserElement": 

1711 """ 

1712 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1713 cluttering up returned output. 

1714 """ 

1715 return Suppress(self) 

1716 

1717 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": 

1718 """ 

1719 Enables the skipping of whitespace before matching the characters in the 

1720 :class:`ParserElement`'s defined pattern. 

1721 

1722 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1723 """ 

1724 self.skipWhitespace = True 

1725 return self 

1726 

1727 def leave_whitespace(self, recursive: bool = True) -> "ParserElement": 

1728 """ 

1729 Disables the skipping of whitespace before matching the characters in the 

1730 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1731 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1732 

1733 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1734 """ 

1735 self.skipWhitespace = False 

1736 return self 

1737 

1738 def set_whitespace_chars( 

1739 self, chars: Union[Set[str], str], copy_defaults: bool = False 

1740 ) -> "ParserElement": 

1741 """ 

1742 Overrides the default whitespace chars 

1743 """ 

1744 self.skipWhitespace = True 

1745 self.whiteChars = set(chars) 

1746 self.copyDefaultWhiteChars = copy_defaults 

1747 return self 

1748 

1749 def parse_with_tabs(self) -> "ParserElement": 

1750 """ 

1751 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1752 Must be called before ``parse_string`` when the input grammar contains elements that 

1753 match ``<TAB>`` characters. 

1754 """ 

1755 self.keepTabs = True 

1756 return self 

1757 

1758 def ignore(self, other: "ParserElement") -> "ParserElement": 

1759 """ 

1760 Define expression to be ignored (e.g., comments) while doing pattern 

1761 matching; may be called repeatedly, to define multiple comment or other 

1762 ignorable patterns. 

1763 

1764 Example:: 

1765 

1766 patt = Word(alphas)[1, ...] 

1767 patt.parse_string('ablaj /* comment */ lskjd') 

1768 # -> ['ablaj'] 

1769 

1770 patt.ignore(c_style_comment) 

1771 patt.parse_string('ablaj /* comment */ lskjd') 

1772 # -> ['ablaj', 'lskjd'] 

1773 """ 

1774 import typing 

1775 

1776 if isinstance(other, str_type): 

1777 other = Suppress(other) 

1778 

1779 if isinstance(other, Suppress): 

1780 if other not in self.ignoreExprs: 

1781 self.ignoreExprs.append(other) 

1782 else: 

1783 self.ignoreExprs.append(Suppress(other.copy())) 

1784 return self 

1785 

1786 def set_debug_actions( 

1787 self, 

1788 start_action: DebugStartAction, 

1789 success_action: DebugSuccessAction, 

1790 exception_action: DebugExceptionAction, 

1791 ) -> "ParserElement": 

1792 """ 

1793 Customize display of debugging messages while doing pattern matching: 

1794 

1795 - ``start_action`` - method to be called when an expression is about to be parsed; 

1796 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` 

1797 

1798 - ``success_action`` - method to be called when an expression has successfully parsed; 

1799 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` 

1800 

1801 - ``exception_action`` - method to be called when expression fails to parse; 

1802 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` 

1803 """ 

1804 self.debugActions = self.DebugActions( 

1805 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

1806 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

1807 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

1808 ) 

1809 self.debug = True 

1810 return self 

1811 

1812 def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement": 

1813 """ 

1814 Enable display of debugging messages while doing pattern matching. 

1815 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

1816 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

1817 

1818 Example:: 

1819 

1820 wd = Word(alphas).set_name("alphaword") 

1821 integer = Word(nums).set_name("numword") 

1822 term = wd | integer 

1823 

1824 # turn on debugging for wd 

1825 wd.set_debug() 

1826 

1827 term[1, ...].parse_string("abc 123 xyz 890") 

1828 

1829 prints:: 

1830 

1831 Match alphaword at loc 0(1,1) 

1832 Matched alphaword -> ['abc'] 

1833 Match alphaword at loc 3(1,4) 

1834 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

1835 Match alphaword at loc 7(1,8) 

1836 Matched alphaword -> ['xyz'] 

1837 Match alphaword at loc 11(1,12) 

1838 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

1839 Match alphaword at loc 15(1,16) 

1840 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

1841 

1842 The output shown is that produced by the default debug actions - custom debug actions can be 

1843 specified using :class:`set_debug_actions`. Prior to attempting 

1844 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

1845 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

1846 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, 

1847 which makes debugging and exception messages easier to understand - for instance, the default 

1848 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. 

1849 """ 

1850 if recurse: 

1851 for expr in self.visit_all(): 

1852 expr.set_debug(flag, recurse=False) 

1853 return self 

1854 

1855 if flag: 

1856 self.set_debug_actions( 

1857 _default_start_debug_action, 

1858 _default_success_debug_action, 

1859 _default_exception_debug_action, 

1860 ) 

1861 else: 

1862 self.debug = False 

1863 return self 

1864 

1865 @property 

1866 def default_name(self) -> str: 

1867 if self._defaultName is None: 

1868 self._defaultName = self._generateDefaultName() 

1869 return self._defaultName 

1870 

1871 @abstractmethod 

1872 def _generateDefaultName(self) -> str: 

1873 """ 

1874 Child classes must define this method, which defines how the ``default_name`` is set. 

1875 """ 

1876 

1877 def set_name(self, name: str) -> "ParserElement": 

1878 """ 

1879 Define name for this expression, makes debugging and exception messages clearer. 

1880 

1881 Example:: 

1882 

1883 Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) 

1884 Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1885 """ 

1886 self.customName = name 

1887 self.errmsg = "Expected " + self.name 

1888 if __diag__.enable_debug_on_named_expressions: 

1889 self.set_debug() 

1890 return self 

1891 

1892 @property 

1893 def name(self) -> str: 

1894 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 

1895 return self.customName if self.customName is not None else self.default_name 

1896 

1897 def __str__(self) -> str: 

1898 return self.name 

1899 

1900 def __repr__(self) -> str: 

1901 return str(self) 

1902 

1903 def streamline(self) -> "ParserElement": 

1904 self.streamlined = True 

1905 self._defaultName = None 

1906 return self 

1907 

1908 def recurse(self) -> List["ParserElement"]: 

1909 return [] 

1910 

1911 def _checkRecursion(self, parseElementList): 

1912 subRecCheckList = parseElementList[:] + [self] 

1913 for e in self.recurse(): 

1914 e._checkRecursion(subRecCheckList) 

1915 

1916 def validate(self, validateTrace=None) -> None: 

1917 """ 

1918 Check defined expressions for valid structure, check for infinite recursive definitions. 

1919 """ 

1920 warnings.warn( 

1921 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

1922 DeprecationWarning, 

1923 stacklevel=2, 

1924 ) 

1925 self._checkRecursion([]) 

1926 

1927 def parse_file( 

1928 self, 

1929 file_or_filename: Union[str, Path, TextIO], 

1930 encoding: str = "utf-8", 

1931 parse_all: bool = False, 

1932 *, 

1933 parseAll: bool = False, 

1934 ) -> ParseResults: 

1935 """ 

1936 Execute the parse expression on the given file or filename. 

1937 If a filename is specified (instead of a file object), 

1938 the entire file is opened, read, and closed before parsing. 

1939 """ 

1940 parseAll = parseAll or parse_all 

1941 try: 

1942 file_or_filename = typing.cast(TextIO, file_or_filename) 

1943 file_contents = file_or_filename.read() 

1944 except AttributeError: 

1945 file_or_filename = typing.cast(str, file_or_filename) 

1946 with open(file_or_filename, "r", encoding=encoding) as f: 

1947 file_contents = f.read() 

1948 try: 

1949 return self.parse_string(file_contents, parseAll) 

1950 except ParseBaseException as exc: 

1951 if ParserElement.verbose_stacktrace: 

1952 raise 

1953 else: 

1954 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1955 raise exc.with_traceback(None) 

1956 

1957 def __eq__(self, other): 

1958 if self is other: 

1959 return True 

1960 elif isinstance(other, str_type): 

1961 return self.matches(other, parse_all=True) 

1962 elif isinstance(other, ParserElement): 

1963 return vars(self) == vars(other) 

1964 return False 

1965 

1966 def __hash__(self): 

1967 return id(self) 

1968 

1969 def matches( 

1970 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 

1971 ) -> bool: 

1972 """ 

1973 Method for quick testing of a parser against a test string. Good for simple 

1974 inline microtests of sub expressions while building up larger parser. 

1975 

1976 Parameters: 

1977 

1978 - ``test_string`` - to test against this expression for a match 

1979 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

1980 

1981 Example:: 

1982 

1983 expr = Word(nums) 

1984 assert expr.matches("100") 

1985 """ 

1986 parseAll = parseAll and parse_all 

1987 try: 

1988 self.parse_string(str(test_string), parse_all=parseAll) 

1989 return True 

1990 except ParseBaseException: 

1991 return False 

1992 

1993 def run_tests( 

1994 self, 

1995 tests: Union[str, List[str]], 

1996 parse_all: bool = True, 

1997 comment: typing.Optional[Union["ParserElement", str]] = "#", 

1998 full_dump: bool = True, 

1999 print_results: bool = True, 

2000 failure_tests: bool = False, 

2001 post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None, 

2002 file: typing.Optional[TextIO] = None, 

2003 with_line_numbers: bool = False, 

2004 *, 

2005 parseAll: bool = True, 

2006 fullDump: bool = True, 

2007 printResults: bool = True, 

2008 failureTests: bool = False, 

2009 postParse: typing.Optional[Callable[[str, ParseResults], str]] = None, 

2010 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: 

2011 """ 

2012 Execute the parse expression on a series of test strings, showing each 

2013 test, the parsed results or where the parse failed. Quick and easy way to 

2014 run a parse expression against a list of sample strings. 

2015 

2016 Parameters: 

2017 

2018 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2019 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2020 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2021 string; pass None to disable comment filtering 

2022 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2023 if False, only dump nested list 

2024 - ``print_results`` - (default= ``True``) prints test output to stdout 

2025 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2026 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2027 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2028 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2029 if None, will default to ``sys.stdout`` 

2030 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2031 

2032 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2033 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2034 test's output 

2035 

2036 Example:: 

2037 

2038 number_expr = pyparsing_common.number.copy() 

2039 

2040 result = number_expr.run_tests(''' 

2041 # unsigned integer 

2042 100 

2043 # negative integer 

2044 -100 

2045 # float with scientific notation 

2046 6.02e23 

2047 # integer with scientific notation 

2048 1e-12 

2049 ''') 

2050 print("Success" if result[0] else "Failed!") 

2051 

2052 result = number_expr.run_tests(''' 

2053 # stray character 

2054 100Z 

2055 # missing leading digit before '.' 

2056 -.100 

2057 # too many '.' 

2058 3.14.159 

2059 ''', failure_tests=True) 

2060 print("Success" if result[0] else "Failed!") 

2061 

2062 prints:: 

2063 

2064 # unsigned integer 

2065 100 

2066 [100] 

2067 

2068 # negative integer 

2069 -100 

2070 [-100] 

2071 

2072 # float with scientific notation 

2073 6.02e23 

2074 [6.02e+23] 

2075 

2076 # integer with scientific notation 

2077 1e-12 

2078 [1e-12] 

2079 

2080 Success 

2081 

2082 # stray character 

2083 100Z 

2084 ^ 

2085 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2086 

2087 # missing leading digit before '.' 

2088 -.100 

2089 ^ 

2090 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2091 

2092 # too many '.' 

2093 3.14.159 

2094 ^ 

2095 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2096 

2097 Success 

2098 

2099 Each test string must be on a single line. If you want to test a string that spans multiple 

2100 lines, create a test like this:: 

2101 

2102 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2103 

2104 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2105 """ 

2106 from .testing import pyparsing_test 

2107 

2108 parseAll = parseAll and parse_all 

2109 fullDump = fullDump and full_dump 

2110 printResults = printResults and print_results 

2111 failureTests = failureTests or failure_tests 

2112 postParse = postParse or post_parse 

2113 if isinstance(tests, str_type): 

2114 tests = typing.cast(str, tests) 

2115 line_strip = type(tests).strip 

2116 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2117 comment_specified = comment is not None 

2118 if comment_specified: 

2119 if isinstance(comment, str_type): 

2120 comment = typing.cast(str, comment) 

2121 comment = Literal(comment) 

2122 comment = typing.cast(ParserElement, comment) 

2123 if file is None: 

2124 file = sys.stdout 

2125 print_ = file.write 

2126 

2127 result: Union[ParseResults, Exception] 

2128 allResults: List[Tuple[str, Union[ParseResults, Exception]]] = [] 

2129 comments: List[str] = [] 

2130 success = True 

2131 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2132 BOM = "\ufeff" 

2133 for t in tests: 

2134 if comment_specified and comment.matches(t, False) or comments and not t: 

2135 comments.append( 

2136 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2137 ) 

2138 continue 

2139 if not t: 

2140 continue 

2141 out = [ 

2142 "\n" + "\n".join(comments) if comments else "", 

2143 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2144 ] 

2145 comments = [] 

2146 try: 

2147 # convert newline marks to actual newlines, and strip leading BOM if present 

2148 t = NL.transform_string(t.lstrip(BOM)) 

2149 result = self.parse_string(t, parse_all=parseAll) 

2150 except ParseBaseException as pe: 

2151 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 

2152 out.append(pe.explain()) 

2153 out.append("FAIL: " + str(pe)) 

2154 if ParserElement.verbose_stacktrace: 

2155 out.extend(traceback.format_tb(pe.__traceback__)) 

2156 success = success and failureTests 

2157 result = pe 

2158 except Exception as exc: 

2159 out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}") 

2160 if ParserElement.verbose_stacktrace: 

2161 out.extend(traceback.format_tb(exc.__traceback__)) 

2162 success = success and failureTests 

2163 result = exc 

2164 else: 

2165 success = success and not failureTests 

2166 if postParse is not None: 

2167 try: 

2168 pp_value = postParse(t, result) 

2169 if pp_value is not None: 

2170 if isinstance(pp_value, ParseResults): 

2171 out.append(pp_value.dump()) 

2172 else: 

2173 out.append(str(pp_value)) 

2174 else: 

2175 out.append(result.dump()) 

2176 except Exception as e: 

2177 out.append(result.dump(full=fullDump)) 

2178 out.append( 

2179 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2180 ) 

2181 else: 

2182 out.append(result.dump(full=fullDump)) 

2183 out.append("") 

2184 

2185 if printResults: 

2186 print_("\n".join(out)) 

2187 

2188 allResults.append((t, result)) 

2189 

2190 return success, allResults 

2191 

2192 def create_diagram( 

2193 self, 

2194 output_html: Union[TextIO, Path, str], 

2195 vertical: int = 3, 

2196 show_results_names: bool = False, 

2197 show_groups: bool = False, 

2198 embed: bool = False, 

2199 **kwargs, 

2200 ) -> None: 

2201 """ 

2202 Create a railroad diagram for the parser. 

2203 

2204 Parameters: 

2205 

2206 - ``output_html`` (str or file-like object) - output target for generated 

2207 diagram HTML 

2208 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2209 instead of horizontally (default=3) 

2210 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2211 defined results names 

2212 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2213 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2214 the resulting HTML in an enclosing HTML source 

2215 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2216 can be used to insert custom CSS styling 

2217 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2218 generated code 

2219 

2220 Additional diagram-formatting keyword arguments can also be included; 

2221 see railroad.Diagram class. 

2222 """ 

2223 

2224 try: 

2225 from .diagram import to_railroad, railroad_to_html 

2226 except ImportError as ie: 

2227 raise Exception( 

2228 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2229 ) from ie 

2230 

2231 self.streamline() 

2232 

2233 railroad = to_railroad( 

2234 self, 

2235 vertical=vertical, 

2236 show_results_names=show_results_names, 

2237 show_groups=show_groups, 

2238 diagram_kwargs=kwargs, 

2239 ) 

2240 if isinstance(output_html, (str, Path)): 

2241 with open(output_html, "w", encoding="utf-8") as diag_file: 

2242 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2243 else: 

2244 # we were passed a file-like object, just write to it 

2245 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2246 

2247 # Compatibility synonyms 

2248 # fmt: off 

2249 @staticmethod 

2250 @replaced_by_pep8(inline_literals_using) 

2251 def inlineLiteralsUsing(): ... 

2252 

2253 @staticmethod 

2254 @replaced_by_pep8(set_default_whitespace_chars) 

2255 def setDefaultWhitespaceChars(): ... 

2256 

2257 @replaced_by_pep8(set_results_name) 

2258 def setResultsName(self): ... 

2259 

2260 @replaced_by_pep8(set_break) 

2261 def setBreak(self): ... 

2262 

2263 @replaced_by_pep8(set_parse_action) 

2264 def setParseAction(self): ... 

2265 

2266 @replaced_by_pep8(add_parse_action) 

2267 def addParseAction(self): ... 

2268 

2269 @replaced_by_pep8(add_condition) 

2270 def addCondition(self): ... 

2271 

2272 @replaced_by_pep8(set_fail_action) 

2273 def setFailAction(self): ... 

2274 

2275 @replaced_by_pep8(try_parse) 

2276 def tryParse(self): ... 

2277 

2278 @staticmethod 

2279 @replaced_by_pep8(enable_left_recursion) 

2280 def enableLeftRecursion(): ... 

2281 

2282 @staticmethod 

2283 @replaced_by_pep8(enable_packrat) 

2284 def enablePackrat(): ... 

2285 

2286 @replaced_by_pep8(parse_string) 

2287 def parseString(self): ... 

2288 

2289 @replaced_by_pep8(scan_string) 

2290 def scanString(self): ... 

2291 

2292 @replaced_by_pep8(transform_string) 

2293 def transformString(self): ... 

2294 

2295 @replaced_by_pep8(search_string) 

2296 def searchString(self): ... 

2297 

2298 @replaced_by_pep8(ignore_whitespace) 

2299 def ignoreWhitespace(self): ... 

2300 

2301 @replaced_by_pep8(leave_whitespace) 

2302 def leaveWhitespace(self): ... 

2303 

2304 @replaced_by_pep8(set_whitespace_chars) 

2305 def setWhitespaceChars(self): ... 

2306 

2307 @replaced_by_pep8(parse_with_tabs) 

2308 def parseWithTabs(self): ... 

2309 

2310 @replaced_by_pep8(set_debug_actions) 

2311 def setDebugActions(self): ... 

2312 

2313 @replaced_by_pep8(set_debug) 

2314 def setDebug(self): ... 

2315 

2316 @replaced_by_pep8(set_name) 

2317 def setName(self): ... 

2318 

2319 @replaced_by_pep8(parse_file) 

2320 def parseFile(self): ... 

2321 

2322 @replaced_by_pep8(run_tests) 

2323 def runTests(self): ... 

2324 

2325 canParseNext = can_parse_next 

2326 resetCache = reset_cache 

2327 defaultName = default_name 

2328 # fmt: on 

2329 

2330 

2331class _PendingSkip(ParserElement): 

2332 # internal placeholder class to hold a place were '...' is added to a parser element, 

2333 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2334 def __init__(self, expr: ParserElement, must_skip: bool = False): 

2335 super().__init__() 

2336 self.anchor = expr 

2337 self.must_skip = must_skip 

2338 

2339 def _generateDefaultName(self) -> str: 

2340 return str(self.anchor + Empty()).replace("Empty", "...") 

2341 

2342 def __add__(self, other) -> "ParserElement": 

2343 skipper = SkipTo(other).set_name("...")("_skipped*") 

2344 if self.must_skip: 

2345 

2346 def must_skip(t): 

2347 if not t._skipped or t._skipped.as_list() == [""]: 

2348 del t[0] 

2349 t.pop("_skipped", None) 

2350 

2351 def show_skip(t): 

2352 if t._skipped.as_list()[-1:] == [""]: 

2353 t.pop("_skipped") 

2354 t["_skipped"] = "missing <" + repr(self.anchor) + ">" 

2355 

2356 return ( 

2357 self.anchor + skipper().add_parse_action(must_skip) 

2358 | skipper().add_parse_action(show_skip) 

2359 ) + other 

2360 

2361 return self.anchor + skipper + other 

2362 

2363 def __repr__(self): 

2364 return self.defaultName 

2365 

2366 def parseImpl(self, *args): 

2367 raise Exception( 

2368 "use of `...` expression without following SkipTo target expression" 

2369 ) 

2370 

2371 

2372class Token(ParserElement): 

2373 """Abstract :class:`ParserElement` subclass, for defining atomic 

2374 matching patterns. 

2375 """ 

2376 

2377 def __init__(self): 

2378 super().__init__(savelist=False) 

2379 

2380 def _generateDefaultName(self) -> str: 

2381 return type(self).__name__ 

2382 

2383 

2384class NoMatch(Token): 

2385 """ 

2386 A token that will never match. 

2387 """ 

2388 

2389 def __init__(self): 

2390 super().__init__() 

2391 self.mayReturnEmpty = True 

2392 self.mayIndexError = False 

2393 self.errmsg = "Unmatchable token" 

2394 

2395 def parseImpl(self, instring, loc, doActions=True): 

2396 raise ParseException(instring, loc, self.errmsg, self) 

2397 

2398 

2399class Literal(Token): 

2400 """ 

2401 Token to exactly match a specified string. 

2402 

2403 Example:: 

2404 

2405 Literal('blah').parse_string('blah') # -> ['blah'] 

2406 Literal('blah').parse_string('blahfooblah') # -> ['blah'] 

2407 Literal('blah').parse_string('bla') # -> Exception: Expected "blah" 

2408 

2409 For case-insensitive matching, use :class:`CaselessLiteral`. 

2410 

2411 For keyword matching (force word break before and after the matched string), 

2412 use :class:`Keyword` or :class:`CaselessKeyword`. 

2413 """ 

2414 

2415 def __new__(cls, match_string: str = "", *, matchString: str = ""): 

2416 # Performance tuning: select a subclass with optimized parseImpl 

2417 if cls is Literal: 

2418 match_string = matchString or match_string 

2419 if not match_string: 

2420 return super().__new__(Empty) 

2421 if len(match_string) == 1: 

2422 return super().__new__(_SingleCharLiteral) 

2423 

2424 # Default behavior 

2425 return super().__new__(cls) 

2426 

2427 # Needed to make copy.copy() work correctly if we customize __new__ 

2428 def __getnewargs__(self): 

2429 return (self.match,) 

2430 

2431 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2432 super().__init__() 

2433 match_string = matchString or match_string 

2434 self.match = match_string 

2435 self.matchLen = len(match_string) 

2436 self.firstMatchChar = match_string[:1] 

2437 self.errmsg = "Expected " + self.name 

2438 self.mayReturnEmpty = False 

2439 self.mayIndexError = False 

2440 

2441 def _generateDefaultName(self) -> str: 

2442 return repr(self.match) 

2443 

2444 def parseImpl(self, instring, loc, doActions=True): 

2445 if instring[loc] == self.firstMatchChar and instring.startswith( 

2446 self.match, loc 

2447 ): 

2448 return loc + self.matchLen, self.match 

2449 raise ParseException(instring, loc, self.errmsg, self) 

2450 

2451 

2452class Empty(Literal): 

2453 """ 

2454 An empty token, will always match. 

2455 """ 

2456 

2457 def __init__(self, match_string="", *, matchString=""): 

2458 super().__init__("") 

2459 self.mayReturnEmpty = True 

2460 self.mayIndexError = False 

2461 

2462 def _generateDefaultName(self) -> str: 

2463 return "Empty" 

2464 

2465 def parseImpl(self, instring, loc, doActions=True): 

2466 return loc, [] 

2467 

2468 

2469class _SingleCharLiteral(Literal): 

2470 def parseImpl(self, instring, loc, doActions=True): 

2471 if instring[loc] == self.firstMatchChar: 

2472 return loc + 1, self.match 

2473 raise ParseException(instring, loc, self.errmsg, self) 

2474 

2475 

2476ParserElement._literalStringClass = Literal 

2477 

2478 

2479class Keyword(Token): 

2480 """ 

2481 Token to exactly match a specified string as a keyword, that is, 

2482 it must be immediately preceded and followed by whitespace or 

2483 non-keyword characters. Compare with :class:`Literal`: 

2484 

2485 - ``Literal("if")`` will match the leading ``'if'`` in 

2486 ``'ifAndOnlyIf'``. 

2487 - ``Keyword("if")`` will not; it will only match the leading 

2488 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2489 

2490 Accepts two optional constructor arguments in addition to the 

2491 keyword string: 

2492 

2493 - ``ident_chars`` is a string of characters that would be valid 

2494 identifier characters, defaulting to all alphanumerics + "_" and 

2495 "$" 

2496 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2497 

2498 Example:: 

2499 

2500 Keyword("start").parse_string("start") # -> ['start'] 

2501 Keyword("start").parse_string("starting") # -> Exception 

2502 

2503 For case-insensitive matching, use :class:`CaselessKeyword`. 

2504 """ 

2505 

2506 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2507 

2508 def __init__( 

2509 self, 

2510 match_string: str = "", 

2511 ident_chars: typing.Optional[str] = None, 

2512 caseless: bool = False, 

2513 *, 

2514 matchString: str = "", 

2515 identChars: typing.Optional[str] = None, 

2516 ): 

2517 super().__init__() 

2518 identChars = identChars or ident_chars 

2519 if identChars is None: 

2520 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2521 match_string = matchString or match_string 

2522 self.match = match_string 

2523 self.matchLen = len(match_string) 

2524 try: 

2525 self.firstMatchChar = match_string[0] 

2526 except IndexError: 

2527 raise ValueError("null string passed to Keyword; use Empty() instead") 

2528 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2529 self.mayReturnEmpty = False 

2530 self.mayIndexError = False 

2531 self.caseless = caseless 

2532 if caseless: 

2533 self.caselessmatch = match_string.upper() 

2534 identChars = identChars.upper() 

2535 self.identChars = set(identChars) 

2536 

2537 def _generateDefaultName(self) -> str: 

2538 return repr(self.match) 

2539 

2540 def parseImpl(self, instring, loc, doActions=True): 

2541 errmsg = self.errmsg 

2542 errloc = loc 

2543 if self.caseless: 

2544 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2545 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2546 if ( 

2547 loc >= len(instring) - self.matchLen 

2548 or instring[loc + self.matchLen].upper() not in self.identChars 

2549 ): 

2550 return loc + self.matchLen, self.match 

2551 else: 

2552 # followed by keyword char 

2553 errmsg += ", was immediately followed by keyword character" 

2554 errloc = loc + self.matchLen 

2555 else: 

2556 # preceded by keyword char 

2557 errmsg += ", keyword was immediately preceded by keyword character" 

2558 errloc = loc - 1 

2559 # else no match just raise plain exception 

2560 

2561 else: 

2562 if ( 

2563 instring[loc] == self.firstMatchChar 

2564 and self.matchLen == 1 

2565 or instring.startswith(self.match, loc) 

2566 ): 

2567 if loc == 0 or instring[loc - 1] not in self.identChars: 

2568 if ( 

2569 loc >= len(instring) - self.matchLen 

2570 or instring[loc + self.matchLen] not in self.identChars 

2571 ): 

2572 return loc + self.matchLen, self.match 

2573 else: 

2574 # followed by keyword char 

2575 errmsg += ( 

2576 ", keyword was immediately followed by keyword character" 

2577 ) 

2578 errloc = loc + self.matchLen 

2579 else: 

2580 # preceded by keyword char 

2581 errmsg += ", keyword was immediately preceded by keyword character" 

2582 errloc = loc - 1 

2583 # else no match just raise plain exception 

2584 

2585 raise ParseException(instring, errloc, errmsg, self) 

2586 

2587 @staticmethod 

2588 def set_default_keyword_chars(chars) -> None: 

2589 """ 

2590 Overrides the default characters used by :class:`Keyword` expressions. 

2591 """ 

2592 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2593 

2594 setDefaultKeywordChars = set_default_keyword_chars 

2595 

2596 

2597class CaselessLiteral(Literal): 

2598 """ 

2599 Token to match a specified string, ignoring case of letters. 

2600 Note: the matched results will always be in the case of the given 

2601 match string, NOT the case of the input text. 

2602 

2603 Example:: 

2604 

2605 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2606 # -> ['CMD', 'CMD', 'CMD'] 

2607 

2608 (Contrast with example for :class:`CaselessKeyword`.) 

2609 """ 

2610 

2611 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2612 match_string = matchString or match_string 

2613 super().__init__(match_string.upper()) 

2614 # Preserve the defining literal. 

2615 self.returnString = match_string 

2616 self.errmsg = "Expected " + self.name 

2617 

2618 def parseImpl(self, instring, loc, doActions=True): 

2619 if instring[loc : loc + self.matchLen].upper() == self.match: 

2620 return loc + self.matchLen, self.returnString 

2621 raise ParseException(instring, loc, self.errmsg, self) 

2622 

2623 

2624class CaselessKeyword(Keyword): 

2625 """ 

2626 Caseless version of :class:`Keyword`. 

2627 

2628 Example:: 

2629 

2630 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2631 # -> ['CMD', 'CMD'] 

2632 

2633 (Contrast with example for :class:`CaselessLiteral`.) 

2634 """ 

2635 

2636 def __init__( 

2637 self, 

2638 match_string: str = "", 

2639 ident_chars: typing.Optional[str] = None, 

2640 *, 

2641 matchString: str = "", 

2642 identChars: typing.Optional[str] = None, 

2643 ): 

2644 identChars = identChars or ident_chars 

2645 match_string = matchString or match_string 

2646 super().__init__(match_string, identChars, caseless=True) 

2647 

2648 

2649class CloseMatch(Token): 

2650 """A variation on :class:`Literal` which matches "close" matches, 

2651 that is, strings with at most 'n' mismatching characters. 

2652 :class:`CloseMatch` takes parameters: 

2653 

2654 - ``match_string`` - string to be matched 

2655 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2656 - ``max_mismatches`` - (``default=1``) maximum number of 

2657 mismatches allowed to count as a match 

2658 

2659 The results from a successful parse will contain the matched text 

2660 from the input string and the following named results: 

2661 

2662 - ``mismatches`` - a list of the positions within the 

2663 match_string where mismatches were found 

2664 - ``original`` - the original match_string used to compare 

2665 against the input string 

2666 

2667 If ``mismatches`` is an empty list, then the match was an exact 

2668 match. 

2669 

2670 Example:: 

2671 

2672 patt = CloseMatch("ATCATCGAATGGA") 

2673 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

2674 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

2675 

2676 # exact match 

2677 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

2678 

2679 # close match allowing up to 2 mismatches 

2680 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

2681 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

2682 """ 

2683 

2684 def __init__( 

2685 self, 

2686 match_string: str, 

2687 max_mismatches: typing.Optional[int] = None, 

2688 *, 

2689 maxMismatches: int = 1, 

2690 caseless=False, 

2691 ): 

2692 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

2693 super().__init__() 

2694 self.match_string = match_string 

2695 self.maxMismatches = maxMismatches 

2696 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

2697 self.caseless = caseless 

2698 self.mayIndexError = False 

2699 self.mayReturnEmpty = False 

2700 

2701 def _generateDefaultName(self) -> str: 

2702 return f"{type(self).__name__}:{self.match_string!r}" 

2703 

2704 def parseImpl(self, instring, loc, doActions=True): 

2705 start = loc 

2706 instrlen = len(instring) 

2707 maxloc = start + len(self.match_string) 

2708 

2709 if maxloc <= instrlen: 

2710 match_string = self.match_string 

2711 match_stringloc = 0 

2712 mismatches = [] 

2713 maxMismatches = self.maxMismatches 

2714 

2715 for match_stringloc, s_m in enumerate( 

2716 zip(instring[loc:maxloc], match_string) 

2717 ): 

2718 src, mat = s_m 

2719 if self.caseless: 

2720 src, mat = src.lower(), mat.lower() 

2721 

2722 if src != mat: 

2723 mismatches.append(match_stringloc) 

2724 if len(mismatches) > maxMismatches: 

2725 break 

2726 else: 

2727 loc = start + match_stringloc + 1 

2728 results = ParseResults([instring[start:loc]]) 

2729 results["original"] = match_string 

2730 results["mismatches"] = mismatches 

2731 return loc, results 

2732 

2733 raise ParseException(instring, loc, self.errmsg, self) 

2734 

2735 

2736class Word(Token): 

2737 """Token for matching words composed of allowed character sets. 

2738 

2739 Parameters: 

2740 

2741 - ``init_chars`` - string of all characters that should be used to 

2742 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

2743 if ``body_chars`` is also specified, then this is the string of 

2744 initial characters 

2745 - ``body_chars`` - string of characters that 

2746 can be used for matching after a matched initial character as 

2747 given in ``init_chars``; if omitted, same as the initial characters 

2748 (default=``None``) 

2749 - ``min`` - minimum number of characters to match (default=1) 

2750 - ``max`` - maximum number of characters to match (default=0) 

2751 - ``exact`` - exact number of characters to match (default=0) 

2752 - ``as_keyword`` - match as a keyword (default=``False``) 

2753 - ``exclude_chars`` - characters that might be 

2754 found in the input ``body_chars`` string but which should not be 

2755 accepted for matching ;useful to define a word of all 

2756 printables except for one or two characters, for instance 

2757 (default=``None``) 

2758 

2759 :class:`srange` is useful for defining custom character set strings 

2760 for defining :class:`Word` expressions, using range notation from 

2761 regular expression character sets. 

2762 

2763 A common mistake is to use :class:`Word` to match a specific literal 

2764 string, as in ``Word("Address")``. Remember that :class:`Word` 

2765 uses the string argument to define *sets* of matchable characters. 

2766 This expression would match "Add", "AAA", "dAred", or any other word 

2767 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

2768 exact literal string, use :class:`Literal` or :class:`Keyword`. 

2769 

2770 pyparsing includes helper strings for building Words: 

2771 

2772 - :class:`alphas` 

2773 - :class:`nums` 

2774 - :class:`alphanums` 

2775 - :class:`hexnums` 

2776 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 

2777 - accented, tilded, umlauted, etc.) 

2778 - :class:`punc8bit` (non-alphabetic characters in ASCII range 

2779 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

2780 - :class:`printables` (any non-whitespace character) 

2781 

2782 ``alphas``, ``nums``, and ``printables`` are also defined in several 

2783 Unicode sets - see :class:`pyparsing_unicode``. 

2784 

2785 Example:: 

2786 

2787 # a word composed of digits 

2788 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

2789 

2790 # a word with a leading capital, and zero or more lowercase 

2791 capital_word = Word(alphas.upper(), alphas.lower()) 

2792 

2793 # hostnames are alphanumeric, with leading alpha, and '-' 

2794 hostname = Word(alphas, alphanums + '-') 

2795 

2796 # roman numeral (not a strict parser, accepts invalid mix of characters) 

2797 roman = Word("IVXLCDM") 

2798 

2799 # any string of non-whitespace characters, except for ',' 

2800 csv_value = Word(printables, exclude_chars=",") 

2801 """ 

2802 

2803 def __init__( 

2804 self, 

2805 init_chars: str = "", 

2806 body_chars: typing.Optional[str] = None, 

2807 min: int = 1, 

2808 max: int = 0, 

2809 exact: int = 0, 

2810 as_keyword: bool = False, 

2811 exclude_chars: typing.Optional[str] = None, 

2812 *, 

2813 initChars: typing.Optional[str] = None, 

2814 bodyChars: typing.Optional[str] = None, 

2815 asKeyword: bool = False, 

2816 excludeChars: typing.Optional[str] = None, 

2817 ): 

2818 initChars = initChars or init_chars 

2819 bodyChars = bodyChars or body_chars 

2820 asKeyword = asKeyword or as_keyword 

2821 excludeChars = excludeChars or exclude_chars 

2822 super().__init__() 

2823 if not initChars: 

2824 raise ValueError( 

2825 f"invalid {type(self).__name__}, initChars cannot be empty string" 

2826 ) 

2827 

2828 initChars_set = set(initChars) 

2829 if excludeChars: 

2830 excludeChars_set = set(excludeChars) 

2831 initChars_set -= excludeChars_set 

2832 if bodyChars: 

2833 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

2834 self.initChars = initChars_set 

2835 self.initCharsOrig = "".join(sorted(initChars_set)) 

2836 

2837 if bodyChars: 

2838 self.bodyChars = set(bodyChars) 

2839 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

2840 else: 

2841 self.bodyChars = initChars_set 

2842 self.bodyCharsOrig = self.initCharsOrig 

2843 

2844 self.maxSpecified = max > 0 

2845 

2846 if min < 1: 

2847 raise ValueError( 

2848 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

2849 ) 

2850 

2851 if self.maxSpecified and min > max: 

2852 raise ValueError( 

2853 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

2854 ) 

2855 

2856 self.minLen = min 

2857 

2858 if max > 0: 

2859 self.maxLen = max 

2860 else: 

2861 self.maxLen = _MAX_INT 

2862 

2863 if exact > 0: 

2864 min = max = exact 

2865 self.maxLen = exact 

2866 self.minLen = exact 

2867 

2868 self.errmsg = "Expected " + self.name 

2869 self.mayIndexError = False 

2870 self.asKeyword = asKeyword 

2871 if self.asKeyword: 

2872 self.errmsg += " as a keyword" 

2873 

2874 # see if we can make a regex for this Word 

2875 if " " not in (self.initChars | self.bodyChars): 

2876 if len(self.initChars) == 1: 

2877 re_leading_fragment = re.escape(self.initCharsOrig) 

2878 else: 

2879 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

2880 

2881 if self.bodyChars == self.initChars: 

2882 if max == 0: 

2883 repeat = "+" 

2884 elif max == 1: 

2885 repeat = "" 

2886 else: 

2887 if self.minLen != self.maxLen: 

2888 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

2889 else: 

2890 repeat = f"{{{self.minLen}}}" 

2891 self.reString = f"{re_leading_fragment}{repeat}" 

2892 else: 

2893 if max == 1: 

2894 re_body_fragment = "" 

2895 repeat = "" 

2896 else: 

2897 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

2898 if max == 0: 

2899 repeat = "*" 

2900 elif max == 2: 

2901 repeat = "?" if min <= 1 else "" 

2902 else: 

2903 if min != max: 

2904 repeat = f"{{{min - 1 if min > 0 else 0},{max - 1}}}" 

2905 else: 

2906 repeat = f"{{{min - 1 if min > 0 else 0}}}" 

2907 

2908 self.reString = ( 

2909 f"{re_leading_fragment}" f"{re_body_fragment}" f"{repeat}" 

2910 ) 

2911 

2912 if self.asKeyword: 

2913 self.reString = rf"\b{self.reString}\b" 

2914 

2915 try: 

2916 self.re = re.compile(self.reString) 

2917 except re.error: 

2918 self.re = None # type: ignore[assignment] 

2919 else: 

2920 self.re_match = self.re.match 

2921 self.parseImpl = self.parseImpl_regex # type: ignore[assignment] 

2922 

2923 def _generateDefaultName(self) -> str: 

2924 def charsAsStr(s): 

2925 max_repr_len = 16 

2926 s = _collapse_string_to_ranges(s, re_escape=False) 

2927 if len(s) > max_repr_len: 

2928 return s[: max_repr_len - 3] + "..." 

2929 else: 

2930 return s 

2931 

2932 if self.initChars != self.bodyChars: 

2933 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

2934 else: 

2935 base = f"W:({charsAsStr(self.initChars)})" 

2936 

2937 # add length specification 

2938 if self.minLen > 1 or self.maxLen != _MAX_INT: 

2939 if self.minLen == self.maxLen: 

2940 if self.minLen == 1: 

2941 return base[2:] 

2942 else: 

2943 return base + f"{{{self.minLen}}}" 

2944 elif self.maxLen == _MAX_INT: 

2945 return base + f"{{{self.minLen},...}}" 

2946 else: 

2947 return base + f"{{{self.minLen},{self.maxLen}}}" 

2948 return base 

2949 

2950 def parseImpl(self, instring, loc, doActions=True): 

2951 if instring[loc] not in self.initChars: 

2952 raise ParseException(instring, loc, self.errmsg, self) 

2953 

2954 start = loc 

2955 loc += 1 

2956 instrlen = len(instring) 

2957 bodychars = self.bodyChars 

2958 maxloc = start + self.maxLen 

2959 maxloc = min(maxloc, instrlen) 

2960 while loc < maxloc and instring[loc] in bodychars: 

2961 loc += 1 

2962 

2963 throwException = False 

2964 if loc - start < self.minLen: 

2965 throwException = True 

2966 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 

2967 throwException = True 

2968 elif self.asKeyword: 

2969 if ( 

2970 start > 0 

2971 and instring[start - 1] in bodychars 

2972 or loc < instrlen 

2973 and instring[loc] in bodychars 

2974 ): 

2975 throwException = True 

2976 

2977 if throwException: 

2978 raise ParseException(instring, loc, self.errmsg, self) 

2979 

2980 return loc, instring[start:loc] 

2981 

2982 def parseImpl_regex(self, instring, loc, doActions=True): 

2983 result = self.re_match(instring, loc) 

2984 if not result: 

2985 raise ParseException(instring, loc, self.errmsg, self) 

2986 

2987 loc = result.end() 

2988 return loc, result.group() 

2989 

2990 

2991class Char(Word): 

2992 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

2993 when defining a match of any single character in a string of 

2994 characters. 

2995 """ 

2996 

2997 def __init__( 

2998 self, 

2999 charset: str, 

3000 as_keyword: bool = False, 

3001 exclude_chars: typing.Optional[str] = None, 

3002 *, 

3003 asKeyword: bool = False, 

3004 excludeChars: typing.Optional[str] = None, 

3005 ): 

3006 asKeyword = asKeyword or as_keyword 

3007 excludeChars = excludeChars or exclude_chars 

3008 super().__init__( 

3009 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

3010 ) 

3011 

3012 

3013class Regex(Token): 

3014 r"""Token for matching strings that match a given regular 

3015 expression. Defined with string specifying the regular expression in 

3016 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3017 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3018 these will be preserved as named :class:`ParseResults`. 

3019 

3020 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3021 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3022 a compiled RE that was compiled using ``regex``. 

3023 

3024 Example:: 

3025 

3026 realnum = Regex(r"[+-]?\d+\.\d*") 

3027 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3028 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3029 

3030 # named fields in a regex will be returned as named results 

3031 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3032 

3033 # the Regex class will accept re's compiled using the regex module 

3034 import regex 

3035 parser = pp.Regex(regex.compile(r'[0-9]')) 

3036 """ 

3037 

3038 def __init__( 

3039 self, 

3040 pattern: Any, 

3041 flags: Union[re.RegexFlag, int] = 0, 

3042 as_group_list: bool = False, 

3043 as_match: bool = False, 

3044 *, 

3045 asGroupList: bool = False, 

3046 asMatch: bool = False, 

3047 ): 

3048 """The parameters ``pattern`` and ``flags`` are passed 

3049 to the ``re.compile()`` function as-is. See the Python 

3050 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3051 explanation of the acceptable patterns and flags. 

3052 """ 

3053 super().__init__() 

3054 asGroupList = asGroupList or as_group_list 

3055 asMatch = asMatch or as_match 

3056 

3057 if isinstance(pattern, str_type): 

3058 if not pattern: 

3059 raise ValueError("null string passed to Regex; use Empty() instead") 

3060 

3061 self._re = None 

3062 self.reString = self.pattern = pattern 

3063 self.flags = flags 

3064 

3065 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3066 self._re = pattern 

3067 self.pattern = self.reString = pattern.pattern 

3068 self.flags = flags 

3069 

3070 else: 

3071 raise TypeError( 

3072 "Regex may only be constructed with a string or a compiled RE object" 

3073 ) 

3074 

3075 self.errmsg = "Expected " + self.name 

3076 self.mayIndexError = False 

3077 self.asGroupList = asGroupList 

3078 self.asMatch = asMatch 

3079 if self.asGroupList: 

3080 self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment] 

3081 if self.asMatch: 

3082 self.parseImpl = self.parseImplAsMatch # type: ignore [assignment] 

3083 

3084 @cached_property 

3085 def re(self): 

3086 if self._re: 

3087 return self._re 

3088 else: 

3089 try: 

3090 return re.compile(self.pattern, self.flags) 

3091 except re.error: 

3092 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3093 

3094 @cached_property 

3095 def re_match(self): 

3096 return self.re.match 

3097 

3098 @cached_property 

3099 def mayReturnEmpty(self): 

3100 return self.re_match("") is not None 

3101 

3102 def _generateDefaultName(self) -> str: 

3103 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) 

3104 

3105 def parseImpl(self, instring, loc, doActions=True): 

3106 result = self.re_match(instring, loc) 

3107 if not result: 

3108 raise ParseException(instring, loc, self.errmsg, self) 

3109 

3110 loc = result.end() 

3111 ret = ParseResults(result.group()) 

3112 d = result.groupdict() 

3113 if d: 

3114 for k, v in d.items(): 

3115 ret[k] = v 

3116 return loc, ret 

3117 

3118 def parseImplAsGroupList(self, instring, loc, doActions=True): 

3119 result = self.re_match(instring, loc) 

3120 if not result: 

3121 raise ParseException(instring, loc, self.errmsg, self) 

3122 

3123 loc = result.end() 

3124 ret = result.groups() 

3125 return loc, ret 

3126 

3127 def parseImplAsMatch(self, instring, loc, doActions=True): 

3128 result = self.re_match(instring, loc) 

3129 if not result: 

3130 raise ParseException(instring, loc, self.errmsg, self) 

3131 

3132 loc = result.end() 

3133 ret = result 

3134 return loc, ret 

3135 

3136 def sub(self, repl: str) -> ParserElement: 

3137 r""" 

3138 Return :class:`Regex` with an attached parse action to transform the parsed 

3139 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3140 

3141 Example:: 

3142 

3143 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3144 print(make_html.transform_string("h1:main title:")) 

3145 # prints "<h1>main title</h1>" 

3146 """ 

3147 if self.asGroupList: 

3148 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3149 

3150 if self.asMatch and callable(repl): 

3151 raise TypeError( 

3152 "cannot use sub() with a callable with Regex(as_match=True)" 

3153 ) 

3154 

3155 if self.asMatch: 

3156 

3157 def pa(tokens): 

3158 return tokens[0].expand(repl) 

3159 

3160 else: 

3161 

3162 def pa(tokens): 

3163 return self.re.sub(repl, tokens[0]) 

3164 

3165 return self.add_parse_action(pa) 

3166 

3167 

3168class QuotedString(Token): 

3169 r""" 

3170 Token for matching strings that are delimited by quoting characters. 

3171 

3172 Defined with the following parameters: 

3173 

3174 - ``quote_char`` - string of one or more characters defining the 

3175 quote delimiting string 

3176 - ``esc_char`` - character to re_escape quotes, typically backslash 

3177 (default= ``None``) 

3178 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3179 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3180 (default= ``None``) 

3181 - ``multiline`` - boolean indicating whether quotes can span 

3182 multiple lines (default= ``False``) 

3183 - ``unquote_results`` - boolean indicating whether the matched text 

3184 should be unquoted (default= ``True``) 

3185 - ``end_quote_char`` - string of one or more characters defining the 

3186 end of the quote delimited string (default= ``None`` => same as 

3187 quote_char) 

3188 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3189 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3190 (default= ``True``) 

3191 

3192 Example:: 

3193 

3194 qs = QuotedString('"') 

3195 print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3196 complex_qs = QuotedString('{{', end_quote_char='}}') 

3197 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) 

3198 sql_qs = QuotedString('"', esc_quote='""') 

3199 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3200 

3201 prints:: 

3202 

3203 [['This is the quote']] 

3204 [['This is the "quote"']] 

3205 [['This is the quote with "embedded" quotes']] 

3206 """ 

3207 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3208 

3209 def __init__( 

3210 self, 

3211 quote_char: str = "", 

3212 esc_char: typing.Optional[str] = None, 

3213 esc_quote: typing.Optional[str] = None, 

3214 multiline: bool = False, 

3215 unquote_results: bool = True, 

3216 end_quote_char: typing.Optional[str] = None, 

3217 convert_whitespace_escapes: bool = True, 

3218 *, 

3219 quoteChar: str = "", 

3220 escChar: typing.Optional[str] = None, 

3221 escQuote: typing.Optional[str] = None, 

3222 unquoteResults: bool = True, 

3223 endQuoteChar: typing.Optional[str] = None, 

3224 convertWhitespaceEscapes: bool = True, 

3225 ): 

3226 super().__init__() 

3227 escChar = escChar or esc_char 

3228 escQuote = escQuote or esc_quote 

3229 unquoteResults = unquoteResults and unquote_results 

3230 endQuoteChar = endQuoteChar or end_quote_char 

3231 convertWhitespaceEscapes = ( 

3232 convertWhitespaceEscapes and convert_whitespace_escapes 

3233 ) 

3234 quote_char = quoteChar or quote_char 

3235 

3236 # remove white space from quote chars - wont work anyway 

3237 quote_char = quote_char.strip() 

3238 if not quote_char: 

3239 raise ValueError("quote_char cannot be the empty string") 

3240 

3241 if endQuoteChar is None: 

3242 endQuoteChar = quote_char 

3243 else: 

3244 endQuoteChar = endQuoteChar.strip() 

3245 if not endQuoteChar: 

3246 raise ValueError("end_quote_char cannot be the empty string") 

3247 

3248 self.quoteChar: str = quote_char 

3249 self.quoteCharLen: int = len(quote_char) 

3250 self.firstQuoteChar: str = quote_char[0] 

3251 self.endQuoteChar: str = endQuoteChar 

3252 self.endQuoteCharLen: int = len(endQuoteChar) 

3253 self.escChar: str = escChar or "" 

3254 self.escQuote: str = escQuote or "" 

3255 self.unquoteResults: bool = unquoteResults 

3256 self.convertWhitespaceEscapes: bool = convertWhitespaceEscapes 

3257 self.multiline = multiline 

3258 

3259 sep = "" 

3260 inner_pattern = "" 

3261 

3262 if escQuote: 

3263 inner_pattern += rf"{sep}(?:{re.escape(escQuote)})" 

3264 sep = "|" 

3265 

3266 if escChar: 

3267 inner_pattern += rf"{sep}(?:{re.escape(escChar)}.)" 

3268 sep = "|" 

3269 self.escCharReplacePattern = re.escape(escChar) + "(.)" 

3270 

3271 if len(self.endQuoteChar) > 1: 

3272 inner_pattern += ( 

3273 f"{sep}(?:" 

3274 + "|".join( 

3275 f"(?:{re.escape(self.endQuoteChar[:i])}(?!{re.escape(self.endQuoteChar[i:])}))" 

3276 for i in range(len(self.endQuoteChar) - 1, 0, -1) 

3277 ) 

3278 + ")" 

3279 ) 

3280 sep = "|" 

3281 

3282 self.flags = re.RegexFlag(0) 

3283 

3284 if multiline: 

3285 self.flags = re.MULTILINE | re.DOTALL 

3286 inner_pattern += ( 

3287 rf"{sep}(?:[^{_escape_regex_range_chars(self.endQuoteChar[0])}" 

3288 rf"{(_escape_regex_range_chars(escChar) if escChar is not None else '')}])" 

3289 ) 

3290 else: 

3291 inner_pattern += ( 

3292 rf"{sep}(?:[^{_escape_regex_range_chars(self.endQuoteChar[0])}\n\r" 

3293 rf"{(_escape_regex_range_chars(escChar) if escChar is not None else '')}])" 

3294 ) 

3295 

3296 self.pattern = "".join( 

3297 [ 

3298 re.escape(self.quoteChar), 

3299 "(?:", 

3300 inner_pattern, 

3301 ")*", 

3302 re.escape(self.endQuoteChar), 

3303 ] 

3304 ) 

3305 

3306 if self.unquoteResults: 

3307 if self.convertWhitespaceEscapes: 

3308 self.unquote_scan_re = re.compile( 

3309 rf"({'|'.join(re.escape(k) for k in self.ws_map)})|({re.escape(self.escChar)}.)|(\n|.)", 

3310 flags=self.flags, 

3311 ) 

3312 else: 

3313 self.unquote_scan_re = re.compile( 

3314 rf"({re.escape(self.escChar)}.)|(\n|.)", flags=self.flags 

3315 ) 

3316 

3317 try: 

3318 self.re = re.compile(self.pattern, self.flags) 

3319 self.reString = self.pattern 

3320 self.re_match = self.re.match 

3321 except re.error: 

3322 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3323 

3324 self.errmsg = "Expected " + self.name 

3325 self.mayIndexError = False 

3326 self.mayReturnEmpty = True 

3327 

3328 def _generateDefaultName(self) -> str: 

3329 if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): 

3330 return f"string enclosed in {self.quoteChar!r}" 

3331 

3332 return f"quoted string, starting with {self.quoteChar} ending with {self.endQuoteChar}" 

3333 

3334 def parseImpl(self, instring, loc, doActions=True): 

3335 result = ( 

3336 instring[loc] == self.firstQuoteChar 

3337 and self.re_match(instring, loc) 

3338 or None 

3339 ) 

3340 if not result: 

3341 raise ParseException(instring, loc, self.errmsg, self) 

3342 

3343 loc = result.end() 

3344 ret = result.group() 

3345 

3346 if self.unquoteResults: 

3347 # strip off quotes 

3348 ret = ret[self.quoteCharLen : -self.endQuoteCharLen] 

3349 

3350 if isinstance(ret, str_type): 

3351 if self.convertWhitespaceEscapes: 

3352 ret = "".join( 

3353 self.ws_map[match.group(1)] 

3354 if match.group(1) 

3355 else match.group(2)[-1] 

3356 if match.group(2) 

3357 else match.group(3) 

3358 for match in self.unquote_scan_re.finditer(ret) 

3359 ) 

3360 else: 

3361 ret = "".join( 

3362 match.group(1)[-1] if match.group(1) else match.group(2) 

3363 for match in self.unquote_scan_re.finditer(ret) 

3364 ) 

3365 

3366 # replace escaped quotes 

3367 if self.escQuote: 

3368 ret = ret.replace(self.escQuote, self.endQuoteChar) 

3369 

3370 return loc, ret 

3371 

3372 

3373class CharsNotIn(Token): 

3374 """Token for matching words composed of characters *not* in a given 

3375 set (will include whitespace in matched characters if not listed in 

3376 the provided exclusion set - see example). Defined with string 

3377 containing all disallowed characters, and an optional minimum, 

3378 maximum, and/or exact length. The default value for ``min`` is 

3379 1 (a minimum value < 1 is not valid); the default values for 

3380 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3381 length restriction. 

3382 

3383 Example:: 

3384 

3385 # define a comma-separated-value as anything that is not a ',' 

3386 csv_value = CharsNotIn(',') 

3387 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) 

3388 

3389 prints:: 

3390 

3391 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3392 """ 

3393 

3394 def __init__( 

3395 self, 

3396 not_chars: str = "", 

3397 min: int = 1, 

3398 max: int = 0, 

3399 exact: int = 0, 

3400 *, 

3401 notChars: str = "", 

3402 ): 

3403 super().__init__() 

3404 self.skipWhitespace = False 

3405 self.notChars = not_chars or notChars 

3406 self.notCharsSet = set(self.notChars) 

3407 

3408 if min < 1: 

3409 raise ValueError( 

3410 "cannot specify a minimum length < 1; use " 

3411 "Opt(CharsNotIn()) if zero-length char group is permitted" 

3412 ) 

3413 

3414 self.minLen = min 

3415 

3416 if max > 0: 

3417 self.maxLen = max 

3418 else: 

3419 self.maxLen = _MAX_INT 

3420 

3421 if exact > 0: 

3422 self.maxLen = exact 

3423 self.minLen = exact 

3424 

3425 self.errmsg = "Expected " + self.name 

3426 self.mayReturnEmpty = self.minLen == 0 

3427 self.mayIndexError = False 

3428 

3429 def _generateDefaultName(self) -> str: 

3430 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3431 if len(not_chars_str) > 16: 

3432 return f"!W:({self.notChars[: 16 - 3]}...)" 

3433 else: 

3434 return f"!W:({self.notChars})" 

3435 

3436 def parseImpl(self, instring, loc, doActions=True): 

3437 notchars = self.notCharsSet 

3438 if instring[loc] in notchars: 

3439 raise ParseException(instring, loc, self.errmsg, self) 

3440 

3441 start = loc 

3442 loc += 1 

3443 maxlen = min(start + self.maxLen, len(instring)) 

3444 while loc < maxlen and instring[loc] not in notchars: 

3445 loc += 1 

3446 

3447 if loc - start < self.minLen: 

3448 raise ParseException(instring, loc, self.errmsg, self) 

3449 

3450 return loc, instring[start:loc] 

3451 

3452 

3453class White(Token): 

3454 """Special matching class for matching whitespace. Normally, 

3455 whitespace is ignored by pyparsing grammars. This class is included 

3456 when some whitespace structures are significant. Define with 

3457 a string containing the whitespace characters to be matched; default 

3458 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3459 ``max``, and ``exact`` arguments, as defined for the 

3460 :class:`Word` class. 

3461 """ 

3462 

3463 whiteStrs = { 

3464 " ": "<SP>", 

3465 "\t": "<TAB>", 

3466 "\n": "<LF>", 

3467 "\r": "<CR>", 

3468 "\f": "<FF>", 

3469 "\u00A0": "<NBSP>", 

3470 "\u1680": "<OGHAM_SPACE_MARK>", 

3471 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3472 "\u2000": "<EN_QUAD>", 

3473 "\u2001": "<EM_QUAD>", 

3474 "\u2002": "<EN_SPACE>", 

3475 "\u2003": "<EM_SPACE>", 

3476 "\u2004": "<THREE-PER-EM_SPACE>", 

3477 "\u2005": "<FOUR-PER-EM_SPACE>", 

3478 "\u2006": "<SIX-PER-EM_SPACE>", 

3479 "\u2007": "<FIGURE_SPACE>", 

3480 "\u2008": "<PUNCTUATION_SPACE>", 

3481 "\u2009": "<THIN_SPACE>", 

3482 "\u200A": "<HAIR_SPACE>", 

3483 "\u200B": "<ZERO_WIDTH_SPACE>", 

3484 "\u202F": "<NNBSP>", 

3485 "\u205F": "<MMSP>", 

3486 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3487 } 

3488 

3489 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): 

3490 super().__init__() 

3491 self.matchWhite = ws 

3492 self.set_whitespace_chars( 

3493 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3494 copy_defaults=True, 

3495 ) 

3496 # self.leave_whitespace() 

3497 self.mayReturnEmpty = True 

3498 self.errmsg = "Expected " + self.name 

3499 

3500 self.minLen = min 

3501 

3502 if max > 0: 

3503 self.maxLen = max 

3504 else: 

3505 self.maxLen = _MAX_INT 

3506 

3507 if exact > 0: 

3508 self.maxLen = exact 

3509 self.minLen = exact 

3510 

3511 def _generateDefaultName(self) -> str: 

3512 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

3513 

3514 def parseImpl(self, instring, loc, doActions=True): 

3515 if instring[loc] not in self.matchWhite: 

3516 raise ParseException(instring, loc, self.errmsg, self) 

3517 start = loc 

3518 loc += 1 

3519 maxloc = start + self.maxLen 

3520 maxloc = min(maxloc, len(instring)) 

3521 while loc < maxloc and instring[loc] in self.matchWhite: 

3522 loc += 1 

3523 

3524 if loc - start < self.minLen: 

3525 raise ParseException(instring, loc, self.errmsg, self) 

3526 

3527 return loc, instring[start:loc] 

3528 

3529 

3530class PositionToken(Token): 

3531 def __init__(self): 

3532 super().__init__() 

3533 self.mayReturnEmpty = True 

3534 self.mayIndexError = False 

3535 

3536 

3537class GoToColumn(PositionToken): 

3538 """Token to advance to a specific column of input text; useful for 

3539 tabular report scraping. 

3540 """ 

3541 

3542 def __init__(self, colno: int): 

3543 super().__init__() 

3544 self.col = colno 

3545 

3546 def preParse(self, instring: str, loc: int) -> int: 

3547 if col(loc, instring) != self.col: 

3548 instrlen = len(instring) 

3549 if self.ignoreExprs: 

3550 loc = self._skipIgnorables(instring, loc) 

3551 while ( 

3552 loc < instrlen 

3553 and instring[loc].isspace() 

3554 and col(loc, instring) != self.col 

3555 ): 

3556 loc += 1 

3557 return loc 

3558 

3559 def parseImpl(self, instring, loc, doActions=True): 

3560 thiscol = col(loc, instring) 

3561 if thiscol > self.col: 

3562 raise ParseException(instring, loc, "Text not in expected column", self) 

3563 newloc = loc + self.col - thiscol 

3564 ret = instring[loc:newloc] 

3565 return newloc, ret 

3566 

3567 

3568class LineStart(PositionToken): 

3569 r"""Matches if current position is at the beginning of a line within 

3570 the parse string 

3571 

3572 Example:: 

3573 

3574 test = '''\ 

3575 AAA this line 

3576 AAA and this line 

3577 AAA but not this one 

3578 B AAA and definitely not this one 

3579 ''' 

3580 

3581 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

3582 print(t) 

3583 

3584 prints:: 

3585 

3586 ['AAA', ' this line'] 

3587 ['AAA', ' and this line'] 

3588 

3589 """ 

3590 

3591 def __init__(self): 

3592 super().__init__() 

3593 self.leave_whitespace() 

3594 self.orig_whiteChars = set() | self.whiteChars 

3595 self.whiteChars.discard("\n") 

3596 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

3597 self.errmsg = "Expected start of line" 

3598 

3599 def preParse(self, instring: str, loc: int) -> int: 

3600 if loc == 0: 

3601 return loc 

3602 else: 

3603 ret = self.skipper.preParse(instring, loc) 

3604 if "\n" in self.orig_whiteChars: 

3605 while instring[ret : ret + 1] == "\n": 

3606 ret = self.skipper.preParse(instring, ret + 1) 

3607 return ret 

3608 

3609 def parseImpl(self, instring, loc, doActions=True): 

3610 if col(loc, instring) == 1: 

3611 return loc, [] 

3612 raise ParseException(instring, loc, self.errmsg, self) 

3613 

3614 

3615class LineEnd(PositionToken): 

3616 """Matches if current position is at the end of a line within the 

3617 parse string 

3618 """ 

3619 

3620 def __init__(self): 

3621 super().__init__() 

3622 self.whiteChars.discard("\n") 

3623 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

3624 self.errmsg = "Expected end of line" 

3625 

3626 def parseImpl(self, instring, loc, doActions=True): 

3627 if loc < len(instring): 

3628 if instring[loc] == "\n": 

3629 return loc + 1, "\n" 

3630 else: 

3631 raise ParseException(instring, loc, self.errmsg, self) 

3632 elif loc == len(instring): 

3633 return loc + 1, [] 

3634 else: 

3635 raise ParseException(instring, loc, self.errmsg, self) 

3636 

3637 

3638class StringStart(PositionToken): 

3639 """Matches if current position is at the beginning of the parse 

3640 string 

3641 """ 

3642 

3643 def __init__(self): 

3644 super().__init__() 

3645 self.errmsg = "Expected start of text" 

3646 

3647 def parseImpl(self, instring, loc, doActions=True): 

3648 if loc != 0: 

3649 # see if entire string up to here is just whitespace and ignoreables 

3650 if loc != self.preParse(instring, 0): 

3651 raise ParseException(instring, loc, self.errmsg, self) 

3652 return loc, [] 

3653 

3654 

3655class StringEnd(PositionToken): 

3656 """ 

3657 Matches if current position is at the end of the parse string 

3658 """ 

3659 

3660 def __init__(self): 

3661 super().__init__() 

3662 self.errmsg = "Expected end of text" 

3663 

3664 def parseImpl(self, instring, loc, doActions=True): 

3665 if loc < len(instring): 

3666 raise ParseException(instring, loc, self.errmsg, self) 

3667 elif loc == len(instring): 

3668 return loc + 1, [] 

3669 elif loc > len(instring): 

3670 return loc, [] 

3671 else: 

3672 raise ParseException(instring, loc, self.errmsg, self) 

3673 

3674 

3675class WordStart(PositionToken): 

3676 """Matches if the current position is at the beginning of a 

3677 :class:`Word`, and is not preceded by any character in a given 

3678 set of ``word_chars`` (default= ``printables``). To emulate the 

3679 ``\b`` behavior of regular expressions, use 

3680 ``WordStart(alphanums)``. ``WordStart`` will also match at 

3681 the beginning of the string being parsed, or at the beginning of 

3682 a line. 

3683 """ 

3684 

3685 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3686 wordChars = word_chars if wordChars == printables else wordChars 

3687 super().__init__() 

3688 self.wordChars = set(wordChars) 

3689 self.errmsg = "Not at the start of a word" 

3690 

3691 def parseImpl(self, instring, loc, doActions=True): 

3692 if loc != 0: 

3693 if ( 

3694 instring[loc - 1] in self.wordChars 

3695 or instring[loc] not in self.wordChars 

3696 ): 

3697 raise ParseException(instring, loc, self.errmsg, self) 

3698 return loc, [] 

3699 

3700 

3701class WordEnd(PositionToken): 

3702 """Matches if the current position is at the end of a :class:`Word`, 

3703 and is not followed by any character in a given set of ``word_chars`` 

3704 (default= ``printables``). To emulate the ``\b`` behavior of 

3705 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

3706 will also match at the end of the string being parsed, or at the end 

3707 of a line. 

3708 """ 

3709 

3710 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3711 wordChars = word_chars if wordChars == printables else wordChars 

3712 super().__init__() 

3713 self.wordChars = set(wordChars) 

3714 self.skipWhitespace = False 

3715 self.errmsg = "Not at the end of a word" 

3716 

3717 def parseImpl(self, instring, loc, doActions=True): 

3718 instrlen = len(instring) 

3719 if instrlen > 0 and loc < instrlen: 

3720 if ( 

3721 instring[loc] in self.wordChars 

3722 or instring[loc - 1] not in self.wordChars 

3723 ): 

3724 raise ParseException(instring, loc, self.errmsg, self) 

3725 return loc, [] 

3726 

3727 

3728class ParseExpression(ParserElement): 

3729 """Abstract subclass of ParserElement, for combining and 

3730 post-processing parsed tokens. 

3731 """ 

3732 

3733 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

3734 super().__init__(savelist) 

3735 self.exprs: List[ParserElement] 

3736 if isinstance(exprs, _generatorType): 

3737 exprs = list(exprs) 

3738 

3739 if isinstance(exprs, str_type): 

3740 self.exprs = [self._literalStringClass(exprs)] 

3741 elif isinstance(exprs, ParserElement): 

3742 self.exprs = [exprs] 

3743 elif isinstance(exprs, Iterable): 

3744 exprs = list(exprs) 

3745 # if sequence of strings provided, wrap with Literal 

3746 if any(isinstance(expr, str_type) for expr in exprs): 

3747 exprs = ( 

3748 self._literalStringClass(e) if isinstance(e, str_type) else e 

3749 for e in exprs 

3750 ) 

3751 self.exprs = list(exprs) 

3752 else: 

3753 try: 

3754 self.exprs = list(exprs) 

3755 except TypeError: 

3756 self.exprs = [exprs] 

3757 self.callPreparse = False 

3758 

3759 def recurse(self) -> List[ParserElement]: 

3760 return self.exprs[:] 

3761 

3762 def append(self, other) -> ParserElement: 

3763 self.exprs.append(other) 

3764 self._defaultName = None 

3765 return self 

3766 

3767 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

3768 """ 

3769 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3770 all contained expressions. 

3771 """ 

3772 super().leave_whitespace(recursive) 

3773 

3774 if recursive: 

3775 self.exprs = [e.copy() for e in self.exprs] 

3776 for e in self.exprs: 

3777 e.leave_whitespace(recursive) 

3778 return self 

3779 

3780 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

3781 """ 

3782 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3783 all contained expressions. 

3784 """ 

3785 super().ignore_whitespace(recursive) 

3786 if recursive: 

3787 self.exprs = [e.copy() for e in self.exprs] 

3788 for e in self.exprs: 

3789 e.ignore_whitespace(recursive) 

3790 return self 

3791 

3792 def ignore(self, other) -> ParserElement: 

3793 if isinstance(other, Suppress): 

3794 if other not in self.ignoreExprs: 

3795 super().ignore(other) 

3796 for e in self.exprs: 

3797 e.ignore(self.ignoreExprs[-1]) 

3798 else: 

3799 super().ignore(other) 

3800 for e in self.exprs: 

3801 e.ignore(self.ignoreExprs[-1]) 

3802 return self 

3803 

3804 def _generateDefaultName(self) -> str: 

3805 return f"{self.__class__.__name__}:({str(self.exprs)})" 

3806 

3807 def streamline(self) -> ParserElement: 

3808 if self.streamlined: 

3809 return self 

3810 

3811 super().streamline() 

3812 

3813 for e in self.exprs: 

3814 e.streamline() 

3815 

3816 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

3817 # but only if there are no parse actions or resultsNames on the nested And's 

3818 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

3819 if len(self.exprs) == 2: 

3820 other = self.exprs[0] 

3821 if ( 

3822 isinstance(other, self.__class__) 

3823 and not other.parseAction 

3824 and other.resultsName is None 

3825 and not other.debug 

3826 ): 

3827 self.exprs = other.exprs[:] + [self.exprs[1]] 

3828 self._defaultName = None 

3829 self.mayReturnEmpty |= other.mayReturnEmpty 

3830 self.mayIndexError |= other.mayIndexError 

3831 

3832 other = self.exprs[-1] 

3833 if ( 

3834 isinstance(other, self.__class__) 

3835 and not other.parseAction 

3836 and other.resultsName is None 

3837 and not other.debug 

3838 ): 

3839 self.exprs = self.exprs[:-1] + other.exprs[:] 

3840 self._defaultName = None 

3841 self.mayReturnEmpty |= other.mayReturnEmpty 

3842 self.mayIndexError |= other.mayIndexError 

3843 

3844 self.errmsg = "Expected " + str(self) 

3845 

3846 return self 

3847 

3848 def validate(self, validateTrace=None) -> None: 

3849 warnings.warn( 

3850 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

3851 DeprecationWarning, 

3852 stacklevel=2, 

3853 ) 

3854 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

3855 for e in self.exprs: 

3856 e.validate(tmp) 

3857 self._checkRecursion([]) 

3858 

3859 def copy(self) -> ParserElement: 

3860 ret = super().copy() 

3861 ret = typing.cast(ParseExpression, ret) 

3862 ret.exprs = [e.copy() for e in self.exprs] 

3863 return ret 

3864 

3865 def _setResultsName(self, name, listAllMatches=False): 

3866 if ( 

3867 __diag__.warn_ungrouped_named_tokens_in_collection 

3868 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

3869 not in self.suppress_warnings_ 

3870 ): 

3871 for e in self.exprs: 

3872 if ( 

3873 isinstance(e, ParserElement) 

3874 and e.resultsName 

3875 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

3876 not in e.suppress_warnings_ 

3877 ): 

3878 warnings.warn( 

3879 "{}: setting results name {!r} on {} expression " 

3880 "collides with {!r} on contained expression".format( 

3881 "warn_ungrouped_named_tokens_in_collection", 

3882 name, 

3883 type(self).__name__, 

3884 e.resultsName, 

3885 ), 

3886 stacklevel=3, 

3887 ) 

3888 

3889 return super()._setResultsName(name, listAllMatches) 

3890 

3891 # Compatibility synonyms 

3892 # fmt: off 

3893 @replaced_by_pep8(leave_whitespace) 

3894 def leaveWhitespace(self): ... 

3895 

3896 @replaced_by_pep8(ignore_whitespace) 

3897 def ignoreWhitespace(self): ... 

3898 # fmt: on 

3899 

3900 

3901class And(ParseExpression): 

3902 """ 

3903 Requires all given :class:`ParseExpression` s to be found in the given order. 

3904 Expressions may be separated by whitespace. 

3905 May be constructed using the ``'+'`` operator. 

3906 May also be constructed using the ``'-'`` operator, which will 

3907 suppress backtracking. 

3908 

3909 Example:: 

3910 

3911 integer = Word(nums) 

3912 name_expr = Word(alphas)[1, ...] 

3913 

3914 expr = And([integer("id"), name_expr("name"), integer("age")]) 

3915 # more easily written as: 

3916 expr = integer("id") + name_expr("name") + integer("age") 

3917 """ 

3918 

3919 class _ErrorStop(Empty): 

3920 def __init__(self, *args, **kwargs): 

3921 super().__init__(*args, **kwargs) 

3922 self.leave_whitespace() 

3923 

3924 def _generateDefaultName(self) -> str: 

3925 return "-" 

3926 

3927 def __init__( 

3928 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True 

3929 ): 

3930 exprs: List[ParserElement] = list(exprs_arg) 

3931 if exprs and Ellipsis in exprs: 

3932 tmp = [] 

3933 for i, expr in enumerate(exprs): 

3934 if expr is Ellipsis: 

3935 if i < len(exprs) - 1: 

3936 skipto_arg: ParserElement = typing.cast( 

3937 ParseExpression, (Empty() + exprs[i + 1]) 

3938 ).exprs[-1] 

3939 tmp.append(SkipTo(skipto_arg)("_skipped*")) 

3940 else: 

3941 raise Exception( 

3942 "cannot construct And with sequence ending in ..." 

3943 ) 

3944 else: 

3945 tmp.append(expr) 

3946 exprs[:] = tmp 

3947 super().__init__(exprs, savelist) 

3948 if self.exprs: 

3949 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

3950 if not isinstance(self.exprs[0], White): 

3951 self.set_whitespace_chars( 

3952 self.exprs[0].whiteChars, 

3953 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

3954 ) 

3955 self.skipWhitespace = self.exprs[0].skipWhitespace 

3956 else: 

3957 self.skipWhitespace = False 

3958 else: 

3959 self.mayReturnEmpty = True 

3960 self.callPreparse = True 

3961 

3962 def streamline(self) -> ParserElement: 

3963 # collapse any _PendingSkip's 

3964 if self.exprs: 

3965 if any( 

3966 isinstance(e, ParseExpression) 

3967 and e.exprs 

3968 and isinstance(e.exprs[-1], _PendingSkip) 

3969 for e in self.exprs[:-1] 

3970 ): 

3971 deleted_expr_marker = NoMatch() 

3972 for i, e in enumerate(self.exprs[:-1]): 

3973 if e is deleted_expr_marker: 

3974 continue 

3975 if ( 

3976 isinstance(e, ParseExpression) 

3977 and e.exprs 

3978 and isinstance(e.exprs[-1], _PendingSkip) 

3979 ): 

3980 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

3981 self.exprs[i + 1] = deleted_expr_marker 

3982 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

3983 

3984 super().streamline() 

3985 

3986 # link any IndentedBlocks to the prior expression 

3987 prev: ParserElement 

3988 cur: ParserElement 

3989 for prev, cur in zip(self.exprs, self.exprs[1:]): 

3990 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

3991 # (but watch out for recursive grammar) 

3992 seen = set() 

3993 while True: 

3994 if id(cur) in seen: 

3995 break 

3996 seen.add(id(cur)) 

3997 if isinstance(cur, IndentedBlock): 

3998 prev.add_parse_action( 

3999 lambda s, l, t, cur_=cur: setattr( 

4000 cur_, "parent_anchor", col(l, s) 

4001 ) 

4002 ) 

4003 break 

4004 subs = cur.recurse() 

4005 next_first = next(iter(subs), None) 

4006 if next_first is None: 

4007 break 

4008 cur = typing.cast(ParserElement, next_first) 

4009 

4010 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4011 return self 

4012 

4013 def parseImpl(self, instring, loc, doActions=True): 

4014 # pass False as callPreParse arg to _parse for first element, since we already 

4015 # pre-parsed the string as part of our And pre-parsing 

4016 loc, resultlist = self.exprs[0]._parse( 

4017 instring, loc, doActions, callPreParse=False 

4018 ) 

4019 errorStop = False 

4020 for e in self.exprs[1:]: 

4021 # if isinstance(e, And._ErrorStop): 

4022 if type(e) is And._ErrorStop: 

4023 errorStop = True 

4024 continue 

4025 if errorStop: 

4026 try: 

4027 loc, exprtokens = e._parse(instring, loc, doActions) 

4028 except ParseSyntaxException: 

4029 raise 

4030 except ParseBaseException as pe: 

4031 pe.__traceback__ = None 

4032 raise ParseSyntaxException._from_exception(pe) 

4033 except IndexError: 

4034 raise ParseSyntaxException( 

4035 instring, len(instring), self.errmsg, self 

4036 ) 

4037 else: 

4038 loc, exprtokens = e._parse(instring, loc, doActions) 

4039 resultlist += exprtokens 

4040 return loc, resultlist 

4041 

4042 def __iadd__(self, other): 

4043 if isinstance(other, str_type): 

4044 other = self._literalStringClass(other) 

4045 if not isinstance(other, ParserElement): 

4046 return NotImplemented 

4047 return self.append(other) # And([self, other]) 

4048 

4049 def _checkRecursion(self, parseElementList): 

4050 subRecCheckList = parseElementList[:] + [self] 

4051 for e in self.exprs: 

4052 e._checkRecursion(subRecCheckList) 

4053 if not e.mayReturnEmpty: 

4054 break 

4055 

4056 def _generateDefaultName(self) -> str: 

4057 inner = " ".join(str(e) for e in self.exprs) 

4058 # strip off redundant inner {}'s 

4059 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4060 inner = inner[1:-1] 

4061 return "{" + inner + "}" 

4062 

4063 

4064class Or(ParseExpression): 

4065 """Requires that at least one :class:`ParseExpression` is found. If 

4066 two expressions match, the expression that matches the longest 

4067 string will be used. May be constructed using the ``'^'`` 

4068 operator. 

4069 

4070 Example:: 

4071 

4072 # construct Or using '^' operator 

4073 

4074 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4075 print(number.search_string("123 3.1416 789")) 

4076 

4077 prints:: 

4078 

4079 [['123'], ['3.1416'], ['789']] 

4080 """ 

4081 

4082 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4083 super().__init__(exprs, savelist) 

4084 if self.exprs: 

4085 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4086 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4087 else: 

4088 self.mayReturnEmpty = True 

4089 

4090 def streamline(self) -> ParserElement: 

4091 super().streamline() 

4092 if self.exprs: 

4093 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4094 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4095 self.skipWhitespace = all( 

4096 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4097 ) 

4098 else: 

4099 self.saveAsList = False 

4100 return self 

4101 

4102 def parseImpl(self, instring, loc, doActions=True): 

4103 maxExcLoc = -1 

4104 maxException = None 

4105 matches = [] 

4106 fatals = [] 

4107 if all(e.callPreparse for e in self.exprs): 

4108 loc = self.preParse(instring, loc) 

4109 for e in self.exprs: 

4110 try: 

4111 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4112 except ParseFatalException as pfe: 

4113 pfe.__traceback__ = None 

4114 pfe.parser_element = e 

4115 fatals.append(pfe) 

4116 maxException = None 

4117 maxExcLoc = -1 

4118 except ParseException as err: 

4119 if not fatals: 

4120 err.__traceback__ = None 

4121 if err.loc > maxExcLoc: 

4122 maxException = err 

4123 maxExcLoc = err.loc 

4124 except IndexError: 

4125 if len(instring) > maxExcLoc: 

4126 maxException = ParseException( 

4127 instring, len(instring), e.errmsg, self 

4128 ) 

4129 maxExcLoc = len(instring) 

4130 else: 

4131 # save match among all matches, to retry longest to shortest 

4132 matches.append((loc2, e)) 

4133 

4134 if matches: 

4135 # re-evaluate all matches in descending order of length of match, in case attached actions 

4136 # might change whether or how much they match of the input. 

4137 matches.sort(key=itemgetter(0), reverse=True) 

4138 

4139 if not doActions: 

4140 # no further conditions or parse actions to change the selection of 

4141 # alternative, so the first match will be the best match 

4142 best_expr = matches[0][1] 

4143 return best_expr._parse(instring, loc, doActions) 

4144 

4145 longest = -1, None 

4146 for loc1, expr1 in matches: 

4147 if loc1 <= longest[0]: 

4148 # already have a longer match than this one will deliver, we are done 

4149 return longest 

4150 

4151 try: 

4152 loc2, toks = expr1._parse(instring, loc, doActions) 

4153 except ParseException as err: 

4154 err.__traceback__ = None 

4155 if err.loc > maxExcLoc: 

4156 maxException = err 

4157 maxExcLoc = err.loc 

4158 else: 

4159 if loc2 >= loc1: 

4160 return loc2, toks 

4161 # didn't match as much as before 

4162 elif loc2 > longest[0]: 

4163 longest = loc2, toks 

4164 

4165 if longest != (-1, None): 

4166 return longest 

4167 

4168 if fatals: 

4169 if len(fatals) > 1: 

4170 fatals.sort(key=lambda e: -e.loc) 

4171 if fatals[0].loc == fatals[1].loc: 

4172 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4173 max_fatal = fatals[0] 

4174 raise max_fatal 

4175 

4176 if maxException is not None: 

4177 # infer from this check that all alternatives failed at the current position 

4178 # so emit this collective error message instead of any single error message 

4179 if maxExcLoc == loc: 

4180 maxException.msg = self.errmsg 

4181 raise maxException 

4182 else: 

4183 raise ParseException( 

4184 instring, loc, "no defined alternatives to match", self 

4185 ) 

4186 

4187 def __ixor__(self, other): 

4188 if isinstance(other, str_type): 

4189 other = self._literalStringClass(other) 

4190 if not isinstance(other, ParserElement): 

4191 return NotImplemented 

4192 return self.append(other) # Or([self, other]) 

4193 

4194 def _generateDefaultName(self) -> str: 

4195 return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" 

4196 

4197 def _setResultsName(self, name, listAllMatches=False): 

4198 if ( 

4199 __diag__.warn_multiple_tokens_in_named_alternation 

4200 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4201 not in self.suppress_warnings_ 

4202 ): 

4203 if any( 

4204 isinstance(e, And) 

4205 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4206 not in e.suppress_warnings_ 

4207 for e in self.exprs 

4208 ): 

4209 warnings.warn( 

4210 "{}: setting results name {!r} on {} expression " 

4211 "will return a list of all parsed tokens in an And alternative, " 

4212 "in prior versions only the first token was returned; enclose " 

4213 "contained argument in Group".format( 

4214 "warn_multiple_tokens_in_named_alternation", 

4215 name, 

4216 type(self).__name__, 

4217 ), 

4218 stacklevel=3, 

4219 ) 

4220 

4221 return super()._setResultsName(name, listAllMatches) 

4222 

4223 

4224class MatchFirst(ParseExpression): 

4225 """Requires that at least one :class:`ParseExpression` is found. If 

4226 more than one expression matches, the first one listed is the one that will 

4227 match. May be constructed using the ``'|'`` operator. 

4228 

4229 Example:: 

4230 

4231 # construct MatchFirst using '|' operator 

4232 

4233 # watch the order of expressions to match 

4234 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4235 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

4236 

4237 # put more selective expression first 

4238 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4239 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

4240 """ 

4241 

4242 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4243 super().__init__(exprs, savelist) 

4244 if self.exprs: 

4245 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4246 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4247 else: 

4248 self.mayReturnEmpty = True 

4249 

4250 def streamline(self) -> ParserElement: 

4251 if self.streamlined: 

4252 return self 

4253 

4254 super().streamline() 

4255 if self.exprs: 

4256 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4257 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4258 self.skipWhitespace = all( 

4259 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4260 ) 

4261 else: 

4262 self.saveAsList = False 

4263 self.mayReturnEmpty = True 

4264 return self 

4265 

4266 def parseImpl(self, instring, loc, doActions=True): 

4267 maxExcLoc = -1 

4268 maxException = None 

4269 

4270 for e in self.exprs: 

4271 try: 

4272 return e._parse( 

4273 instring, 

4274 loc, 

4275 doActions, 

4276 ) 

4277 except ParseFatalException as pfe: 

4278 pfe.__traceback__ = None 

4279 pfe.parser_element = e 

4280 raise 

4281 except ParseException as err: 

4282 if err.loc > maxExcLoc: 

4283 maxException = err 

4284 maxExcLoc = err.loc 

4285 except IndexError: 

4286 if len(instring) > maxExcLoc: 

4287 maxException = ParseException( 

4288 instring, len(instring), e.errmsg, self 

4289 ) 

4290 maxExcLoc = len(instring) 

4291 

4292 if maxException is not None: 

4293 # infer from this check that all alternatives failed at the current position 

4294 # so emit this collective error message instead of any individual error message 

4295 if maxExcLoc == loc: 

4296 maxException.msg = self.errmsg 

4297 raise maxException 

4298 else: 

4299 raise ParseException( 

4300 instring, loc, "no defined alternatives to match", self 

4301 ) 

4302 

4303 def __ior__(self, other): 

4304 if isinstance(other, str_type): 

4305 other = self._literalStringClass(other) 

4306 if not isinstance(other, ParserElement): 

4307 return NotImplemented 

4308 return self.append(other) # MatchFirst([self, other]) 

4309 

4310 def _generateDefaultName(self) -> str: 

4311 return "{" + " | ".join(str(e) for e in self.exprs) + "}" 

4312 

4313 def _setResultsName(self, name, listAllMatches=False): 

4314 if ( 

4315 __diag__.warn_multiple_tokens_in_named_alternation 

4316 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4317 not in self.suppress_warnings_ 

4318 ): 

4319 if any( 

4320 isinstance(e, And) 

4321 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4322 not in e.suppress_warnings_ 

4323 for e in self.exprs 

4324 ): 

4325 warnings.warn( 

4326 "{}: setting results name {!r} on {} expression " 

4327 "will return a list of all parsed tokens in an And alternative, " 

4328 "in prior versions only the first token was returned; enclose " 

4329 "contained argument in Group".format( 

4330 "warn_multiple_tokens_in_named_alternation", 

4331 name, 

4332 type(self).__name__, 

4333 ), 

4334 stacklevel=3, 

4335 ) 

4336 

4337 return super()._setResultsName(name, listAllMatches) 

4338 

4339 

4340class Each(ParseExpression): 

4341 """Requires all given :class:`ParseExpression` s to be found, but in 

4342 any order. Expressions may be separated by whitespace. 

4343 

4344 May be constructed using the ``'&'`` operator. 

4345 

4346 Example:: 

4347 

4348 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4349 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4350 integer = Word(nums) 

4351 shape_attr = "shape:" + shape_type("shape") 

4352 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4353 color_attr = "color:" + color("color") 

4354 size_attr = "size:" + integer("size") 

4355 

4356 # use Each (using operator '&') to accept attributes in any order 

4357 # (shape and posn are required, color and size are optional) 

4358 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4359 

4360 shape_spec.run_tests(''' 

4361 shape: SQUARE color: BLACK posn: 100, 120 

4362 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4363 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4364 ''' 

4365 ) 

4366 

4367 prints:: 

4368 

4369 shape: SQUARE color: BLACK posn: 100, 120 

4370 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4371 - color: BLACK 

4372 - posn: ['100', ',', '120'] 

4373 - x: 100 

4374 - y: 120 

4375 - shape: SQUARE 

4376 

4377 

4378 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4379 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

4380 - color: BLUE 

4381 - posn: ['50', ',', '80'] 

4382 - x: 50 

4383 - y: 80 

4384 - shape: CIRCLE 

4385 - size: 50 

4386 

4387 

4388 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

4389 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

4390 - color: GREEN 

4391 - posn: ['20', ',', '40'] 

4392 - x: 20 

4393 - y: 40 

4394 - shape: TRIANGLE 

4395 - size: 20 

4396 """ 

4397 

4398 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): 

4399 super().__init__(exprs, savelist) 

4400 if self.exprs: 

4401 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4402 else: 

4403 self.mayReturnEmpty = True 

4404 self.skipWhitespace = True 

4405 self.initExprGroups = True 

4406 self.saveAsList = True 

4407 

4408 def __iand__(self, other): 

4409 if isinstance(other, str_type): 

4410 other = self._literalStringClass(other) 

4411 if not isinstance(other, ParserElement): 

4412 return NotImplemented 

4413 return self.append(other) # Each([self, other]) 

4414 

4415 def streamline(self) -> ParserElement: 

4416 super().streamline() 

4417 if self.exprs: 

4418 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4419 else: 

4420 self.mayReturnEmpty = True 

4421 return self 

4422 

4423 def parseImpl(self, instring, loc, doActions=True): 

4424 if self.initExprGroups: 

4425 self.opt1map = dict( 

4426 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

4427 ) 

4428 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

4429 opt2 = [ 

4430 e 

4431 for e in self.exprs 

4432 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

4433 ] 

4434 self.optionals = opt1 + opt2 

4435 self.multioptionals = [ 

4436 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4437 for e in self.exprs 

4438 if isinstance(e, _MultipleMatch) 

4439 ] 

4440 self.multirequired = [ 

4441 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4442 for e in self.exprs 

4443 if isinstance(e, OneOrMore) 

4444 ] 

4445 self.required = [ 

4446 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

4447 ] 

4448 self.required += self.multirequired 

4449 self.initExprGroups = False 

4450 

4451 tmpLoc = loc 

4452 tmpReqd = self.required[:] 

4453 tmpOpt = self.optionals[:] 

4454 multis = self.multioptionals[:] 

4455 matchOrder = [] 

4456 

4457 keepMatching = True 

4458 failed = [] 

4459 fatals = [] 

4460 while keepMatching: 

4461 tmpExprs = tmpReqd + tmpOpt + multis 

4462 failed.clear() 

4463 fatals.clear() 

4464 for e in tmpExprs: 

4465 try: 

4466 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

4467 except ParseFatalException as pfe: 

4468 pfe.__traceback__ = None 

4469 pfe.parser_element = e 

4470 fatals.append(pfe) 

4471 failed.append(e) 

4472 except ParseException: 

4473 failed.append(e) 

4474 else: 

4475 matchOrder.append(self.opt1map.get(id(e), e)) 

4476 if e in tmpReqd: 

4477 tmpReqd.remove(e) 

4478 elif e in tmpOpt: 

4479 tmpOpt.remove(e) 

4480 if len(failed) == len(tmpExprs): 

4481 keepMatching = False 

4482 

4483 # look for any ParseFatalExceptions 

4484 if fatals: 

4485 if len(fatals) > 1: 

4486 fatals.sort(key=lambda e: -e.loc) 

4487 if fatals[0].loc == fatals[1].loc: 

4488 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4489 max_fatal = fatals[0] 

4490 raise max_fatal 

4491 

4492 if tmpReqd: 

4493 missing = ", ".join([str(e) for e in tmpReqd]) 

4494 raise ParseException( 

4495 instring, 

4496 loc, 

4497 f"Missing one or more required elements ({missing})", 

4498 ) 

4499 

4500 # add any unmatched Opts, in case they have default values defined 

4501 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

4502 

4503 total_results = ParseResults([]) 

4504 for e in matchOrder: 

4505 loc, results = e._parse(instring, loc, doActions) 

4506 total_results += results 

4507 

4508 return loc, total_results 

4509 

4510 def _generateDefaultName(self) -> str: 

4511 return "{" + " & ".join(str(e) for e in self.exprs) + "}" 

4512 

4513 

4514class ParseElementEnhance(ParserElement): 

4515 """Abstract subclass of :class:`ParserElement`, for combining and 

4516 post-processing parsed tokens. 

4517 """ 

4518 

4519 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

4520 super().__init__(savelist) 

4521 if isinstance(expr, str_type): 

4522 expr_str = typing.cast(str, expr) 

4523 if issubclass(self._literalStringClass, Token): 

4524 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

4525 elif issubclass(type(self), self._literalStringClass): 

4526 expr = Literal(expr_str) 

4527 else: 

4528 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

4529 expr = typing.cast(ParserElement, expr) 

4530 self.expr = expr 

4531 if expr is not None: 

4532 self.mayIndexError = expr.mayIndexError 

4533 self.mayReturnEmpty = expr.mayReturnEmpty 

4534 self.set_whitespace_chars( 

4535 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

4536 ) 

4537 self.skipWhitespace = expr.skipWhitespace 

4538 self.saveAsList = expr.saveAsList 

4539 self.callPreparse = expr.callPreparse 

4540 self.ignoreExprs.extend(expr.ignoreExprs) 

4541 

4542 def recurse(self) -> List[ParserElement]: 

4543 return [self.expr] if self.expr is not None else [] 

4544 

4545 def parseImpl(self, instring, loc, doActions=True): 

4546 if self.expr is not None: 

4547 try: 

4548 return self.expr._parse(instring, loc, doActions, callPreParse=False) 

4549 except ParseBaseException as pbe: 

4550 pbe.msg = self.errmsg 

4551 raise 

4552 else: 

4553 raise ParseException(instring, loc, "No expression defined", self) 

4554 

4555 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4556 super().leave_whitespace(recursive) 

4557 

4558 if recursive: 

4559 if self.expr is not None: 

4560 self.expr = self.expr.copy() 

4561 self.expr.leave_whitespace(recursive) 

4562 return self 

4563 

4564 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4565 super().ignore_whitespace(recursive) 

4566 

4567 if recursive: 

4568 if self.expr is not None: 

4569 self.expr = self.expr.copy() 

4570 self.expr.ignore_whitespace(recursive) 

4571 return self 

4572 

4573 def ignore(self, other) -> ParserElement: 

4574 if isinstance(other, Suppress): 

4575 if other not in self.ignoreExprs: 

4576 super().ignore(other) 

4577 if self.expr is not None: 

4578 self.expr.ignore(self.ignoreExprs[-1]) 

4579 else: 

4580 super().ignore(other) 

4581 if self.expr is not None: 

4582 self.expr.ignore(self.ignoreExprs[-1]) 

4583 return self 

4584 

4585 def streamline(self) -> ParserElement: 

4586 super().streamline() 

4587 if self.expr is not None: 

4588 self.expr.streamline() 

4589 return self 

4590 

4591 def _checkRecursion(self, parseElementList): 

4592 if self in parseElementList: 

4593 raise RecursiveGrammarException(parseElementList + [self]) 

4594 subRecCheckList = parseElementList[:] + [self] 

4595 if self.expr is not None: 

4596 self.expr._checkRecursion(subRecCheckList) 

4597 

4598 def validate(self, validateTrace=None) -> None: 

4599 warnings.warn( 

4600 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4601 DeprecationWarning, 

4602 stacklevel=2, 

4603 ) 

4604 if validateTrace is None: 

4605 validateTrace = [] 

4606 tmp = validateTrace[:] + [self] 

4607 if self.expr is not None: 

4608 self.expr.validate(tmp) 

4609 self._checkRecursion([]) 

4610 

4611 def _generateDefaultName(self) -> str: 

4612 return f"{self.__class__.__name__}:({str(self.expr)})" 

4613 

4614 # Compatibility synonyms 

4615 # fmt: off 

4616 @replaced_by_pep8(leave_whitespace) 

4617 def leaveWhitespace(self): ... 

4618 

4619 @replaced_by_pep8(ignore_whitespace) 

4620 def ignoreWhitespace(self): ... 

4621 # fmt: on 

4622 

4623 

4624class IndentedBlock(ParseElementEnhance): 

4625 """ 

4626 Expression to match one or more expressions at a given indentation level. 

4627 Useful for parsing text where structure is implied by indentation (like Python source code). 

4628 """ 

4629 

4630 class _Indent(Empty): 

4631 def __init__(self, ref_col: int): 

4632 super().__init__() 

4633 self.errmsg = f"expected indent at column {ref_col}" 

4634 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

4635 

4636 class _IndentGreater(Empty): 

4637 def __init__(self, ref_col: int): 

4638 super().__init__() 

4639 self.errmsg = f"expected indent at column greater than {ref_col}" 

4640 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

4641 

4642 def __init__( 

4643 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

4644 ): 

4645 super().__init__(expr, savelist=True) 

4646 # if recursive: 

4647 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

4648 self._recursive = recursive 

4649 self._grouped = grouped 

4650 self.parent_anchor = 1 

4651 

4652 def parseImpl(self, instring, loc, doActions=True): 

4653 # advance parse position to non-whitespace by using an Empty() 

4654 # this should be the column to be used for all subsequent indented lines 

4655 anchor_loc = Empty().preParse(instring, loc) 

4656 

4657 # see if self.expr matches at the current location - if not it will raise an exception 

4658 # and no further work is necessary 

4659 self.expr.try_parse(instring, anchor_loc, do_actions=doActions) 

4660 

4661 indent_col = col(anchor_loc, instring) 

4662 peer_detect_expr = self._Indent(indent_col) 

4663 

4664 inner_expr = Empty() + peer_detect_expr + self.expr 

4665 if self._recursive: 

4666 sub_indent = self._IndentGreater(indent_col) 

4667 nested_block = IndentedBlock( 

4668 self.expr, recursive=self._recursive, grouped=self._grouped 

4669 ) 

4670 nested_block.set_debug(self.debug) 

4671 nested_block.parent_anchor = indent_col 

4672 inner_expr += Opt(sub_indent + nested_block) 

4673 

4674 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

4675 block = OneOrMore(inner_expr) 

4676 

4677 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

4678 

4679 if self._grouped: 

4680 wrapper = Group 

4681 else: 

4682 wrapper = lambda expr: expr 

4683 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

4684 instring, anchor_loc, doActions 

4685 ) 

4686 

4687 

4688class AtStringStart(ParseElementEnhance): 

4689 """Matches if expression matches at the beginning of the parse 

4690 string:: 

4691 

4692 AtStringStart(Word(nums)).parse_string("123") 

4693 # prints ["123"] 

4694 

4695 AtStringStart(Word(nums)).parse_string(" 123") 

4696 # raises ParseException 

4697 """ 

4698 

4699 def __init__(self, expr: Union[ParserElement, str]): 

4700 super().__init__(expr) 

4701 self.callPreparse = False 

4702 

4703 def parseImpl(self, instring, loc, doActions=True): 

4704 if loc != 0: 

4705 raise ParseException(instring, loc, "not found at string start") 

4706 return super().parseImpl(instring, loc, doActions) 

4707 

4708 

4709class AtLineStart(ParseElementEnhance): 

4710 r"""Matches if an expression matches at the beginning of a line within 

4711 the parse string 

4712 

4713 Example:: 

4714 

4715 test = '''\ 

4716 AAA this line 

4717 AAA and this line 

4718 AAA but not this one 

4719 B AAA and definitely not this one 

4720 ''' 

4721 

4722 for t in (AtLineStart('AAA') + rest_of_line).search_string(test): 

4723 print(t) 

4724 

4725 prints:: 

4726 

4727 ['AAA', ' this line'] 

4728 ['AAA', ' and this line'] 

4729 

4730 """ 

4731 

4732 def __init__(self, expr: Union[ParserElement, str]): 

4733 super().__init__(expr) 

4734 self.callPreparse = False 

4735 

4736 def parseImpl(self, instring, loc, doActions=True): 

4737 if col(loc, instring) != 1: 

4738 raise ParseException(instring, loc, "not found at line start") 

4739 return super().parseImpl(instring, loc, doActions) 

4740 

4741 

4742class FollowedBy(ParseElementEnhance): 

4743 """Lookahead matching of the given parse expression. 

4744 ``FollowedBy`` does *not* advance the parsing position within 

4745 the input string, it only verifies that the specified parse 

4746 expression matches at the current position. ``FollowedBy`` 

4747 always returns a null token list. If any results names are defined 

4748 in the lookahead expression, those *will* be returned for access by 

4749 name. 

4750 

4751 Example:: 

4752 

4753 # use FollowedBy to match a label only if it is followed by a ':' 

4754 data_word = Word(alphas) 

4755 label = data_word + FollowedBy(':') 

4756 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

4757 

4758 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() 

4759 

4760 prints:: 

4761 

4762 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

4763 """ 

4764 

4765 def __init__(self, expr: Union[ParserElement, str]): 

4766 super().__init__(expr) 

4767 self.mayReturnEmpty = True 

4768 

4769 def parseImpl(self, instring, loc, doActions=True): 

4770 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

4771 # we keep any named results that were defined in the FollowedBy expression 

4772 _, ret = self.expr._parse(instring, loc, doActions=doActions) 

4773 del ret[:] 

4774 

4775 return loc, ret 

4776 

4777 

4778class PrecededBy(ParseElementEnhance): 

4779 """Lookbehind matching of the given parse expression. 

4780 ``PrecededBy`` does not advance the parsing position within the 

4781 input string, it only verifies that the specified parse expression 

4782 matches prior to the current position. ``PrecededBy`` always 

4783 returns a null token list, but if a results name is defined on the 

4784 given expression, it is returned. 

4785 

4786 Parameters: 

4787 

4788 - ``expr`` - expression that must match prior to the current parse 

4789 location 

4790 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

4791 to lookbehind prior to the current parse location 

4792 

4793 If the lookbehind expression is a string, :class:`Literal`, 

4794 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

4795 with a specified exact or maximum length, then the retreat 

4796 parameter is not required. Otherwise, retreat must be specified to 

4797 give a maximum number of characters to look back from 

4798 the current parse position for a lookbehind match. 

4799 

4800 Example:: 

4801 

4802 # VB-style variable names with type prefixes 

4803 int_var = PrecededBy("#") + pyparsing_common.identifier 

4804 str_var = PrecededBy("$") + pyparsing_common.identifier 

4805 

4806 """ 

4807 

4808 def __init__( 

4809 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None 

4810 ): 

4811 super().__init__(expr) 

4812 self.expr = self.expr().leave_whitespace() 

4813 self.mayReturnEmpty = True 

4814 self.mayIndexError = False 

4815 self.exact = False 

4816 if isinstance(expr, str_type): 

4817 expr = typing.cast(str, expr) 

4818 retreat = len(expr) 

4819 self.exact = True 

4820 elif isinstance(expr, (Literal, Keyword)): 

4821 retreat = expr.matchLen 

4822 self.exact = True 

4823 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

4824 retreat = expr.maxLen 

4825 self.exact = True 

4826 elif isinstance(expr, PositionToken): 

4827 retreat = 0 

4828 self.exact = True 

4829 self.retreat = retreat 

4830 self.errmsg = "not preceded by " + str(expr) 

4831 self.skipWhitespace = False 

4832 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

4833 

4834 def parseImpl(self, instring, loc=0, doActions=True): 

4835 if self.exact: 

4836 if loc < self.retreat: 

4837 raise ParseException(instring, loc, self.errmsg) 

4838 start = loc - self.retreat 

4839 _, ret = self.expr._parse(instring, start) 

4840 else: 

4841 # retreat specified a maximum lookbehind window, iterate 

4842 test_expr = self.expr + StringEnd() 

4843 instring_slice = instring[max(0, loc - self.retreat) : loc] 

4844 last_expr = ParseException(instring, loc, self.errmsg) 

4845 for offset in range(1, min(loc, self.retreat + 1) + 1): 

4846 try: 

4847 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

4848 _, ret = test_expr._parse( 

4849 instring_slice, len(instring_slice) - offset 

4850 ) 

4851 except ParseBaseException as pbe: 

4852 last_expr = pbe 

4853 else: 

4854 break 

4855 else: 

4856 raise last_expr 

4857 return loc, ret 

4858 

4859 

4860class Located(ParseElementEnhance): 

4861 """ 

4862 Decorates a returned token with its starting and ending 

4863 locations in the input string. 

4864 

4865 This helper adds the following results names: 

4866 

4867 - ``locn_start`` - location where matched expression begins 

4868 - ``locn_end`` - location where matched expression ends 

4869 - ``value`` - the actual parsed results 

4870 

4871 Be careful if the input text contains ``<TAB>`` characters, you 

4872 may want to call :class:`ParserElement.parse_with_tabs` 

4873 

4874 Example:: 

4875 

4876 wd = Word(alphas) 

4877 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

4878 print(match) 

4879 

4880 prints:: 

4881 

4882 [0, ['ljsdf'], 5] 

4883 [8, ['lksdjjf'], 15] 

4884 [18, ['lkkjj'], 23] 

4885 

4886 """ 

4887 

4888 def parseImpl(self, instring, loc, doActions=True): 

4889 start = loc 

4890 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False) 

4891 ret_tokens = ParseResults([start, tokens, loc]) 

4892 ret_tokens["locn_start"] = start 

4893 ret_tokens["value"] = tokens 

4894 ret_tokens["locn_end"] = loc 

4895 if self.resultsName: 

4896 # must return as a list, so that the name will be attached to the complete group 

4897 return loc, [ret_tokens] 

4898 else: 

4899 return loc, ret_tokens 

4900 

4901 

4902class NotAny(ParseElementEnhance): 

4903 """ 

4904 Lookahead to disallow matching with the given parse expression. 

4905 ``NotAny`` does *not* advance the parsing position within the 

4906 input string, it only verifies that the specified parse expression 

4907 does *not* match at the current position. Also, ``NotAny`` does 

4908 *not* skip over leading whitespace. ``NotAny`` always returns 

4909 a null token list. May be constructed using the ``'~'`` operator. 

4910 

4911 Example:: 

4912 

4913 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

4914 

4915 # take care not to mistake keywords for identifiers 

4916 ident = ~(AND | OR | NOT) + Word(alphas) 

4917 boolean_term = Opt(NOT) + ident 

4918 

4919 # very crude boolean expression - to support parenthesis groups and 

4920 # operation hierarchy, use infix_notation 

4921 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

4922 

4923 # integers that are followed by "." are actually floats 

4924 integer = Word(nums) + ~Char(".") 

4925 """ 

4926 

4927 def __init__(self, expr: Union[ParserElement, str]): 

4928 super().__init__(expr) 

4929 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

4930 # self.leave_whitespace() 

4931 self.skipWhitespace = False 

4932 

4933 self.mayReturnEmpty = True 

4934 self.errmsg = "Found unwanted token, " + str(self.expr) 

4935 

4936 def parseImpl(self, instring, loc, doActions=True): 

4937 if self.expr.can_parse_next(instring, loc, do_actions=doActions): 

4938 raise ParseException(instring, loc, self.errmsg, self) 

4939 return loc, [] 

4940 

4941 def _generateDefaultName(self) -> str: 

4942 return "~{" + str(self.expr) + "}" 

4943 

4944 

4945class _MultipleMatch(ParseElementEnhance): 

4946 def __init__( 

4947 self, 

4948 expr: Union[str, ParserElement], 

4949 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

4950 *, 

4951 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

4952 ): 

4953 super().__init__(expr) 

4954 stopOn = stopOn or stop_on 

4955 self.saveAsList = True 

4956 ender = stopOn 

4957 if isinstance(ender, str_type): 

4958 ender = self._literalStringClass(ender) 

4959 self.stopOn(ender) 

4960 

4961 def stopOn(self, ender) -> ParserElement: 

4962 if isinstance(ender, str_type): 

4963 ender = self._literalStringClass(ender) 

4964 self.not_ender = ~ender if ender is not None else None 

4965 return self 

4966 

4967 def parseImpl(self, instring, loc, doActions=True): 

4968 self_expr_parse = self.expr._parse 

4969 self_skip_ignorables = self._skipIgnorables 

4970 check_ender = self.not_ender is not None 

4971 if check_ender: 

4972 try_not_ender = self.not_ender.try_parse 

4973 

4974 # must be at least one (but first see if we are the stopOn sentinel; 

4975 # if so, fail) 

4976 if check_ender: 

4977 try_not_ender(instring, loc) 

4978 loc, tokens = self_expr_parse(instring, loc, doActions) 

4979 try: 

4980 hasIgnoreExprs = not not self.ignoreExprs 

4981 while 1: 

4982 if check_ender: 

4983 try_not_ender(instring, loc) 

4984 if hasIgnoreExprs: 

4985 preloc = self_skip_ignorables(instring, loc) 

4986 else: 

4987 preloc = loc 

4988 loc, tmptokens = self_expr_parse(instring, preloc, doActions) 

4989 tokens += tmptokens 

4990 except (ParseException, IndexError): 

4991 pass 

4992 

4993 return loc, tokens 

4994 

4995 def _setResultsName(self, name, listAllMatches=False): 

4996 if ( 

4997 __diag__.warn_ungrouped_named_tokens_in_collection 

4998 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4999 not in self.suppress_warnings_ 

5000 ): 

5001 for e in [self.expr] + self.expr.recurse(): 

5002 if ( 

5003 isinstance(e, ParserElement) 

5004 and e.resultsName 

5005 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5006 not in e.suppress_warnings_ 

5007 ): 

5008 warnings.warn( 

5009 "{}: setting results name {!r} on {} expression " 

5010 "collides with {!r} on contained expression".format( 

5011 "warn_ungrouped_named_tokens_in_collection", 

5012 name, 

5013 type(self).__name__, 

5014 e.resultsName, 

5015 ), 

5016 stacklevel=3, 

5017 ) 

5018 

5019 return super()._setResultsName(name, listAllMatches) 

5020 

5021 

5022class OneOrMore(_MultipleMatch): 

5023 """ 

5024 Repetition of one or more of the given expression. 

5025 

5026 Parameters: 

5027 

5028 - ``expr`` - expression that must match one or more times 

5029 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5030 (only required if the sentinel would ordinarily match the repetition 

5031 expression) 

5032 

5033 Example:: 

5034 

5035 data_word = Word(alphas) 

5036 label = data_word + FollowedBy(':') 

5037 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) 

5038 

5039 text = "shape: SQUARE posn: upper left color: BLACK" 

5040 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

5041 

5042 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data 

5043 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5044 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5045 

5046 # could also be written as 

5047 (attr_expr * (1,)).parse_string(text).pprint() 

5048 """ 

5049 

5050 def _generateDefaultName(self) -> str: 

5051 return "{" + str(self.expr) + "}..." 

5052 

5053 

5054class ZeroOrMore(_MultipleMatch): 

5055 """ 

5056 Optional repetition of zero or more of the given expression. 

5057 

5058 Parameters: 

5059 

5060 - ``expr`` - expression that must match zero or more times 

5061 - ``stop_on`` - expression for a terminating sentinel 

5062 (only required if the sentinel would ordinarily match the repetition 

5063 expression) - (default= ``None``) 

5064 

5065 Example: similar to :class:`OneOrMore` 

5066 """ 

5067 

5068 def __init__( 

5069 self, 

5070 expr: Union[str, ParserElement], 

5071 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5072 *, 

5073 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5074 ): 

5075 super().__init__(expr, stopOn=stopOn or stop_on) 

5076 self.mayReturnEmpty = True 

5077 

5078 def parseImpl(self, instring, loc, doActions=True): 

5079 try: 

5080 return super().parseImpl(instring, loc, doActions) 

5081 except (ParseException, IndexError): 

5082 return loc, ParseResults([], name=self.resultsName) 

5083 

5084 def _generateDefaultName(self) -> str: 

5085 return "[" + str(self.expr) + "]..." 

5086 

5087 

5088class DelimitedList(ParseElementEnhance): 

5089 def __init__( 

5090 self, 

5091 expr: Union[str, ParserElement], 

5092 delim: Union[str, ParserElement] = ",", 

5093 combine: bool = False, 

5094 min: typing.Optional[int] = None, 

5095 max: typing.Optional[int] = None, 

5096 *, 

5097 allow_trailing_delim: bool = False, 

5098 ): 

5099 """Helper to define a delimited list of expressions - the delimiter 

5100 defaults to ','. By default, the list elements and delimiters can 

5101 have intervening whitespace, and comments, but this can be 

5102 overridden by passing ``combine=True`` in the constructor. If 

5103 ``combine`` is set to ``True``, the matching tokens are 

5104 returned as a single token string, with the delimiters included; 

5105 otherwise, the matching tokens are returned as a list of tokens, 

5106 with the delimiters suppressed. 

5107 

5108 If ``allow_trailing_delim`` is set to True, then the list may end with 

5109 a delimiter. 

5110 

5111 Example:: 

5112 

5113 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

5114 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

5115 """ 

5116 if isinstance(expr, str_type): 

5117 expr = ParserElement._literalStringClass(expr) 

5118 expr = typing.cast(ParserElement, expr) 

5119 

5120 if min is not None: 

5121 if min < 1: 

5122 raise ValueError("min must be greater than 0") 

5123 if max is not None: 

5124 if min is not None and max < min: 

5125 raise ValueError("max must be greater than, or equal to min") 

5126 

5127 self.content = expr 

5128 self.raw_delim = str(delim) 

5129 self.delim = delim 

5130 self.combine = combine 

5131 if not combine: 

5132 self.delim = Suppress(delim) 

5133 self.min = min or 1 

5134 self.max = max 

5135 self.allow_trailing_delim = allow_trailing_delim 

5136 

5137 delim_list_expr = self.content + (self.delim + self.content) * ( 

5138 self.min - 1, 

5139 None if self.max is None else self.max - 1, 

5140 ) 

5141 if self.allow_trailing_delim: 

5142 delim_list_expr += Opt(self.delim) 

5143 

5144 if self.combine: 

5145 delim_list_expr = Combine(delim_list_expr) 

5146 

5147 super().__init__(delim_list_expr, savelist=True) 

5148 

5149 def _generateDefaultName(self) -> str: 

5150 return "{0} [{1} {0}]...".format(self.content.streamline(), self.raw_delim) 

5151 

5152 

5153class _NullToken: 

5154 def __bool__(self): 

5155 return False 

5156 

5157 def __str__(self): 

5158 return "" 

5159 

5160 

5161class Opt(ParseElementEnhance): 

5162 """ 

5163 Optional matching of the given expression. 

5164 

5165 Parameters: 

5166 

5167 - ``expr`` - expression that must match zero or more times 

5168 - ``default`` (optional) - value to be returned if the optional expression is not found. 

5169 

5170 Example:: 

5171 

5172 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5173 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5174 zip.run_tests(''' 

5175 # traditional ZIP code 

5176 12345 

5177 

5178 # ZIP+4 form 

5179 12101-0001 

5180 

5181 # invalid ZIP 

5182 98765- 

5183 ''') 

5184 

5185 prints:: 

5186 

5187 # traditional ZIP code 

5188 12345 

5189 ['12345'] 

5190 

5191 # ZIP+4 form 

5192 12101-0001 

5193 ['12101-0001'] 

5194 

5195 # invalid ZIP 

5196 98765- 

5197 ^ 

5198 FAIL: Expected end of text (at char 5), (line:1, col:6) 

5199 """ 

5200 

5201 __optionalNotMatched = _NullToken() 

5202 

5203 def __init__( 

5204 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5205 ): 

5206 super().__init__(expr, savelist=False) 

5207 self.saveAsList = self.expr.saveAsList 

5208 self.defaultValue = default 

5209 self.mayReturnEmpty = True 

5210 

5211 def parseImpl(self, instring, loc, doActions=True): 

5212 self_expr = self.expr 

5213 try: 

5214 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False) 

5215 except (ParseException, IndexError): 

5216 default_value = self.defaultValue 

5217 if default_value is not self.__optionalNotMatched: 

5218 if self_expr.resultsName: 

5219 tokens = ParseResults([default_value]) 

5220 tokens[self_expr.resultsName] = default_value 

5221 else: 

5222 tokens = [default_value] 

5223 else: 

5224 tokens = [] 

5225 return loc, tokens 

5226 

5227 def _generateDefaultName(self) -> str: 

5228 inner = str(self.expr) 

5229 # strip off redundant inner {}'s 

5230 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5231 inner = inner[1:-1] 

5232 return "[" + inner + "]" 

5233 

5234 

5235Optional = Opt 

5236 

5237 

5238class SkipTo(ParseElementEnhance): 

5239 """ 

5240 Token for skipping over all undefined text until the matched 

5241 expression is found. 

5242 

5243 Parameters: 

5244 

5245 - ``expr`` - target expression marking the end of the data to be skipped 

5246 - ``include`` - if ``True``, the target expression is also parsed 

5247 (the skipped text and target expression are returned as a 2-element 

5248 list) (default= ``False``). 

5249 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and 

5250 comments) that might contain false matches to the target expression 

5251 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be 

5252 included in the skipped test; if found before the target expression is found, 

5253 the :class:`SkipTo` is not a match 

5254 

5255 Example:: 

5256 

5257 report = ''' 

5258 Outstanding Issues Report - 1 Jan 2000 

5259 

5260 # | Severity | Description | Days Open 

5261 -----+----------+-------------------------------------------+----------- 

5262 101 | Critical | Intermittent system crash | 6 

5263 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5264 79 | Minor | System slow when running too many reports | 47 

5265 ''' 

5266 integer = Word(nums) 

5267 SEP = Suppress('|') 

5268 # use SkipTo to simply match everything up until the next SEP 

5269 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5270 # - parse action will call token.strip() for each matched token, i.e., the description body 

5271 string_data = SkipTo(SEP, ignore=quoted_string) 

5272 string_data.set_parse_action(token_map(str.strip)) 

5273 ticket_expr = (integer("issue_num") + SEP 

5274 + string_data("sev") + SEP 

5275 + string_data("desc") + SEP 

5276 + integer("days_open")) 

5277 

5278 for tkt in ticket_expr.search_string(report): 

5279 print tkt.dump() 

5280 

5281 prints:: 

5282 

5283 ['101', 'Critical', 'Intermittent system crash', '6'] 

5284 - days_open: '6' 

5285 - desc: 'Intermittent system crash' 

5286 - issue_num: '101' 

5287 - sev: 'Critical' 

5288 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

5289 - days_open: '14' 

5290 - desc: "Spelling error on Login ('log|n')" 

5291 - issue_num: '94' 

5292 - sev: 'Cosmetic' 

5293 ['79', 'Minor', 'System slow when running too many reports', '47'] 

5294 - days_open: '47' 

5295 - desc: 'System slow when running too many reports' 

5296 - issue_num: '79' 

5297 - sev: 'Minor' 

5298 """ 

5299 

5300 def __init__( 

5301 self, 

5302 other: Union[ParserElement, str], 

5303 include: bool = False, 

5304 ignore: typing.Optional[Union[ParserElement, str]] = None, 

5305 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

5306 *, 

5307 failOn: typing.Optional[Union[ParserElement, str]] = None, 

5308 ): 

5309 super().__init__(other) 

5310 failOn = failOn or fail_on 

5311 if ignore is not None: 

5312 self.ignore(ignore) 

5313 self.mayReturnEmpty = True 

5314 self.mayIndexError = False 

5315 self.includeMatch = include 

5316 self.saveAsList = False 

5317 if isinstance(failOn, str_type): 

5318 self.failOn = self._literalStringClass(failOn) 

5319 else: 

5320 self.failOn = failOn 

5321 self.errmsg = "No match found for " + str(self.expr) 

5322 

5323 def parseImpl(self, instring, loc, doActions=True): 

5324 startloc = loc 

5325 instrlen = len(instring) 

5326 self_expr_parse = self.expr._parse 

5327 self_failOn_canParseNext = ( 

5328 self.failOn.canParseNext if self.failOn is not None else None 

5329 ) 

5330 self_preParse = self.preParse if self.callPreparse else None 

5331 

5332 tmploc = loc 

5333 while tmploc <= instrlen: 

5334 if self_failOn_canParseNext is not None: 

5335 # break if failOn expression matches 

5336 if self_failOn_canParseNext(instring, tmploc): 

5337 break 

5338 

5339 if self_preParse is not None: 

5340 # skip grammar-ignored expressions 

5341 tmploc = self_preParse(instring, tmploc) 

5342 

5343 try: 

5344 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) 

5345 except (ParseException, IndexError): 

5346 # no match, advance loc in string 

5347 tmploc += 1 

5348 else: 

5349 # matched skipto expr, done 

5350 break 

5351 

5352 else: 

5353 # ran off the end of the input string without matching skipto expr, fail 

5354 raise ParseException(instring, loc, self.errmsg, self) 

5355 

5356 # build up return values 

5357 loc = tmploc 

5358 skiptext = instring[startloc:loc] 

5359 skipresult = ParseResults(skiptext) 

5360 

5361 if self.includeMatch: 

5362 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False) 

5363 skipresult += mat 

5364 

5365 return loc, skipresult 

5366 

5367 

5368class Forward(ParseElementEnhance): 

5369 """ 

5370 Forward declaration of an expression to be defined later - 

5371 used for recursive grammars, such as algebraic infix notation. 

5372 When the expression is known, it is assigned to the ``Forward`` 

5373 variable using the ``'<<'`` operator. 

5374 

5375 Note: take care when assigning to ``Forward`` not to overlook 

5376 precedence of operators. 

5377 

5378 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

5379 

5380 fwd_expr << a | b | c 

5381 

5382 will actually be evaluated as:: 

5383 

5384 (fwd_expr << a) | b | c 

5385 

5386 thereby leaving b and c out as parseable alternatives. It is recommended that you 

5387 explicitly group the values inserted into the ``Forward``:: 

5388 

5389 fwd_expr << (a | b | c) 

5390 

5391 Converting to use the ``'<<='`` operator instead will avoid this problem. 

5392 

5393 See :class:`ParseResults.pprint` for an example of a recursive 

5394 parser created using ``Forward``. 

5395 """ 

5396 

5397 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): 

5398 self.caller_frame = traceback.extract_stack(limit=2)[0] 

5399 super().__init__(other, savelist=False) # type: ignore[arg-type] 

5400 self.lshift_line = None 

5401 

5402 def __lshift__(self, other) -> "Forward": 

5403 if hasattr(self, "caller_frame"): 

5404 del self.caller_frame 

5405 if isinstance(other, str_type): 

5406 other = self._literalStringClass(other) 

5407 

5408 if not isinstance(other, ParserElement): 

5409 return NotImplemented 

5410 

5411 self.expr = other 

5412 self.streamlined = other.streamlined 

5413 self.mayIndexError = self.expr.mayIndexError 

5414 self.mayReturnEmpty = self.expr.mayReturnEmpty 

5415 self.set_whitespace_chars( 

5416 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

5417 ) 

5418 self.skipWhitespace = self.expr.skipWhitespace 

5419 self.saveAsList = self.expr.saveAsList 

5420 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5421 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

5422 return self 

5423 

5424 def __ilshift__(self, other) -> "Forward": 

5425 if not isinstance(other, ParserElement): 

5426 return NotImplemented 

5427 

5428 return self << other 

5429 

5430 def __or__(self, other) -> "ParserElement": 

5431 caller_line = traceback.extract_stack(limit=2)[-2] 

5432 if ( 

5433 __diag__.warn_on_match_first_with_lshift_operator 

5434 and caller_line == self.lshift_line 

5435 and Diagnostics.warn_on_match_first_with_lshift_operator 

5436 not in self.suppress_warnings_ 

5437 ): 

5438 warnings.warn( 

5439 "using '<<' operator with '|' is probably an error, use '<<='", 

5440 stacklevel=2, 

5441 ) 

5442 ret = super().__or__(other) 

5443 return ret 

5444 

5445 def __del__(self): 

5446 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

5447 if ( 

5448 self.expr is None 

5449 and __diag__.warn_on_assignment_to_Forward 

5450 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

5451 ): 

5452 warnings.warn_explicit( 

5453 "Forward defined here but no expression attached later using '<<=' or '<<'", 

5454 UserWarning, 

5455 filename=self.caller_frame.filename, 

5456 lineno=self.caller_frame.lineno, 

5457 ) 

5458 

5459 def parseImpl(self, instring, loc, doActions=True): 

5460 if ( 

5461 self.expr is None 

5462 and __diag__.warn_on_parse_using_empty_Forward 

5463 and Diagnostics.warn_on_parse_using_empty_Forward 

5464 not in self.suppress_warnings_ 

5465 ): 

5466 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

5467 parse_fns = ( 

5468 "parse_string", 

5469 "scan_string", 

5470 "search_string", 

5471 "transform_string", 

5472 ) 

5473 tb = traceback.extract_stack(limit=200) 

5474 for i, frm in enumerate(reversed(tb), start=1): 

5475 if frm.name in parse_fns: 

5476 stacklevel = i + 1 

5477 break 

5478 else: 

5479 stacklevel = 2 

5480 warnings.warn( 

5481 "Forward expression was never assigned a value, will not parse any input", 

5482 stacklevel=stacklevel, 

5483 ) 

5484 if not ParserElement._left_recursion_enabled: 

5485 return super().parseImpl(instring, loc, doActions) 

5486 # ## Bounded Recursion algorithm ## 

5487 # Recursion only needs to be processed at ``Forward`` elements, since they are 

5488 # the only ones that can actually refer to themselves. The general idea is 

5489 # to handle recursion stepwise: We start at no recursion, then recurse once, 

5490 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

5491 # 

5492 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

5493 # - to *match* a specific recursion level, and 

5494 # - to *search* the bounded recursion level 

5495 # and the two run concurrently. The *search* must *match* each recursion level 

5496 # to find the best possible match. This is handled by a memo table, which 

5497 # provides the previous match to the next level match attempt. 

5498 # 

5499 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

5500 # 

5501 # There is a complication since we not only *parse* but also *transform* via 

5502 # actions: We do not want to run the actions too often while expanding. Thus, 

5503 # we expand using `doActions=False` and only run `doActions=True` if the next 

5504 # recursion level is acceptable. 

5505 with ParserElement.recursion_lock: 

5506 memo = ParserElement.recursion_memos 

5507 try: 

5508 # we are parsing at a specific recursion expansion - use it as-is 

5509 prev_loc, prev_result = memo[loc, self, doActions] 

5510 if isinstance(prev_result, Exception): 

5511 raise prev_result 

5512 return prev_loc, prev_result.copy() 

5513 except KeyError: 

5514 act_key = (loc, self, True) 

5515 peek_key = (loc, self, False) 

5516 # we are searching for the best recursion expansion - keep on improving 

5517 # both `doActions` cases must be tracked separately here! 

5518 prev_loc, prev_peek = memo[peek_key] = ( 

5519 loc - 1, 

5520 ParseException( 

5521 instring, loc, "Forward recursion without base case", self 

5522 ), 

5523 ) 

5524 if doActions: 

5525 memo[act_key] = memo[peek_key] 

5526 while True: 

5527 try: 

5528 new_loc, new_peek = super().parseImpl(instring, loc, False) 

5529 except ParseException: 

5530 # we failed before getting any match – do not hide the error 

5531 if isinstance(prev_peek, Exception): 

5532 raise 

5533 new_loc, new_peek = prev_loc, prev_peek 

5534 # the match did not get better: we are done 

5535 if new_loc <= prev_loc: 

5536 if doActions: 

5537 # replace the match for doActions=False as well, 

5538 # in case the action did backtrack 

5539 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

5540 del memo[peek_key], memo[act_key] 

5541 return prev_loc, prev_result.copy() 

5542 del memo[peek_key] 

5543 return prev_loc, prev_peek.copy() 

5544 # the match did get better: see if we can improve further 

5545 else: 

5546 if doActions: 

5547 try: 

5548 memo[act_key] = super().parseImpl(instring, loc, True) 

5549 except ParseException as e: 

5550 memo[peek_key] = memo[act_key] = (new_loc, e) 

5551 raise 

5552 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

5553 

5554 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5555 self.skipWhitespace = False 

5556 return self 

5557 

5558 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5559 self.skipWhitespace = True 

5560 return self 

5561 

5562 def streamline(self) -> ParserElement: 

5563 if not self.streamlined: 

5564 self.streamlined = True 

5565 if self.expr is not None: 

5566 self.expr.streamline() 

5567 return self 

5568 

5569 def validate(self, validateTrace=None) -> None: 

5570 warnings.warn( 

5571 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5572 DeprecationWarning, 

5573 stacklevel=2, 

5574 ) 

5575 if validateTrace is None: 

5576 validateTrace = [] 

5577 

5578 if self not in validateTrace: 

5579 tmp = validateTrace[:] + [self] 

5580 if self.expr is not None: 

5581 self.expr.validate(tmp) 

5582 self._checkRecursion([]) 

5583 

5584 def _generateDefaultName(self) -> str: 

5585 # Avoid infinite recursion by setting a temporary _defaultName 

5586 self._defaultName = ": ..." 

5587 

5588 # Use the string representation of main expression. 

5589 retString = "..." 

5590 try: 

5591 if self.expr is not None: 

5592 retString = str(self.expr)[:1000] 

5593 else: 

5594 retString = "None" 

5595 finally: 

5596 return self.__class__.__name__ + ": " + retString 

5597 

5598 def copy(self) -> ParserElement: 

5599 if self.expr is not None: 

5600 return super().copy() 

5601 else: 

5602 ret = Forward() 

5603 ret <<= self 

5604 return ret 

5605 

5606 def _setResultsName(self, name, list_all_matches=False): 

5607 if ( 

5608 __diag__.warn_name_set_on_empty_Forward 

5609 and Diagnostics.warn_name_set_on_empty_Forward 

5610 not in self.suppress_warnings_ 

5611 ): 

5612 if self.expr is None: 

5613 warnings.warn( 

5614 "{}: setting results name {!r} on {} expression " 

5615 "that has no contained expression".format( 

5616 "warn_name_set_on_empty_Forward", name, type(self).__name__ 

5617 ), 

5618 stacklevel=3, 

5619 ) 

5620 

5621 return super()._setResultsName(name, list_all_matches) 

5622 

5623 # Compatibility synonyms 

5624 # fmt: off 

5625 @replaced_by_pep8(leave_whitespace) 

5626 def leaveWhitespace(self): ... 

5627 

5628 @replaced_by_pep8(ignore_whitespace) 

5629 def ignoreWhitespace(self): ... 

5630 # fmt: on 

5631 

5632 

5633class TokenConverter(ParseElementEnhance): 

5634 """ 

5635 Abstract subclass of :class:`ParseExpression`, for converting parsed results. 

5636 """ 

5637 

5638 def __init__(self, expr: Union[ParserElement, str], savelist=False): 

5639 super().__init__(expr) # , savelist) 

5640 self.saveAsList = False 

5641 

5642 

5643class Combine(TokenConverter): 

5644 """Converter to concatenate all matching tokens to a single string. 

5645 By default, the matching patterns must also be contiguous in the 

5646 input string; this can be disabled by specifying 

5647 ``'adjacent=False'`` in the constructor. 

5648 

5649 Example:: 

5650 

5651 real = Word(nums) + '.' + Word(nums) 

5652 print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] 

5653 # will also erroneously match the following 

5654 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] 

5655 

5656 real = Combine(Word(nums) + '.' + Word(nums)) 

5657 print(real.parse_string('3.1416')) # -> ['3.1416'] 

5658 # no match when there are internal spaces 

5659 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) 

5660 """ 

5661 

5662 def __init__( 

5663 self, 

5664 expr: ParserElement, 

5665 join_string: str = "", 

5666 adjacent: bool = True, 

5667 *, 

5668 joinString: typing.Optional[str] = None, 

5669 ): 

5670 super().__init__(expr) 

5671 joinString = joinString if joinString is not None else join_string 

5672 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

5673 if adjacent: 

5674 self.leave_whitespace() 

5675 self.adjacent = adjacent 

5676 self.skipWhitespace = True 

5677 self.joinString = joinString 

5678 self.callPreparse = True 

5679 

5680 def ignore(self, other) -> ParserElement: 

5681 if self.adjacent: 

5682 ParserElement.ignore(self, other) 

5683 else: 

5684 super().ignore(other) 

5685 return self 

5686 

5687 def postParse(self, instring, loc, tokenlist): 

5688 retToks = tokenlist.copy() 

5689 del retToks[:] 

5690 retToks += ParseResults( 

5691 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

5692 ) 

5693 

5694 if self.resultsName and retToks.haskeys(): 

5695 return [retToks] 

5696 else: 

5697 return retToks 

5698 

5699 

5700class Group(TokenConverter): 

5701 """Converter to return the matched tokens as a list - useful for 

5702 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

5703 

5704 The optional ``aslist`` argument when set to True will return the 

5705 parsed tokens as a Python list instead of a pyparsing ParseResults. 

5706 

5707 Example:: 

5708 

5709 ident = Word(alphas) 

5710 num = Word(nums) 

5711 term = ident | num 

5712 func = ident + Opt(DelimitedList(term)) 

5713 print(func.parse_string("fn a, b, 100")) 

5714 # -> ['fn', 'a', 'b', '100'] 

5715 

5716 func = ident + Group(Opt(DelimitedList(term))) 

5717 print(func.parse_string("fn a, b, 100")) 

5718 # -> ['fn', ['a', 'b', '100']] 

5719 """ 

5720 

5721 def __init__(self, expr: ParserElement, aslist: bool = False): 

5722 super().__init__(expr) 

5723 self.saveAsList = True 

5724 self._asPythonList = aslist 

5725 

5726 def postParse(self, instring, loc, tokenlist): 

5727 if self._asPythonList: 

5728 return ParseResults.List( 

5729 tokenlist.asList() 

5730 if isinstance(tokenlist, ParseResults) 

5731 else list(tokenlist) 

5732 ) 

5733 else: 

5734 return [tokenlist] 

5735 

5736 

5737class Dict(TokenConverter): 

5738 """Converter to return a repetitive expression as a list, but also 

5739 as a dictionary. Each element can also be referenced using the first 

5740 token in the expression as its key. Useful for tabular report 

5741 scraping when the first column can be used as a item key. 

5742 

5743 The optional ``asdict`` argument when set to True will return the 

5744 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

5745 

5746 Example:: 

5747 

5748 data_word = Word(alphas) 

5749 label = data_word + FollowedBy(':') 

5750 

5751 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5752 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5753 

5754 # print attributes as plain groups 

5755 print(attr_expr[1, ...].parse_string(text).dump()) 

5756 

5757 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names 

5758 result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

5759 print(result.dump()) 

5760 

5761 # access named fields as dict entries, or output as dict 

5762 print(result['shape']) 

5763 print(result.as_dict()) 

5764 

5765 prints:: 

5766 

5767 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

5768 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5769 - color: 'light blue' 

5770 - posn: 'upper left' 

5771 - shape: 'SQUARE' 

5772 - texture: 'burlap' 

5773 SQUARE 

5774 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

5775 

5776 See more examples at :class:`ParseResults` of accessing fields by results name. 

5777 """ 

5778 

5779 def __init__(self, expr: ParserElement, asdict: bool = False): 

5780 super().__init__(expr) 

5781 self.saveAsList = True 

5782 self._asPythonDict = asdict 

5783 

5784 def postParse(self, instring, loc, tokenlist): 

5785 for i, tok in enumerate(tokenlist): 

5786 if len(tok) == 0: 

5787 continue 

5788 

5789 ikey = tok[0] 

5790 if isinstance(ikey, int): 

5791 ikey = str(ikey).strip() 

5792 

5793 if len(tok) == 1: 

5794 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

5795 

5796 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

5797 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

5798 

5799 else: 

5800 try: 

5801 dictvalue = tok.copy() # ParseResults(i) 

5802 except Exception: 

5803 exc = TypeError( 

5804 "could not extract dict values from parsed results" 

5805 " - Dict expression must contain Grouped expressions" 

5806 ) 

5807 raise exc from None 

5808 

5809 del dictvalue[0] 

5810 

5811 if len(dictvalue) != 1 or ( 

5812 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

5813 ): 

5814 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

5815 else: 

5816 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

5817 

5818 if self._asPythonDict: 

5819 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

5820 else: 

5821 return [tokenlist] if self.resultsName else tokenlist 

5822 

5823 

5824class Suppress(TokenConverter): 

5825 """Converter for ignoring the results of a parsed expression. 

5826 

5827 Example:: 

5828 

5829 source = "a, b, c,d" 

5830 wd = Word(alphas) 

5831 wd_list1 = wd + (',' + wd)[...] 

5832 print(wd_list1.parse_string(source)) 

5833 

5834 # often, delimiters that are useful during parsing are just in the 

5835 # way afterward - use Suppress to keep them out of the parsed output 

5836 wd_list2 = wd + (Suppress(',') + wd)[...] 

5837 print(wd_list2.parse_string(source)) 

5838 

5839 # Skipped text (using '...') can be suppressed as well 

5840 source = "lead in START relevant text END trailing text" 

5841 start_marker = Keyword("START") 

5842 end_marker = Keyword("END") 

5843 find_body = Suppress(...) + start_marker + ... + end_marker 

5844 print(find_body.parse_string(source) 

5845 

5846 prints:: 

5847 

5848 ['a', ',', 'b', ',', 'c', ',', 'd'] 

5849 ['a', 'b', 'c', 'd'] 

5850 ['START', 'relevant text ', 'END'] 

5851 

5852 (See also :class:`DelimitedList`.) 

5853 """ 

5854 

5855 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

5856 if expr is ...: 

5857 expr = _PendingSkip(NoMatch()) 

5858 super().__init__(expr) 

5859 

5860 def __add__(self, other) -> "ParserElement": 

5861 if isinstance(self.expr, _PendingSkip): 

5862 return Suppress(SkipTo(other)) + other 

5863 else: 

5864 return super().__add__(other) 

5865 

5866 def __sub__(self, other) -> "ParserElement": 

5867 if isinstance(self.expr, _PendingSkip): 

5868 return Suppress(SkipTo(other)) - other 

5869 else: 

5870 return super().__sub__(other) 

5871 

5872 def postParse(self, instring, loc, tokenlist): 

5873 return [] 

5874 

5875 def suppress(self) -> ParserElement: 

5876 return self 

5877 

5878 

5879def trace_parse_action(f: ParseAction) -> ParseAction: 

5880 """Decorator for debugging parse actions. 

5881 

5882 When the parse action is called, this decorator will print 

5883 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

5884 When the parse action completes, the decorator will print 

5885 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

5886 

5887 Example:: 

5888 

5889 wd = Word(alphas) 

5890 

5891 @trace_parse_action 

5892 def remove_duplicate_chars(tokens): 

5893 return ''.join(sorted(set(''.join(tokens)))) 

5894 

5895 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

5896 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

5897 

5898 prints:: 

5899 

5900 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

5901 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

5902 ['dfjkls'] 

5903 """ 

5904 f = _trim_arity(f) 

5905 

5906 def z(*paArgs): 

5907 thisFunc = f.__name__ 

5908 s, l, t = paArgs[-3:] 

5909 if len(paArgs) > 3: 

5910 thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc 

5911 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

5912 try: 

5913 ret = f(*paArgs) 

5914 except Exception as exc: 

5915 sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n") 

5916 raise 

5917 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

5918 return ret 

5919 

5920 z.__name__ = f.__name__ 

5921 return z 

5922 

5923 

5924# convenience constants for positional expressions 

5925empty = Empty().set_name("empty") 

5926line_start = LineStart().set_name("line_start") 

5927line_end = LineEnd().set_name("line_end") 

5928string_start = StringStart().set_name("string_start") 

5929string_end = StringEnd().set_name("string_end") 

5930 

5931_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

5932 lambda s, l, t: t[0][1] 

5933) 

5934_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

5935 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

5936) 

5937_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

5938 lambda s, l, t: chr(int(t[0][1:], 8)) 

5939) 

5940_singleChar = ( 

5941 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

5942) 

5943_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

5944_reBracketExpr = ( 

5945 Literal("[") 

5946 + Opt("^").set_results_name("negate") 

5947 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

5948 + Literal("]") 

5949) 

5950 

5951 

5952def srange(s: str) -> str: 

5953 r"""Helper to easily define string ranges for use in :class:`Word` 

5954 construction. Borrows syntax from regexp ``'[]'`` string range 

5955 definitions:: 

5956 

5957 srange("[0-9]") -> "0123456789" 

5958 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

5959 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

5960 

5961 The input string must be enclosed in []'s, and the returned string 

5962 is the expanded character set joined into a single string. The 

5963 values enclosed in the []'s may be: 

5964 

5965 - a single character 

5966 - an escaped character with a leading backslash (such as ``\-`` 

5967 or ``\]``) 

5968 - an escaped hex character with a leading ``'\x'`` 

5969 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

5970 is also supported for backwards compatibility) 

5971 - an escaped octal character with a leading ``'\0'`` 

5972 (``\041``, which is a ``'!'`` character) 

5973 - a range of any of the above, separated by a dash (``'a-z'``, 

5974 etc.) 

5975 - any combination of the above (``'aeiouy'``, 

5976 ``'a-zA-Z0-9_$'``, etc.) 

5977 """ 

5978 _expanded = ( 

5979 lambda p: p 

5980 if not isinstance(p, ParseResults) 

5981 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

5982 ) 

5983 try: 

5984 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) 

5985 except Exception as e: 

5986 return "" 

5987 

5988 

5989def token_map(func, *args) -> ParseAction: 

5990 """Helper to define a parse action by mapping a function to all 

5991 elements of a :class:`ParseResults` list. If any additional args are passed, 

5992 they are forwarded to the given function as additional arguments 

5993 after the token, as in 

5994 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

5995 which will convert the parsed data to an integer using base 16. 

5996 

5997 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

5998 

5999 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6000 hex_ints.run_tests(''' 

6001 00 11 22 aa FF 0a 0d 1a 

6002 ''') 

6003 

6004 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6005 upperword[1, ...].run_tests(''' 

6006 my kingdom for a horse 

6007 ''') 

6008 

6009 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6010 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6011 now is the winter of our discontent made glorious summer by this sun of york 

6012 ''') 

6013 

6014 prints:: 

6015 

6016 00 11 22 aa FF 0a 0d 1a 

6017 [0, 17, 34, 170, 255, 10, 13, 26] 

6018 

6019 my kingdom for a horse 

6020 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6021 

6022 now is the winter of our discontent made glorious summer by this sun of york 

6023 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6024 """ 

6025 

6026 def pa(s, l, t): 

6027 return [func(tokn, *args) for tokn in t] 

6028 

6029 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6030 pa.__name__ = func_name 

6031 

6032 return pa 

6033 

6034 

6035def autoname_elements() -> None: 

6036 """ 

6037 Utility to simplify mass-naming of parser elements, for 

6038 generating railroad diagram with named subdiagrams. 

6039 """ 

6040 calling_frame = sys._getframe().f_back 

6041 if calling_frame is None: 

6042 return 

6043 calling_frame = typing.cast(types.FrameType, calling_frame) 

6044 for name, var in calling_frame.f_locals.items(): 

6045 if isinstance(var, ParserElement) and not var.customName: 

6046 var.set_name(name) 

6047 

6048 

6049dbl_quoted_string = Combine( 

6050 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6051).set_name("string enclosed in double quotes") 

6052 

6053sgl_quoted_string = Combine( 

6054 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6055).set_name("string enclosed in single quotes") 

6056 

6057quoted_string = Combine( 

6058 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6059 "double quoted string" 

6060 ) 

6061 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6062 "single quoted string" 

6063 ) 

6064).set_name("quoted string using single or double quotes") 

6065 

6066python_quoted_string = Combine( 

6067 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6068 "multiline double quoted string" 

6069 ) 

6070 ^ ( 

6071 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6072 ).set_name("multiline single quoted string") 

6073 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6074 "double quoted string" 

6075 ) 

6076 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6077 "single quoted string" 

6078 ) 

6079).set_name("Python quoted string") 

6080 

6081unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6082 

6083 

6084alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6085punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6086 

6087# build list of built-in expressions, for future reference if a global default value 

6088# gets updated 

6089_builtin_exprs: List[ParserElement] = [ 

6090 v for v in vars().values() if isinstance(v, ParserElement) 

6091] 

6092 

6093# backward compatibility names 

6094# fmt: off 

6095sglQuotedString = sgl_quoted_string 

6096dblQuotedString = dbl_quoted_string 

6097quotedString = quoted_string 

6098unicodeString = unicode_string 

6099lineStart = line_start 

6100lineEnd = line_end 

6101stringStart = string_start 

6102stringEnd = string_end 

6103 

6104@replaced_by_pep8(null_debug_action) 

6105def nullDebugAction(): ... 

6106 

6107@replaced_by_pep8(trace_parse_action) 

6108def traceParseAction(): ... 

6109 

6110@replaced_by_pep8(condition_as_parse_action) 

6111def conditionAsParseAction(): ... 

6112 

6113@replaced_by_pep8(token_map) 

6114def tokenMap(): ... 

6115# fmt: on