Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2594 statements  

1# 

2# core.py 

3# 

4 

5from collections import deque 

6import os 

7import typing 

8from typing import ( 

9 Any, 

10 Callable, 

11 Generator, 

12 List, 

13 NamedTuple, 

14 Sequence, 

15 Set, 

16 TextIO, 

17 Tuple, 

18 Union, 

19 cast, 

20) 

21from abc import ABC, abstractmethod 

22from enum import Enum 

23import string 

24import copy 

25import warnings 

26import re 

27import sys 

28from collections.abc import Iterable 

29import traceback 

30import types 

31from operator import itemgetter 

32from functools import wraps 

33from threading import RLock 

34from pathlib import Path 

35 

36from .util import ( 

37 _FifoCache, 

38 _UnboundedCache, 

39 __config_flags, 

40 _collapse_string_to_ranges, 

41 _escape_regex_range_chars, 

42 _bslash, 

43 _flatten, 

44 LRUMemo as _LRUMemo, 

45 UnboundedMemo as _UnboundedMemo, 

46 replaced_by_pep8, 

47) 

48from .exceptions import * 

49from .actions import * 

50from .results import ParseResults, _ParseResultsWithOffset 

51from .unicode import pyparsing_unicode 

52 

53_MAX_INT = sys.maxsize 

54str_type: Tuple[type, ...] = (str, bytes) 

55 

56if sys.version_info >= (3, 7): 

57 _RePattern = re.Pattern 

58else: 

59 _RePattern = typing.Pattern 

60 

61# 

62# Copyright (c) 2003-2022 Paul T. McGuire 

63# 

64# Permission is hereby granted, free of charge, to any person obtaining 

65# a copy of this software and associated documentation files (the 

66# "Software"), to deal in the Software without restriction, including 

67# without limitation the rights to use, copy, modify, merge, publish, 

68# distribute, sublicense, and/or sell copies of the Software, and to 

69# permit persons to whom the Software is furnished to do so, subject to 

70# the following conditions: 

71# 

72# The above copyright notice and this permission notice shall be 

73# included in all copies or substantial portions of the Software. 

74# 

75# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

76# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

77# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

78# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

79# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

80# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

81# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

82# 

83 

84 

85if sys.version_info >= (3, 8): 

86 from functools import cached_property 

87else: 

88 

89 class cached_property: 

90 def __init__(self, func): 

91 self._func = func 

92 

93 def __get__(self, instance, owner=None): 

94 ret = instance.__dict__[self._func.__name__] = self._func(instance) 

95 return ret 

96 

97 

98class __compat__(__config_flags): 

99 """ 

100 A cross-version compatibility configuration for pyparsing features that will be 

101 released in a future version. By setting values in this configuration to True, 

102 those features can be enabled in prior versions for compatibility development 

103 and testing. 

104 

105 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

106 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

107 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

108 behavior 

109 """ 

110 

111 _type_desc = "compatibility" 

112 

113 collect_all_And_tokens = True 

114 

115 _all_names = [__ for __ in locals() if not __.startswith("_")] 

116 _fixed_names = """ 

117 collect_all_And_tokens 

118 """.split() 

119 

120 

121class __diag__(__config_flags): 

122 _type_desc = "diagnostic" 

123 

124 warn_multiple_tokens_in_named_alternation = False 

125 warn_ungrouped_named_tokens_in_collection = False 

126 warn_name_set_on_empty_Forward = False 

127 warn_on_parse_using_empty_Forward = False 

128 warn_on_assignment_to_Forward = False 

129 warn_on_multiple_string_args_to_oneof = False 

130 warn_on_match_first_with_lshift_operator = False 

131 enable_debug_on_named_expressions = False 

132 

133 _all_names = [__ for __ in locals() if not __.startswith("_")] 

134 _warning_names = [name for name in _all_names if name.startswith("warn")] 

135 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

136 

137 @classmethod 

138 def enable_all_warnings(cls) -> None: 

139 for name in cls._warning_names: 

140 cls.enable(name) 

141 

142 

143class Diagnostics(Enum): 

144 """ 

145 Diagnostic configuration (all default to disabled) 

146 

147 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

148 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

149 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

150 name is defined on a containing expression with ungrouped subexpressions that also 

151 have results names 

152 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

153 with a results name, but has no contents defined 

154 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

155 defined in a grammar but has never had an expression attached to it 

156 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

157 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

158 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

159 incorrectly called with multiple str arguments 

160 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

161 calls to :class:`ParserElement.set_name` 

162 

163 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

164 All warnings can be enabled by calling :class:`enable_all_warnings`. 

165 """ 

166 

167 warn_multiple_tokens_in_named_alternation = 0 

168 warn_ungrouped_named_tokens_in_collection = 1 

169 warn_name_set_on_empty_Forward = 2 

170 warn_on_parse_using_empty_Forward = 3 

171 warn_on_assignment_to_Forward = 4 

172 warn_on_multiple_string_args_to_oneof = 5 

173 warn_on_match_first_with_lshift_operator = 6 

174 enable_debug_on_named_expressions = 7 

175 

176 

177def enable_diag(diag_enum: Diagnostics) -> None: 

178 """ 

179 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

180 """ 

181 __diag__.enable(diag_enum.name) 

182 

183 

184def disable_diag(diag_enum: Diagnostics) -> None: 

185 """ 

186 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

187 """ 

188 __diag__.disable(diag_enum.name) 

189 

190 

191def enable_all_warnings() -> None: 

192 """ 

193 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

194 """ 

195 __diag__.enable_all_warnings() 

196 

197 

198# hide abstract class 

199del __config_flags 

200 

201 

202def _should_enable_warnings( 

203 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

204) -> bool: 

205 enable = bool(warn_env_var) 

206 for warn_opt in cmd_line_warn_options: 

207 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

208 ":" 

209 )[:5] 

210 if not w_action.lower().startswith("i") and ( 

211 not (w_message or w_category or w_module) or w_module == "pyparsing" 

212 ): 

213 enable = True 

214 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

215 enable = False 

216 return enable 

217 

218 

219if _should_enable_warnings( 

220 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

221): 

222 enable_all_warnings() 

223 

224 

225# build list of single arg builtins, that can be used as parse actions 

226# fmt: off 

227_single_arg_builtins = { 

228 sum, len, sorted, reversed, list, tuple, set, any, all, min, max 

229} 

230# fmt: on 

231 

232_generatorType = types.GeneratorType 

233ParseImplReturnType = Tuple[int, Any] 

234PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

235ParseAction = Union[ 

236 Callable[[], Any], 

237 Callable[[ParseResults], Any], 

238 Callable[[int, ParseResults], Any], 

239 Callable[[str, int, ParseResults], Any], 

240] 

241ParseCondition = Union[ 

242 Callable[[], bool], 

243 Callable[[ParseResults], bool], 

244 Callable[[int, ParseResults], bool], 

245 Callable[[str, int, ParseResults], bool], 

246] 

247ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

248DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

249DebugSuccessAction = Callable[ 

250 [str, int, int, "ParserElement", ParseResults, bool], None 

251] 

252DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

253 

254 

255alphas: str = string.ascii_uppercase + string.ascii_lowercase 

256identchars: str = pyparsing_unicode.Latin1.identchars 

257identbodychars: str = pyparsing_unicode.Latin1.identbodychars 

258nums: str = "0123456789" 

259hexnums: str = nums + "ABCDEFabcdef" 

260alphanums: str = alphas + nums 

261printables: str = "".join([c for c in string.printable if c not in string.whitespace]) 

262 

263_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

264 

265 

266def _trim_arity(func, max_limit=3): 

267 """decorator to trim function calls to match the arity of the target""" 

268 global _trim_arity_call_line 

269 

270 if func in _single_arg_builtins: 

271 return lambda s, l, t: func(t) 

272 

273 limit = 0 

274 found_arity = False 

275 

276 # synthesize what would be returned by traceback.extract_stack at the call to 

277 # user's parse action 'func', so that we don't incur call penalty at parse time 

278 

279 # fmt: off 

280 LINE_DIFF = 9 

281 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

282 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

283 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) 

284 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

285 

286 def wrapper(*args): 

287 nonlocal found_arity, limit 

288 if found_arity: 

289 return func(*args[limit:]) 

290 while 1: 

291 try: 

292 ret = func(*args[limit:]) 

293 found_arity = True 

294 return ret 

295 except TypeError as te: 

296 # re-raise TypeErrors if they did not come from our arity testing 

297 if found_arity: 

298 raise 

299 else: 

300 tb = te.__traceback__ 

301 frames = traceback.extract_tb(tb, limit=2) 

302 frame_summary = frames[-1] 

303 trim_arity_type_error = ( 

304 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

305 ) 

306 del tb 

307 

308 if trim_arity_type_error: 

309 if limit < max_limit: 

310 limit += 1 

311 continue 

312 

313 raise 

314 # fmt: on 

315 

316 # copy func name to wrapper for sensible debug output 

317 # (can't use functools.wraps, since that messes with function signature) 

318 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

319 wrapper.__name__ = func_name 

320 wrapper.__doc__ = func.__doc__ 

321 

322 return wrapper 

323 

324 

325def condition_as_parse_action( 

326 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

327) -> ParseAction: 

328 """ 

329 Function to convert a simple predicate function that returns ``True`` or ``False`` 

330 into a parse action. Can be used in places when a parse action is required 

331 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition 

332 to an operator level in :class:`infix_notation`). 

333 

334 Optional keyword arguments: 

335 

336 - ``message`` - define a custom message to be used in the raised exception 

337 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; 

338 otherwise will raise :class:`ParseException` 

339 

340 """ 

341 msg = message if message is not None else "failed user-defined condition" 

342 exc_type = ParseFatalException if fatal else ParseException 

343 fn = _trim_arity(fn) 

344 

345 @wraps(fn) 

346 def pa(s, l, t): 

347 if not bool(fn(s, l, t)): 

348 raise exc_type(s, l, msg) 

349 

350 return pa 

351 

352 

353def _default_start_debug_action( 

354 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False 

355): 

356 cache_hit_str = "*" if cache_hit else "" 

357 print( 

358 ( 

359 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

360 f" {line(loc, instring)}\n" 

361 f" {' ' * (col(loc, instring) - 1)}^" 

362 ) 

363 ) 

364 

365 

366def _default_success_debug_action( 

367 instring: str, 

368 startloc: int, 

369 endloc: int, 

370 expr: "ParserElement", 

371 toks: ParseResults, 

372 cache_hit: bool = False, 

373): 

374 cache_hit_str = "*" if cache_hit else "" 

375 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

376 

377 

378def _default_exception_debug_action( 

379 instring: str, 

380 loc: int, 

381 expr: "ParserElement", 

382 exc: Exception, 

383 cache_hit: bool = False, 

384): 

385 cache_hit_str = "*" if cache_hit else "" 

386 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

387 

388 

389def null_debug_action(*args): 

390 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

391 

392 

393class ParserElement(ABC): 

394 """Abstract base level parser element class.""" 

395 

396 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

397 verbose_stacktrace: bool = False 

398 _literalStringClass: type = None # type: ignore[assignment] 

399 

400 @staticmethod 

401 def set_default_whitespace_chars(chars: str) -> None: 

402 r""" 

403 Overrides the default whitespace chars 

404 

405 Example:: 

406 

407 # default whitespace chars are space, <TAB> and newline 

408 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

409 

410 # change to just treat newline as significant 

411 ParserElement.set_default_whitespace_chars(" \t") 

412 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] 

413 """ 

414 ParserElement.DEFAULT_WHITE_CHARS = chars 

415 

416 # update whitespace all parse expressions defined in this module 

417 for expr in _builtin_exprs: 

418 if expr.copyDefaultWhiteChars: 

419 expr.whiteChars = set(chars) 

420 

421 @staticmethod 

422 def inline_literals_using(cls: type) -> None: 

423 """ 

424 Set class to be used for inclusion of string literals into a parser. 

425 

426 Example:: 

427 

428 # default literal class used is Literal 

429 integer = Word(nums) 

430 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

431 

432 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

433 

434 

435 # change to Suppress 

436 ParserElement.inline_literals_using(Suppress) 

437 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

438 

439 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] 

440 """ 

441 ParserElement._literalStringClass = cls 

442 

443 @classmethod 

444 def using_each(cls, seq, **class_kwargs): 

445 """ 

446 Yields a sequence of class(obj, **class_kwargs) for obj in seq. 

447 

448 Example:: 

449 

450 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

451 

452 """ 

453 yield from (cls(obj, **class_kwargs) for obj in seq) 

454 

455 class DebugActions(NamedTuple): 

456 debug_try: typing.Optional[DebugStartAction] 

457 debug_match: typing.Optional[DebugSuccessAction] 

458 debug_fail: typing.Optional[DebugExceptionAction] 

459 

460 def __init__(self, savelist: bool = False): 

461 self.parseAction: List[ParseAction] = list() 

462 self.failAction: typing.Optional[ParseFailAction] = None 

463 self.customName: str = None # type: ignore[assignment] 

464 self._defaultName: typing.Optional[str] = None 

465 self.resultsName: str = None # type: ignore[assignment] 

466 self.saveAsList = savelist 

467 self.skipWhitespace = True 

468 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

469 self.copyDefaultWhiteChars = True 

470 # used when checking for left-recursion 

471 self.mayReturnEmpty = False 

472 self.keepTabs = False 

473 self.ignoreExprs: List["ParserElement"] = list() 

474 self.debug = False 

475 self.streamlined = False 

476 # optimize exception handling for subclasses that don't advance parse index 

477 self.mayIndexError = True 

478 self.errmsg: Union[str, None] = "" 

479 # mark results names as modal (report only last) or cumulative (list all) 

480 self.modalResults = True 

481 # custom debug actions 

482 self.debugActions = self.DebugActions(None, None, None) 

483 # avoid redundant calls to preParse 

484 self.callPreparse = True 

485 self.callDuringTry = False 

486 self.suppress_warnings_: List[Diagnostics] = [] 

487 

488 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": 

489 """ 

490 Suppress warnings emitted for a particular diagnostic on this expression. 

491 

492 Example:: 

493 

494 base = pp.Forward() 

495 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) 

496 

497 # statement would normally raise a warning, but is now suppressed 

498 print(base.parse_string("x")) 

499 

500 """ 

501 self.suppress_warnings_.append(warning_type) 

502 return self 

503 

504 def visit_all(self): 

505 """General-purpose method to yield all expressions and sub-expressions 

506 in a grammar. Typically just for internal use. 

507 """ 

508 to_visit = deque([self]) 

509 seen = set() 

510 while to_visit: 

511 cur = to_visit.popleft() 

512 

513 # guard against looping forever through recursive grammars 

514 if cur in seen: 

515 continue 

516 seen.add(cur) 

517 

518 to_visit.extend(cur.recurse()) 

519 yield cur 

520 

521 def copy(self) -> "ParserElement": 

522 """ 

523 Make a copy of this :class:`ParserElement`. Useful for defining 

524 different parse actions for the same parsing pattern, using copies of 

525 the original parse element. 

526 

527 Example:: 

528 

529 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

530 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") 

531 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

532 

533 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) 

534 

535 prints:: 

536 

537 [5120, 100, 655360, 268435456] 

538 

539 Equivalent form of ``expr.copy()`` is just ``expr()``:: 

540 

541 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

542 """ 

543 cpy = copy.copy(self) 

544 cpy.parseAction = self.parseAction[:] 

545 cpy.ignoreExprs = self.ignoreExprs[:] 

546 if self.copyDefaultWhiteChars: 

547 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

548 return cpy 

549 

550 def set_results_name( 

551 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 

552 ) -> "ParserElement": 

553 """ 

554 Define name for referencing matching tokens as a nested attribute 

555 of the returned parse results. 

556 

557 Normally, results names are assigned as you would assign keys in a dict: 

558 any existing value is overwritten by later values. If it is necessary to 

559 keep all values captured for a particular results name, call ``set_results_name`` 

560 with ``list_all_matches`` = True. 

561 

562 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

563 this is so that the client can define a basic element, such as an 

564 integer, and reference it in multiple places with different names. 

565 

566 You can also set results names using the abbreviated syntax, 

567 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

568 - see :class:`__call__`. If ``list_all_matches`` is required, use 

569 ``expr("name*")``. 

570 

571 Example:: 

572 

573 integer = Word(nums) 

574 date_str = (integer.set_results_name("year") + '/' 

575 + integer.set_results_name("month") + '/' 

576 + integer.set_results_name("day")) 

577 

578 # equivalent form: 

579 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

580 """ 

581 listAllMatches = listAllMatches or list_all_matches 

582 return self._setResultsName(name, listAllMatches) 

583 

584 def _setResultsName(self, name, list_all_matches=False) -> "ParserElement": 

585 if name is None: 

586 return self 

587 newself = self.copy() 

588 if name.endswith("*"): 

589 name = name[:-1] 

590 list_all_matches = True 

591 newself.resultsName = name 

592 newself.modalResults = not list_all_matches 

593 return newself 

594 

595 def set_break(self, break_flag: bool = True) -> "ParserElement": 

596 """ 

597 Method to invoke the Python pdb debugger when this element is 

598 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

599 disable. 

600 """ 

601 if break_flag: 

602 _parseMethod = self._parse 

603 

604 def breaker(instring, loc, do_actions=True, callPreParse=True): 

605 import pdb 

606 

607 # this call to pdb.set_trace() is intentional, not a checkin error 

608 pdb.set_trace() 

609 return _parseMethod(instring, loc, do_actions, callPreParse) 

610 

611 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

612 self._parse = breaker # type: ignore [assignment] 

613 elif hasattr(self._parse, "_originalParseMethod"): 

614 self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment] 

615 return self 

616 

617 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> "ParserElement": 

618 """ 

619 Define one or more actions to perform when successfully matching parse element definition. 

620 

621 Parse actions can be called to perform data conversions, do extra validation, 

622 update external data structures, or enhance or replace the parsed tokens. 

623 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

624 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

625 

626 - ``s`` = the original string being parsed (see note below) 

627 - ``loc`` = the location of the matching substring 

628 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

629 

630 The parsed tokens are passed to the parse action as ParseResults. They can be 

631 modified in place using list-style append, extend, and pop operations to update 

632 the parsed list elements; and with dictionary-style item set and del operations 

633 to add, update, or remove any named results. If the tokens are modified in place, 

634 it is not necessary to return them with a return statement. 

635 

636 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

637 object, or with some entirely different object (common for parse actions that perform data 

638 conversions). A convenient way to build a new parse result is to define the values 

639 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

640 

641 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

642 expression are cleared. 

643 

644 Optional keyword arguments: 

645 

646 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during 

647 lookaheads and alternate testing. For parse actions that have side effects, it is 

648 important to only call the parse action once it is determined that it is being 

649 called as part of a successful parse. For parse actions that perform additional 

650 validation, then call_during_try should be passed as True, so that the validation 

651 code is included in the preliminary "try" parses. 

652 

653 Note: the default parsing behavior is to expand tabs in the input string 

654 before starting the parsing process. See :class:`parse_string` for more 

655 information on parsing strings containing ``<TAB>`` s, and suggested 

656 methods to maintain a consistent view of the parsed string, the parse 

657 location, and line and column positions within the parsed string. 

658 

659 Example:: 

660 

661 # parse dates in the form YYYY/MM/DD 

662 

663 # use parse action to convert toks from str to int at parse time 

664 def convert_to_int(toks): 

665 return int(toks[0]) 

666 

667 # use a parse action to verify that the date is a valid date 

668 def is_valid_date(instring, loc, toks): 

669 from datetime import date 

670 year, month, day = toks[::2] 

671 try: 

672 date(year, month, day) 

673 except ValueError: 

674 raise ParseException(instring, loc, "invalid date given") 

675 

676 integer = Word(nums) 

677 date_str = integer + '/' + integer + '/' + integer 

678 

679 # add parse actions 

680 integer.set_parse_action(convert_to_int) 

681 date_str.set_parse_action(is_valid_date) 

682 

683 # note that integer fields are now ints, not strings 

684 date_str.run_tests(''' 

685 # successful parse - note that integer fields were converted to ints 

686 1999/12/31 

687 

688 # fail - invalid date 

689 1999/13/31 

690 ''') 

691 """ 

692 if list(fns) == [None]: 

693 self.parseAction.clear() 

694 return self 

695 

696 if not all(callable(fn) for fn in fns): 

697 raise TypeError("parse actions must be callable") 

698 self.parseAction[:] = [_trim_arity(fn) for fn in fns] 

699 self.callDuringTry = kwargs.get( 

700 "call_during_try", kwargs.get("callDuringTry", False) 

701 ) 

702 

703 return self 

704 

705 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> "ParserElement": 

706 """ 

707 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

708 

709 See examples in :class:`copy`. 

710 """ 

711 self.parseAction += [_trim_arity(fn) for fn in fns] 

712 self.callDuringTry = self.callDuringTry or kwargs.get( 

713 "call_during_try", kwargs.get("callDuringTry", False) 

714 ) 

715 return self 

716 

717 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> "ParserElement": 

718 """Add a boolean predicate function to expression's list of parse actions. See 

719 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

720 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

721 

722 Optional keyword arguments: 

723 

724 - ``message`` = define a custom message to be used in the raised exception 

725 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

726 ParseException 

727 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

728 default=False 

729 

730 Example:: 

731 

732 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

733 year_int = integer.copy() 

734 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

735 date_str = year_int + '/' + integer + '/' + integer 

736 

737 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), 

738 (line:1, col:1) 

739 """ 

740 for fn in fns: 

741 self.parseAction.append( 

742 condition_as_parse_action( 

743 fn, 

744 message=str(kwargs.get("message")), 

745 fatal=bool(kwargs.get("fatal", False)), 

746 ) 

747 ) 

748 

749 self.callDuringTry = self.callDuringTry or kwargs.get( 

750 "call_during_try", kwargs.get("callDuringTry", False) 

751 ) 

752 return self 

753 

754 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": 

755 """ 

756 Define action to perform if parsing fails at this expression. 

757 Fail acton fn is a callable function that takes the arguments 

758 ``fn(s, loc, expr, err)`` where: 

759 

760 - ``s`` = string being parsed 

761 - ``loc`` = location where expression match was attempted and failed 

762 - ``expr`` = the parse expression that failed 

763 - ``err`` = the exception thrown 

764 

765 The function returns no value. It may throw :class:`ParseFatalException` 

766 if it is desired to stop parsing immediately.""" 

767 self.failAction = fn 

768 return self 

769 

770 def _skipIgnorables(self, instring: str, loc: int) -> int: 

771 if not self.ignoreExprs: 

772 return loc 

773 exprsFound = True 

774 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

775 last_loc = loc 

776 while exprsFound: 

777 exprsFound = False 

778 for ignore_fn in ignore_expr_fns: 

779 try: 

780 while 1: 

781 loc, dummy = ignore_fn(instring, loc) 

782 exprsFound = True 

783 except ParseException: 

784 pass 

785 # check if all ignore exprs matched but didn't actually advance the parse location 

786 if loc == last_loc: 

787 break 

788 last_loc = loc 

789 return loc 

790 

791 def preParse(self, instring: str, loc: int) -> int: 

792 if self.ignoreExprs: 

793 loc = self._skipIgnorables(instring, loc) 

794 

795 if self.skipWhitespace: 

796 instrlen = len(instring) 

797 white_chars = self.whiteChars 

798 while loc < instrlen and instring[loc] in white_chars: 

799 loc += 1 

800 

801 return loc 

802 

803 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

804 return loc, [] 

805 

806 def postParse(self, instring, loc, tokenlist): 

807 return tokenlist 

808 

809 # @profile 

810 def _parseNoCache( 

811 self, instring, loc, do_actions=True, callPreParse=True 

812 ) -> Tuple[int, ParseResults]: 

813 TRY, MATCH, FAIL = 0, 1, 2 

814 debugging = self.debug # and do_actions) 

815 len_instring = len(instring) 

816 

817 if debugging or self.failAction: 

818 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

819 try: 

820 if callPreParse and self.callPreparse: 

821 pre_loc = self.preParse(instring, loc) 

822 else: 

823 pre_loc = loc 

824 tokens_start = pre_loc 

825 if self.debugActions.debug_try: 

826 self.debugActions.debug_try(instring, tokens_start, self, False) 

827 if self.mayIndexError or pre_loc >= len_instring: 

828 try: 

829 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

830 except IndexError: 

831 raise ParseException(instring, len_instring, self.errmsg, self) 

832 else: 

833 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

834 except Exception as err: 

835 # print("Exception raised:", err) 

836 if self.debugActions.debug_fail: 

837 self.debugActions.debug_fail( 

838 instring, tokens_start, self, err, False 

839 ) 

840 if self.failAction: 

841 self.failAction(instring, tokens_start, self, err) 

842 raise 

843 else: 

844 if callPreParse and self.callPreparse: 

845 pre_loc = self.preParse(instring, loc) 

846 else: 

847 pre_loc = loc 

848 tokens_start = pre_loc 

849 if self.mayIndexError or pre_loc >= len_instring: 

850 try: 

851 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

852 except IndexError: 

853 raise ParseException(instring, len_instring, self.errmsg, self) 

854 else: 

855 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

856 

857 tokens = self.postParse(instring, loc, tokens) 

858 

859 ret_tokens = ParseResults( 

860 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 

861 ) 

862 if self.parseAction and (do_actions or self.callDuringTry): 

863 if debugging: 

864 try: 

865 for fn in self.parseAction: 

866 try: 

867 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

868 except IndexError as parse_action_exc: 

869 exc = ParseException("exception raised in parse action") 

870 raise exc from parse_action_exc 

871 

872 if tokens is not None and tokens is not ret_tokens: 

873 ret_tokens = ParseResults( 

874 tokens, 

875 self.resultsName, 

876 asList=self.saveAsList 

877 and isinstance(tokens, (ParseResults, list)), 

878 modal=self.modalResults, 

879 ) 

880 except Exception as err: 

881 # print "Exception raised in user parse action:", err 

882 if self.debugActions.debug_fail: 

883 self.debugActions.debug_fail( 

884 instring, tokens_start, self, err, False 

885 ) 

886 raise 

887 else: 

888 for fn in self.parseAction: 

889 try: 

890 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

891 except IndexError as parse_action_exc: 

892 exc = ParseException("exception raised in parse action") 

893 raise exc from parse_action_exc 

894 

895 if tokens is not None and tokens is not ret_tokens: 

896 ret_tokens = ParseResults( 

897 tokens, 

898 self.resultsName, 

899 asList=self.saveAsList 

900 and isinstance(tokens, (ParseResults, list)), 

901 modal=self.modalResults, 

902 ) 

903 if debugging: 

904 # print("Matched", self, "->", ret_tokens.as_list()) 

905 if self.debugActions.debug_match: 

906 self.debugActions.debug_match( 

907 instring, tokens_start, loc, self, ret_tokens, False 

908 ) 

909 

910 return loc, ret_tokens 

911 

912 def try_parse( 

913 self, 

914 instring: str, 

915 loc: int, 

916 *, 

917 raise_fatal: bool = False, 

918 do_actions: bool = False, 

919 ) -> int: 

920 try: 

921 return self._parse(instring, loc, do_actions=do_actions)[0] 

922 except ParseFatalException: 

923 if raise_fatal: 

924 raise 

925 raise ParseException(instring, loc, self.errmsg, self) 

926 

927 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

928 try: 

929 self.try_parse(instring, loc, do_actions=do_actions) 

930 except (ParseException, IndexError): 

931 return False 

932 else: 

933 return True 

934 

935 # cache for left-recursion in Forward references 

936 recursion_lock = RLock() 

937 recursion_memos: typing.Dict[ 

938 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] 

939 ] = {} 

940 

941 class _CacheType(dict): 

942 """ 

943 class to help type checking 

944 """ 

945 

946 not_in_cache: bool 

947 

948 def get(self, *args): ... 

949 

950 def set(self, *args): ... 

951 

952 # argument cache for optimizing repeated calls when backtracking through recursive expressions 

953 packrat_cache = ( 

954 _CacheType() 

955 ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail 

956 packrat_cache_lock = RLock() 

957 packrat_cache_stats = [0, 0] 

958 

959 # this method gets repeatedly called during backtracking with the same arguments - 

960 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

961 def _parseCache( 

962 self, instring, loc, do_actions=True, callPreParse=True 

963 ) -> Tuple[int, ParseResults]: 

964 HIT, MISS = 0, 1 

965 TRY, MATCH, FAIL = 0, 1, 2 

966 lookup = (self, instring, loc, callPreParse, do_actions) 

967 with ParserElement.packrat_cache_lock: 

968 cache = ParserElement.packrat_cache 

969 value = cache.get(lookup) 

970 if value is cache.not_in_cache: 

971 ParserElement.packrat_cache_stats[MISS] += 1 

972 try: 

973 value = self._parseNoCache(instring, loc, do_actions, callPreParse) 

974 except ParseBaseException as pe: 

975 # cache a copy of the exception, without the traceback 

976 cache.set(lookup, pe.__class__(*pe.args)) 

977 raise 

978 else: 

979 cache.set(lookup, (value[0], value[1].copy(), loc)) 

980 return value 

981 else: 

982 ParserElement.packrat_cache_stats[HIT] += 1 

983 if self.debug and self.debugActions.debug_try: 

984 try: 

985 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

986 except TypeError: 

987 pass 

988 if isinstance(value, Exception): 

989 if self.debug and self.debugActions.debug_fail: 

990 try: 

991 self.debugActions.debug_fail( 

992 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

993 ) 

994 except TypeError: 

995 pass 

996 raise value 

997 

998 value = cast(Tuple[int, ParseResults, int], value) 

999 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1000 if self.debug and self.debugActions.debug_match: 

1001 try: 

1002 self.debugActions.debug_match( 

1003 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1004 ) 

1005 except TypeError: 

1006 pass 

1007 

1008 return loc_, result 

1009 

1010 _parse = _parseNoCache 

1011 

1012 @staticmethod 

1013 def reset_cache() -> None: 

1014 ParserElement.packrat_cache.clear() 

1015 ParserElement.packrat_cache_stats[:] = [0] * len( 

1016 ParserElement.packrat_cache_stats 

1017 ) 

1018 ParserElement.recursion_memos.clear() 

1019 

1020 _packratEnabled = False 

1021 _left_recursion_enabled = False 

1022 

1023 @staticmethod 

1024 def disable_memoization() -> None: 

1025 """ 

1026 Disables active Packrat or Left Recursion parsing and their memoization 

1027 

1028 This method also works if neither Packrat nor Left Recursion are enabled. 

1029 This makes it safe to call before activating Packrat nor Left Recursion 

1030 to clear any previous settings. 

1031 """ 

1032 ParserElement.reset_cache() 

1033 ParserElement._left_recursion_enabled = False 

1034 ParserElement._packratEnabled = False 

1035 ParserElement._parse = ParserElement._parseNoCache 

1036 

1037 @staticmethod 

1038 def enable_left_recursion( 

1039 cache_size_limit: typing.Optional[int] = None, *, force=False 

1040 ) -> None: 

1041 """ 

1042 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1043 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1044 repeatedly matched with a fixed recursion depth that is gradually increased 

1045 until finding the longest match. 

1046 

1047 Example:: 

1048 

1049 import pyparsing as pp 

1050 pp.ParserElement.enable_left_recursion() 

1051 

1052 E = pp.Forward("E") 

1053 num = pp.Word(pp.nums) 

1054 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1055 E <<= E + '+' - num | num 

1056 

1057 print(E.parse_string("1+2+3")) 

1058 

1059 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1060 thus skip reevaluation of parse actions during backtracking. This may break 

1061 programs with parse actions which rely on strict ordering of side-effects. 

1062 

1063 Parameters: 

1064 

1065 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1066 ``Forward`` elements during matching; if ``None`` (the default), 

1067 memoize all ``Forward`` elements. 

1068 

1069 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1070 thus the two cannot be used together. Use ``force=True`` to disable any 

1071 previous, conflicting settings. 

1072 """ 

1073 if force: 

1074 ParserElement.disable_memoization() 

1075 elif ParserElement._packratEnabled: 

1076 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1077 if cache_size_limit is None: 

1078 ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment] 

1079 elif cache_size_limit > 0: 

1080 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1081 else: 

1082 raise NotImplementedError(f"Memo size of {cache_size_limit}") 

1083 ParserElement._left_recursion_enabled = True 

1084 

1085 @staticmethod 

1086 def enable_packrat( 

1087 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1088 ) -> None: 

1089 """ 

1090 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1091 Repeated parse attempts at the same string location (which happens 

1092 often in many complex grammars) can immediately return a cached value, 

1093 instead of re-executing parsing/validating code. Memoizing is done of 

1094 both valid results and parsing exceptions. 

1095 

1096 Parameters: 

1097 

1098 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1099 will limit the size of the packrat cache; if None is passed, then 

1100 the cache size will be unbounded; if 0 is passed, the cache will 

1101 be effectively disabled. 

1102 

1103 This speedup may break existing programs that use parse actions that 

1104 have side-effects. For this reason, packrat parsing is disabled when 

1105 you first import pyparsing. To activate the packrat feature, your 

1106 program must call the class method :class:`ParserElement.enable_packrat`. 

1107 For best results, call ``enable_packrat()`` immediately after 

1108 importing pyparsing. 

1109 

1110 Example:: 

1111 

1112 import pyparsing 

1113 pyparsing.ParserElement.enable_packrat() 

1114 

1115 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1116 thus the two cannot be used together. Use ``force=True`` to disable any 

1117 previous, conflicting settings. 

1118 """ 

1119 if force: 

1120 ParserElement.disable_memoization() 

1121 elif ParserElement._left_recursion_enabled: 

1122 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1123 

1124 if ParserElement._packratEnabled: 

1125 return 

1126 

1127 ParserElement._packratEnabled = True 

1128 if cache_size_limit is None: 

1129 ParserElement.packrat_cache = _UnboundedCache() 

1130 else: 

1131 ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment] 

1132 ParserElement._parse = ParserElement._parseCache 

1133 

1134 def parse_string( 

1135 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 

1136 ) -> ParseResults: 

1137 """ 

1138 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1139 client code. 

1140 

1141 :param instring: The input string to be parsed. 

1142 :param parse_all: If set, the entire input string must match the grammar. 

1143 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1144 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1145 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1146 an object with attributes if the given parser includes results names. 

1147 

1148 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1149 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1150 

1151 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1152 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1153 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1154 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1155 

1156 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1157 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1158 parse action's ``s`` argument, or 

1159 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1160 

1161 Examples: 

1162 

1163 By default, partial matches are OK. 

1164 

1165 >>> res = Word('a').parse_string('aaaaabaaa') 

1166 >>> print(res) 

1167 ['aaaaa'] 

1168 

1169 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1170 directly to see more examples. 

1171 

1172 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1173 

1174 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1175 Traceback (most recent call last): 

1176 ... 

1177 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) 

1178 """ 

1179 parseAll = parse_all or parseAll 

1180 

1181 ParserElement.reset_cache() 

1182 if not self.streamlined: 

1183 self.streamline() 

1184 for e in self.ignoreExprs: 

1185 e.streamline() 

1186 if not self.keepTabs: 

1187 instring = instring.expandtabs() 

1188 try: 

1189 loc, tokens = self._parse(instring, 0) 

1190 if parseAll: 

1191 loc = self.preParse(instring, loc) 

1192 se = Empty() + StringEnd().set_debug(False) 

1193 se._parse(instring, loc) 

1194 except ParseBaseException as exc: 

1195 if ParserElement.verbose_stacktrace: 

1196 raise 

1197 else: 

1198 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1199 raise exc.with_traceback(None) 

1200 else: 

1201 return tokens 

1202 

1203 def scan_string( 

1204 self, 

1205 instring: str, 

1206 max_matches: int = _MAX_INT, 

1207 overlap: bool = False, 

1208 *, 

1209 debug: bool = False, 

1210 maxMatches: int = _MAX_INT, 

1211 ) -> Generator[Tuple[ParseResults, int, int], None, None]: 

1212 """ 

1213 Scan the input string for expression matches. Each match will return the 

1214 matching tokens, start location, and end location. May be called with optional 

1215 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1216 ``overlap`` is specified, then overlapping matches will be reported. 

1217 

1218 Note that the start and end locations are reported relative to the string 

1219 being parsed. See :class:`parse_string` for more information on parsing 

1220 strings with embedded tabs. 

1221 

1222 Example:: 

1223 

1224 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1225 print(source) 

1226 for tokens, start, end in Word(alphas).scan_string(source): 

1227 print(' '*start + '^'*(end-start)) 

1228 print(' '*start + tokens[0]) 

1229 

1230 prints:: 

1231 

1232 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1233 ^^^^^ 

1234 sldjf 

1235 ^^^^^^^ 

1236 lsdjjkf 

1237 ^^^^^^ 

1238 sldkjf 

1239 ^^^^^^ 

1240 lkjsfd 

1241 """ 

1242 maxMatches = min(maxMatches, max_matches) 

1243 if not self.streamlined: 

1244 self.streamline() 

1245 for e in self.ignoreExprs: 

1246 e.streamline() 

1247 

1248 if not self.keepTabs: 

1249 instring = str(instring).expandtabs() 

1250 instrlen = len(instring) 

1251 loc = 0 

1252 preparseFn = self.preParse 

1253 parseFn = self._parse 

1254 ParserElement.resetCache() 

1255 matches = 0 

1256 try: 

1257 while loc <= instrlen and matches < maxMatches: 

1258 try: 

1259 preloc: int = preparseFn(instring, loc) 

1260 nextLoc: int 

1261 tokens: ParseResults 

1262 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1263 except ParseException: 

1264 loc = preloc + 1 

1265 else: 

1266 if nextLoc > loc: 

1267 matches += 1 

1268 if debug: 

1269 print( 

1270 { 

1271 "tokens": tokens.asList(), 

1272 "start": preloc, 

1273 "end": nextLoc, 

1274 } 

1275 ) 

1276 yield tokens, preloc, nextLoc 

1277 if overlap: 

1278 nextloc = preparseFn(instring, loc) 

1279 if nextloc > loc: 

1280 loc = nextLoc 

1281 else: 

1282 loc += 1 

1283 else: 

1284 loc = nextLoc 

1285 else: 

1286 loc = preloc + 1 

1287 except ParseBaseException as exc: 

1288 if ParserElement.verbose_stacktrace: 

1289 raise 

1290 

1291 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1292 raise exc.with_traceback(None) 

1293 

1294 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1295 """ 

1296 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1297 be returned from a parse action. To use ``transform_string``, define a grammar and 

1298 attach a parse action to it that modifies the returned token list. 

1299 Invoking ``transform_string()`` on a target string will then scan for matches, 

1300 and replace the matched text patterns according to the logic in the parse 

1301 action. ``transform_string()`` returns the resulting transformed string. 

1302 

1303 Example:: 

1304 

1305 wd = Word(alphas) 

1306 wd.set_parse_action(lambda toks: toks[0].title()) 

1307 

1308 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) 

1309 

1310 prints:: 

1311 

1312 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

1313 """ 

1314 out: List[str] = [] 

1315 lastE = 0 

1316 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1317 # keep string locs straight between transform_string and scan_string 

1318 self.keepTabs = True 

1319 try: 

1320 for t, s, e in self.scan_string(instring, debug=debug): 

1321 out.append(instring[lastE:s]) 

1322 lastE = e 

1323 

1324 if not t: 

1325 continue 

1326 

1327 if isinstance(t, ParseResults): 

1328 out += t.as_list() 

1329 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1330 out.extend(t) 

1331 else: 

1332 out.append(t) 

1333 

1334 out.append(instring[lastE:]) 

1335 out = [o for o in out if o] 

1336 return "".join([str(s) for s in _flatten(out)]) 

1337 except ParseBaseException as exc: 

1338 if ParserElement.verbose_stacktrace: 

1339 raise 

1340 

1341 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1342 raise exc.with_traceback(None) 

1343 

1344 def search_string( 

1345 self, 

1346 instring: str, 

1347 max_matches: int = _MAX_INT, 

1348 *, 

1349 debug: bool = False, 

1350 maxMatches: int = _MAX_INT, 

1351 ) -> ParseResults: 

1352 """ 

1353 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1354 to match the given parse expression. May be called with optional 

1355 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1356 

1357 Example:: 

1358 

1359 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

1360 cap_word = Word(alphas.upper(), alphas.lower()) 

1361 

1362 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) 

1363 

1364 # the sum() builtin can be used to merge results into a single ParseResults object 

1365 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) 

1366 

1367 prints:: 

1368 

1369 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1370 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1371 """ 

1372 maxMatches = min(maxMatches, max_matches) 

1373 try: 

1374 return ParseResults( 

1375 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] 

1376 ) 

1377 except ParseBaseException as exc: 

1378 if ParserElement.verbose_stacktrace: 

1379 raise 

1380 

1381 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1382 raise exc.with_traceback(None) 

1383 

1384 def split( 

1385 self, 

1386 instring: str, 

1387 maxsplit: int = _MAX_INT, 

1388 include_separators: bool = False, 

1389 *, 

1390 includeSeparators=False, 

1391 ) -> Generator[str, None, None]: 

1392 """ 

1393 Generator method to split a string using the given expression as a separator. 

1394 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1395 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1396 matching text should be included in the split results. 

1397 

1398 Example:: 

1399 

1400 punc = one_of(list(".,;:/-!?")) 

1401 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

1402 

1403 prints:: 

1404 

1405 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1406 """ 

1407 includeSeparators = includeSeparators or include_separators 

1408 last = 0 

1409 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1410 yield instring[last:s] 

1411 if includeSeparators: 

1412 yield t[0] 

1413 last = e 

1414 yield instring[last:] 

1415 

1416 def __add__(self, other) -> "ParserElement": 

1417 """ 

1418 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1419 converts them to :class:`Literal`\\ s by default. 

1420 

1421 Example:: 

1422 

1423 greet = Word(alphas) + "," + Word(alphas) + "!" 

1424 hello = "Hello, World!" 

1425 print(hello, "->", greet.parse_string(hello)) 

1426 

1427 prints:: 

1428 

1429 Hello, World! -> ['Hello', ',', 'World', '!'] 

1430 

1431 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: 

1432 

1433 Literal('start') + ... + Literal('end') 

1434 

1435 is equivalent to:: 

1436 

1437 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1438 

1439 Note that the skipped text is returned with '_skipped' as a results name, 

1440 and to support having multiple skips in the same parser, the value returned is 

1441 a list of all skipped text. 

1442 """ 

1443 if other is Ellipsis: 

1444 return _PendingSkip(self) 

1445 

1446 if isinstance(other, str_type): 

1447 other = self._literalStringClass(other) 

1448 if not isinstance(other, ParserElement): 

1449 return NotImplemented 

1450 return And([self, other]) 

1451 

1452 def __radd__(self, other) -> "ParserElement": 

1453 """ 

1454 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1455 """ 

1456 if other is Ellipsis: 

1457 return SkipTo(self)("_skipped*") + self 

1458 

1459 if isinstance(other, str_type): 

1460 other = self._literalStringClass(other) 

1461 if not isinstance(other, ParserElement): 

1462 return NotImplemented 

1463 return other + self 

1464 

1465 def __sub__(self, other) -> "ParserElement": 

1466 """ 

1467 Implementation of ``-`` operator, returns :class:`And` with error stop 

1468 """ 

1469 if isinstance(other, str_type): 

1470 other = self._literalStringClass(other) 

1471 if not isinstance(other, ParserElement): 

1472 return NotImplemented 

1473 return self + And._ErrorStop() + other 

1474 

1475 def __rsub__(self, other) -> "ParserElement": 

1476 """ 

1477 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1478 """ 

1479 if isinstance(other, str_type): 

1480 other = self._literalStringClass(other) 

1481 if not isinstance(other, ParserElement): 

1482 return NotImplemented 

1483 return other - self 

1484 

1485 def __mul__(self, other) -> "ParserElement": 

1486 """ 

1487 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1488 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1489 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1490 may also include ``None`` as in: 

1491 

1492 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1493 to ``expr*n + ZeroOrMore(expr)`` 

1494 (read as "at least n instances of ``expr``") 

1495 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1496 (read as "0 to n instances of ``expr``") 

1497 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1498 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1499 

1500 Note that ``expr*(None, n)`` does not raise an exception if 

1501 more than n exprs exist in the input stream; that is, 

1502 ``expr*(None, n)`` does not enforce a maximum number of expr 

1503 occurrences. If this behavior is desired, then write 

1504 ``expr*(None, n) + ~expr`` 

1505 """ 

1506 if other is Ellipsis: 

1507 other = (0, None) 

1508 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1509 other = ((0,) + other[1:] + (None,))[:2] 

1510 

1511 if not isinstance(other, (int, tuple)): 

1512 return NotImplemented 

1513 

1514 if isinstance(other, int): 

1515 minElements, optElements = other, 0 

1516 else: 

1517 other = tuple(o if o is not Ellipsis else None for o in other) 

1518 other = (other + (None, None))[:2] 

1519 if other[0] is None: 

1520 other = (0, other[1]) 

1521 if isinstance(other[0], int) and other[1] is None: 

1522 if other[0] == 0: 

1523 return ZeroOrMore(self) 

1524 if other[0] == 1: 

1525 return OneOrMore(self) 

1526 else: 

1527 return self * other[0] + ZeroOrMore(self) 

1528 elif isinstance(other[0], int) and isinstance(other[1], int): 

1529 minElements, optElements = other 

1530 optElements -= minElements 

1531 else: 

1532 return NotImplemented 

1533 

1534 if minElements < 0: 

1535 raise ValueError("cannot multiply ParserElement by negative value") 

1536 if optElements < 0: 

1537 raise ValueError( 

1538 "second tuple value must be greater or equal to first tuple value" 

1539 ) 

1540 if minElements == optElements == 0: 

1541 return And([]) 

1542 

1543 if optElements: 

1544 

1545 def makeOptionalList(n): 

1546 if n > 1: 

1547 return Opt(self + makeOptionalList(n - 1)) 

1548 else: 

1549 return Opt(self) 

1550 

1551 if minElements: 

1552 if minElements == 1: 

1553 ret = self + makeOptionalList(optElements) 

1554 else: 

1555 ret = And([self] * minElements) + makeOptionalList(optElements) 

1556 else: 

1557 ret = makeOptionalList(optElements) 

1558 else: 

1559 if minElements == 1: 

1560 ret = self 

1561 else: 

1562 ret = And([self] * minElements) 

1563 return ret 

1564 

1565 def __rmul__(self, other) -> "ParserElement": 

1566 return self.__mul__(other) 

1567 

1568 def __or__(self, other) -> "ParserElement": 

1569 """ 

1570 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1571 """ 

1572 if other is Ellipsis: 

1573 return _PendingSkip(self, must_skip=True) 

1574 

1575 if isinstance(other, str_type): 

1576 # `expr | ""` is equivalent to `Opt(expr)` 

1577 if other == "": 

1578 return Opt(self) 

1579 other = self._literalStringClass(other) 

1580 if not isinstance(other, ParserElement): 

1581 return NotImplemented 

1582 return MatchFirst([self, other]) 

1583 

1584 def __ror__(self, other) -> "ParserElement": 

1585 """ 

1586 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1587 """ 

1588 if isinstance(other, str_type): 

1589 other = self._literalStringClass(other) 

1590 if not isinstance(other, ParserElement): 

1591 return NotImplemented 

1592 return other | self 

1593 

1594 def __xor__(self, other) -> "ParserElement": 

1595 """ 

1596 Implementation of ``^`` operator - returns :class:`Or` 

1597 """ 

1598 if isinstance(other, str_type): 

1599 other = self._literalStringClass(other) 

1600 if not isinstance(other, ParserElement): 

1601 return NotImplemented 

1602 return Or([self, other]) 

1603 

1604 def __rxor__(self, other) -> "ParserElement": 

1605 """ 

1606 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1607 """ 

1608 if isinstance(other, str_type): 

1609 other = self._literalStringClass(other) 

1610 if not isinstance(other, ParserElement): 

1611 return NotImplemented 

1612 return other ^ self 

1613 

1614 def __and__(self, other) -> "ParserElement": 

1615 """ 

1616 Implementation of ``&`` operator - returns :class:`Each` 

1617 """ 

1618 if isinstance(other, str_type): 

1619 other = self._literalStringClass(other) 

1620 if not isinstance(other, ParserElement): 

1621 return NotImplemented 

1622 return Each([self, other]) 

1623 

1624 def __rand__(self, other) -> "ParserElement": 

1625 """ 

1626 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1627 """ 

1628 if isinstance(other, str_type): 

1629 other = self._literalStringClass(other) 

1630 if not isinstance(other, ParserElement): 

1631 return NotImplemented 

1632 return other & self 

1633 

1634 def __invert__(self) -> "ParserElement": 

1635 """ 

1636 Implementation of ``~`` operator - returns :class:`NotAny` 

1637 """ 

1638 return NotAny(self) 

1639 

1640 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1641 # iterate over a sequence 

1642 __iter__ = None 

1643 

1644 def __getitem__(self, key): 

1645 """ 

1646 use ``[]`` indexing notation as a short form for expression repetition: 

1647 

1648 - ``expr[n]`` is equivalent to ``expr*n`` 

1649 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1650 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1651 to ``expr*n + ZeroOrMore(expr)`` 

1652 (read as "at least n instances of ``expr``") 

1653 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1654 (read as "0 to n instances of ``expr``") 

1655 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1656 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1657 

1658 ``None`` may be used in place of ``...``. 

1659 

1660 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1661 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1662 desired, then write ``expr[..., n] + ~expr``. 

1663 

1664 For repetition with a stop_on expression, use slice notation: 

1665 

1666 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1667 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1668 

1669 """ 

1670 

1671 stop_on_defined = False 

1672 stop_on = NoMatch() 

1673 if isinstance(key, slice): 

1674 key, stop_on = key.start, key.stop 

1675 if key is None: 

1676 key = ... 

1677 stop_on_defined = True 

1678 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1679 key, stop_on = (key[0], key[1].start), key[1].stop 

1680 stop_on_defined = True 

1681 

1682 # convert single arg keys to tuples 

1683 if isinstance(key, str_type): 

1684 key = (key,) 

1685 try: 

1686 iter(key) 

1687 except TypeError: 

1688 key = (key, key) 

1689 

1690 if len(key) > 2: 

1691 raise TypeError( 

1692 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1693 ) 

1694 

1695 # clip to 2 elements 

1696 ret = self * tuple(key[:2]) 

1697 ret = typing.cast(_MultipleMatch, ret) 

1698 

1699 if stop_on_defined: 

1700 ret.stopOn(stop_on) 

1701 

1702 return ret 

1703 

1704 def __call__(self, name: typing.Optional[str] = None) -> "ParserElement": 

1705 """ 

1706 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1707 

1708 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1709 passed as ``True``. 

1710 

1711 If ``name`` is omitted, same as calling :class:`copy`. 

1712 

1713 Example:: 

1714 

1715 # these are equivalent 

1716 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") 

1717 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1718 """ 

1719 if name is not None: 

1720 return self._setResultsName(name) 

1721 

1722 return self.copy() 

1723 

1724 def suppress(self) -> "ParserElement": 

1725 """ 

1726 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1727 cluttering up returned output. 

1728 """ 

1729 return Suppress(self) 

1730 

1731 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": 

1732 """ 

1733 Enables the skipping of whitespace before matching the characters in the 

1734 :class:`ParserElement`'s defined pattern. 

1735 

1736 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1737 """ 

1738 self.skipWhitespace = True 

1739 return self 

1740 

1741 def leave_whitespace(self, recursive: bool = True) -> "ParserElement": 

1742 """ 

1743 Disables the skipping of whitespace before matching the characters in the 

1744 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1745 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1746 

1747 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1748 """ 

1749 self.skipWhitespace = False 

1750 return self 

1751 

1752 def set_whitespace_chars( 

1753 self, chars: Union[Set[str], str], copy_defaults: bool = False 

1754 ) -> "ParserElement": 

1755 """ 

1756 Overrides the default whitespace chars 

1757 """ 

1758 self.skipWhitespace = True 

1759 self.whiteChars = set(chars) 

1760 self.copyDefaultWhiteChars = copy_defaults 

1761 return self 

1762 

1763 def parse_with_tabs(self) -> "ParserElement": 

1764 """ 

1765 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1766 Must be called before ``parse_string`` when the input grammar contains elements that 

1767 match ``<TAB>`` characters. 

1768 """ 

1769 self.keepTabs = True 

1770 return self 

1771 

1772 def ignore(self, other: "ParserElement") -> "ParserElement": 

1773 """ 

1774 Define expression to be ignored (e.g., comments) while doing pattern 

1775 matching; may be called repeatedly, to define multiple comment or other 

1776 ignorable patterns. 

1777 

1778 Example:: 

1779 

1780 patt = Word(alphas)[...] 

1781 patt.parse_string('ablaj /* comment */ lskjd') 

1782 # -> ['ablaj'] 

1783 

1784 patt.ignore(c_style_comment) 

1785 patt.parse_string('ablaj /* comment */ lskjd') 

1786 # -> ['ablaj', 'lskjd'] 

1787 """ 

1788 if isinstance(other, str_type): 

1789 other = Suppress(other) 

1790 

1791 if isinstance(other, Suppress): 

1792 if other not in self.ignoreExprs: 

1793 self.ignoreExprs.append(other) 

1794 else: 

1795 self.ignoreExprs.append(Suppress(other.copy())) 

1796 return self 

1797 

1798 def set_debug_actions( 

1799 self, 

1800 start_action: DebugStartAction, 

1801 success_action: DebugSuccessAction, 

1802 exception_action: DebugExceptionAction, 

1803 ) -> "ParserElement": 

1804 """ 

1805 Customize display of debugging messages while doing pattern matching: 

1806 

1807 - ``start_action`` - method to be called when an expression is about to be parsed; 

1808 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` 

1809 

1810 - ``success_action`` - method to be called when an expression has successfully parsed; 

1811 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` 

1812 

1813 - ``exception_action`` - method to be called when expression fails to parse; 

1814 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` 

1815 """ 

1816 self.debugActions = self.DebugActions( 

1817 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

1818 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

1819 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

1820 ) 

1821 self.debug = True 

1822 return self 

1823 

1824 def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement": 

1825 """ 

1826 Enable display of debugging messages while doing pattern matching. 

1827 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

1828 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

1829 

1830 Example:: 

1831 

1832 wd = Word(alphas).set_name("alphaword") 

1833 integer = Word(nums).set_name("numword") 

1834 term = wd | integer 

1835 

1836 # turn on debugging for wd 

1837 wd.set_debug() 

1838 

1839 term[1, ...].parse_string("abc 123 xyz 890") 

1840 

1841 prints:: 

1842 

1843 Match alphaword at loc 0(1,1) 

1844 Matched alphaword -> ['abc'] 

1845 Match alphaword at loc 3(1,4) 

1846 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

1847 Match alphaword at loc 7(1,8) 

1848 Matched alphaword -> ['xyz'] 

1849 Match alphaword at loc 11(1,12) 

1850 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

1851 Match alphaword at loc 15(1,16) 

1852 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

1853 

1854 The output shown is that produced by the default debug actions - custom debug actions can be 

1855 specified using :class:`set_debug_actions`. Prior to attempting 

1856 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

1857 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

1858 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, 

1859 which makes debugging and exception messages easier to understand - for instance, the default 

1860 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. 

1861 """ 

1862 if recurse: 

1863 for expr in self.visit_all(): 

1864 expr.set_debug(flag, recurse=False) 

1865 return self 

1866 

1867 if flag: 

1868 self.set_debug_actions( 

1869 _default_start_debug_action, 

1870 _default_success_debug_action, 

1871 _default_exception_debug_action, 

1872 ) 

1873 else: 

1874 self.debug = False 

1875 return self 

1876 

1877 @property 

1878 def default_name(self) -> str: 

1879 if self._defaultName is None: 

1880 self._defaultName = self._generateDefaultName() 

1881 return self._defaultName 

1882 

1883 @abstractmethod 

1884 def _generateDefaultName(self) -> str: 

1885 """ 

1886 Child classes must define this method, which defines how the ``default_name`` is set. 

1887 """ 

1888 

1889 def set_name(self, name: typing.Optional[str]) -> "ParserElement": 

1890 """ 

1891 Define name for this expression, makes debugging and exception messages clearer. If 

1892 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also 

1893 enable debug for this expression. 

1894 

1895 If `name` is None, clears any custom name for this expression, and clears the 

1896 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`. 

1897 

1898 Example:: 

1899 

1900 integer = Word(nums) 

1901 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) 

1902 

1903 integer.set_name("integer") 

1904 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1905 """ 

1906 self.customName = name 

1907 self.errmsg = f"Expected {str(self)}" 

1908 

1909 if __diag__.enable_debug_on_named_expressions: 

1910 self.set_debug(name is not None) 

1911 

1912 return self 

1913 

1914 @property 

1915 def name(self) -> str: 

1916 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 

1917 return self.customName if self.customName is not None else self.default_name 

1918 

1919 @name.setter 

1920 def name(self, new_name) -> None: 

1921 self.set_name(new_name) 

1922 

1923 def __str__(self) -> str: 

1924 return self.name 

1925 

1926 def __repr__(self) -> str: 

1927 return str(self) 

1928 

1929 def streamline(self) -> "ParserElement": 

1930 self.streamlined = True 

1931 self._defaultName = None 

1932 return self 

1933 

1934 def recurse(self) -> List["ParserElement"]: 

1935 return [] 

1936 

1937 def _checkRecursion(self, parseElementList): 

1938 subRecCheckList = parseElementList[:] + [self] 

1939 for e in self.recurse(): 

1940 e._checkRecursion(subRecCheckList) 

1941 

1942 def validate(self, validateTrace=None) -> None: 

1943 """ 

1944 Check defined expressions for valid structure, check for infinite recursive definitions. 

1945 """ 

1946 warnings.warn( 

1947 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

1948 DeprecationWarning, 

1949 stacklevel=2, 

1950 ) 

1951 self._checkRecursion([]) 

1952 

1953 def parse_file( 

1954 self, 

1955 file_or_filename: Union[str, Path, TextIO], 

1956 encoding: str = "utf-8", 

1957 parse_all: bool = False, 

1958 *, 

1959 parseAll: bool = False, 

1960 ) -> ParseResults: 

1961 """ 

1962 Execute the parse expression on the given file or filename. 

1963 If a filename is specified (instead of a file object), 

1964 the entire file is opened, read, and closed before parsing. 

1965 """ 

1966 parseAll = parseAll or parse_all 

1967 try: 

1968 file_or_filename = typing.cast(TextIO, file_or_filename) 

1969 file_contents = file_or_filename.read() 

1970 except AttributeError: 

1971 file_or_filename = typing.cast(str, file_or_filename) 

1972 with open(file_or_filename, "r", encoding=encoding) as f: 

1973 file_contents = f.read() 

1974 try: 

1975 return self.parse_string(file_contents, parseAll) 

1976 except ParseBaseException as exc: 

1977 if ParserElement.verbose_stacktrace: 

1978 raise 

1979 

1980 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1981 raise exc.with_traceback(None) 

1982 

1983 def __eq__(self, other): 

1984 if self is other: 

1985 return True 

1986 elif isinstance(other, str_type): 

1987 return self.matches(other, parse_all=True) 

1988 elif isinstance(other, ParserElement): 

1989 return vars(self) == vars(other) 

1990 return False 

1991 

1992 def __hash__(self): 

1993 return id(self) 

1994 

1995 def matches( 

1996 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 

1997 ) -> bool: 

1998 """ 

1999 Method for quick testing of a parser against a test string. Good for simple 

2000 inline microtests of sub expressions while building up larger parser. 

2001 

2002 Parameters: 

2003 

2004 - ``test_string`` - to test against this expression for a match 

2005 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2006 

2007 Example:: 

2008 

2009 expr = Word(nums) 

2010 assert expr.matches("100") 

2011 """ 

2012 parseAll = parseAll and parse_all 

2013 try: 

2014 self.parse_string(str(test_string), parse_all=parseAll) 

2015 return True 

2016 except ParseBaseException: 

2017 return False 

2018 

2019 def run_tests( 

2020 self, 

2021 tests: Union[str, List[str]], 

2022 parse_all: bool = True, 

2023 comment: typing.Optional[Union["ParserElement", str]] = "#", 

2024 full_dump: bool = True, 

2025 print_results: bool = True, 

2026 failure_tests: bool = False, 

2027 post_parse: typing.Optional[ 

2028 Callable[[str, ParseResults], typing.Optional[str]] 

2029 ] = None, 

2030 file: typing.Optional[TextIO] = None, 

2031 with_line_numbers: bool = False, 

2032 *, 

2033 parseAll: bool = True, 

2034 fullDump: bool = True, 

2035 printResults: bool = True, 

2036 failureTests: bool = False, 

2037 postParse: typing.Optional[ 

2038 Callable[[str, ParseResults], typing.Optional[str]] 

2039 ] = None, 

2040 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: 

2041 """ 

2042 Execute the parse expression on a series of test strings, showing each 

2043 test, the parsed results or where the parse failed. Quick and easy way to 

2044 run a parse expression against a list of sample strings. 

2045 

2046 Parameters: 

2047 

2048 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2049 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2050 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2051 string; pass None to disable comment filtering 

2052 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2053 if False, only dump nested list 

2054 - ``print_results`` - (default= ``True``) prints test output to stdout 

2055 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2056 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2057 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2058 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2059 if None, will default to ``sys.stdout`` 

2060 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2061 

2062 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2063 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2064 test's output 

2065 

2066 Example:: 

2067 

2068 number_expr = pyparsing_common.number.copy() 

2069 

2070 result = number_expr.run_tests(''' 

2071 # unsigned integer 

2072 100 

2073 # negative integer 

2074 -100 

2075 # float with scientific notation 

2076 6.02e23 

2077 # integer with scientific notation 

2078 1e-12 

2079 ''') 

2080 print("Success" if result[0] else "Failed!") 

2081 

2082 result = number_expr.run_tests(''' 

2083 # stray character 

2084 100Z 

2085 # missing leading digit before '.' 

2086 -.100 

2087 # too many '.' 

2088 3.14.159 

2089 ''', failure_tests=True) 

2090 print("Success" if result[0] else "Failed!") 

2091 

2092 prints:: 

2093 

2094 # unsigned integer 

2095 100 

2096 [100] 

2097 

2098 # negative integer 

2099 -100 

2100 [-100] 

2101 

2102 # float with scientific notation 

2103 6.02e23 

2104 [6.02e+23] 

2105 

2106 # integer with scientific notation 

2107 1e-12 

2108 [1e-12] 

2109 

2110 Success 

2111 

2112 # stray character 

2113 100Z 

2114 ^ 

2115 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2116 

2117 # missing leading digit before '.' 

2118 -.100 

2119 ^ 

2120 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2121 

2122 # too many '.' 

2123 3.14.159 

2124 ^ 

2125 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2126 

2127 Success 

2128 

2129 Each test string must be on a single line. If you want to test a string that spans multiple 

2130 lines, create a test like this:: 

2131 

2132 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2133 

2134 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2135 """ 

2136 from .testing import pyparsing_test 

2137 

2138 parseAll = parseAll and parse_all 

2139 fullDump = fullDump and full_dump 

2140 printResults = printResults and print_results 

2141 failureTests = failureTests or failure_tests 

2142 postParse = postParse or post_parse 

2143 if isinstance(tests, str_type): 

2144 tests = typing.cast(str, tests) 

2145 line_strip = type(tests).strip 

2146 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2147 comment_specified = comment is not None 

2148 if comment_specified: 

2149 if isinstance(comment, str_type): 

2150 comment = typing.cast(str, comment) 

2151 comment = Literal(comment) 

2152 comment = typing.cast(ParserElement, comment) 

2153 if file is None: 

2154 file = sys.stdout 

2155 print_ = file.write 

2156 

2157 result: Union[ParseResults, Exception] 

2158 allResults: List[Tuple[str, Union[ParseResults, Exception]]] = [] 

2159 comments: List[str] = [] 

2160 success = True 

2161 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2162 BOM = "\ufeff" 

2163 nlstr = "\n" 

2164 for t in tests: 

2165 if comment_specified and comment.matches(t, False) or comments and not t: 

2166 comments.append( 

2167 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2168 ) 

2169 continue 

2170 if not t: 

2171 continue 

2172 out = [ 

2173 f"{nlstr}{nlstr.join(comments) if comments else ''}", 

2174 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2175 ] 

2176 comments.clear() 

2177 try: 

2178 # convert newline marks to actual newlines, and strip leading BOM if present 

2179 t = NL.transform_string(t.lstrip(BOM)) 

2180 result = self.parse_string(t, parse_all=parseAll) 

2181 except ParseBaseException as pe: 

2182 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" 

2183 out.append(pe.explain()) 

2184 out.append(f"FAIL: {fatal}{pe}") 

2185 if ParserElement.verbose_stacktrace: 

2186 out.extend(traceback.format_tb(pe.__traceback__)) 

2187 success = success and failureTests 

2188 result = pe 

2189 except Exception as exc: 

2190 out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}") 

2191 if ParserElement.verbose_stacktrace: 

2192 out.extend(traceback.format_tb(exc.__traceback__)) 

2193 success = success and failureTests 

2194 result = exc 

2195 else: 

2196 success = success and not failureTests 

2197 if postParse is not None: 

2198 try: 

2199 pp_value = postParse(t, result) 

2200 if pp_value is not None: 

2201 if isinstance(pp_value, ParseResults): 

2202 out.append(pp_value.dump()) 

2203 else: 

2204 out.append(str(pp_value)) 

2205 else: 

2206 out.append(result.dump()) 

2207 except Exception as e: 

2208 out.append(result.dump(full=fullDump)) 

2209 out.append( 

2210 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2211 ) 

2212 else: 

2213 out.append(result.dump(full=fullDump)) 

2214 out.append("") 

2215 

2216 if printResults: 

2217 print_("\n".join(out)) 

2218 

2219 allResults.append((t, result)) 

2220 

2221 return success, allResults 

2222 

2223 def create_diagram( 

2224 self, 

2225 output_html: Union[TextIO, Path, str], 

2226 vertical: int = 3, 

2227 show_results_names: bool = False, 

2228 show_groups: bool = False, 

2229 embed: bool = False, 

2230 **kwargs, 

2231 ) -> None: 

2232 """ 

2233 Create a railroad diagram for the parser. 

2234 

2235 Parameters: 

2236 

2237 - ``output_html`` (str or file-like object) - output target for generated 

2238 diagram HTML 

2239 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2240 instead of horizontally (default=3) 

2241 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2242 defined results names 

2243 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2244 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2245 the resulting HTML in an enclosing HTML source 

2246 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2247 can be used to insert custom CSS styling 

2248 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2249 generated code 

2250 

2251 Additional diagram-formatting keyword arguments can also be included; 

2252 see railroad.Diagram class. 

2253 """ 

2254 

2255 try: 

2256 from .diagram import to_railroad, railroad_to_html 

2257 except ImportError as ie: 

2258 raise Exception( 

2259 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2260 ) from ie 

2261 

2262 self.streamline() 

2263 

2264 railroad = to_railroad( 

2265 self, 

2266 vertical=vertical, 

2267 show_results_names=show_results_names, 

2268 show_groups=show_groups, 

2269 diagram_kwargs=kwargs, 

2270 ) 

2271 if not isinstance(output_html, (str, Path)): 

2272 # we were passed a file-like object, just write to it 

2273 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2274 return 

2275 

2276 with open(output_html, "w", encoding="utf-8") as diag_file: 

2277 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2278 

2279 # Compatibility synonyms 

2280 # fmt: off 

2281 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)) 

2282 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8( 

2283 "setDefaultWhitespaceChars", set_default_whitespace_chars 

2284 )) 

2285 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization)) 

2286 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion)) 

2287 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat)) 

2288 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache)) 

2289 

2290 setResultsName = replaced_by_pep8("setResultsName", set_results_name) 

2291 setBreak = replaced_by_pep8("setBreak", set_break) 

2292 setParseAction = replaced_by_pep8("setParseAction", set_parse_action) 

2293 addParseAction = replaced_by_pep8("addParseAction", add_parse_action) 

2294 addCondition = replaced_by_pep8("addCondition", add_condition) 

2295 setFailAction = replaced_by_pep8("setFailAction", set_fail_action) 

2296 tryParse = replaced_by_pep8("tryParse", try_parse) 

2297 parseString = replaced_by_pep8("parseString", parse_string) 

2298 scanString = replaced_by_pep8("scanString", scan_string) 

2299 transformString = replaced_by_pep8("transformString", transform_string) 

2300 searchString = replaced_by_pep8("searchString", search_string) 

2301 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

2302 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

2303 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) 

2304 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) 

2305 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) 

2306 setDebug = replaced_by_pep8("setDebug", set_debug) 

2307 setName = replaced_by_pep8("setName", set_name) 

2308 parseFile = replaced_by_pep8("parseFile", parse_file) 

2309 runTests = replaced_by_pep8("runTests", run_tests) 

2310 canParseNext = replaced_by_pep8("canParseNext", can_parse_next) 

2311 defaultName = default_name 

2312 # fmt: on 

2313 

2314 

2315class _PendingSkip(ParserElement): 

2316 # internal placeholder class to hold a place were '...' is added to a parser element, 

2317 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2318 def __init__(self, expr: ParserElement, must_skip: bool = False): 

2319 super().__init__() 

2320 self.anchor = expr 

2321 self.must_skip = must_skip 

2322 

2323 def _generateDefaultName(self) -> str: 

2324 return str(self.anchor + Empty()).replace("Empty", "...") 

2325 

2326 def __add__(self, other) -> "ParserElement": 

2327 skipper = SkipTo(other).set_name("...")("_skipped*") 

2328 if self.must_skip: 

2329 

2330 def must_skip(t): 

2331 if not t._skipped or t._skipped.as_list() == [""]: 

2332 del t[0] 

2333 t.pop("_skipped", None) 

2334 

2335 def show_skip(t): 

2336 if t._skipped.as_list()[-1:] == [""]: 

2337 t.pop("_skipped") 

2338 t["_skipped"] = f"missing <{self.anchor!r}>" 

2339 

2340 return ( 

2341 self.anchor + skipper().add_parse_action(must_skip) 

2342 | skipper().add_parse_action(show_skip) 

2343 ) + other 

2344 

2345 return self.anchor + skipper + other 

2346 

2347 def __repr__(self): 

2348 return self.defaultName 

2349 

2350 def parseImpl(self, *args) -> ParseImplReturnType: 

2351 raise Exception( 

2352 "use of `...` expression without following SkipTo target expression" 

2353 ) 

2354 

2355 

2356class Token(ParserElement): 

2357 """Abstract :class:`ParserElement` subclass, for defining atomic 

2358 matching patterns. 

2359 """ 

2360 

2361 def __init__(self): 

2362 super().__init__(savelist=False) 

2363 

2364 def _generateDefaultName(self) -> str: 

2365 return type(self).__name__ 

2366 

2367 

2368class NoMatch(Token): 

2369 """ 

2370 A token that will never match. 

2371 """ 

2372 

2373 def __init__(self): 

2374 super().__init__() 

2375 self.mayReturnEmpty = True 

2376 self.mayIndexError = False 

2377 self.errmsg = "Unmatchable token" 

2378 

2379 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2380 raise ParseException(instring, loc, self.errmsg, self) 

2381 

2382 

2383class Literal(Token): 

2384 """ 

2385 Token to exactly match a specified string. 

2386 

2387 Example:: 

2388 

2389 Literal('abc').parse_string('abc') # -> ['abc'] 

2390 Literal('abc').parse_string('abcdef') # -> ['abc'] 

2391 Literal('abc').parse_string('ab') # -> Exception: Expected "abc" 

2392 

2393 For case-insensitive matching, use :class:`CaselessLiteral`. 

2394 

2395 For keyword matching (force word break before and after the matched string), 

2396 use :class:`Keyword` or :class:`CaselessKeyword`. 

2397 """ 

2398 

2399 def __new__(cls, match_string: str = "", *, matchString: str = ""): 

2400 # Performance tuning: select a subclass with optimized parseImpl 

2401 if cls is Literal: 

2402 match_string = matchString or match_string 

2403 if not match_string: 

2404 return super().__new__(Empty) 

2405 if len(match_string) == 1: 

2406 return super().__new__(_SingleCharLiteral) 

2407 

2408 # Default behavior 

2409 return super().__new__(cls) 

2410 

2411 # Needed to make copy.copy() work correctly if we customize __new__ 

2412 def __getnewargs__(self): 

2413 return (self.match,) 

2414 

2415 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2416 super().__init__() 

2417 match_string = matchString or match_string 

2418 self.match = match_string 

2419 self.matchLen = len(match_string) 

2420 self.firstMatchChar = match_string[:1] 

2421 self.errmsg = f"Expected {self.name}" 

2422 self.mayReturnEmpty = False 

2423 self.mayIndexError = False 

2424 

2425 def _generateDefaultName(self) -> str: 

2426 return repr(self.match) 

2427 

2428 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2429 if instring[loc] == self.firstMatchChar and instring.startswith( 

2430 self.match, loc 

2431 ): 

2432 return loc + self.matchLen, self.match 

2433 raise ParseException(instring, loc, self.errmsg, self) 

2434 

2435 

2436class Empty(Literal): 

2437 """ 

2438 An empty token, will always match. 

2439 """ 

2440 

2441 def __init__(self, match_string="", *, matchString=""): 

2442 super().__init__("") 

2443 self.mayReturnEmpty = True 

2444 self.mayIndexError = False 

2445 

2446 def _generateDefaultName(self) -> str: 

2447 return "Empty" 

2448 

2449 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2450 return loc, [] 

2451 

2452 

2453class _SingleCharLiteral(Literal): 

2454 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2455 if instring[loc] == self.firstMatchChar: 

2456 return loc + 1, self.match 

2457 raise ParseException(instring, loc, self.errmsg, self) 

2458 

2459 

2460ParserElement._literalStringClass = Literal 

2461 

2462 

2463class Keyword(Token): 

2464 """ 

2465 Token to exactly match a specified string as a keyword, that is, 

2466 it must be immediately preceded and followed by whitespace or 

2467 non-keyword characters. Compare with :class:`Literal`: 

2468 

2469 - ``Literal("if")`` will match the leading ``'if'`` in 

2470 ``'ifAndOnlyIf'``. 

2471 - ``Keyword("if")`` will not; it will only match the leading 

2472 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2473 

2474 Accepts two optional constructor arguments in addition to the 

2475 keyword string: 

2476 

2477 - ``ident_chars`` is a string of characters that would be valid 

2478 identifier characters, defaulting to all alphanumerics + "_" and 

2479 "$" 

2480 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2481 

2482 Example:: 

2483 

2484 Keyword("start").parse_string("start") # -> ['start'] 

2485 Keyword("start").parse_string("starting") # -> Exception 

2486 

2487 For case-insensitive matching, use :class:`CaselessKeyword`. 

2488 """ 

2489 

2490 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2491 

2492 def __init__( 

2493 self, 

2494 match_string: str = "", 

2495 ident_chars: typing.Optional[str] = None, 

2496 caseless: bool = False, 

2497 *, 

2498 matchString: str = "", 

2499 identChars: typing.Optional[str] = None, 

2500 ): 

2501 super().__init__() 

2502 identChars = identChars or ident_chars 

2503 if identChars is None: 

2504 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2505 match_string = matchString or match_string 

2506 self.match = match_string 

2507 self.matchLen = len(match_string) 

2508 try: 

2509 self.firstMatchChar = match_string[0] 

2510 except IndexError: 

2511 raise ValueError("null string passed to Keyword; use Empty() instead") 

2512 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2513 self.mayReturnEmpty = False 

2514 self.mayIndexError = False 

2515 self.caseless = caseless 

2516 if caseless: 

2517 self.caselessmatch = match_string.upper() 

2518 identChars = identChars.upper() 

2519 self.identChars = set(identChars) 

2520 

2521 def _generateDefaultName(self) -> str: 

2522 return repr(self.match) 

2523 

2524 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2525 errmsg = self.errmsg 

2526 errloc = loc 

2527 if self.caseless: 

2528 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2529 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2530 if ( 

2531 loc >= len(instring) - self.matchLen 

2532 or instring[loc + self.matchLen].upper() not in self.identChars 

2533 ): 

2534 return loc + self.matchLen, self.match 

2535 

2536 # followed by keyword char 

2537 errmsg += ", was immediately followed by keyword character" 

2538 errloc = loc + self.matchLen 

2539 else: 

2540 # preceded by keyword char 

2541 errmsg += ", keyword was immediately preceded by keyword character" 

2542 errloc = loc - 1 

2543 # else no match just raise plain exception 

2544 

2545 elif ( 

2546 instring[loc] == self.firstMatchChar 

2547 and self.matchLen == 1 

2548 or instring.startswith(self.match, loc) 

2549 ): 

2550 if loc == 0 or instring[loc - 1] not in self.identChars: 

2551 if ( 

2552 loc >= len(instring) - self.matchLen 

2553 or instring[loc + self.matchLen] not in self.identChars 

2554 ): 

2555 return loc + self.matchLen, self.match 

2556 

2557 # followed by keyword char 

2558 errmsg += ", keyword was immediately followed by keyword character" 

2559 errloc = loc + self.matchLen 

2560 else: 

2561 # preceded by keyword char 

2562 errmsg += ", keyword was immediately preceded by keyword character" 

2563 errloc = loc - 1 

2564 # else no match just raise plain exception 

2565 

2566 raise ParseException(instring, errloc, errmsg, self) 

2567 

2568 @staticmethod 

2569 def set_default_keyword_chars(chars) -> None: 

2570 """ 

2571 Overrides the default characters used by :class:`Keyword` expressions. 

2572 """ 

2573 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2574 

2575 # Compatibility synonyms 

2576 setDefaultKeywordChars = staticmethod( 

2577 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars) 

2578 ) 

2579 

2580 

2581class CaselessLiteral(Literal): 

2582 """ 

2583 Token to match a specified string, ignoring case of letters. 

2584 Note: the matched results will always be in the case of the given 

2585 match string, NOT the case of the input text. 

2586 

2587 Example:: 

2588 

2589 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2590 # -> ['CMD', 'CMD', 'CMD'] 

2591 

2592 (Contrast with example for :class:`CaselessKeyword`.) 

2593 """ 

2594 

2595 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2596 match_string = matchString or match_string 

2597 super().__init__(match_string.upper()) 

2598 # Preserve the defining literal. 

2599 self.returnString = match_string 

2600 self.errmsg = f"Expected {self.name}" 

2601 

2602 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2603 if instring[loc : loc + self.matchLen].upper() == self.match: 

2604 return loc + self.matchLen, self.returnString 

2605 raise ParseException(instring, loc, self.errmsg, self) 

2606 

2607 

2608class CaselessKeyword(Keyword): 

2609 """ 

2610 Caseless version of :class:`Keyword`. 

2611 

2612 Example:: 

2613 

2614 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2615 # -> ['CMD', 'CMD'] 

2616 

2617 (Contrast with example for :class:`CaselessLiteral`.) 

2618 """ 

2619 

2620 def __init__( 

2621 self, 

2622 match_string: str = "", 

2623 ident_chars: typing.Optional[str] = None, 

2624 *, 

2625 matchString: str = "", 

2626 identChars: typing.Optional[str] = None, 

2627 ): 

2628 identChars = identChars or ident_chars 

2629 match_string = matchString or match_string 

2630 super().__init__(match_string, identChars, caseless=True) 

2631 

2632 

2633class CloseMatch(Token): 

2634 """A variation on :class:`Literal` which matches "close" matches, 

2635 that is, strings with at most 'n' mismatching characters. 

2636 :class:`CloseMatch` takes parameters: 

2637 

2638 - ``match_string`` - string to be matched 

2639 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2640 - ``max_mismatches`` - (``default=1``) maximum number of 

2641 mismatches allowed to count as a match 

2642 

2643 The results from a successful parse will contain the matched text 

2644 from the input string and the following named results: 

2645 

2646 - ``mismatches`` - a list of the positions within the 

2647 match_string where mismatches were found 

2648 - ``original`` - the original match_string used to compare 

2649 against the input string 

2650 

2651 If ``mismatches`` is an empty list, then the match was an exact 

2652 match. 

2653 

2654 Example:: 

2655 

2656 patt = CloseMatch("ATCATCGAATGGA") 

2657 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

2658 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

2659 

2660 # exact match 

2661 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

2662 

2663 # close match allowing up to 2 mismatches 

2664 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

2665 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

2666 """ 

2667 

2668 def __init__( 

2669 self, 

2670 match_string: str, 

2671 max_mismatches: typing.Optional[int] = None, 

2672 *, 

2673 maxMismatches: int = 1, 

2674 caseless=False, 

2675 ): 

2676 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

2677 super().__init__() 

2678 self.match_string = match_string 

2679 self.maxMismatches = maxMismatches 

2680 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

2681 self.caseless = caseless 

2682 self.mayIndexError = False 

2683 self.mayReturnEmpty = False 

2684 

2685 def _generateDefaultName(self) -> str: 

2686 return f"{type(self).__name__}:{self.match_string!r}" 

2687 

2688 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2689 start = loc 

2690 instrlen = len(instring) 

2691 maxloc = start + len(self.match_string) 

2692 

2693 if maxloc <= instrlen: 

2694 match_string = self.match_string 

2695 match_stringloc = 0 

2696 mismatches = [] 

2697 maxMismatches = self.maxMismatches 

2698 

2699 for match_stringloc, s_m in enumerate( 

2700 zip(instring[loc:maxloc], match_string) 

2701 ): 

2702 src, mat = s_m 

2703 if self.caseless: 

2704 src, mat = src.lower(), mat.lower() 

2705 

2706 if src != mat: 

2707 mismatches.append(match_stringloc) 

2708 if len(mismatches) > maxMismatches: 

2709 break 

2710 else: 

2711 loc = start + match_stringloc + 1 

2712 results = ParseResults([instring[start:loc]]) 

2713 results["original"] = match_string 

2714 results["mismatches"] = mismatches 

2715 return loc, results 

2716 

2717 raise ParseException(instring, loc, self.errmsg, self) 

2718 

2719 

2720class Word(Token): 

2721 """Token for matching words composed of allowed character sets. 

2722 

2723 Parameters: 

2724 

2725 - ``init_chars`` - string of all characters that should be used to 

2726 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

2727 if ``body_chars`` is also specified, then this is the string of 

2728 initial characters 

2729 - ``body_chars`` - string of characters that 

2730 can be used for matching after a matched initial character as 

2731 given in ``init_chars``; if omitted, same as the initial characters 

2732 (default=``None``) 

2733 - ``min`` - minimum number of characters to match (default=1) 

2734 - ``max`` - maximum number of characters to match (default=0) 

2735 - ``exact`` - exact number of characters to match (default=0) 

2736 - ``as_keyword`` - match as a keyword (default=``False``) 

2737 - ``exclude_chars`` - characters that might be 

2738 found in the input ``body_chars`` string but which should not be 

2739 accepted for matching ;useful to define a word of all 

2740 printables except for one or two characters, for instance 

2741 (default=``None``) 

2742 

2743 :class:`srange` is useful for defining custom character set strings 

2744 for defining :class:`Word` expressions, using range notation from 

2745 regular expression character sets. 

2746 

2747 A common mistake is to use :class:`Word` to match a specific literal 

2748 string, as in ``Word("Address")``. Remember that :class:`Word` 

2749 uses the string argument to define *sets* of matchable characters. 

2750 This expression would match "Add", "AAA", "dAred", or any other word 

2751 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

2752 exact literal string, use :class:`Literal` or :class:`Keyword`. 

2753 

2754 pyparsing includes helper strings for building Words: 

2755 

2756 - :class:`alphas` 

2757 - :class:`nums` 

2758 - :class:`alphanums` 

2759 - :class:`hexnums` 

2760 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 

2761 - accented, tilded, umlauted, etc.) 

2762 - :class:`punc8bit` (non-alphabetic characters in ASCII range 

2763 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

2764 - :class:`printables` (any non-whitespace character) 

2765 

2766 ``alphas``, ``nums``, and ``printables`` are also defined in several 

2767 Unicode sets - see :class:`pyparsing_unicode``. 

2768 

2769 Example:: 

2770 

2771 # a word composed of digits 

2772 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

2773 

2774 # a word with a leading capital, and zero or more lowercase 

2775 capitalized_word = Word(alphas.upper(), alphas.lower()) 

2776 

2777 # hostnames are alphanumeric, with leading alpha, and '-' 

2778 hostname = Word(alphas, alphanums + '-') 

2779 

2780 # roman numeral (not a strict parser, accepts invalid mix of characters) 

2781 roman = Word("IVXLCDM") 

2782 

2783 # any string of non-whitespace characters, except for ',' 

2784 csv_value = Word(printables, exclude_chars=",") 

2785 """ 

2786 

2787 def __init__( 

2788 self, 

2789 init_chars: str = "", 

2790 body_chars: typing.Optional[str] = None, 

2791 min: int = 1, 

2792 max: int = 0, 

2793 exact: int = 0, 

2794 as_keyword: bool = False, 

2795 exclude_chars: typing.Optional[str] = None, 

2796 *, 

2797 initChars: typing.Optional[str] = None, 

2798 bodyChars: typing.Optional[str] = None, 

2799 asKeyword: bool = False, 

2800 excludeChars: typing.Optional[str] = None, 

2801 ): 

2802 initChars = initChars or init_chars 

2803 bodyChars = bodyChars or body_chars 

2804 asKeyword = asKeyword or as_keyword 

2805 excludeChars = excludeChars or exclude_chars 

2806 super().__init__() 

2807 if not initChars: 

2808 raise ValueError( 

2809 f"invalid {type(self).__name__}, initChars cannot be empty string" 

2810 ) 

2811 

2812 initChars_set = set(initChars) 

2813 if excludeChars: 

2814 excludeChars_set = set(excludeChars) 

2815 initChars_set -= excludeChars_set 

2816 if bodyChars: 

2817 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

2818 self.initChars = initChars_set 

2819 self.initCharsOrig = "".join(sorted(initChars_set)) 

2820 

2821 if bodyChars: 

2822 self.bodyChars = set(bodyChars) 

2823 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

2824 else: 

2825 self.bodyChars = initChars_set 

2826 self.bodyCharsOrig = self.initCharsOrig 

2827 

2828 self.maxSpecified = max > 0 

2829 

2830 if min < 1: 

2831 raise ValueError( 

2832 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

2833 ) 

2834 

2835 if self.maxSpecified and min > max: 

2836 raise ValueError( 

2837 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

2838 ) 

2839 

2840 self.minLen = min 

2841 

2842 if max > 0: 

2843 self.maxLen = max 

2844 else: 

2845 self.maxLen = _MAX_INT 

2846 

2847 if exact > 0: 

2848 min = max = exact 

2849 self.maxLen = exact 

2850 self.minLen = exact 

2851 

2852 self.errmsg = f"Expected {self.name}" 

2853 self.mayIndexError = False 

2854 self.asKeyword = asKeyword 

2855 if self.asKeyword: 

2856 self.errmsg += " as a keyword" 

2857 

2858 # see if we can make a regex for this Word 

2859 if " " not in (self.initChars | self.bodyChars): 

2860 if len(self.initChars) == 1: 

2861 re_leading_fragment = re.escape(self.initCharsOrig) 

2862 else: 

2863 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

2864 

2865 if self.bodyChars == self.initChars: 

2866 if max == 0 and self.minLen == 1: 

2867 repeat = "+" 

2868 elif max == 1: 

2869 repeat = "" 

2870 else: 

2871 if self.minLen != self.maxLen: 

2872 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

2873 else: 

2874 repeat = f"{{{self.minLen}}}" 

2875 self.reString = f"{re_leading_fragment}{repeat}" 

2876 else: 

2877 if max == 1: 

2878 re_body_fragment = "" 

2879 repeat = "" 

2880 else: 

2881 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

2882 if max == 0 and self.minLen == 1: 

2883 repeat = "*" 

2884 elif max == 2: 

2885 repeat = "?" if min <= 1 else "" 

2886 else: 

2887 if min != max: 

2888 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

2889 else: 

2890 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

2891 

2892 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

2893 

2894 if self.asKeyword: 

2895 self.reString = rf"\b{self.reString}\b" 

2896 

2897 try: 

2898 self.re = re.compile(self.reString) 

2899 except re.error: 

2900 self.re = None # type: ignore[assignment] 

2901 else: 

2902 self.re_match = self.re.match 

2903 self.parseImpl = self.parseImpl_regex # type: ignore[assignment] 

2904 

2905 def _generateDefaultName(self) -> str: 

2906 def charsAsStr(s): 

2907 max_repr_len = 16 

2908 s = _collapse_string_to_ranges(s, re_escape=False) 

2909 

2910 if len(s) > max_repr_len: 

2911 return s[: max_repr_len - 3] + "..." 

2912 

2913 return s 

2914 

2915 if self.initChars != self.bodyChars: 

2916 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

2917 else: 

2918 base = f"W:({charsAsStr(self.initChars)})" 

2919 

2920 # add length specification 

2921 if self.minLen > 1 or self.maxLen != _MAX_INT: 

2922 if self.minLen == self.maxLen: 

2923 if self.minLen == 1: 

2924 return base[2:] 

2925 else: 

2926 return base + f"{{{self.minLen}}}" 

2927 elif self.maxLen == _MAX_INT: 

2928 return base + f"{{{self.minLen},...}}" 

2929 else: 

2930 return base + f"{{{self.minLen},{self.maxLen}}}" 

2931 return base 

2932 

2933 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2934 if instring[loc] not in self.initChars: 

2935 raise ParseException(instring, loc, self.errmsg, self) 

2936 

2937 start = loc 

2938 loc += 1 

2939 instrlen = len(instring) 

2940 body_chars: set[str] = self.bodyChars 

2941 maxloc = start + self.maxLen 

2942 maxloc = min(maxloc, instrlen) 

2943 while loc < maxloc and instring[loc] in body_chars: 

2944 loc += 1 

2945 

2946 throw_exception = False 

2947 if loc - start < self.minLen: 

2948 throw_exception = True 

2949 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars: 

2950 throw_exception = True 

2951 elif self.asKeyword and ( 

2952 (start > 0 and instring[start - 1] in body_chars) 

2953 or (loc < instrlen and instring[loc] in body_chars) 

2954 ): 

2955 throw_exception = True 

2956 

2957 if throw_exception: 

2958 raise ParseException(instring, loc, self.errmsg, self) 

2959 

2960 return loc, instring[start:loc] 

2961 

2962 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2963 result = self.re_match(instring, loc) 

2964 if not result: 

2965 raise ParseException(instring, loc, self.errmsg, self) 

2966 

2967 loc = result.end() 

2968 return loc, result.group() 

2969 

2970 

2971class Char(Word): 

2972 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

2973 when defining a match of any single character in a string of 

2974 characters. 

2975 """ 

2976 

2977 def __init__( 

2978 self, 

2979 charset: str, 

2980 as_keyword: bool = False, 

2981 exclude_chars: typing.Optional[str] = None, 

2982 *, 

2983 asKeyword: bool = False, 

2984 excludeChars: typing.Optional[str] = None, 

2985 ): 

2986 asKeyword = asKeyword or as_keyword 

2987 excludeChars = excludeChars or exclude_chars 

2988 super().__init__( 

2989 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

2990 ) 

2991 

2992 

2993class Regex(Token): 

2994 r"""Token for matching strings that match a given regular 

2995 expression. Defined with string specifying the regular expression in 

2996 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

2997 If the given regex contains named groups (defined using ``(?P<name>...)``), 

2998 these will be preserved as named :class:`ParseResults`. 

2999 

3000 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3001 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3002 a compiled RE that was compiled using ``regex``. 

3003 

3004 Example:: 

3005 

3006 realnum = Regex(r"[+-]?\d+\.\d*") 

3007 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3008 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3009 

3010 # named fields in a regex will be returned as named results 

3011 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3012 

3013 # the Regex class will accept re's compiled using the regex module 

3014 import regex 

3015 parser = pp.Regex(regex.compile(r'[0-9]')) 

3016 """ 

3017 

3018 def __init__( 

3019 self, 

3020 pattern: Any, 

3021 flags: Union[re.RegexFlag, int] = 0, 

3022 as_group_list: bool = False, 

3023 as_match: bool = False, 

3024 *, 

3025 asGroupList: bool = False, 

3026 asMatch: bool = False, 

3027 ): 

3028 """The parameters ``pattern`` and ``flags`` are passed 

3029 to the ``re.compile()`` function as-is. See the Python 

3030 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3031 explanation of the acceptable patterns and flags. 

3032 """ 

3033 super().__init__() 

3034 asGroupList = asGroupList or as_group_list 

3035 asMatch = asMatch or as_match 

3036 

3037 if isinstance(pattern, str_type): 

3038 if not pattern: 

3039 raise ValueError("null string passed to Regex; use Empty() instead") 

3040 

3041 self._re = None 

3042 self.reString = self.pattern = pattern 

3043 self.flags = flags 

3044 

3045 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3046 self._re = pattern 

3047 self.pattern = self.reString = pattern.pattern 

3048 self.flags = flags 

3049 

3050 else: 

3051 raise TypeError( 

3052 "Regex may only be constructed with a string or a compiled RE object" 

3053 ) 

3054 

3055 self.errmsg = f"Expected {self.name}" 

3056 self.mayIndexError = False 

3057 self.asGroupList = asGroupList 

3058 self.asMatch = asMatch 

3059 if self.asGroupList: 

3060 self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment] 

3061 if self.asMatch: 

3062 self.parseImpl = self.parseImplAsMatch # type: ignore [assignment] 

3063 

3064 @cached_property 

3065 def re(self) -> _RePattern: 

3066 if self._re: 

3067 return self._re 

3068 

3069 try: 

3070 return re.compile(self.pattern, self.flags) 

3071 except re.error: 

3072 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3073 

3074 @cached_property 

3075 def re_match(self) -> Callable[[str], Any]: 

3076 return self.re.match 

3077 

3078 @cached_property 

3079 def mayReturnEmpty(self) -> bool: 

3080 return self.re_match("") is not None 

3081 

3082 def _generateDefaultName(self) -> str: 

3083 unescaped = self.pattern.replace("\\\\", "\\") 

3084 return f"Re:({unescaped!r})" 

3085 

3086 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3087 result = self.re_match(instring, loc) 

3088 if not result: 

3089 raise ParseException(instring, loc, self.errmsg, self) 

3090 

3091 loc = result.end() 

3092 ret = ParseResults(result.group()) 

3093 d = result.groupdict() 

3094 

3095 for k, v in d.items(): 

3096 ret[k] = v 

3097 

3098 return loc, ret 

3099 

3100 def parseImplAsGroupList(self, instring, loc, do_actions=True): 

3101 result = self.re_match(instring, loc) 

3102 if not result: 

3103 raise ParseException(instring, loc, self.errmsg, self) 

3104 

3105 loc = result.end() 

3106 ret = result.groups() 

3107 return loc, ret 

3108 

3109 def parseImplAsMatch(self, instring, loc, do_actions=True): 

3110 result = self.re_match(instring, loc) 

3111 if not result: 

3112 raise ParseException(instring, loc, self.errmsg, self) 

3113 

3114 loc = result.end() 

3115 ret = result 

3116 return loc, ret 

3117 

3118 def sub(self, repl: str) -> ParserElement: 

3119 r""" 

3120 Return :class:`Regex` with an attached parse action to transform the parsed 

3121 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3122 

3123 Example:: 

3124 

3125 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3126 print(make_html.transform_string("h1:main title:")) 

3127 # prints "<h1>main title</h1>" 

3128 """ 

3129 if self.asGroupList: 

3130 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3131 

3132 if self.asMatch and callable(repl): 

3133 raise TypeError( 

3134 "cannot use sub() with a callable with Regex(as_match=True)" 

3135 ) 

3136 

3137 if self.asMatch: 

3138 

3139 def pa(tokens): 

3140 return tokens[0].expand(repl) 

3141 

3142 else: 

3143 

3144 def pa(tokens): 

3145 return self.re.sub(repl, tokens[0]) 

3146 

3147 return self.add_parse_action(pa) 

3148 

3149 

3150class QuotedString(Token): 

3151 r""" 

3152 Token for matching strings that are delimited by quoting characters. 

3153 

3154 Defined with the following parameters: 

3155 

3156 - ``quote_char`` - string of one or more characters defining the 

3157 quote delimiting string 

3158 - ``esc_char`` - character to re_escape quotes, typically backslash 

3159 (default= ``None``) 

3160 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3161 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3162 (default= ``None``) 

3163 - ``multiline`` - boolean indicating whether quotes can span 

3164 multiple lines (default= ``False``) 

3165 - ``unquote_results`` - boolean indicating whether the matched text 

3166 should be unquoted (default= ``True``) 

3167 - ``end_quote_char`` - string of one or more characters defining the 

3168 end of the quote delimited string (default= ``None`` => same as 

3169 quote_char) 

3170 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3171 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3172 (default= ``True``) 

3173 

3174 Example:: 

3175 

3176 qs = QuotedString('"') 

3177 print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3178 complex_qs = QuotedString('{{', end_quote_char='}}') 

3179 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) 

3180 sql_qs = QuotedString('"', esc_quote='""') 

3181 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3182 

3183 prints:: 

3184 

3185 [['This is the quote']] 

3186 [['This is the "quote"']] 

3187 [['This is the quote with "embedded" quotes']] 

3188 """ 

3189 

3190 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3191 

3192 def __init__( 

3193 self, 

3194 quote_char: str = "", 

3195 esc_char: typing.Optional[str] = None, 

3196 esc_quote: typing.Optional[str] = None, 

3197 multiline: bool = False, 

3198 unquote_results: bool = True, 

3199 end_quote_char: typing.Optional[str] = None, 

3200 convert_whitespace_escapes: bool = True, 

3201 *, 

3202 quoteChar: str = "", 

3203 escChar: typing.Optional[str] = None, 

3204 escQuote: typing.Optional[str] = None, 

3205 unquoteResults: bool = True, 

3206 endQuoteChar: typing.Optional[str] = None, 

3207 convertWhitespaceEscapes: bool = True, 

3208 ): 

3209 super().__init__() 

3210 esc_char = escChar or esc_char 

3211 esc_quote = escQuote or esc_quote 

3212 unquote_results = unquoteResults and unquote_results 

3213 end_quote_char = endQuoteChar or end_quote_char 

3214 convert_whitespace_escapes = ( 

3215 convertWhitespaceEscapes and convert_whitespace_escapes 

3216 ) 

3217 quote_char = quoteChar or quote_char 

3218 

3219 # remove white space from quote chars 

3220 quote_char = quote_char.strip() 

3221 if not quote_char: 

3222 raise ValueError("quote_char cannot be the empty string") 

3223 

3224 if end_quote_char is None: 

3225 end_quote_char = quote_char 

3226 else: 

3227 end_quote_char = end_quote_char.strip() 

3228 if not end_quote_char: 

3229 raise ValueError("end_quote_char cannot be the empty string") 

3230 

3231 self.quote_char: str = quote_char 

3232 self.quote_char_len: int = len(quote_char) 

3233 self.first_quote_char: str = quote_char[0] 

3234 self.end_quote_char: str = end_quote_char 

3235 self.end_quote_char_len: int = len(end_quote_char) 

3236 self.esc_char: str = esc_char or "" 

3237 self.has_esc_char: bool = esc_char is not None 

3238 self.esc_quote: str = esc_quote or "" 

3239 self.unquote_results: bool = unquote_results 

3240 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3241 self.multiline = multiline 

3242 self.re_flags = re.RegexFlag(0) 

3243 

3244 # fmt: off 

3245 # build up re pattern for the content between the quote delimiters 

3246 inner_pattern: List[str] = [] 

3247 

3248 if esc_quote: 

3249 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3250 

3251 if esc_char: 

3252 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3253 

3254 if len(self.end_quote_char) > 1: 

3255 inner_pattern.append( 

3256 "(?:" 

3257 + "|".join( 

3258 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3259 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3260 ) 

3261 + ")" 

3262 ) 

3263 

3264 if self.multiline: 

3265 self.re_flags |= re.MULTILINE | re.DOTALL 

3266 inner_pattern.append( 

3267 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3268 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3269 ) 

3270 else: 

3271 inner_pattern.append( 

3272 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3273 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3274 ) 

3275 

3276 self.pattern = "".join( 

3277 [ 

3278 re.escape(self.quote_char), 

3279 "(?:", 

3280 '|'.join(inner_pattern), 

3281 ")*", 

3282 re.escape(self.end_quote_char), 

3283 ] 

3284 ) 

3285 

3286 if self.unquote_results: 

3287 if self.convert_whitespace_escapes: 

3288 self.unquote_scan_re = re.compile( 

3289 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3290 rf"|({re.escape(self.esc_char)}.)" 

3291 rf"|(\n|.)", 

3292 flags=self.re_flags, 

3293 ) 

3294 else: 

3295 self.unquote_scan_re = re.compile( 

3296 rf"({re.escape(self.esc_char)}.)" 

3297 rf"|(\n|.)", 

3298 flags=self.re_flags 

3299 ) 

3300 # fmt: on 

3301 

3302 try: 

3303 self.re = re.compile(self.pattern, self.re_flags) 

3304 self.reString = self.pattern 

3305 self.re_match = self.re.match 

3306 except re.error: 

3307 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3308 

3309 self.errmsg = f"Expected {self.name}" 

3310 self.mayIndexError = False 

3311 self.mayReturnEmpty = True 

3312 

3313 def _generateDefaultName(self) -> str: 

3314 if self.quote_char == self.end_quote_char and isinstance( 

3315 self.quote_char, str_type 

3316 ): 

3317 return f"string enclosed in {self.quote_char!r}" 

3318 

3319 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3320 

3321 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3322 # check first character of opening quote to see if that is a match 

3323 # before doing the more complicated regex match 

3324 result = ( 

3325 instring[loc] == self.first_quote_char 

3326 and self.re_match(instring, loc) 

3327 or None 

3328 ) 

3329 if not result: 

3330 raise ParseException(instring, loc, self.errmsg, self) 

3331 

3332 # get ending loc and matched string from regex matching result 

3333 loc = result.end() 

3334 ret = result.group() 

3335 

3336 if self.unquote_results: 

3337 # strip off quotes 

3338 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3339 

3340 if isinstance(ret, str_type): 

3341 # fmt: off 

3342 if self.convert_whitespace_escapes: 

3343 # as we iterate over matches in the input string, 

3344 # collect from whichever match group of the unquote_scan_re 

3345 # regex matches (only 1 group will match at any given time) 

3346 ret = "".join( 

3347 # match group 1 matches \t, \n, etc. 

3348 self.ws_map[match.group(1)] if match.group(1) 

3349 # match group 2 matches escaped characters 

3350 else match.group(2)[-1] if match.group(2) 

3351 # match group 3 matches any character 

3352 else match.group(3) 

3353 for match in self.unquote_scan_re.finditer(ret) 

3354 ) 

3355 else: 

3356 ret = "".join( 

3357 # match group 1 matches escaped characters 

3358 match.group(1)[-1] if match.group(1) 

3359 # match group 2 matches any character 

3360 else match.group(2) 

3361 for match in self.unquote_scan_re.finditer(ret) 

3362 ) 

3363 # fmt: on 

3364 

3365 # replace escaped quotes 

3366 if self.esc_quote: 

3367 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3368 

3369 return loc, ret 

3370 

3371 

3372class CharsNotIn(Token): 

3373 """Token for matching words composed of characters *not* in a given 

3374 set (will include whitespace in matched characters if not listed in 

3375 the provided exclusion set - see example). Defined with string 

3376 containing all disallowed characters, and an optional minimum, 

3377 maximum, and/or exact length. The default value for ``min`` is 

3378 1 (a minimum value < 1 is not valid); the default values for 

3379 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3380 length restriction. 

3381 

3382 Example:: 

3383 

3384 # define a comma-separated-value as anything that is not a ',' 

3385 csv_value = CharsNotIn(',') 

3386 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) 

3387 

3388 prints:: 

3389 

3390 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3391 """ 

3392 

3393 def __init__( 

3394 self, 

3395 not_chars: str = "", 

3396 min: int = 1, 

3397 max: int = 0, 

3398 exact: int = 0, 

3399 *, 

3400 notChars: str = "", 

3401 ): 

3402 super().__init__() 

3403 self.skipWhitespace = False 

3404 self.notChars = not_chars or notChars 

3405 self.notCharsSet = set(self.notChars) 

3406 

3407 if min < 1: 

3408 raise ValueError( 

3409 "cannot specify a minimum length < 1; use" 

3410 " Opt(CharsNotIn()) if zero-length char group is permitted" 

3411 ) 

3412 

3413 self.minLen = min 

3414 

3415 if max > 0: 

3416 self.maxLen = max 

3417 else: 

3418 self.maxLen = _MAX_INT 

3419 

3420 if exact > 0: 

3421 self.maxLen = exact 

3422 self.minLen = exact 

3423 

3424 self.errmsg = f"Expected {self.name}" 

3425 self.mayReturnEmpty = self.minLen == 0 

3426 self.mayIndexError = False 

3427 

3428 def _generateDefaultName(self) -> str: 

3429 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3430 if len(not_chars_str) > 16: 

3431 return f"!W:({self.notChars[: 16 - 3]}...)" 

3432 else: 

3433 return f"!W:({self.notChars})" 

3434 

3435 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3436 notchars = self.notCharsSet 

3437 if instring[loc] in notchars: 

3438 raise ParseException(instring, loc, self.errmsg, self) 

3439 

3440 start = loc 

3441 loc += 1 

3442 maxlen = min(start + self.maxLen, len(instring)) 

3443 while loc < maxlen and instring[loc] not in notchars: 

3444 loc += 1 

3445 

3446 if loc - start < self.minLen: 

3447 raise ParseException(instring, loc, self.errmsg, self) 

3448 

3449 return loc, instring[start:loc] 

3450 

3451 

3452class White(Token): 

3453 """Special matching class for matching whitespace. Normally, 

3454 whitespace is ignored by pyparsing grammars. This class is included 

3455 when some whitespace structures are significant. Define with 

3456 a string containing the whitespace characters to be matched; default 

3457 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3458 ``max``, and ``exact`` arguments, as defined for the 

3459 :class:`Word` class. 

3460 """ 

3461 

3462 whiteStrs = { 

3463 " ": "<SP>", 

3464 "\t": "<TAB>", 

3465 "\n": "<LF>", 

3466 "\r": "<CR>", 

3467 "\f": "<FF>", 

3468 "\u00A0": "<NBSP>", 

3469 "\u1680": "<OGHAM_SPACE_MARK>", 

3470 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3471 "\u2000": "<EN_QUAD>", 

3472 "\u2001": "<EM_QUAD>", 

3473 "\u2002": "<EN_SPACE>", 

3474 "\u2003": "<EM_SPACE>", 

3475 "\u2004": "<THREE-PER-EM_SPACE>", 

3476 "\u2005": "<FOUR-PER-EM_SPACE>", 

3477 "\u2006": "<SIX-PER-EM_SPACE>", 

3478 "\u2007": "<FIGURE_SPACE>", 

3479 "\u2008": "<PUNCTUATION_SPACE>", 

3480 "\u2009": "<THIN_SPACE>", 

3481 "\u200A": "<HAIR_SPACE>", 

3482 "\u200B": "<ZERO_WIDTH_SPACE>", 

3483 "\u202F": "<NNBSP>", 

3484 "\u205F": "<MMSP>", 

3485 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3486 } 

3487 

3488 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): 

3489 super().__init__() 

3490 self.matchWhite = ws 

3491 self.set_whitespace_chars( 

3492 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3493 copy_defaults=True, 

3494 ) 

3495 # self.leave_whitespace() 

3496 self.mayReturnEmpty = True 

3497 self.errmsg = f"Expected {self.name}" 

3498 

3499 self.minLen = min 

3500 

3501 if max > 0: 

3502 self.maxLen = max 

3503 else: 

3504 self.maxLen = _MAX_INT 

3505 

3506 if exact > 0: 

3507 self.maxLen = exact 

3508 self.minLen = exact 

3509 

3510 def _generateDefaultName(self) -> str: 

3511 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

3512 

3513 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3514 if instring[loc] not in self.matchWhite: 

3515 raise ParseException(instring, loc, self.errmsg, self) 

3516 start = loc 

3517 loc += 1 

3518 maxloc = start + self.maxLen 

3519 maxloc = min(maxloc, len(instring)) 

3520 while loc < maxloc and instring[loc] in self.matchWhite: 

3521 loc += 1 

3522 

3523 if loc - start < self.minLen: 

3524 raise ParseException(instring, loc, self.errmsg, self) 

3525 

3526 return loc, instring[start:loc] 

3527 

3528 

3529class PositionToken(Token): 

3530 def __init__(self): 

3531 super().__init__() 

3532 self.mayReturnEmpty = True 

3533 self.mayIndexError = False 

3534 

3535 

3536class GoToColumn(PositionToken): 

3537 """Token to advance to a specific column of input text; useful for 

3538 tabular report scraping. 

3539 """ 

3540 

3541 def __init__(self, colno: int): 

3542 super().__init__() 

3543 self.col = colno 

3544 

3545 def preParse(self, instring: str, loc: int) -> int: 

3546 if col(loc, instring) == self.col: 

3547 return loc 

3548 

3549 instrlen = len(instring) 

3550 if self.ignoreExprs: 

3551 loc = self._skipIgnorables(instring, loc) 

3552 while ( 

3553 loc < instrlen 

3554 and instring[loc].isspace() 

3555 and col(loc, instring) != self.col 

3556 ): 

3557 loc += 1 

3558 

3559 return loc 

3560 

3561 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3562 thiscol = col(loc, instring) 

3563 if thiscol > self.col: 

3564 raise ParseException(instring, loc, "Text not in expected column", self) 

3565 newloc = loc + self.col - thiscol 

3566 ret = instring[loc:newloc] 

3567 return newloc, ret 

3568 

3569 

3570class LineStart(PositionToken): 

3571 r"""Matches if current position is at the beginning of a line within 

3572 the parse string 

3573 

3574 Example:: 

3575 

3576 test = '''\ 

3577 AAA this line 

3578 AAA and this line 

3579 AAA but not this one 

3580 B AAA and definitely not this one 

3581 ''' 

3582 

3583 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

3584 print(t) 

3585 

3586 prints:: 

3587 

3588 ['AAA', ' this line'] 

3589 ['AAA', ' and this line'] 

3590 

3591 """ 

3592 

3593 def __init__(self): 

3594 super().__init__() 

3595 self.leave_whitespace() 

3596 self.orig_whiteChars = set() | self.whiteChars 

3597 self.whiteChars.discard("\n") 

3598 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

3599 self.set_name("start of line") 

3600 

3601 def preParse(self, instring: str, loc: int) -> int: 

3602 if loc == 0: 

3603 return loc 

3604 

3605 ret = self.skipper.preParse(instring, loc) 

3606 

3607 if "\n" in self.orig_whiteChars: 

3608 while instring[ret : ret + 1] == "\n": 

3609 ret = self.skipper.preParse(instring, ret + 1) 

3610 

3611 return ret 

3612 

3613 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3614 if col(loc, instring) == 1: 

3615 return loc, [] 

3616 raise ParseException(instring, loc, self.errmsg, self) 

3617 

3618 

3619class LineEnd(PositionToken): 

3620 """Matches if current position is at the end of a line within the 

3621 parse string 

3622 """ 

3623 

3624 def __init__(self): 

3625 super().__init__() 

3626 self.whiteChars.discard("\n") 

3627 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

3628 self.set_name("end of line") 

3629 

3630 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3631 if loc < len(instring): 

3632 if instring[loc] == "\n": 

3633 return loc + 1, "\n" 

3634 else: 

3635 raise ParseException(instring, loc, self.errmsg, self) 

3636 elif loc == len(instring): 

3637 return loc + 1, [] 

3638 else: 

3639 raise ParseException(instring, loc, self.errmsg, self) 

3640 

3641 

3642class StringStart(PositionToken): 

3643 """Matches if current position is at the beginning of the parse 

3644 string 

3645 """ 

3646 

3647 def __init__(self): 

3648 super().__init__() 

3649 self.set_name("start of text") 

3650 

3651 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3652 # see if entire string up to here is just whitespace and ignoreables 

3653 if loc != 0 and loc != self.preParse(instring, 0): 

3654 raise ParseException(instring, loc, self.errmsg, self) 

3655 

3656 return loc, [] 

3657 

3658 

3659class StringEnd(PositionToken): 

3660 """ 

3661 Matches if current position is at the end of the parse string 

3662 """ 

3663 

3664 def __init__(self): 

3665 super().__init__() 

3666 self.set_name("end of text") 

3667 

3668 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3669 if loc < len(instring): 

3670 raise ParseException(instring, loc, self.errmsg, self) 

3671 if loc == len(instring): 

3672 return loc + 1, [] 

3673 if loc > len(instring): 

3674 return loc, [] 

3675 

3676 raise ParseException(instring, loc, self.errmsg, self) 

3677 

3678 

3679class WordStart(PositionToken): 

3680 """Matches if the current position is at the beginning of a 

3681 :class:`Word`, and is not preceded by any character in a given 

3682 set of ``word_chars`` (default= ``printables``). To emulate the 

3683 ``\b`` behavior of regular expressions, use 

3684 ``WordStart(alphanums)``. ``WordStart`` will also match at 

3685 the beginning of the string being parsed, or at the beginning of 

3686 a line. 

3687 """ 

3688 

3689 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3690 wordChars = word_chars if wordChars == printables else wordChars 

3691 super().__init__() 

3692 self.wordChars = set(wordChars) 

3693 self.set_name("start of a word") 

3694 

3695 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3696 if loc != 0: 

3697 if ( 

3698 instring[loc - 1] in self.wordChars 

3699 or instring[loc] not in self.wordChars 

3700 ): 

3701 raise ParseException(instring, loc, self.errmsg, self) 

3702 return loc, [] 

3703 

3704 

3705class WordEnd(PositionToken): 

3706 """Matches if the current position is at the end of a :class:`Word`, 

3707 and is not followed by any character in a given set of ``word_chars`` 

3708 (default= ``printables``). To emulate the ``\b`` behavior of 

3709 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

3710 will also match at the end of the string being parsed, or at the end 

3711 of a line. 

3712 """ 

3713 

3714 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3715 wordChars = word_chars if wordChars == printables else wordChars 

3716 super().__init__() 

3717 self.wordChars = set(wordChars) 

3718 self.skipWhitespace = False 

3719 self.set_name("end of a word") 

3720 

3721 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3722 instrlen = len(instring) 

3723 if instrlen > 0 and loc < instrlen: 

3724 if ( 

3725 instring[loc] in self.wordChars 

3726 or instring[loc - 1] not in self.wordChars 

3727 ): 

3728 raise ParseException(instring, loc, self.errmsg, self) 

3729 return loc, [] 

3730 

3731 

3732class Tag(Token): 

3733 """ 

3734 A meta-element for inserting a named result into the parsed 

3735 tokens that may be checked later in a parse action or while 

3736 processing the parsed results. Accepts an optional tag value, 

3737 defaulting to `True`. 

3738 

3739 Example:: 

3740 

3741 end_punc = "." | ("!" + Tag("enthusiastic"))) 

3742 greeting = "Hello," + Word(alphas) + end_punc 

3743 

3744 result = greeting.parse_string("Hello, World.") 

3745 print(result.dump()) 

3746 

3747 result = greeting.parse_string("Hello, World!") 

3748 print(result.dump()) 

3749 

3750 prints:: 

3751 

3752 ['Hello,', 'World', '.'] 

3753 

3754 ['Hello,', 'World', '!'] 

3755 - enthusiastic: True 

3756 """ 

3757 def __init__(self, tag_name: str, value: Any = True): 

3758 super().__init__() 

3759 self.mayReturnEmpty = True 

3760 self.mayIndexError = False 

3761 self.leave_whitespace() 

3762 self.tag_name = tag_name 

3763 self.tag_value = value 

3764 self.add_parse_action(self._add_tag) 

3765 

3766 def _add_tag(self, tokens: ParseResults): 

3767 tokens[self.tag_name] = self.tag_value 

3768 

3769 def _generateDefaultName(self) -> str: 

3770 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}" 

3771 

3772 

3773class ParseExpression(ParserElement): 

3774 """Abstract subclass of ParserElement, for combining and 

3775 post-processing parsed tokens. 

3776 """ 

3777 

3778 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

3779 super().__init__(savelist) 

3780 self.exprs: List[ParserElement] 

3781 if isinstance(exprs, _generatorType): 

3782 exprs = list(exprs) 

3783 

3784 if isinstance(exprs, str_type): 

3785 self.exprs = [self._literalStringClass(exprs)] 

3786 elif isinstance(exprs, ParserElement): 

3787 self.exprs = [exprs] 

3788 elif isinstance(exprs, Iterable): 

3789 exprs = list(exprs) 

3790 # if sequence of strings provided, wrap with Literal 

3791 if any(isinstance(expr, str_type) for expr in exprs): 

3792 exprs = ( 

3793 self._literalStringClass(e) if isinstance(e, str_type) else e 

3794 for e in exprs 

3795 ) 

3796 self.exprs = list(exprs) 

3797 else: 

3798 try: 

3799 self.exprs = list(exprs) 

3800 except TypeError: 

3801 self.exprs = [exprs] 

3802 self.callPreparse = False 

3803 

3804 def recurse(self) -> List[ParserElement]: 

3805 return self.exprs[:] 

3806 

3807 def append(self, other) -> ParserElement: 

3808 self.exprs.append(other) 

3809 self._defaultName = None 

3810 return self 

3811 

3812 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

3813 """ 

3814 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3815 all contained expressions. 

3816 """ 

3817 super().leave_whitespace(recursive) 

3818 

3819 if recursive: 

3820 self.exprs = [e.copy() for e in self.exprs] 

3821 for e in self.exprs: 

3822 e.leave_whitespace(recursive) 

3823 return self 

3824 

3825 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

3826 """ 

3827 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3828 all contained expressions. 

3829 """ 

3830 super().ignore_whitespace(recursive) 

3831 if recursive: 

3832 self.exprs = [e.copy() for e in self.exprs] 

3833 for e in self.exprs: 

3834 e.ignore_whitespace(recursive) 

3835 return self 

3836 

3837 def ignore(self, other) -> ParserElement: 

3838 if isinstance(other, Suppress): 

3839 if other not in self.ignoreExprs: 

3840 super().ignore(other) 

3841 for e in self.exprs: 

3842 e.ignore(self.ignoreExprs[-1]) 

3843 else: 

3844 super().ignore(other) 

3845 for e in self.exprs: 

3846 e.ignore(self.ignoreExprs[-1]) 

3847 return self 

3848 

3849 def _generateDefaultName(self) -> str: 

3850 return f"{type(self).__name__}:({self.exprs})" 

3851 

3852 def streamline(self) -> ParserElement: 

3853 if self.streamlined: 

3854 return self 

3855 

3856 super().streamline() 

3857 

3858 for e in self.exprs: 

3859 e.streamline() 

3860 

3861 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

3862 # but only if there are no parse actions or resultsNames on the nested And's 

3863 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

3864 if len(self.exprs) == 2: 

3865 other = self.exprs[0] 

3866 if ( 

3867 isinstance(other, self.__class__) 

3868 and not other.parseAction 

3869 and other.resultsName is None 

3870 and not other.debug 

3871 ): 

3872 self.exprs = other.exprs[:] + [self.exprs[1]] 

3873 self._defaultName = None 

3874 self.mayReturnEmpty |= other.mayReturnEmpty 

3875 self.mayIndexError |= other.mayIndexError 

3876 

3877 other = self.exprs[-1] 

3878 if ( 

3879 isinstance(other, self.__class__) 

3880 and not other.parseAction 

3881 and other.resultsName is None 

3882 and not other.debug 

3883 ): 

3884 self.exprs = self.exprs[:-1] + other.exprs[:] 

3885 self._defaultName = None 

3886 self.mayReturnEmpty |= other.mayReturnEmpty 

3887 self.mayIndexError |= other.mayIndexError 

3888 

3889 self.errmsg = f"Expected {self}" 

3890 

3891 return self 

3892 

3893 def validate(self, validateTrace=None) -> None: 

3894 warnings.warn( 

3895 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

3896 DeprecationWarning, 

3897 stacklevel=2, 

3898 ) 

3899 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

3900 for e in self.exprs: 

3901 e.validate(tmp) 

3902 self._checkRecursion([]) 

3903 

3904 def copy(self) -> ParserElement: 

3905 ret = super().copy() 

3906 ret = typing.cast(ParseExpression, ret) 

3907 ret.exprs = [e.copy() for e in self.exprs] 

3908 return ret 

3909 

3910 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

3911 if not ( 

3912 __diag__.warn_ungrouped_named_tokens_in_collection 

3913 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

3914 not in self.suppress_warnings_ 

3915 ): 

3916 return super()._setResultsName(name, list_all_matches) 

3917 

3918 for e in self.exprs: 

3919 if ( 

3920 isinstance(e, ParserElement) 

3921 and e.resultsName 

3922 and ( 

3923 Diagnostics.warn_ungrouped_named_tokens_in_collection 

3924 not in e.suppress_warnings_ 

3925 ) 

3926 ): 

3927 warning = ( 

3928 "warn_ungrouped_named_tokens_in_collection:" 

3929 f" setting results name {name!r} on {type(self).__name__} expression" 

3930 f" collides with {e.resultsName!r} on contained expression" 

3931 ) 

3932 warnings.warn(warning, stacklevel=3) 

3933 break 

3934 

3935 return super()._setResultsName(name, list_all_matches) 

3936 

3937 # Compatibility synonyms 

3938 # fmt: off 

3939 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

3940 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

3941 # fmt: on 

3942 

3943 

3944class And(ParseExpression): 

3945 """ 

3946 Requires all given :class:`ParseExpression` s to be found in the given order. 

3947 Expressions may be separated by whitespace. 

3948 May be constructed using the ``'+'`` operator. 

3949 May also be constructed using the ``'-'`` operator, which will 

3950 suppress backtracking. 

3951 

3952 Example:: 

3953 

3954 integer = Word(nums) 

3955 name_expr = Word(alphas)[1, ...] 

3956 

3957 expr = And([integer("id"), name_expr("name"), integer("age")]) 

3958 # more easily written as: 

3959 expr = integer("id") + name_expr("name") + integer("age") 

3960 """ 

3961 

3962 class _ErrorStop(Empty): 

3963 def __init__(self, *args, **kwargs): 

3964 super().__init__(*args, **kwargs) 

3965 self.leave_whitespace() 

3966 

3967 def _generateDefaultName(self) -> str: 

3968 return "-" 

3969 

3970 def __init__( 

3971 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True 

3972 ): 

3973 exprs: List[ParserElement] = list(exprs_arg) 

3974 if exprs and Ellipsis in exprs: 

3975 tmp: List[ParserElement] = [] 

3976 for i, expr in enumerate(exprs): 

3977 if expr is not Ellipsis: 

3978 tmp.append(expr) 

3979 continue 

3980 

3981 if i < len(exprs) - 1: 

3982 skipto_arg: ParserElement = typing.cast( 

3983 ParseExpression, (Empty() + exprs[i + 1]) 

3984 ).exprs[-1] 

3985 tmp.append(SkipTo(skipto_arg)("_skipped*")) 

3986 continue 

3987 

3988 raise Exception("cannot construct And with sequence ending in ...") 

3989 exprs[:] = tmp 

3990 super().__init__(exprs, savelist) 

3991 if self.exprs: 

3992 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

3993 if not isinstance(self.exprs[0], White): 

3994 self.set_whitespace_chars( 

3995 self.exprs[0].whiteChars, 

3996 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

3997 ) 

3998 self.skipWhitespace = self.exprs[0].skipWhitespace 

3999 else: 

4000 self.skipWhitespace = False 

4001 else: 

4002 self.mayReturnEmpty = True 

4003 self.callPreparse = True 

4004 

4005 def streamline(self) -> ParserElement: 

4006 # collapse any _PendingSkip's 

4007 if self.exprs and any( 

4008 isinstance(e, ParseExpression) 

4009 and e.exprs 

4010 and isinstance(e.exprs[-1], _PendingSkip) 

4011 for e in self.exprs[:-1] 

4012 ): 

4013 deleted_expr_marker = NoMatch() 

4014 for i, e in enumerate(self.exprs[:-1]): 

4015 if e is deleted_expr_marker: 

4016 continue 

4017 if ( 

4018 isinstance(e, ParseExpression) 

4019 and e.exprs 

4020 and isinstance(e.exprs[-1], _PendingSkip) 

4021 ): 

4022 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4023 self.exprs[i + 1] = deleted_expr_marker 

4024 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

4025 

4026 super().streamline() 

4027 

4028 # link any IndentedBlocks to the prior expression 

4029 prev: ParserElement 

4030 cur: ParserElement 

4031 for prev, cur in zip(self.exprs, self.exprs[1:]): 

4032 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

4033 # (but watch out for recursive grammar) 

4034 seen = set() 

4035 while True: 

4036 if id(cur) in seen: 

4037 break 

4038 seen.add(id(cur)) 

4039 if isinstance(cur, IndentedBlock): 

4040 prev.add_parse_action( 

4041 lambda s, l, t, cur_=cur: setattr( 

4042 cur_, "parent_anchor", col(l, s) 

4043 ) 

4044 ) 

4045 break 

4046 subs = cur.recurse() 

4047 next_first = next(iter(subs), None) 

4048 if next_first is None: 

4049 break 

4050 cur = typing.cast(ParserElement, next_first) 

4051 

4052 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4053 return self 

4054 

4055 def parseImpl(self, instring, loc, do_actions=True): 

4056 # pass False as callPreParse arg to _parse for first element, since we already 

4057 # pre-parsed the string as part of our And pre-parsing 

4058 loc, resultlist = self.exprs[0]._parse( 

4059 instring, loc, do_actions, callPreParse=False 

4060 ) 

4061 errorStop = False 

4062 for e in self.exprs[1:]: 

4063 # if isinstance(e, And._ErrorStop): 

4064 if type(e) is And._ErrorStop: 

4065 errorStop = True 

4066 continue 

4067 if errorStop: 

4068 try: 

4069 loc, exprtokens = e._parse(instring, loc, do_actions) 

4070 except ParseSyntaxException: 

4071 raise 

4072 except ParseBaseException as pe: 

4073 pe.__traceback__ = None 

4074 raise ParseSyntaxException._from_exception(pe) 

4075 except IndexError: 

4076 raise ParseSyntaxException( 

4077 instring, len(instring), self.errmsg, self 

4078 ) 

4079 else: 

4080 loc, exprtokens = e._parse(instring, loc, do_actions) 

4081 resultlist += exprtokens 

4082 return loc, resultlist 

4083 

4084 def __iadd__(self, other): 

4085 if isinstance(other, str_type): 

4086 other = self._literalStringClass(other) 

4087 if not isinstance(other, ParserElement): 

4088 return NotImplemented 

4089 return self.append(other) # And([self, other]) 

4090 

4091 def _checkRecursion(self, parseElementList): 

4092 subRecCheckList = parseElementList[:] + [self] 

4093 for e in self.exprs: 

4094 e._checkRecursion(subRecCheckList) 

4095 if not e.mayReturnEmpty: 

4096 break 

4097 

4098 def _generateDefaultName(self) -> str: 

4099 inner = " ".join(str(e) for e in self.exprs) 

4100 # strip off redundant inner {}'s 

4101 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4102 inner = inner[1:-1] 

4103 return f"{{{inner}}}" 

4104 

4105 

4106class Or(ParseExpression): 

4107 """Requires that at least one :class:`ParseExpression` is found. If 

4108 two expressions match, the expression that matches the longest 

4109 string will be used. May be constructed using the ``'^'`` 

4110 operator. 

4111 

4112 Example:: 

4113 

4114 # construct Or using '^' operator 

4115 

4116 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4117 print(number.search_string("123 3.1416 789")) 

4118 

4119 prints:: 

4120 

4121 [['123'], ['3.1416'], ['789']] 

4122 """ 

4123 

4124 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4125 super().__init__(exprs, savelist) 

4126 if self.exprs: 

4127 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4128 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4129 else: 

4130 self.mayReturnEmpty = True 

4131 

4132 def streamline(self) -> ParserElement: 

4133 super().streamline() 

4134 if self.exprs: 

4135 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4136 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4137 self.skipWhitespace = all( 

4138 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4139 ) 

4140 else: 

4141 self.saveAsList = False 

4142 return self 

4143 

4144 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4145 maxExcLoc = -1 

4146 maxException = None 

4147 matches: List[Tuple[int, ParserElement]] = [] 

4148 fatals: List[ParseFatalException] = [] 

4149 if all(e.callPreparse for e in self.exprs): 

4150 loc = self.preParse(instring, loc) 

4151 for e in self.exprs: 

4152 try: 

4153 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4154 except ParseFatalException as pfe: 

4155 pfe.__traceback__ = None 

4156 pfe.parser_element = e 

4157 fatals.append(pfe) 

4158 maxException = None 

4159 maxExcLoc = -1 

4160 except ParseException as err: 

4161 if not fatals: 

4162 err.__traceback__ = None 

4163 if err.loc > maxExcLoc: 

4164 maxException = err 

4165 maxExcLoc = err.loc 

4166 except IndexError: 

4167 if len(instring) > maxExcLoc: 

4168 maxException = ParseException( 

4169 instring, len(instring), e.errmsg, self 

4170 ) 

4171 maxExcLoc = len(instring) 

4172 else: 

4173 # save match among all matches, to retry longest to shortest 

4174 matches.append((loc2, e)) 

4175 

4176 if matches: 

4177 # re-evaluate all matches in descending order of length of match, in case attached actions 

4178 # might change whether or how much they match of the input. 

4179 matches.sort(key=itemgetter(0), reverse=True) 

4180 

4181 if not do_actions: 

4182 # no further conditions or parse actions to change the selection of 

4183 # alternative, so the first match will be the best match 

4184 best_expr = matches[0][1] 

4185 return best_expr._parse(instring, loc, do_actions) 

4186 

4187 longest = -1, None 

4188 for loc1, expr1 in matches: 

4189 if loc1 <= longest[0]: 

4190 # already have a longer match than this one will deliver, we are done 

4191 return longest 

4192 

4193 try: 

4194 loc2, toks = expr1._parse(instring, loc, do_actions) 

4195 except ParseException as err: 

4196 err.__traceback__ = None 

4197 if err.loc > maxExcLoc: 

4198 maxException = err 

4199 maxExcLoc = err.loc 

4200 else: 

4201 if loc2 >= loc1: 

4202 return loc2, toks 

4203 # didn't match as much as before 

4204 elif loc2 > longest[0]: 

4205 longest = loc2, toks 

4206 

4207 if longest != (-1, None): 

4208 return longest 

4209 

4210 if fatals: 

4211 if len(fatals) > 1: 

4212 fatals.sort(key=lambda e: -e.loc) 

4213 if fatals[0].loc == fatals[1].loc: 

4214 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4215 max_fatal = fatals[0] 

4216 raise max_fatal 

4217 

4218 if maxException is not None: 

4219 # infer from this check that all alternatives failed at the current position 

4220 # so emit this collective error message instead of any single error message 

4221 if maxExcLoc == loc: 

4222 maxException.msg = self.errmsg 

4223 raise maxException 

4224 

4225 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4226 

4227 def __ixor__(self, other): 

4228 if isinstance(other, str_type): 

4229 other = self._literalStringClass(other) 

4230 if not isinstance(other, ParserElement): 

4231 return NotImplemented 

4232 return self.append(other) # Or([self, other]) 

4233 

4234 def _generateDefaultName(self) -> str: 

4235 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" 

4236 

4237 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4238 if ( 

4239 __diag__.warn_multiple_tokens_in_named_alternation 

4240 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4241 not in self.suppress_warnings_ 

4242 ): 

4243 if any( 

4244 isinstance(e, And) 

4245 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4246 not in e.suppress_warnings_ 

4247 for e in self.exprs 

4248 ): 

4249 warning = ( 

4250 "warn_multiple_tokens_in_named_alternation:" 

4251 f" setting results name {name!r} on {type(self).__name__} expression" 

4252 " will return a list of all parsed tokens in an And alternative," 

4253 " in prior versions only the first token was returned; enclose" 

4254 " contained argument in Group" 

4255 ) 

4256 warnings.warn(warning, stacklevel=3) 

4257 

4258 return super()._setResultsName(name, list_all_matches) 

4259 

4260 

4261class MatchFirst(ParseExpression): 

4262 """Requires that at least one :class:`ParseExpression` is found. If 

4263 more than one expression matches, the first one listed is the one that will 

4264 match. May be constructed using the ``'|'`` operator. 

4265 

4266 Example:: 

4267 

4268 # construct MatchFirst using '|' operator 

4269 

4270 # watch the order of expressions to match 

4271 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4272 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

4273 

4274 # put more selective expression first 

4275 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4276 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

4277 """ 

4278 

4279 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4280 super().__init__(exprs, savelist) 

4281 if self.exprs: 

4282 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4283 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4284 else: 

4285 self.mayReturnEmpty = True 

4286 

4287 def streamline(self) -> ParserElement: 

4288 if self.streamlined: 

4289 return self 

4290 

4291 super().streamline() 

4292 if self.exprs: 

4293 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4294 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4295 self.skipWhitespace = all( 

4296 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4297 ) 

4298 else: 

4299 self.saveAsList = False 

4300 self.mayReturnEmpty = True 

4301 return self 

4302 

4303 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4304 maxExcLoc = -1 

4305 maxException = None 

4306 

4307 for e in self.exprs: 

4308 try: 

4309 return e._parse(instring, loc, do_actions) 

4310 except ParseFatalException as pfe: 

4311 pfe.__traceback__ = None 

4312 pfe.parser_element = e 

4313 raise 

4314 except ParseException as err: 

4315 if err.loc > maxExcLoc: 

4316 maxException = err 

4317 maxExcLoc = err.loc 

4318 except IndexError: 

4319 if len(instring) > maxExcLoc: 

4320 maxException = ParseException( 

4321 instring, len(instring), e.errmsg, self 

4322 ) 

4323 maxExcLoc = len(instring) 

4324 

4325 if maxException is not None: 

4326 # infer from this check that all alternatives failed at the current position 

4327 # so emit this collective error message instead of any individual error message 

4328 if maxExcLoc == loc: 

4329 maxException.msg = self.errmsg 

4330 raise maxException 

4331 

4332 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4333 

4334 def __ior__(self, other): 

4335 if isinstance(other, str_type): 

4336 other = self._literalStringClass(other) 

4337 if not isinstance(other, ParserElement): 

4338 return NotImplemented 

4339 return self.append(other) # MatchFirst([self, other]) 

4340 

4341 def _generateDefaultName(self) -> str: 

4342 return f"{{{' | '.join(str(e) for e in self.exprs)}}}" 

4343 

4344 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4345 if ( 

4346 __diag__.warn_multiple_tokens_in_named_alternation 

4347 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4348 not in self.suppress_warnings_ 

4349 ): 

4350 if any( 

4351 isinstance(e, And) 

4352 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4353 not in e.suppress_warnings_ 

4354 for e in self.exprs 

4355 ): 

4356 warning = ( 

4357 "warn_multiple_tokens_in_named_alternation:" 

4358 f" setting results name {name!r} on {type(self).__name__} expression" 

4359 " will return a list of all parsed tokens in an And alternative," 

4360 " in prior versions only the first token was returned; enclose" 

4361 " contained argument in Group" 

4362 ) 

4363 warnings.warn(warning, stacklevel=3) 

4364 

4365 return super()._setResultsName(name, list_all_matches) 

4366 

4367 

4368class Each(ParseExpression): 

4369 """Requires all given :class:`ParseExpression` s to be found, but in 

4370 any order. Expressions may be separated by whitespace. 

4371 

4372 May be constructed using the ``'&'`` operator. 

4373 

4374 Example:: 

4375 

4376 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4377 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4378 integer = Word(nums) 

4379 shape_attr = "shape:" + shape_type("shape") 

4380 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4381 color_attr = "color:" + color("color") 

4382 size_attr = "size:" + integer("size") 

4383 

4384 # use Each (using operator '&') to accept attributes in any order 

4385 # (shape and posn are required, color and size are optional) 

4386 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4387 

4388 shape_spec.run_tests(''' 

4389 shape: SQUARE color: BLACK posn: 100, 120 

4390 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4391 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4392 ''' 

4393 ) 

4394 

4395 prints:: 

4396 

4397 shape: SQUARE color: BLACK posn: 100, 120 

4398 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4399 - color: BLACK 

4400 - posn: ['100', ',', '120'] 

4401 - x: 100 

4402 - y: 120 

4403 - shape: SQUARE 

4404 

4405 

4406 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4407 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

4408 - color: BLUE 

4409 - posn: ['50', ',', '80'] 

4410 - x: 50 

4411 - y: 80 

4412 - shape: CIRCLE 

4413 - size: 50 

4414 

4415 

4416 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

4417 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

4418 - color: GREEN 

4419 - posn: ['20', ',', '40'] 

4420 - x: 20 

4421 - y: 40 

4422 - shape: TRIANGLE 

4423 - size: 20 

4424 """ 

4425 

4426 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): 

4427 super().__init__(exprs, savelist) 

4428 if self.exprs: 

4429 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4430 else: 

4431 self.mayReturnEmpty = True 

4432 self.skipWhitespace = True 

4433 self.initExprGroups = True 

4434 self.saveAsList = True 

4435 

4436 def __iand__(self, other): 

4437 if isinstance(other, str_type): 

4438 other = self._literalStringClass(other) 

4439 if not isinstance(other, ParserElement): 

4440 return NotImplemented 

4441 return self.append(other) # Each([self, other]) 

4442 

4443 def streamline(self) -> ParserElement: 

4444 super().streamline() 

4445 if self.exprs: 

4446 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4447 else: 

4448 self.mayReturnEmpty = True 

4449 return self 

4450 

4451 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4452 if self.initExprGroups: 

4453 self.opt1map = dict( 

4454 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

4455 ) 

4456 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

4457 opt2 = [ 

4458 e 

4459 for e in self.exprs 

4460 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

4461 ] 

4462 self.optionals = opt1 + opt2 

4463 self.multioptionals = [ 

4464 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4465 for e in self.exprs 

4466 if isinstance(e, _MultipleMatch) 

4467 ] 

4468 self.multirequired = [ 

4469 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4470 for e in self.exprs 

4471 if isinstance(e, OneOrMore) 

4472 ] 

4473 self.required = [ 

4474 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

4475 ] 

4476 self.required += self.multirequired 

4477 self.initExprGroups = False 

4478 

4479 tmpLoc = loc 

4480 tmpReqd = self.required[:] 

4481 tmpOpt = self.optionals[:] 

4482 multis = self.multioptionals[:] 

4483 matchOrder: List[ParserElement] = [] 

4484 

4485 keepMatching = True 

4486 failed: List[ParserElement] = [] 

4487 fatals: List[ParseFatalException] = [] 

4488 while keepMatching: 

4489 tmpExprs = tmpReqd + tmpOpt + multis 

4490 failed.clear() 

4491 fatals.clear() 

4492 for e in tmpExprs: 

4493 try: 

4494 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

4495 except ParseFatalException as pfe: 

4496 pfe.__traceback__ = None 

4497 pfe.parser_element = e 

4498 fatals.append(pfe) 

4499 failed.append(e) 

4500 except ParseException: 

4501 failed.append(e) 

4502 else: 

4503 matchOrder.append(self.opt1map.get(id(e), e)) 

4504 if e in tmpReqd: 

4505 tmpReqd.remove(e) 

4506 elif e in tmpOpt: 

4507 tmpOpt.remove(e) 

4508 if len(failed) == len(tmpExprs): 

4509 keepMatching = False 

4510 

4511 # look for any ParseFatalExceptions 

4512 if fatals: 

4513 if len(fatals) > 1: 

4514 fatals.sort(key=lambda e: -e.loc) 

4515 if fatals[0].loc == fatals[1].loc: 

4516 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4517 max_fatal = fatals[0] 

4518 raise max_fatal 

4519 

4520 if tmpReqd: 

4521 missing = ", ".join([str(e) for e in tmpReqd]) 

4522 raise ParseException( 

4523 instring, 

4524 loc, 

4525 f"Missing one or more required elements ({missing})", 

4526 ) 

4527 

4528 # add any unmatched Opts, in case they have default values defined 

4529 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

4530 

4531 total_results = ParseResults([]) 

4532 for e in matchOrder: 

4533 loc, results = e._parse(instring, loc, do_actions) 

4534 total_results += results 

4535 

4536 return loc, total_results 

4537 

4538 def _generateDefaultName(self) -> str: 

4539 return f"{{{' & '.join(str(e) for e in self.exprs)}}}" 

4540 

4541 

4542class ParseElementEnhance(ParserElement): 

4543 """Abstract subclass of :class:`ParserElement`, for combining and 

4544 post-processing parsed tokens. 

4545 """ 

4546 

4547 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

4548 super().__init__(savelist) 

4549 if isinstance(expr, str_type): 

4550 expr_str = typing.cast(str, expr) 

4551 if issubclass(self._literalStringClass, Token): 

4552 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

4553 elif issubclass(type(self), self._literalStringClass): 

4554 expr = Literal(expr_str) 

4555 else: 

4556 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

4557 expr = typing.cast(ParserElement, expr) 

4558 self.expr = expr 

4559 if expr is not None: 

4560 self.mayIndexError = expr.mayIndexError 

4561 self.mayReturnEmpty = expr.mayReturnEmpty 

4562 self.set_whitespace_chars( 

4563 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

4564 ) 

4565 self.skipWhitespace = expr.skipWhitespace 

4566 self.saveAsList = expr.saveAsList 

4567 self.callPreparse = expr.callPreparse 

4568 self.ignoreExprs.extend(expr.ignoreExprs) 

4569 

4570 def recurse(self) -> List[ParserElement]: 

4571 return [self.expr] if self.expr is not None else [] 

4572 

4573 def parseImpl(self, instring, loc, do_actions=True): 

4574 if self.expr is None: 

4575 raise ParseException(instring, loc, "No expression defined", self) 

4576 

4577 try: 

4578 return self.expr._parse(instring, loc, do_actions, callPreParse=False) 

4579 except ParseSyntaxException: 

4580 raise 

4581 except ParseBaseException as pbe: 

4582 if not isinstance(self, Forward) or self.customName is not None: 

4583 if self.errmsg: 

4584 pbe.msg = self.errmsg 

4585 raise 

4586 

4587 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4588 super().leave_whitespace(recursive) 

4589 

4590 if recursive: 

4591 if self.expr is not None: 

4592 self.expr = self.expr.copy() 

4593 self.expr.leave_whitespace(recursive) 

4594 return self 

4595 

4596 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4597 super().ignore_whitespace(recursive) 

4598 

4599 if recursive: 

4600 if self.expr is not None: 

4601 self.expr = self.expr.copy() 

4602 self.expr.ignore_whitespace(recursive) 

4603 return self 

4604 

4605 def ignore(self, other) -> ParserElement: 

4606 if not isinstance(other, Suppress) or other not in self.ignoreExprs: 

4607 super().ignore(other) 

4608 if self.expr is not None: 

4609 self.expr.ignore(self.ignoreExprs[-1]) 

4610 

4611 return self 

4612 

4613 def streamline(self) -> ParserElement: 

4614 super().streamline() 

4615 if self.expr is not None: 

4616 self.expr.streamline() 

4617 return self 

4618 

4619 def _checkRecursion(self, parseElementList): 

4620 if self in parseElementList: 

4621 raise RecursiveGrammarException(parseElementList + [self]) 

4622 subRecCheckList = parseElementList[:] + [self] 

4623 if self.expr is not None: 

4624 self.expr._checkRecursion(subRecCheckList) 

4625 

4626 def validate(self, validateTrace=None) -> None: 

4627 warnings.warn( 

4628 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4629 DeprecationWarning, 

4630 stacklevel=2, 

4631 ) 

4632 if validateTrace is None: 

4633 validateTrace = [] 

4634 tmp = validateTrace[:] + [self] 

4635 if self.expr is not None: 

4636 self.expr.validate(tmp) 

4637 self._checkRecursion([]) 

4638 

4639 def _generateDefaultName(self) -> str: 

4640 return f"{type(self).__name__}:({self.expr})" 

4641 

4642 # Compatibility synonyms 

4643 # fmt: off 

4644 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4645 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4646 # fmt: on 

4647 

4648 

4649class IndentedBlock(ParseElementEnhance): 

4650 """ 

4651 Expression to match one or more expressions at a given indentation level. 

4652 Useful for parsing text where structure is implied by indentation (like Python source code). 

4653 """ 

4654 

4655 class _Indent(Empty): 

4656 def __init__(self, ref_col: int): 

4657 super().__init__() 

4658 self.errmsg = f"expected indent at column {ref_col}" 

4659 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

4660 

4661 class _IndentGreater(Empty): 

4662 def __init__(self, ref_col: int): 

4663 super().__init__() 

4664 self.errmsg = f"expected indent at column greater than {ref_col}" 

4665 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

4666 

4667 def __init__( 

4668 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

4669 ): 

4670 super().__init__(expr, savelist=True) 

4671 # if recursive: 

4672 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

4673 self._recursive = recursive 

4674 self._grouped = grouped 

4675 self.parent_anchor = 1 

4676 

4677 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4678 # advance parse position to non-whitespace by using an Empty() 

4679 # this should be the column to be used for all subsequent indented lines 

4680 anchor_loc = Empty().preParse(instring, loc) 

4681 

4682 # see if self.expr matches at the current location - if not it will raise an exception 

4683 # and no further work is necessary 

4684 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions) 

4685 

4686 indent_col = col(anchor_loc, instring) 

4687 peer_detect_expr = self._Indent(indent_col) 

4688 

4689 inner_expr = Empty() + peer_detect_expr + self.expr 

4690 if self._recursive: 

4691 sub_indent = self._IndentGreater(indent_col) 

4692 nested_block = IndentedBlock( 

4693 self.expr, recursive=self._recursive, grouped=self._grouped 

4694 ) 

4695 nested_block.set_debug(self.debug) 

4696 nested_block.parent_anchor = indent_col 

4697 inner_expr += Opt(sub_indent + nested_block) 

4698 

4699 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

4700 block = OneOrMore(inner_expr) 

4701 

4702 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

4703 

4704 if self._grouped: 

4705 wrapper = Group 

4706 else: 

4707 wrapper = lambda expr: expr 

4708 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

4709 instring, anchor_loc, do_actions 

4710 ) 

4711 

4712 

4713class AtStringStart(ParseElementEnhance): 

4714 """Matches if expression matches at the beginning of the parse 

4715 string:: 

4716 

4717 AtStringStart(Word(nums)).parse_string("123") 

4718 # prints ["123"] 

4719 

4720 AtStringStart(Word(nums)).parse_string(" 123") 

4721 # raises ParseException 

4722 """ 

4723 

4724 def __init__(self, expr: Union[ParserElement, str]): 

4725 super().__init__(expr) 

4726 self.callPreparse = False 

4727 

4728 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4729 if loc != 0: 

4730 raise ParseException(instring, loc, "not found at string start") 

4731 return super().parseImpl(instring, loc, do_actions) 

4732 

4733 

4734class AtLineStart(ParseElementEnhance): 

4735 r"""Matches if an expression matches at the beginning of a line within 

4736 the parse string 

4737 

4738 Example:: 

4739 

4740 test = '''\ 

4741 AAA this line 

4742 AAA and this line 

4743 AAA but not this one 

4744 B AAA and definitely not this one 

4745 ''' 

4746 

4747 for t in (AtLineStart('AAA') + rest_of_line).search_string(test): 

4748 print(t) 

4749 

4750 prints:: 

4751 

4752 ['AAA', ' this line'] 

4753 ['AAA', ' and this line'] 

4754 

4755 """ 

4756 

4757 def __init__(self, expr: Union[ParserElement, str]): 

4758 super().__init__(expr) 

4759 self.callPreparse = False 

4760 

4761 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4762 if col(loc, instring) != 1: 

4763 raise ParseException(instring, loc, "not found at line start") 

4764 return super().parseImpl(instring, loc, do_actions) 

4765 

4766 

4767class FollowedBy(ParseElementEnhance): 

4768 """Lookahead matching of the given parse expression. 

4769 ``FollowedBy`` does *not* advance the parsing position within 

4770 the input string, it only verifies that the specified parse 

4771 expression matches at the current position. ``FollowedBy`` 

4772 always returns a null token list. If any results names are defined 

4773 in the lookahead expression, those *will* be returned for access by 

4774 name. 

4775 

4776 Example:: 

4777 

4778 # use FollowedBy to match a label only if it is followed by a ':' 

4779 data_word = Word(alphas) 

4780 label = data_word + FollowedBy(':') 

4781 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

4782 

4783 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() 

4784 

4785 prints:: 

4786 

4787 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

4788 """ 

4789 

4790 def __init__(self, expr: Union[ParserElement, str]): 

4791 super().__init__(expr) 

4792 self.mayReturnEmpty = True 

4793 

4794 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4795 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

4796 # we keep any named results that were defined in the FollowedBy expression 

4797 _, ret = self.expr._parse(instring, loc, do_actions=do_actions) 

4798 del ret[:] 

4799 

4800 return loc, ret 

4801 

4802 

4803class PrecededBy(ParseElementEnhance): 

4804 """Lookbehind matching of the given parse expression. 

4805 ``PrecededBy`` does not advance the parsing position within the 

4806 input string, it only verifies that the specified parse expression 

4807 matches prior to the current position. ``PrecededBy`` always 

4808 returns a null token list, but if a results name is defined on the 

4809 given expression, it is returned. 

4810 

4811 Parameters: 

4812 

4813 - ``expr`` - expression that must match prior to the current parse 

4814 location 

4815 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

4816 to lookbehind prior to the current parse location 

4817 

4818 If the lookbehind expression is a string, :class:`Literal`, 

4819 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

4820 with a specified exact or maximum length, then the retreat 

4821 parameter is not required. Otherwise, retreat must be specified to 

4822 give a maximum number of characters to look back from 

4823 the current parse position for a lookbehind match. 

4824 

4825 Example:: 

4826 

4827 # VB-style variable names with type prefixes 

4828 int_var = PrecededBy("#") + pyparsing_common.identifier 

4829 str_var = PrecededBy("$") + pyparsing_common.identifier 

4830 

4831 """ 

4832 

4833 def __init__( 

4834 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None 

4835 ): 

4836 super().__init__(expr) 

4837 self.expr = self.expr().leave_whitespace() 

4838 self.mayReturnEmpty = True 

4839 self.mayIndexError = False 

4840 self.exact = False 

4841 if isinstance(expr, str_type): 

4842 expr = typing.cast(str, expr) 

4843 retreat = len(expr) 

4844 self.exact = True 

4845 elif isinstance(expr, (Literal, Keyword)): 

4846 retreat = expr.matchLen 

4847 self.exact = True 

4848 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

4849 retreat = expr.maxLen 

4850 self.exact = True 

4851 elif isinstance(expr, PositionToken): 

4852 retreat = 0 

4853 self.exact = True 

4854 self.retreat = retreat 

4855 self.errmsg = f"not preceded by {expr}" 

4856 self.skipWhitespace = False 

4857 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

4858 

4859 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType: 

4860 if self.exact: 

4861 if loc < self.retreat: 

4862 raise ParseException(instring, loc, self.errmsg) 

4863 start = loc - self.retreat 

4864 _, ret = self.expr._parse(instring, start) 

4865 return loc, ret 

4866 

4867 # retreat specified a maximum lookbehind window, iterate 

4868 test_expr = self.expr + StringEnd() 

4869 instring_slice = instring[max(0, loc - self.retreat) : loc] 

4870 last_expr = ParseException(instring, loc, self.errmsg) 

4871 

4872 for offset in range(1, min(loc, self.retreat + 1) + 1): 

4873 try: 

4874 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

4875 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

4876 except ParseBaseException as pbe: 

4877 last_expr = pbe 

4878 else: 

4879 break 

4880 else: 

4881 raise last_expr 

4882 

4883 return loc, ret 

4884 

4885 

4886class Located(ParseElementEnhance): 

4887 """ 

4888 Decorates a returned token with its starting and ending 

4889 locations in the input string. 

4890 

4891 This helper adds the following results names: 

4892 

4893 - ``locn_start`` - location where matched expression begins 

4894 - ``locn_end`` - location where matched expression ends 

4895 - ``value`` - the actual parsed results 

4896 

4897 Be careful if the input text contains ``<TAB>`` characters, you 

4898 may want to call :class:`ParserElement.parse_with_tabs` 

4899 

4900 Example:: 

4901 

4902 wd = Word(alphas) 

4903 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

4904 print(match) 

4905 

4906 prints:: 

4907 

4908 [0, ['ljsdf'], 5] 

4909 [8, ['lksdjjf'], 15] 

4910 [18, ['lkkjj'], 23] 

4911 

4912 """ 

4913 

4914 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4915 start = loc 

4916 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False) 

4917 ret_tokens = ParseResults([start, tokens, loc]) 

4918 ret_tokens["locn_start"] = start 

4919 ret_tokens["value"] = tokens 

4920 ret_tokens["locn_end"] = loc 

4921 if self.resultsName: 

4922 # must return as a list, so that the name will be attached to the complete group 

4923 return loc, [ret_tokens] 

4924 else: 

4925 return loc, ret_tokens 

4926 

4927 

4928class NotAny(ParseElementEnhance): 

4929 """ 

4930 Lookahead to disallow matching with the given parse expression. 

4931 ``NotAny`` does *not* advance the parsing position within the 

4932 input string, it only verifies that the specified parse expression 

4933 does *not* match at the current position. Also, ``NotAny`` does 

4934 *not* skip over leading whitespace. ``NotAny`` always returns 

4935 a null token list. May be constructed using the ``'~'`` operator. 

4936 

4937 Example:: 

4938 

4939 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

4940 

4941 # take care not to mistake keywords for identifiers 

4942 ident = ~(AND | OR | NOT) + Word(alphas) 

4943 boolean_term = Opt(NOT) + ident 

4944 

4945 # very crude boolean expression - to support parenthesis groups and 

4946 # operation hierarchy, use infix_notation 

4947 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

4948 

4949 # integers that are followed by "." are actually floats 

4950 integer = Word(nums) + ~Char(".") 

4951 """ 

4952 

4953 def __init__(self, expr: Union[ParserElement, str]): 

4954 super().__init__(expr) 

4955 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

4956 # self.leave_whitespace() 

4957 self.skipWhitespace = False 

4958 

4959 self.mayReturnEmpty = True 

4960 self.errmsg = f"Found unwanted token, {self.expr}" 

4961 

4962 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4963 if self.expr.can_parse_next(instring, loc, do_actions=do_actions): 

4964 raise ParseException(instring, loc, self.errmsg, self) 

4965 return loc, [] 

4966 

4967 def _generateDefaultName(self) -> str: 

4968 return f"~{{{self.expr}}}" 

4969 

4970 

4971class _MultipleMatch(ParseElementEnhance): 

4972 def __init__( 

4973 self, 

4974 expr: Union[str, ParserElement], 

4975 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

4976 *, 

4977 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

4978 ): 

4979 super().__init__(expr) 

4980 stopOn = stopOn or stop_on 

4981 self.saveAsList = True 

4982 ender = stopOn 

4983 if isinstance(ender, str_type): 

4984 ender = self._literalStringClass(ender) 

4985 self.stopOn(ender) 

4986 

4987 def stopOn(self, ender) -> ParserElement: 

4988 if isinstance(ender, str_type): 

4989 ender = self._literalStringClass(ender) 

4990 self.not_ender = ~ender if ender is not None else None 

4991 return self 

4992 

4993 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4994 self_expr_parse = self.expr._parse 

4995 self_skip_ignorables = self._skipIgnorables 

4996 check_ender = self.not_ender is not None 

4997 if check_ender: 

4998 try_not_ender = self.not_ender.try_parse 

4999 

5000 # must be at least one (but first see if we are the stopOn sentinel; 

5001 # if so, fail) 

5002 if check_ender: 

5003 try_not_ender(instring, loc) 

5004 loc, tokens = self_expr_parse(instring, loc, do_actions) 

5005 try: 

5006 hasIgnoreExprs = not not self.ignoreExprs 

5007 while 1: 

5008 if check_ender: 

5009 try_not_ender(instring, loc) 

5010 if hasIgnoreExprs: 

5011 preloc = self_skip_ignorables(instring, loc) 

5012 else: 

5013 preloc = loc 

5014 loc, tmptokens = self_expr_parse(instring, preloc, do_actions) 

5015 tokens += tmptokens 

5016 except (ParseException, IndexError): 

5017 pass 

5018 

5019 return loc, tokens 

5020 

5021 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5022 if ( 

5023 __diag__.warn_ungrouped_named_tokens_in_collection 

5024 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5025 not in self.suppress_warnings_ 

5026 ): 

5027 for e in [self.expr] + self.expr.recurse(): 

5028 if ( 

5029 isinstance(e, ParserElement) 

5030 and e.resultsName 

5031 and ( 

5032 Diagnostics.warn_ungrouped_named_tokens_in_collection 

5033 not in e.suppress_warnings_ 

5034 ) 

5035 ): 

5036 warning = ( 

5037 "warn_ungrouped_named_tokens_in_collection:" 

5038 f" setting results name {name!r} on {type(self).__name__} expression" 

5039 f" collides with {e.resultsName!r} on contained expression" 

5040 ) 

5041 warnings.warn(warning, stacklevel=3) 

5042 break 

5043 

5044 return super()._setResultsName(name, list_all_matches) 

5045 

5046 

5047class OneOrMore(_MultipleMatch): 

5048 """ 

5049 Repetition of one or more of the given expression. 

5050 

5051 Parameters: 

5052 

5053 - ``expr`` - expression that must match one or more times 

5054 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5055 (only required if the sentinel would ordinarily match the repetition 

5056 expression) 

5057 

5058 Example:: 

5059 

5060 data_word = Word(alphas) 

5061 label = data_word + FollowedBy(':') 

5062 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) 

5063 

5064 text = "shape: SQUARE posn: upper left color: BLACK" 

5065 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

5066 

5067 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data 

5068 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5069 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5070 

5071 # could also be written as 

5072 (attr_expr * (1,)).parse_string(text).pprint() 

5073 """ 

5074 

5075 def _generateDefaultName(self) -> str: 

5076 return f"{{{self.expr}}}..." 

5077 

5078 

5079class ZeroOrMore(_MultipleMatch): 

5080 """ 

5081 Optional repetition of zero or more of the given expression. 

5082 

5083 Parameters: 

5084 

5085 - ``expr`` - expression that must match zero or more times 

5086 - ``stop_on`` - expression for a terminating sentinel 

5087 (only required if the sentinel would ordinarily match the repetition 

5088 expression) - (default= ``None``) 

5089 

5090 Example: similar to :class:`OneOrMore` 

5091 """ 

5092 

5093 def __init__( 

5094 self, 

5095 expr: Union[str, ParserElement], 

5096 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5097 *, 

5098 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5099 ): 

5100 super().__init__(expr, stopOn=stopOn or stop_on) 

5101 self.mayReturnEmpty = True 

5102 

5103 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5104 try: 

5105 return super().parseImpl(instring, loc, do_actions) 

5106 except (ParseException, IndexError): 

5107 return loc, ParseResults([], name=self.resultsName) 

5108 

5109 def _generateDefaultName(self) -> str: 

5110 return f"[{self.expr}]..." 

5111 

5112 

5113class DelimitedList(ParseElementEnhance): 

5114 def __init__( 

5115 self, 

5116 expr: Union[str, ParserElement], 

5117 delim: Union[str, ParserElement] = ",", 

5118 combine: bool = False, 

5119 min: typing.Optional[int] = None, 

5120 max: typing.Optional[int] = None, 

5121 *, 

5122 allow_trailing_delim: bool = False, 

5123 ): 

5124 """Helper to define a delimited list of expressions - the delimiter 

5125 defaults to ','. By default, the list elements and delimiters can 

5126 have intervening whitespace, and comments, but this can be 

5127 overridden by passing ``combine=True`` in the constructor. If 

5128 ``combine`` is set to ``True``, the matching tokens are 

5129 returned as a single token string, with the delimiters included; 

5130 otherwise, the matching tokens are returned as a list of tokens, 

5131 with the delimiters suppressed. 

5132 

5133 If ``allow_trailing_delim`` is set to True, then the list may end with 

5134 a delimiter. 

5135 

5136 Example:: 

5137 

5138 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

5139 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

5140 """ 

5141 if isinstance(expr, str_type): 

5142 expr = ParserElement._literalStringClass(expr) 

5143 expr = typing.cast(ParserElement, expr) 

5144 

5145 if min is not None and min < 1: 

5146 raise ValueError("min must be greater than 0") 

5147 

5148 if max is not None and min is not None and max < min: 

5149 raise ValueError("max must be greater than, or equal to min") 

5150 

5151 self.content = expr 

5152 self.raw_delim = str(delim) 

5153 self.delim = delim 

5154 self.combine = combine 

5155 if not combine: 

5156 self.delim = Suppress(delim) 

5157 self.min = min or 1 

5158 self.max = max 

5159 self.allow_trailing_delim = allow_trailing_delim 

5160 

5161 delim_list_expr = self.content + (self.delim + self.content) * ( 

5162 self.min - 1, 

5163 None if self.max is None else self.max - 1, 

5164 ) 

5165 if self.allow_trailing_delim: 

5166 delim_list_expr += Opt(self.delim) 

5167 

5168 if self.combine: 

5169 delim_list_expr = Combine(delim_list_expr) 

5170 

5171 super().__init__(delim_list_expr, savelist=True) 

5172 

5173 def _generateDefaultName(self) -> str: 

5174 content_expr = self.content.streamline() 

5175 return f"{content_expr} [{self.raw_delim} {content_expr}]..." 

5176 

5177 

5178class _NullToken: 

5179 def __bool__(self): 

5180 return False 

5181 

5182 def __str__(self): 

5183 return "" 

5184 

5185 

5186class Opt(ParseElementEnhance): 

5187 """ 

5188 Optional matching of the given expression. 

5189 

5190 Parameters: 

5191 

5192 - ``expr`` - expression that must match zero or more times 

5193 - ``default`` (optional) - value to be returned if the optional expression is not found. 

5194 

5195 Example:: 

5196 

5197 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5198 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5199 zip.run_tests(''' 

5200 # traditional ZIP code 

5201 12345 

5202 

5203 # ZIP+4 form 

5204 12101-0001 

5205 

5206 # invalid ZIP 

5207 98765- 

5208 ''') 

5209 

5210 prints:: 

5211 

5212 # traditional ZIP code 

5213 12345 

5214 ['12345'] 

5215 

5216 # ZIP+4 form 

5217 12101-0001 

5218 ['12101-0001'] 

5219 

5220 # invalid ZIP 

5221 98765- 

5222 ^ 

5223 FAIL: Expected end of text (at char 5), (line:1, col:6) 

5224 """ 

5225 

5226 __optionalNotMatched = _NullToken() 

5227 

5228 def __init__( 

5229 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5230 ): 

5231 super().__init__(expr, savelist=False) 

5232 self.saveAsList = self.expr.saveAsList 

5233 self.defaultValue = default 

5234 self.mayReturnEmpty = True 

5235 

5236 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5237 self_expr = self.expr 

5238 try: 

5239 loc, tokens = self_expr._parse(instring, loc, do_actions, callPreParse=False) 

5240 except (ParseException, IndexError): 

5241 default_value = self.defaultValue 

5242 if default_value is not self.__optionalNotMatched: 

5243 if self_expr.resultsName: 

5244 tokens = ParseResults([default_value]) 

5245 tokens[self_expr.resultsName] = default_value 

5246 else: 

5247 tokens = [default_value] 

5248 else: 

5249 tokens = [] 

5250 return loc, tokens 

5251 

5252 def _generateDefaultName(self) -> str: 

5253 inner = str(self.expr) 

5254 # strip off redundant inner {}'s 

5255 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5256 inner = inner[1:-1] 

5257 return f"[{inner}]" 

5258 

5259 

5260Optional = Opt 

5261 

5262 

5263class SkipTo(ParseElementEnhance): 

5264 """ 

5265 Token for skipping over all undefined text until the matched 

5266 expression is found. 

5267 

5268 Parameters: 

5269 

5270 - ``expr`` - target expression marking the end of the data to be skipped 

5271 - ``include`` - if ``True``, the target expression is also parsed 

5272 (the skipped text and target expression are returned as a 2-element 

5273 list) (default= ``False``). 

5274 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and 

5275 comments) that might contain false matches to the target expression 

5276 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be 

5277 included in the skipped test; if found before the target expression is found, 

5278 the :class:`SkipTo` is not a match 

5279 

5280 Example:: 

5281 

5282 report = ''' 

5283 Outstanding Issues Report - 1 Jan 2000 

5284 

5285 # | Severity | Description | Days Open 

5286 -----+----------+-------------------------------------------+----------- 

5287 101 | Critical | Intermittent system crash | 6 

5288 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5289 79 | Minor | System slow when running too many reports | 47 

5290 ''' 

5291 integer = Word(nums) 

5292 SEP = Suppress('|') 

5293 # use SkipTo to simply match everything up until the next SEP 

5294 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5295 # - parse action will call token.strip() for each matched token, i.e., the description body 

5296 string_data = SkipTo(SEP, ignore=quoted_string) 

5297 string_data.set_parse_action(token_map(str.strip)) 

5298 ticket_expr = (integer("issue_num") + SEP 

5299 + string_data("sev") + SEP 

5300 + string_data("desc") + SEP 

5301 + integer("days_open")) 

5302 

5303 for tkt in ticket_expr.search_string(report): 

5304 print tkt.dump() 

5305 

5306 prints:: 

5307 

5308 ['101', 'Critical', 'Intermittent system crash', '6'] 

5309 - days_open: '6' 

5310 - desc: 'Intermittent system crash' 

5311 - issue_num: '101' 

5312 - sev: 'Critical' 

5313 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

5314 - days_open: '14' 

5315 - desc: "Spelling error on Login ('log|n')" 

5316 - issue_num: '94' 

5317 - sev: 'Cosmetic' 

5318 ['79', 'Minor', 'System slow when running too many reports', '47'] 

5319 - days_open: '47' 

5320 - desc: 'System slow when running too many reports' 

5321 - issue_num: '79' 

5322 - sev: 'Minor' 

5323 """ 

5324 

5325 def __init__( 

5326 self, 

5327 other: Union[ParserElement, str], 

5328 include: bool = False, 

5329 ignore: typing.Optional[Union[ParserElement, str]] = None, 

5330 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

5331 *, 

5332 failOn: typing.Optional[Union[ParserElement, str]] = None, 

5333 ): 

5334 super().__init__(other) 

5335 failOn = failOn or fail_on 

5336 self.ignoreExpr = ignore 

5337 self.mayReturnEmpty = True 

5338 self.mayIndexError = False 

5339 self.includeMatch = include 

5340 self.saveAsList = False 

5341 if isinstance(failOn, str_type): 

5342 self.failOn = self._literalStringClass(failOn) 

5343 else: 

5344 self.failOn = failOn 

5345 self.errmsg = f"No match found for {self.expr}" 

5346 self.ignorer = Empty().leave_whitespace() 

5347 self._update_ignorer() 

5348 

5349 def _update_ignorer(self): 

5350 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

5351 self.ignorer.ignoreExprs.clear() 

5352 for e in self.expr.ignoreExprs: 

5353 self.ignorer.ignore(e) 

5354 if self.ignoreExpr: 

5355 self.ignorer.ignore(self.ignoreExpr) 

5356 

5357 def ignore(self, expr): 

5358 super().ignore(expr) 

5359 self._update_ignorer() 

5360 

5361 def parseImpl(self, instring, loc, do_actions=True): 

5362 startloc = loc 

5363 instrlen = len(instring) 

5364 self_expr_parse = self.expr._parse 

5365 self_failOn_canParseNext = ( 

5366 self.failOn.canParseNext if self.failOn is not None else None 

5367 ) 

5368 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

5369 

5370 tmploc = loc 

5371 while tmploc <= instrlen: 

5372 if self_failOn_canParseNext is not None: 

5373 # break if failOn expression matches 

5374 if self_failOn_canParseNext(instring, tmploc): 

5375 break 

5376 

5377 if ignorer_try_parse is not None: 

5378 # advance past ignore expressions 

5379 prev_tmploc = tmploc 

5380 while 1: 

5381 try: 

5382 tmploc = ignorer_try_parse(instring, tmploc) 

5383 except ParseBaseException: 

5384 break 

5385 # see if all ignorers matched, but didn't actually ignore anything 

5386 if tmploc == prev_tmploc: 

5387 break 

5388 prev_tmploc = tmploc 

5389 

5390 try: 

5391 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False) 

5392 except (ParseException, IndexError): 

5393 # no match, advance loc in string 

5394 tmploc += 1 

5395 else: 

5396 # matched skipto expr, done 

5397 break 

5398 

5399 else: 

5400 # ran off the end of the input string without matching skipto expr, fail 

5401 raise ParseException(instring, loc, self.errmsg, self) 

5402 

5403 # build up return values 

5404 loc = tmploc 

5405 skiptext = instring[startloc:loc] 

5406 skipresult = ParseResults(skiptext) 

5407 

5408 if self.includeMatch: 

5409 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False) 

5410 skipresult += mat 

5411 

5412 return loc, skipresult 

5413 

5414 

5415class Forward(ParseElementEnhance): 

5416 """ 

5417 Forward declaration of an expression to be defined later - 

5418 used for recursive grammars, such as algebraic infix notation. 

5419 When the expression is known, it is assigned to the ``Forward`` 

5420 variable using the ``'<<'`` operator. 

5421 

5422 Note: take care when assigning to ``Forward`` not to overlook 

5423 precedence of operators. 

5424 

5425 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

5426 

5427 fwd_expr << a | b | c 

5428 

5429 will actually be evaluated as:: 

5430 

5431 (fwd_expr << a) | b | c 

5432 

5433 thereby leaving b and c out as parseable alternatives. It is recommended that you 

5434 explicitly group the values inserted into the ``Forward``:: 

5435 

5436 fwd_expr << (a | b | c) 

5437 

5438 Converting to use the ``'<<='`` operator instead will avoid this problem. 

5439 

5440 See :class:`ParseResults.pprint` for an example of a recursive 

5441 parser created using ``Forward``. 

5442 """ 

5443 

5444 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): 

5445 self.caller_frame = traceback.extract_stack(limit=2)[0] 

5446 super().__init__(other, savelist=False) # type: ignore[arg-type] 

5447 self.lshift_line = None 

5448 

5449 def __lshift__(self, other) -> "Forward": 

5450 if hasattr(self, "caller_frame"): 

5451 del self.caller_frame 

5452 if isinstance(other, str_type): 

5453 other = self._literalStringClass(other) 

5454 

5455 if not isinstance(other, ParserElement): 

5456 return NotImplemented 

5457 

5458 self.expr = other 

5459 self.streamlined = other.streamlined 

5460 self.mayIndexError = self.expr.mayIndexError 

5461 self.mayReturnEmpty = self.expr.mayReturnEmpty 

5462 self.set_whitespace_chars( 

5463 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

5464 ) 

5465 self.skipWhitespace = self.expr.skipWhitespace 

5466 self.saveAsList = self.expr.saveAsList 

5467 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5468 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

5469 return self 

5470 

5471 def __ilshift__(self, other) -> "Forward": 

5472 if not isinstance(other, ParserElement): 

5473 return NotImplemented 

5474 

5475 return self << other 

5476 

5477 def __or__(self, other) -> "ParserElement": 

5478 caller_line = traceback.extract_stack(limit=2)[-2] 

5479 if ( 

5480 __diag__.warn_on_match_first_with_lshift_operator 

5481 and caller_line == self.lshift_line 

5482 and Diagnostics.warn_on_match_first_with_lshift_operator 

5483 not in self.suppress_warnings_ 

5484 ): 

5485 warnings.warn( 

5486 "using '<<' operator with '|' is probably an error, use '<<='", 

5487 stacklevel=2, 

5488 ) 

5489 ret = super().__or__(other) 

5490 return ret 

5491 

5492 def __del__(self): 

5493 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

5494 if ( 

5495 self.expr is None 

5496 and __diag__.warn_on_assignment_to_Forward 

5497 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

5498 ): 

5499 warnings.warn_explicit( 

5500 "Forward defined here but no expression attached later using '<<=' or '<<'", 

5501 UserWarning, 

5502 filename=self.caller_frame.filename, 

5503 lineno=self.caller_frame.lineno, 

5504 ) 

5505 

5506 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5507 if ( 

5508 self.expr is None 

5509 and __diag__.warn_on_parse_using_empty_Forward 

5510 and Diagnostics.warn_on_parse_using_empty_Forward 

5511 not in self.suppress_warnings_ 

5512 ): 

5513 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

5514 parse_fns = ( 

5515 "parse_string", 

5516 "scan_string", 

5517 "search_string", 

5518 "transform_string", 

5519 ) 

5520 tb = traceback.extract_stack(limit=200) 

5521 for i, frm in enumerate(reversed(tb), start=1): 

5522 if frm.name in parse_fns: 

5523 stacklevel = i + 1 

5524 break 

5525 else: 

5526 stacklevel = 2 

5527 warnings.warn( 

5528 "Forward expression was never assigned a value, will not parse any input", 

5529 stacklevel=stacklevel, 

5530 ) 

5531 if not ParserElement._left_recursion_enabled: 

5532 return super().parseImpl(instring, loc, do_actions) 

5533 # ## Bounded Recursion algorithm ## 

5534 # Recursion only needs to be processed at ``Forward`` elements, since they are 

5535 # the only ones that can actually refer to themselves. The general idea is 

5536 # to handle recursion stepwise: We start at no recursion, then recurse once, 

5537 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

5538 # 

5539 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

5540 # - to *match* a specific recursion level, and 

5541 # - to *search* the bounded recursion level 

5542 # and the two run concurrently. The *search* must *match* each recursion level 

5543 # to find the best possible match. This is handled by a memo table, which 

5544 # provides the previous match to the next level match attempt. 

5545 # 

5546 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

5547 # 

5548 # There is a complication since we not only *parse* but also *transform* via 

5549 # actions: We do not want to run the actions too often while expanding. Thus, 

5550 # we expand using `do_actions=False` and only run `do_actions=True` if the next 

5551 # recursion level is acceptable. 

5552 with ParserElement.recursion_lock: 

5553 memo = ParserElement.recursion_memos 

5554 try: 

5555 # we are parsing at a specific recursion expansion - use it as-is 

5556 prev_loc, prev_result = memo[loc, self, do_actions] 

5557 if isinstance(prev_result, Exception): 

5558 raise prev_result 

5559 return prev_loc, prev_result.copy() 

5560 except KeyError: 

5561 act_key = (loc, self, True) 

5562 peek_key = (loc, self, False) 

5563 # we are searching for the best recursion expansion - keep on improving 

5564 # both `do_actions` cases must be tracked separately here! 

5565 prev_loc, prev_peek = memo[peek_key] = ( 

5566 loc - 1, 

5567 ParseException( 

5568 instring, loc, "Forward recursion without base case", self 

5569 ), 

5570 ) 

5571 if do_actions: 

5572 memo[act_key] = memo[peek_key] 

5573 while True: 

5574 try: 

5575 new_loc, new_peek = super().parseImpl(instring, loc, False) 

5576 except ParseException: 

5577 # we failed before getting any match – do not hide the error 

5578 if isinstance(prev_peek, Exception): 

5579 raise 

5580 new_loc, new_peek = prev_loc, prev_peek 

5581 # the match did not get better: we are done 

5582 if new_loc <= prev_loc: 

5583 if do_actions: 

5584 # replace the match for do_actions=False as well, 

5585 # in case the action did backtrack 

5586 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

5587 del memo[peek_key], memo[act_key] 

5588 return prev_loc, prev_result.copy() 

5589 del memo[peek_key] 

5590 return prev_loc, prev_peek.copy() 

5591 # the match did get better: see if we can improve further 

5592 if do_actions: 

5593 try: 

5594 memo[act_key] = super().parseImpl(instring, loc, True) 

5595 except ParseException as e: 

5596 memo[peek_key] = memo[act_key] = (new_loc, e) 

5597 raise 

5598 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

5599 

5600 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5601 self.skipWhitespace = False 

5602 return self 

5603 

5604 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5605 self.skipWhitespace = True 

5606 return self 

5607 

5608 def streamline(self) -> ParserElement: 

5609 if not self.streamlined: 

5610 self.streamlined = True 

5611 if self.expr is not None: 

5612 self.expr.streamline() 

5613 return self 

5614 

5615 def validate(self, validateTrace=None) -> None: 

5616 warnings.warn( 

5617 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5618 DeprecationWarning, 

5619 stacklevel=2, 

5620 ) 

5621 if validateTrace is None: 

5622 validateTrace = [] 

5623 

5624 if self not in validateTrace: 

5625 tmp = validateTrace[:] + [self] 

5626 if self.expr is not None: 

5627 self.expr.validate(tmp) 

5628 self._checkRecursion([]) 

5629 

5630 def _generateDefaultName(self) -> str: 

5631 # Avoid infinite recursion by setting a temporary _defaultName 

5632 self._defaultName = ": ..." 

5633 

5634 # Use the string representation of main expression. 

5635 retString = "..." 

5636 try: 

5637 if self.expr is not None: 

5638 retString = str(self.expr)[:1000] 

5639 else: 

5640 retString = "None" 

5641 finally: 

5642 return f"{type(self).__name__}: {retString}" 

5643 

5644 def copy(self) -> ParserElement: 

5645 if self.expr is not None: 

5646 return super().copy() 

5647 else: 

5648 ret = Forward() 

5649 ret <<= self 

5650 return ret 

5651 

5652 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5653 # fmt: off 

5654 if ( 

5655 __diag__.warn_name_set_on_empty_Forward 

5656 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ 

5657 and self.expr is None 

5658 ): 

5659 warning = ( 

5660 "warn_name_set_on_empty_Forward:" 

5661 f" setting results name {name!r} on {type(self).__name__} expression" 

5662 " that has no contained expression" 

5663 ) 

5664 warnings.warn(warning, stacklevel=3) 

5665 # fmt: on 

5666 

5667 return super()._setResultsName(name, list_all_matches) 

5668 

5669 # Compatibility synonyms 

5670 # fmt: off 

5671 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

5672 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

5673 # fmt: on 

5674 

5675 

5676class TokenConverter(ParseElementEnhance): 

5677 """ 

5678 Abstract subclass of :class:`ParseExpression`, for converting parsed results. 

5679 """ 

5680 

5681 def __init__(self, expr: Union[ParserElement, str], savelist=False): 

5682 super().__init__(expr) # , savelist) 

5683 self.saveAsList = False 

5684 

5685 

5686class Combine(TokenConverter): 

5687 """Converter to concatenate all matching tokens to a single string. 

5688 By default, the matching patterns must also be contiguous in the 

5689 input string; this can be disabled by specifying 

5690 ``'adjacent=False'`` in the constructor. 

5691 

5692 Example:: 

5693 

5694 real = Word(nums) + '.' + Word(nums) 

5695 print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] 

5696 # will also erroneously match the following 

5697 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] 

5698 

5699 real = Combine(Word(nums) + '.' + Word(nums)) 

5700 print(real.parse_string('3.1416')) # -> ['3.1416'] 

5701 # no match when there are internal spaces 

5702 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) 

5703 """ 

5704 

5705 def __init__( 

5706 self, 

5707 expr: ParserElement, 

5708 join_string: str = "", 

5709 adjacent: bool = True, 

5710 *, 

5711 joinString: typing.Optional[str] = None, 

5712 ): 

5713 super().__init__(expr) 

5714 joinString = joinString if joinString is not None else join_string 

5715 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

5716 if adjacent: 

5717 self.leave_whitespace() 

5718 self.adjacent = adjacent 

5719 self.skipWhitespace = True 

5720 self.joinString = joinString 

5721 self.callPreparse = True 

5722 

5723 def ignore(self, other) -> ParserElement: 

5724 if self.adjacent: 

5725 ParserElement.ignore(self, other) 

5726 else: 

5727 super().ignore(other) 

5728 return self 

5729 

5730 def postParse(self, instring, loc, tokenlist): 

5731 retToks = tokenlist.copy() 

5732 del retToks[:] 

5733 retToks += ParseResults( 

5734 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

5735 ) 

5736 

5737 if self.resultsName and retToks.haskeys(): 

5738 return [retToks] 

5739 else: 

5740 return retToks 

5741 

5742 

5743class Group(TokenConverter): 

5744 """Converter to return the matched tokens as a list - useful for 

5745 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

5746 

5747 The optional ``aslist`` argument when set to True will return the 

5748 parsed tokens as a Python list instead of a pyparsing ParseResults. 

5749 

5750 Example:: 

5751 

5752 ident = Word(alphas) 

5753 num = Word(nums) 

5754 term = ident | num 

5755 func = ident + Opt(DelimitedList(term)) 

5756 print(func.parse_string("fn a, b, 100")) 

5757 # -> ['fn', 'a', 'b', '100'] 

5758 

5759 func = ident + Group(Opt(DelimitedList(term))) 

5760 print(func.parse_string("fn a, b, 100")) 

5761 # -> ['fn', ['a', 'b', '100']] 

5762 """ 

5763 

5764 def __init__(self, expr: ParserElement, aslist: bool = False): 

5765 super().__init__(expr) 

5766 self.saveAsList = True 

5767 self._asPythonList = aslist 

5768 

5769 def postParse(self, instring, loc, tokenlist): 

5770 if self._asPythonList: 

5771 return ParseResults.List( 

5772 tokenlist.asList() 

5773 if isinstance(tokenlist, ParseResults) 

5774 else list(tokenlist) 

5775 ) 

5776 

5777 return [tokenlist] 

5778 

5779 

5780class Dict(TokenConverter): 

5781 """Converter to return a repetitive expression as a list, but also 

5782 as a dictionary. Each element can also be referenced using the first 

5783 token in the expression as its key. Useful for tabular report 

5784 scraping when the first column can be used as a item key. 

5785 

5786 The optional ``asdict`` argument when set to True will return the 

5787 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

5788 

5789 Example:: 

5790 

5791 data_word = Word(alphas) 

5792 label = data_word + FollowedBy(':') 

5793 

5794 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5795 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5796 

5797 # print attributes as plain groups 

5798 print(attr_expr[1, ...].parse_string(text).dump()) 

5799 

5800 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names 

5801 result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

5802 print(result.dump()) 

5803 

5804 # access named fields as dict entries, or output as dict 

5805 print(result['shape']) 

5806 print(result.as_dict()) 

5807 

5808 prints:: 

5809 

5810 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

5811 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5812 - color: 'light blue' 

5813 - posn: 'upper left' 

5814 - shape: 'SQUARE' 

5815 - texture: 'burlap' 

5816 SQUARE 

5817 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

5818 

5819 See more examples at :class:`ParseResults` of accessing fields by results name. 

5820 """ 

5821 

5822 def __init__(self, expr: ParserElement, asdict: bool = False): 

5823 super().__init__(expr) 

5824 self.saveAsList = True 

5825 self._asPythonDict = asdict 

5826 

5827 def postParse(self, instring, loc, tokenlist): 

5828 for i, tok in enumerate(tokenlist): 

5829 if len(tok) == 0: 

5830 continue 

5831 

5832 ikey = tok[0] 

5833 if isinstance(ikey, int): 

5834 ikey = str(ikey).strip() 

5835 

5836 if len(tok) == 1: 

5837 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

5838 

5839 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

5840 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

5841 

5842 else: 

5843 try: 

5844 dictvalue = tok.copy() # ParseResults(i) 

5845 except Exception: 

5846 exc = TypeError( 

5847 "could not extract dict values from parsed results" 

5848 " - Dict expression must contain Grouped expressions" 

5849 ) 

5850 raise exc from None 

5851 

5852 del dictvalue[0] 

5853 

5854 if len(dictvalue) != 1 or ( 

5855 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

5856 ): 

5857 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

5858 else: 

5859 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

5860 

5861 if self._asPythonDict: 

5862 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

5863 

5864 return [tokenlist] if self.resultsName else tokenlist 

5865 

5866 

5867class Suppress(TokenConverter): 

5868 """Converter for ignoring the results of a parsed expression. 

5869 

5870 Example:: 

5871 

5872 source = "a, b, c,d" 

5873 wd = Word(alphas) 

5874 wd_list1 = wd + (',' + wd)[...] 

5875 print(wd_list1.parse_string(source)) 

5876 

5877 # often, delimiters that are useful during parsing are just in the 

5878 # way afterward - use Suppress to keep them out of the parsed output 

5879 wd_list2 = wd + (Suppress(',') + wd)[...] 

5880 print(wd_list2.parse_string(source)) 

5881 

5882 # Skipped text (using '...') can be suppressed as well 

5883 source = "lead in START relevant text END trailing text" 

5884 start_marker = Keyword("START") 

5885 end_marker = Keyword("END") 

5886 find_body = Suppress(...) + start_marker + ... + end_marker 

5887 print(find_body.parse_string(source) 

5888 

5889 prints:: 

5890 

5891 ['a', ',', 'b', ',', 'c', ',', 'd'] 

5892 ['a', 'b', 'c', 'd'] 

5893 ['START', 'relevant text ', 'END'] 

5894 

5895 (See also :class:`DelimitedList`.) 

5896 """ 

5897 

5898 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

5899 if expr is ...: 

5900 expr = _PendingSkip(NoMatch()) 

5901 super().__init__(expr) 

5902 

5903 def __add__(self, other) -> "ParserElement": 

5904 if isinstance(self.expr, _PendingSkip): 

5905 return Suppress(SkipTo(other)) + other 

5906 

5907 return super().__add__(other) 

5908 

5909 def __sub__(self, other) -> "ParserElement": 

5910 if isinstance(self.expr, _PendingSkip): 

5911 return Suppress(SkipTo(other)) - other 

5912 

5913 return super().__sub__(other) 

5914 

5915 def postParse(self, instring, loc, tokenlist): 

5916 return [] 

5917 

5918 def suppress(self) -> ParserElement: 

5919 return self 

5920 

5921 

5922def trace_parse_action(f: ParseAction) -> ParseAction: 

5923 """Decorator for debugging parse actions. 

5924 

5925 When the parse action is called, this decorator will print 

5926 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

5927 When the parse action completes, the decorator will print 

5928 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

5929 

5930 Example:: 

5931 

5932 wd = Word(alphas) 

5933 

5934 @trace_parse_action 

5935 def remove_duplicate_chars(tokens): 

5936 return ''.join(sorted(set(''.join(tokens)))) 

5937 

5938 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

5939 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

5940 

5941 prints:: 

5942 

5943 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

5944 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

5945 ['dfjkls'] 

5946 """ 

5947 f = _trim_arity(f) 

5948 

5949 def z(*paArgs): 

5950 thisFunc = f.__name__ 

5951 s, l, t = paArgs[-3:] 

5952 if len(paArgs) > 3: 

5953 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" 

5954 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

5955 try: 

5956 ret = f(*paArgs) 

5957 except Exception as exc: 

5958 sys.stderr.write( 

5959 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n" 

5960 ) 

5961 raise 

5962 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

5963 return ret 

5964 

5965 z.__name__ = f.__name__ 

5966 return z 

5967 

5968 

5969# convenience constants for positional expressions 

5970empty = Empty().set_name("empty") 

5971line_start = LineStart().set_name("line_start") 

5972line_end = LineEnd().set_name("line_end") 

5973string_start = StringStart().set_name("string_start") 

5974string_end = StringEnd().set_name("string_end") 

5975 

5976_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

5977 lambda s, l, t: t[0][1] 

5978) 

5979_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

5980 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

5981) 

5982_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

5983 lambda s, l, t: chr(int(t[0][1:], 8)) 

5984) 

5985_singleChar = ( 

5986 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

5987) 

5988_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

5989_reBracketExpr = ( 

5990 Literal("[") 

5991 + Opt("^").set_results_name("negate") 

5992 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

5993 + Literal("]") 

5994) 

5995 

5996 

5997def srange(s: str) -> str: 

5998 r"""Helper to easily define string ranges for use in :class:`Word` 

5999 construction. Borrows syntax from regexp ``'[]'`` string range 

6000 definitions:: 

6001 

6002 srange("[0-9]") -> "0123456789" 

6003 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

6004 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

6005 

6006 The input string must be enclosed in []'s, and the returned string 

6007 is the expanded character set joined into a single string. The 

6008 values enclosed in the []'s may be: 

6009 

6010 - a single character 

6011 - an escaped character with a leading backslash (such as ``\-`` 

6012 or ``\]``) 

6013 - an escaped hex character with a leading ``'\x'`` 

6014 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

6015 is also supported for backwards compatibility) 

6016 - an escaped octal character with a leading ``'\0'`` 

6017 (``\041``, which is a ``'!'`` character) 

6018 - a range of any of the above, separated by a dash (``'a-z'``, 

6019 etc.) 

6020 - any combination of the above (``'aeiouy'``, 

6021 ``'a-zA-Z0-9_$'``, etc.) 

6022 """ 

6023 _expanded = lambda p: ( 

6024 p 

6025 if not isinstance(p, ParseResults) 

6026 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

6027 ) 

6028 try: 

6029 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) 

6030 except Exception as e: 

6031 return "" 

6032 

6033 

6034def token_map(func, *args) -> ParseAction: 

6035 """Helper to define a parse action by mapping a function to all 

6036 elements of a :class:`ParseResults` list. If any additional args are passed, 

6037 they are forwarded to the given function as additional arguments 

6038 after the token, as in 

6039 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

6040 which will convert the parsed data to an integer using base 16. 

6041 

6042 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

6043 

6044 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6045 hex_ints.run_tests(''' 

6046 00 11 22 aa FF 0a 0d 1a 

6047 ''') 

6048 

6049 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6050 upperword[1, ...].run_tests(''' 

6051 my kingdom for a horse 

6052 ''') 

6053 

6054 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6055 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6056 now is the winter of our discontent made glorious summer by this sun of york 

6057 ''') 

6058 

6059 prints:: 

6060 

6061 00 11 22 aa FF 0a 0d 1a 

6062 [0, 17, 34, 170, 255, 10, 13, 26] 

6063 

6064 my kingdom for a horse 

6065 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6066 

6067 now is the winter of our discontent made glorious summer by this sun of york 

6068 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6069 """ 

6070 

6071 def pa(s, l, t): 

6072 return [func(tokn, *args) for tokn in t] 

6073 

6074 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6075 pa.__name__ = func_name 

6076 

6077 return pa 

6078 

6079 

6080def autoname_elements() -> None: 

6081 """ 

6082 Utility to simplify mass-naming of parser elements, for 

6083 generating railroad diagram with named subdiagrams. 

6084 """ 

6085 calling_frame = sys._getframe().f_back 

6086 if calling_frame is None: 

6087 return 

6088 calling_frame = typing.cast(types.FrameType, calling_frame) 

6089 for name, var in calling_frame.f_locals.items(): 

6090 if isinstance(var, ParserElement) and not var.customName: 

6091 var.set_name(name) 

6092 

6093 

6094dbl_quoted_string = Combine( 

6095 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6096).set_name("string enclosed in double quotes") 

6097 

6098sgl_quoted_string = Combine( 

6099 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6100).set_name("string enclosed in single quotes") 

6101 

6102quoted_string = Combine( 

6103 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6104 "double quoted string" 

6105 ) 

6106 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6107 "single quoted string" 

6108 ) 

6109).set_name("quoted string using single or double quotes") 

6110 

6111python_quoted_string = Combine( 

6112 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6113 "multiline double quoted string" 

6114 ) 

6115 ^ ( 

6116 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6117 ).set_name("multiline single quoted string") 

6118 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6119 "double quoted string" 

6120 ) 

6121 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6122 "single quoted string" 

6123 ) 

6124).set_name("Python quoted string") 

6125 

6126unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6127 

6128 

6129alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6130punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6131 

6132# build list of built-in expressions, for future reference if a global default value 

6133# gets updated 

6134_builtin_exprs: List[ParserElement] = [ 

6135 v for v in vars().values() if isinstance(v, ParserElement) 

6136] 

6137 

6138# Compatibility synonyms 

6139# fmt: off 

6140sglQuotedString = sgl_quoted_string 

6141dblQuotedString = dbl_quoted_string 

6142quotedString = quoted_string 

6143unicodeString = unicode_string 

6144lineStart = line_start 

6145lineEnd = line_end 

6146stringStart = string_start 

6147stringEnd = string_end 

6148nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) 

6149traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) 

6150conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) 

6151tokenMap = replaced_by_pep8("tokenMap", token_map) 

6152# fmt: on