Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2667 statements  

1# 

2# core.py 

3# 

4from __future__ import annotations 

5 

6import collections.abc 

7from collections import deque 

8import os 

9import typing 

10from typing import ( 

11 Any, 

12 Callable, 

13 Generator, 

14 NamedTuple, 

15 Sequence, 

16 TextIO, 

17 Union, 

18 cast, 

19) 

20from abc import ABC, abstractmethod 

21from enum import Enum 

22import string 

23import copy 

24import warnings 

25import re 

26import sys 

27from collections.abc import Iterable 

28import traceback 

29import types 

30from operator import itemgetter 

31from functools import wraps 

32from threading import RLock 

33from pathlib import Path 

34 

35from .util import ( 

36 _FifoCache, 

37 _UnboundedCache, 

38 __config_flags, 

39 _collapse_string_to_ranges, 

40 _escape_regex_range_chars, 

41 _flatten, 

42 LRUMemo as _LRUMemo, 

43 UnboundedMemo as _UnboundedMemo, 

44 replaced_by_pep8, 

45) 

46from .exceptions import * 

47from .actions import * 

48from .results import ParseResults, _ParseResultsWithOffset 

49from .unicode import pyparsing_unicode 

50 

51_MAX_INT = sys.maxsize 

52str_type: tuple[type, ...] = (str, bytes) 

53 

54# 

55# Copyright (c) 2003-2022 Paul T. McGuire 

56# 

57# Permission is hereby granted, free of charge, to any person obtaining 

58# a copy of this software and associated documentation files (the 

59# "Software"), to deal in the Software without restriction, including 

60# without limitation the rights to use, copy, modify, merge, publish, 

61# distribute, sublicense, and/or sell copies of the Software, and to 

62# permit persons to whom the Software is furnished to do so, subject to 

63# the following conditions: 

64# 

65# The above copyright notice and this permission notice shall be 

66# included in all copies or substantial portions of the Software. 

67# 

68# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

69# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

70# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

71# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

72# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

73# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

74# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

75# 

76 

77from functools import cached_property 

78 

79 

80class __compat__(__config_flags): 

81 """ 

82 A cross-version compatibility configuration for pyparsing features that will be 

83 released in a future version. By setting values in this configuration to True, 

84 those features can be enabled in prior versions for compatibility development 

85 and testing. 

86 

87 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

88 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

89 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

90 behavior 

91 """ 

92 

93 _type_desc = "compatibility" 

94 

95 collect_all_And_tokens = True 

96 

97 _all_names = [__ for __ in locals() if not __.startswith("_")] 

98 _fixed_names = """ 

99 collect_all_And_tokens 

100 """.split() 

101 

102 

103class __diag__(__config_flags): 

104 _type_desc = "diagnostic" 

105 

106 warn_multiple_tokens_in_named_alternation = False 

107 warn_ungrouped_named_tokens_in_collection = False 

108 warn_name_set_on_empty_Forward = False 

109 warn_on_parse_using_empty_Forward = False 

110 warn_on_assignment_to_Forward = False 

111 warn_on_multiple_string_args_to_oneof = False 

112 warn_on_match_first_with_lshift_operator = False 

113 enable_debug_on_named_expressions = False 

114 

115 _all_names = [__ for __ in locals() if not __.startswith("_")] 

116 _warning_names = [name for name in _all_names if name.startswith("warn")] 

117 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

118 

119 @classmethod 

120 def enable_all_warnings(cls) -> None: 

121 for name in cls._warning_names: 

122 cls.enable(name) 

123 

124 

125class Diagnostics(Enum): 

126 """ 

127 Diagnostic configuration (all default to disabled) 

128 

129 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

130 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

131 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

132 name is defined on a containing expression with ungrouped subexpressions that also 

133 have results names 

134 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

135 with a results name, but has no contents defined 

136 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

137 defined in a grammar but has never had an expression attached to it 

138 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

139 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

140 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

141 incorrectly called with multiple str arguments 

142 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

143 calls to :class:`ParserElement.set_name` 

144 

145 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

146 All warnings can be enabled by calling :class:`enable_all_warnings`. 

147 """ 

148 

149 warn_multiple_tokens_in_named_alternation = 0 

150 warn_ungrouped_named_tokens_in_collection = 1 

151 warn_name_set_on_empty_Forward = 2 

152 warn_on_parse_using_empty_Forward = 3 

153 warn_on_assignment_to_Forward = 4 

154 warn_on_multiple_string_args_to_oneof = 5 

155 warn_on_match_first_with_lshift_operator = 6 

156 enable_debug_on_named_expressions = 7 

157 

158 

159def enable_diag(diag_enum: Diagnostics) -> None: 

160 """ 

161 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

162 """ 

163 __diag__.enable(diag_enum.name) 

164 

165 

166def disable_diag(diag_enum: Diagnostics) -> None: 

167 """ 

168 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

169 """ 

170 __diag__.disable(diag_enum.name) 

171 

172 

173def enable_all_warnings() -> None: 

174 """ 

175 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

176 """ 

177 __diag__.enable_all_warnings() 

178 

179 

180# hide abstract class 

181del __config_flags 

182 

183 

184def _should_enable_warnings( 

185 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

186) -> bool: 

187 enable = bool(warn_env_var) 

188 for warn_opt in cmd_line_warn_options: 

189 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

190 ":" 

191 )[:5] 

192 if not w_action.lower().startswith("i") and ( 

193 not (w_message or w_category or w_module) or w_module == "pyparsing" 

194 ): 

195 enable = True 

196 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

197 enable = False 

198 return enable 

199 

200 

201if _should_enable_warnings( 

202 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

203): 

204 enable_all_warnings() 

205 

206 

207# build list of single arg builtins, that can be used as parse actions 

208# fmt: off 

209_single_arg_builtins = { 

210 sum, len, sorted, reversed, list, tuple, set, any, all, min, max 

211} 

212# fmt: on 

213 

214_generatorType = types.GeneratorType 

215ParseImplReturnType = tuple[int, Any] 

216PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] 

217 

218ParseCondition = Union[ 

219 Callable[[], bool], 

220 Callable[[ParseResults], bool], 

221 Callable[[int, ParseResults], bool], 

222 Callable[[str, int, ParseResults], bool], 

223] 

224ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

225DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

226DebugSuccessAction = Callable[ 

227 [str, int, int, "ParserElement", ParseResults, bool], None 

228] 

229DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

230 

231 

232alphas: str = string.ascii_uppercase + string.ascii_lowercase 

233identchars: str = pyparsing_unicode.Latin1.identchars 

234identbodychars: str = pyparsing_unicode.Latin1.identbodychars 

235nums: str = "0123456789" 

236hexnums: str = nums + "ABCDEFabcdef" 

237alphanums: str = alphas + nums 

238printables: str = "".join([c for c in string.printable if c not in string.whitespace]) 

239 

240 

241class _ParseActionIndexError(Exception): 

242 """ 

243 Internal wrapper around IndexError so that IndexErrors raised inside 

244 parse actions aren't misinterpreted as IndexErrors raised inside 

245 ParserElement parseImpl methods. 

246 """ 

247 

248 def __init__(self, msg: str, exc: BaseException) -> None: 

249 self.msg: str = msg 

250 self.exc: BaseException = exc 

251 

252 

253_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] 

254pa_call_line_synth = () 

255 

256 

257def _trim_arity(func, max_limit=3): 

258 """decorator to trim function calls to match the arity of the target""" 

259 global _trim_arity_call_line, pa_call_line_synth 

260 

261 if func in _single_arg_builtins: 

262 return lambda s, l, t: func(t) 

263 

264 limit = 0 

265 found_arity = False 

266 

267 # synthesize what would be returned by traceback.extract_stack at the call to 

268 # user's parse action 'func', so that we don't incur call penalty at parse time 

269 

270 # fmt: off 

271 LINE_DIFF = 9 

272 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

273 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

274 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1] 

275 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

276 

277 def wrapper(*args): 

278 nonlocal found_arity, limit 

279 if found_arity: 

280 return func(*args[limit:]) 

281 while 1: 

282 try: 

283 ret = func(*args[limit:]) 

284 found_arity = True 

285 return ret 

286 except TypeError as te: 

287 # re-raise TypeErrors if they did not come from our arity testing 

288 if found_arity: 

289 raise 

290 else: 

291 tb = te.__traceback__ 

292 frames = traceback.extract_tb(tb, limit=2) 

293 frame_summary = frames[-1] 

294 trim_arity_type_error = ( 

295 [frame_summary[:2]][-1][:2] == pa_call_line_synth 

296 ) 

297 del tb 

298 

299 if trim_arity_type_error: 

300 if limit < max_limit: 

301 limit += 1 

302 continue 

303 

304 raise 

305 except IndexError as ie: 

306 # wrap IndexErrors inside a _ParseActionIndexError 

307 raise _ParseActionIndexError( 

308 "IndexError raised in parse action", ie 

309 ).with_traceback(None) 

310 # fmt: on 

311 

312 # copy func name to wrapper for sensible debug output 

313 # (can't use functools.wraps, since that messes with function signature) 

314 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

315 wrapper.__name__ = func_name 

316 wrapper.__doc__ = func.__doc__ 

317 

318 return wrapper 

319 

320 

321def condition_as_parse_action( 

322 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False 

323) -> ParseAction: 

324 """ 

325 Function to convert a simple predicate function that returns ``True`` or ``False`` 

326 into a parse action. Can be used in places when a parse action is required 

327 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition 

328 to an operator level in :class:`infix_notation`). 

329 

330 Optional keyword arguments: 

331 

332 - ``message`` - define a custom message to be used in the raised exception 

333 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; 

334 otherwise will raise :class:`ParseException` 

335 

336 """ 

337 msg = message if message is not None else "failed user-defined condition" 

338 exc_type = ParseFatalException if fatal else ParseException 

339 fn = _trim_arity(fn) 

340 

341 @wraps(fn) 

342 def pa(s, l, t): 

343 if not bool(fn(s, l, t)): 

344 raise exc_type(s, l, msg) 

345 

346 return pa 

347 

348 

349def _default_start_debug_action( 

350 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False 

351): 

352 cache_hit_str = "*" if cache_hit else "" 

353 print( 

354 ( 

355 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" 

356 f" {line(loc, instring)}\n" 

357 f" {'^':>{col(loc, instring)}}" 

358 ) 

359 ) 

360 

361 

362def _default_success_debug_action( 

363 instring: str, 

364 startloc: int, 

365 endloc: int, 

366 expr: ParserElement, 

367 toks: ParseResults, 

368 cache_hit: bool = False, 

369): 

370 cache_hit_str = "*" if cache_hit else "" 

371 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") 

372 

373 

374def _default_exception_debug_action( 

375 instring: str, 

376 loc: int, 

377 expr: ParserElement, 

378 exc: Exception, 

379 cache_hit: bool = False, 

380): 

381 cache_hit_str = "*" if cache_hit else "" 

382 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") 

383 

384 

385def null_debug_action(*args): 

386 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

387 

388 

389class ParserElement(ABC): 

390 """Abstract base level parser element class.""" 

391 

392 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

393 verbose_stacktrace: bool = False 

394 _literalStringClass: type = None # type: ignore[assignment] 

395 

396 @staticmethod 

397 def set_default_whitespace_chars(chars: str) -> None: 

398 r""" 

399 Overrides the default whitespace chars 

400 

401 Example:: 

402 

403 # default whitespace chars are space, <TAB> and newline 

404 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

405 

406 # change to just treat newline as significant 

407 ParserElement.set_default_whitespace_chars(" \t") 

408 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] 

409 """ 

410 ParserElement.DEFAULT_WHITE_CHARS = chars 

411 

412 # update whitespace all parse expressions defined in this module 

413 for expr in _builtin_exprs: 

414 if expr.copyDefaultWhiteChars: 

415 expr.whiteChars = set(chars) 

416 

417 @staticmethod 

418 def inline_literals_using(cls: type) -> None: 

419 """ 

420 Set class to be used for inclusion of string literals into a parser. 

421 

422 Example:: 

423 

424 # default literal class used is Literal 

425 integer = Word(nums) 

426 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

427 

428 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

429 

430 

431 # change to Suppress 

432 ParserElement.inline_literals_using(Suppress) 

433 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

434 

435 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] 

436 """ 

437 ParserElement._literalStringClass = cls 

438 

439 @classmethod 

440 def using_each(cls, seq, **class_kwargs): 

441 """ 

442 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq. 

443 

444 Example:: 

445 

446 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") 

447 

448 """ 

449 yield from (cls(obj, **class_kwargs) for obj in seq) 

450 

451 class DebugActions(NamedTuple): 

452 debug_try: typing.Optional[DebugStartAction] 

453 debug_match: typing.Optional[DebugSuccessAction] 

454 debug_fail: typing.Optional[DebugExceptionAction] 

455 

456 def __init__(self, savelist: bool = False) -> None: 

457 self.parseAction: list[ParseAction] = list() 

458 self.failAction: typing.Optional[ParseFailAction] = None 

459 self.customName: str = None # type: ignore[assignment] 

460 self._defaultName: typing.Optional[str] = None 

461 self.resultsName: str = None # type: ignore[assignment] 

462 self.saveAsList = savelist 

463 self.skipWhitespace = True 

464 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

465 self.copyDefaultWhiteChars = True 

466 # used when checking for left-recursion 

467 self._may_return_empty = False 

468 self.keepTabs = False 

469 self.ignoreExprs: list[ParserElement] = list() 

470 self.debug = False 

471 self.streamlined = False 

472 # optimize exception handling for subclasses that don't advance parse index 

473 self.mayIndexError = True 

474 self.errmsg: Union[str, None] = "" 

475 # mark results names as modal (report only last) or cumulative (list all) 

476 self.modalResults = True 

477 # custom debug actions 

478 self.debugActions = self.DebugActions(None, None, None) 

479 # avoid redundant calls to preParse 

480 self.callPreparse = True 

481 self.callDuringTry = False 

482 self.suppress_warnings_: list[Diagnostics] = [] 

483 self.show_in_diagram = True 

484 

485 @property 

486 def mayReturnEmpty(self): 

487 return self._may_return_empty 

488 

489 @mayReturnEmpty.setter 

490 def mayReturnEmpty(self, value): 

491 self._may_return_empty = value 

492 

493 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: 

494 """ 

495 Suppress warnings emitted for a particular diagnostic on this expression. 

496 

497 Example:: 

498 

499 base = pp.Forward() 

500 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) 

501 

502 # statement would normally raise a warning, but is now suppressed 

503 print(base.parse_string("x")) 

504 

505 """ 

506 self.suppress_warnings_.append(warning_type) 

507 return self 

508 

509 def visit_all(self): 

510 """General-purpose method to yield all expressions and sub-expressions 

511 in a grammar. Typically just for internal use. 

512 """ 

513 to_visit = deque([self]) 

514 seen = set() 

515 while to_visit: 

516 cur = to_visit.popleft() 

517 

518 # guard against looping forever through recursive grammars 

519 if cur in seen: 

520 continue 

521 seen.add(cur) 

522 

523 to_visit.extend(cur.recurse()) 

524 yield cur 

525 

526 def copy(self) -> ParserElement: 

527 """ 

528 Make a copy of this :class:`ParserElement`. Useful for defining 

529 different parse actions for the same parsing pattern, using copies of 

530 the original parse element. 

531 

532 Example:: 

533 

534 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

535 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") 

536 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

537 

538 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) 

539 

540 prints:: 

541 

542 [5120, 100, 655360, 268435456] 

543 

544 Equivalent form of ``expr.copy()`` is just ``expr()``:: 

545 

546 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

547 """ 

548 cpy = copy.copy(self) 

549 cpy.parseAction = self.parseAction[:] 

550 cpy.ignoreExprs = self.ignoreExprs[:] 

551 if self.copyDefaultWhiteChars: 

552 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

553 return cpy 

554 

555 def set_results_name( 

556 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 

557 ) -> ParserElement: 

558 """ 

559 Define name for referencing matching tokens as a nested attribute 

560 of the returned parse results. 

561 

562 Normally, results names are assigned as you would assign keys in a dict: 

563 any existing value is overwritten by later values. If it is necessary to 

564 keep all values captured for a particular results name, call ``set_results_name`` 

565 with ``list_all_matches`` = True. 

566 

567 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

568 this is so that the client can define a basic element, such as an 

569 integer, and reference it in multiple places with different names. 

570 

571 You can also set results names using the abbreviated syntax, 

572 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

573 - see :class:`__call__`. If ``list_all_matches`` is required, use 

574 ``expr("name*")``. 

575 

576 Example:: 

577 

578 integer = Word(nums) 

579 date_str = (integer.set_results_name("year") + '/' 

580 + integer.set_results_name("month") + '/' 

581 + integer.set_results_name("day")) 

582 

583 # equivalent form: 

584 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

585 """ 

586 listAllMatches = listAllMatches or list_all_matches 

587 return self._setResultsName(name, listAllMatches) 

588 

589 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

590 if name is None: 

591 return self 

592 newself = self.copy() 

593 if name.endswith("*"): 

594 name = name[:-1] 

595 list_all_matches = True 

596 newself.resultsName = name 

597 newself.modalResults = not list_all_matches 

598 return newself 

599 

600 def set_break(self, break_flag: bool = True) -> ParserElement: 

601 """ 

602 Method to invoke the Python pdb debugger when this element is 

603 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

604 disable. 

605 """ 

606 if break_flag: 

607 _parseMethod = self._parse 

608 

609 def breaker(instring, loc, do_actions=True, callPreParse=True): 

610 # this call to breakpoint() is intentional, not a checkin error 

611 breakpoint() 

612 return _parseMethod(instring, loc, do_actions, callPreParse) 

613 

614 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] 

615 self._parse = breaker # type: ignore [method-assign] 

616 elif hasattr(self._parse, "_originalParseMethod"): 

617 self._parse = self._parse._originalParseMethod # type: ignore [method-assign] 

618 return self 

619 

620 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: 

621 """ 

622 Define one or more actions to perform when successfully matching parse element definition. 

623 

624 Parse actions can be called to perform data conversions, do extra validation, 

625 update external data structures, or enhance or replace the parsed tokens. 

626 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

627 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

628 

629 - ``s`` = the original string being parsed (see note below) 

630 - ``loc`` = the location of the matching substring 

631 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object 

632 

633 The parsed tokens are passed to the parse action as ParseResults. They can be 

634 modified in place using list-style append, extend, and pop operations to update 

635 the parsed list elements; and with dictionary-style item set and del operations 

636 to add, update, or remove any named results. If the tokens are modified in place, 

637 it is not necessary to return them with a return statement. 

638 

639 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

640 object, or with some entirely different object (common for parse actions that perform data 

641 conversions). A convenient way to build a new parse result is to define the values 

642 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

643 

644 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

645 expression are cleared. 

646 

647 Optional keyword arguments: 

648 

649 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during 

650 lookaheads and alternate testing. For parse actions that have side effects, it is 

651 important to only call the parse action once it is determined that it is being 

652 called as part of a successful parse. For parse actions that perform additional 

653 validation, then call_during_try should be passed as True, so that the validation 

654 code is included in the preliminary "try" parses. 

655 

656 Note: the default parsing behavior is to expand tabs in the input string 

657 before starting the parsing process. See :class:`parse_string` for more 

658 information on parsing strings containing ``<TAB>`` s, and suggested 

659 methods to maintain a consistent view of the parsed string, the parse 

660 location, and line and column positions within the parsed string. 

661 

662 Example:: 

663 

664 # parse dates in the form YYYY/MM/DD 

665 

666 # use parse action to convert toks from str to int at parse time 

667 def convert_to_int(toks): 

668 return int(toks[0]) 

669 

670 # use a parse action to verify that the date is a valid date 

671 def is_valid_date(instring, loc, toks): 

672 from datetime import date 

673 year, month, day = toks[::2] 

674 try: 

675 date(year, month, day) 

676 except ValueError: 

677 raise ParseException(instring, loc, "invalid date given") 

678 

679 integer = Word(nums) 

680 date_str = integer + '/' + integer + '/' + integer 

681 

682 # add parse actions 

683 integer.set_parse_action(convert_to_int) 

684 date_str.set_parse_action(is_valid_date) 

685 

686 # note that integer fields are now ints, not strings 

687 date_str.run_tests(''' 

688 # successful parse - note that integer fields were converted to ints 

689 1999/12/31 

690 

691 # fail - invalid date 

692 1999/13/31 

693 ''') 

694 """ 

695 if list(fns) == [None]: 

696 self.parseAction.clear() 

697 return self 

698 

699 if not all(callable(fn) for fn in fns): 

700 raise TypeError("parse actions must be callable") 

701 self.parseAction[:] = [_trim_arity(fn) for fn in fns] 

702 self.callDuringTry = kwargs.get( 

703 "call_during_try", kwargs.get("callDuringTry", False) 

704 ) 

705 

706 return self 

707 

708 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: 

709 """ 

710 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

711 

712 See examples in :class:`copy`. 

713 """ 

714 self.parseAction += [_trim_arity(fn) for fn in fns] 

715 self.callDuringTry = self.callDuringTry or kwargs.get( 

716 "call_during_try", kwargs.get("callDuringTry", False) 

717 ) 

718 return self 

719 

720 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement: 

721 """Add a boolean predicate function to expression's list of parse actions. See 

722 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

723 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

724 

725 Optional keyword arguments: 

726 

727 - ``message`` = define a custom message to be used in the raised exception 

728 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

729 ParseException 

730 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, 

731 default=False 

732 

733 Example:: 

734 

735 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

736 year_int = integer.copy() 

737 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

738 date_str = year_int + '/' + integer + '/' + integer 

739 

740 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), 

741 (line:1, col:1) 

742 """ 

743 for fn in fns: 

744 self.parseAction.append( 

745 condition_as_parse_action( 

746 fn, 

747 message=str(kwargs.get("message")), 

748 fatal=bool(kwargs.get("fatal", False)), 

749 ) 

750 ) 

751 

752 self.callDuringTry = self.callDuringTry or kwargs.get( 

753 "call_during_try", kwargs.get("callDuringTry", False) 

754 ) 

755 return self 

756 

757 def set_fail_action(self, fn: ParseFailAction) -> ParserElement: 

758 """ 

759 Define action to perform if parsing fails at this expression. 

760 Fail acton fn is a callable function that takes the arguments 

761 ``fn(s, loc, expr, err)`` where: 

762 

763 - ``s`` = string being parsed 

764 - ``loc`` = location where expression match was attempted and failed 

765 - ``expr`` = the parse expression that failed 

766 - ``err`` = the exception thrown 

767 

768 The function returns no value. It may throw :class:`ParseFatalException` 

769 if it is desired to stop parsing immediately.""" 

770 self.failAction = fn 

771 return self 

772 

773 def _skipIgnorables(self, instring: str, loc: int) -> int: 

774 if not self.ignoreExprs: 

775 return loc 

776 exprsFound = True 

777 ignore_expr_fns = [e._parse for e in self.ignoreExprs] 

778 last_loc = loc 

779 while exprsFound: 

780 exprsFound = False 

781 for ignore_fn in ignore_expr_fns: 

782 try: 

783 while 1: 

784 loc, dummy = ignore_fn(instring, loc) 

785 exprsFound = True 

786 except ParseException: 

787 pass 

788 # check if all ignore exprs matched but didn't actually advance the parse location 

789 if loc == last_loc: 

790 break 

791 last_loc = loc 

792 return loc 

793 

794 def preParse(self, instring: str, loc: int) -> int: 

795 if self.ignoreExprs: 

796 loc = self._skipIgnorables(instring, loc) 

797 

798 if self.skipWhitespace: 

799 instrlen = len(instring) 

800 white_chars = self.whiteChars 

801 while loc < instrlen and instring[loc] in white_chars: 

802 loc += 1 

803 

804 return loc 

805 

806 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

807 return loc, [] 

808 

809 def postParse(self, instring, loc, tokenlist): 

810 return tokenlist 

811 

812 # @profile 

813 def _parseNoCache( 

814 self, instring, loc, do_actions=True, callPreParse=True 

815 ) -> tuple[int, ParseResults]: 

816 debugging = self.debug # and do_actions) 

817 len_instring = len(instring) 

818 

819 if debugging or self.failAction: 

820 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

821 try: 

822 if callPreParse and self.callPreparse: 

823 pre_loc = self.preParse(instring, loc) 

824 else: 

825 pre_loc = loc 

826 tokens_start = pre_loc 

827 if self.debugActions.debug_try: 

828 self.debugActions.debug_try(instring, tokens_start, self, False) 

829 if self.mayIndexError or pre_loc >= len_instring: 

830 try: 

831 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

832 except IndexError: 

833 raise ParseException(instring, len_instring, self.errmsg, self) 

834 else: 

835 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

836 except Exception as err: 

837 # print("Exception raised:", err) 

838 if self.debugActions.debug_fail: 

839 self.debugActions.debug_fail( 

840 instring, tokens_start, self, err, False 

841 ) 

842 if self.failAction: 

843 self.failAction(instring, tokens_start, self, err) 

844 raise 

845 else: 

846 if callPreParse and self.callPreparse: 

847 pre_loc = self.preParse(instring, loc) 

848 else: 

849 pre_loc = loc 

850 tokens_start = pre_loc 

851 if self.mayIndexError or pre_loc >= len_instring: 

852 try: 

853 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

854 except IndexError: 

855 raise ParseException(instring, len_instring, self.errmsg, self) 

856 else: 

857 loc, tokens = self.parseImpl(instring, pre_loc, do_actions) 

858 

859 tokens = self.postParse(instring, loc, tokens) 

860 

861 ret_tokens = ParseResults( 

862 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 

863 ) 

864 if self.parseAction and (do_actions or self.callDuringTry): 

865 if debugging: 

866 try: 

867 for fn in self.parseAction: 

868 try: 

869 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

870 except IndexError as parse_action_exc: 

871 exc = ParseException("exception raised in parse action") 

872 raise exc from parse_action_exc 

873 

874 if tokens is not None and tokens is not ret_tokens: 

875 ret_tokens = ParseResults( 

876 tokens, 

877 self.resultsName, 

878 asList=self.saveAsList 

879 and isinstance(tokens, (ParseResults, list)), 

880 modal=self.modalResults, 

881 ) 

882 except Exception as err: 

883 # print "Exception raised in user parse action:", err 

884 if self.debugActions.debug_fail: 

885 self.debugActions.debug_fail( 

886 instring, tokens_start, self, err, False 

887 ) 

888 raise 

889 else: 

890 for fn in self.parseAction: 

891 try: 

892 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] 

893 except IndexError as parse_action_exc: 

894 exc = ParseException("exception raised in parse action") 

895 raise exc from parse_action_exc 

896 

897 if tokens is not None and tokens is not ret_tokens: 

898 ret_tokens = ParseResults( 

899 tokens, 

900 self.resultsName, 

901 asList=self.saveAsList 

902 and isinstance(tokens, (ParseResults, list)), 

903 modal=self.modalResults, 

904 ) 

905 if debugging: 

906 # print("Matched", self, "->", ret_tokens.as_list()) 

907 if self.debugActions.debug_match: 

908 self.debugActions.debug_match( 

909 instring, tokens_start, loc, self, ret_tokens, False 

910 ) 

911 

912 return loc, ret_tokens 

913 

914 def try_parse( 

915 self, 

916 instring: str, 

917 loc: int, 

918 *, 

919 raise_fatal: bool = False, 

920 do_actions: bool = False, 

921 ) -> int: 

922 try: 

923 return self._parse(instring, loc, do_actions=do_actions)[0] 

924 except ParseFatalException: 

925 if raise_fatal: 

926 raise 

927 raise ParseException(instring, loc, self.errmsg, self) 

928 

929 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: 

930 try: 

931 self.try_parse(instring, loc, do_actions=do_actions) 

932 except (ParseException, IndexError): 

933 return False 

934 else: 

935 return True 

936 

937 # cache for left-recursion in Forward references 

938 recursion_lock = RLock() 

939 recursion_memos: collections.abc.MutableMapping[ 

940 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]] 

941 ] = {} 

942 

943 class _CacheType(typing.Protocol): 

944 """ 

945 Class to be used for packrat and left-recursion cacheing of results 

946 and exceptions. 

947 """ 

948 

949 not_in_cache: bool 

950 

951 def get(self, *args) -> typing.Any: ... 

952 

953 def set(self, *args) -> None: ... 

954 

955 def clear(self) -> None: ... 

956 

957 class NullCache(dict): 

958 """ 

959 A null cache type for initialization of the packrat_cache class variable. 

960 If/when enable_packrat() is called, this null cache will be replaced by a 

961 proper _CacheType class instance. 

962 """ 

963 

964 not_in_cache: bool = True 

965 

966 def get(self, *args) -> typing.Any: ... 

967 

968 def set(self, *args) -> None: ... 

969 

970 def clear(self) -> None: ... 

971 

972 # class-level argument cache for optimizing repeated calls when backtracking 

973 # through recursive expressions 

974 packrat_cache: _CacheType = NullCache() 

975 packrat_cache_lock = RLock() 

976 packrat_cache_stats = [0, 0] 

977 

978 # this method gets repeatedly called during backtracking with the same arguments - 

979 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

980 def _parseCache( 

981 self, instring, loc, do_actions=True, callPreParse=True 

982 ) -> tuple[int, ParseResults]: 

983 HIT, MISS = 0, 1 

984 lookup = (self, instring, loc, callPreParse, do_actions) 

985 with ParserElement.packrat_cache_lock: 

986 cache = ParserElement.packrat_cache 

987 value = cache.get(lookup) 

988 if value is cache.not_in_cache: 

989 ParserElement.packrat_cache_stats[MISS] += 1 

990 try: 

991 value = self._parseNoCache(instring, loc, do_actions, callPreParse) 

992 except ParseBaseException as pe: 

993 # cache a copy of the exception, without the traceback 

994 cache.set(lookup, pe.__class__(*pe.args)) 

995 raise 

996 else: 

997 cache.set(lookup, (value[0], value[1].copy(), loc)) 

998 return value 

999 else: 

1000 ParserElement.packrat_cache_stats[HIT] += 1 

1001 if self.debug and self.debugActions.debug_try: 

1002 try: 

1003 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] 

1004 except TypeError: 

1005 pass 

1006 if isinstance(value, Exception): 

1007 if self.debug and self.debugActions.debug_fail: 

1008 try: 

1009 self.debugActions.debug_fail( 

1010 instring, loc, self, value, cache_hit=True # type: ignore [call-arg] 

1011 ) 

1012 except TypeError: 

1013 pass 

1014 raise value 

1015 

1016 value = cast(tuple[int, ParseResults, int], value) 

1017 loc_, result, endloc = value[0], value[1].copy(), value[2] 

1018 if self.debug and self.debugActions.debug_match: 

1019 try: 

1020 self.debugActions.debug_match( 

1021 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] 

1022 ) 

1023 except TypeError: 

1024 pass 

1025 

1026 return loc_, result 

1027 

1028 _parse = _parseNoCache 

1029 

1030 @staticmethod 

1031 def reset_cache() -> None: 

1032 ParserElement.packrat_cache.clear() 

1033 ParserElement.packrat_cache_stats[:] = [0] * len( 

1034 ParserElement.packrat_cache_stats 

1035 ) 

1036 ParserElement.recursion_memos.clear() 

1037 

1038 _packratEnabled = False 

1039 _left_recursion_enabled = False 

1040 

1041 @staticmethod 

1042 def disable_memoization() -> None: 

1043 """ 

1044 Disables active Packrat or Left Recursion parsing and their memoization 

1045 

1046 This method also works if neither Packrat nor Left Recursion are enabled. 

1047 This makes it safe to call before activating Packrat nor Left Recursion 

1048 to clear any previous settings. 

1049 """ 

1050 ParserElement.reset_cache() 

1051 ParserElement._left_recursion_enabled = False 

1052 ParserElement._packratEnabled = False 

1053 ParserElement._parse = ParserElement._parseNoCache 

1054 

1055 @staticmethod 

1056 def enable_left_recursion( 

1057 cache_size_limit: typing.Optional[int] = None, *, force=False 

1058 ) -> None: 

1059 """ 

1060 Enables "bounded recursion" parsing, which allows for both direct and indirect 

1061 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

1062 repeatedly matched with a fixed recursion depth that is gradually increased 

1063 until finding the longest match. 

1064 

1065 Example:: 

1066 

1067 import pyparsing as pp 

1068 pp.ParserElement.enable_left_recursion() 

1069 

1070 E = pp.Forward("E") 

1071 num = pp.Word(pp.nums) 

1072 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1073 E <<= E + '+' - num | num 

1074 

1075 print(E.parse_string("1+2+3")) 

1076 

1077 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1078 thus skip reevaluation of parse actions during backtracking. This may break 

1079 programs with parse actions which rely on strict ordering of side-effects. 

1080 

1081 Parameters: 

1082 

1083 - ``cache_size_limit`` - (default=``None``) - memoize at most this many 

1084 ``Forward`` elements during matching; if ``None`` (the default), 

1085 memoize all ``Forward`` elements. 

1086 

1087 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1088 thus the two cannot be used together. Use ``force=True`` to disable any 

1089 previous, conflicting settings. 

1090 """ 

1091 if force: 

1092 ParserElement.disable_memoization() 

1093 elif ParserElement._packratEnabled: 

1094 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1095 if cache_size_limit is None: 

1096 ParserElement.recursion_memos = _UnboundedMemo() 

1097 elif cache_size_limit > 0: 

1098 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] 

1099 else: 

1100 raise NotImplementedError(f"Memo size of {cache_size_limit}") 

1101 ParserElement._left_recursion_enabled = True 

1102 

1103 @staticmethod 

1104 def enable_packrat( 

1105 cache_size_limit: Union[int, None] = 128, *, force: bool = False 

1106 ) -> None: 

1107 """ 

1108 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1109 Repeated parse attempts at the same string location (which happens 

1110 often in many complex grammars) can immediately return a cached value, 

1111 instead of re-executing parsing/validating code. Memoizing is done of 

1112 both valid results and parsing exceptions. 

1113 

1114 Parameters: 

1115 

1116 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided 

1117 will limit the size of the packrat cache; if None is passed, then 

1118 the cache size will be unbounded; if 0 is passed, the cache will 

1119 be effectively disabled. 

1120 

1121 This speedup may break existing programs that use parse actions that 

1122 have side-effects. For this reason, packrat parsing is disabled when 

1123 you first import pyparsing. To activate the packrat feature, your 

1124 program must call the class method :class:`ParserElement.enable_packrat`. 

1125 For best results, call ``enable_packrat()`` immediately after 

1126 importing pyparsing. 

1127 

1128 Example:: 

1129 

1130 import pyparsing 

1131 pyparsing.ParserElement.enable_packrat() 

1132 

1133 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1134 thus the two cannot be used together. Use ``force=True`` to disable any 

1135 previous, conflicting settings. 

1136 """ 

1137 if force: 

1138 ParserElement.disable_memoization() 

1139 elif ParserElement._left_recursion_enabled: 

1140 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1141 

1142 if ParserElement._packratEnabled: 

1143 return 

1144 

1145 ParserElement._packratEnabled = True 

1146 if cache_size_limit is None: 

1147 ParserElement.packrat_cache = _UnboundedCache() 

1148 else: 

1149 ParserElement.packrat_cache = _FifoCache(cache_size_limit) 

1150 ParserElement._parse = ParserElement._parseCache 

1151 

1152 def parse_string( 

1153 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 

1154 ) -> ParseResults: 

1155 """ 

1156 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1157 client code. 

1158 

1159 :param instring: The input string to be parsed. 

1160 :param parse_all: If set, the entire input string must match the grammar. 

1161 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1162 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1163 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1164 an object with attributes if the given parser includes results names. 

1165 

1166 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1167 is also equivalent to ending the grammar with :class:`StringEnd`\\ (). 

1168 

1169 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1170 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1171 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1172 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1173 

1174 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1175 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1176 parse action's ``s`` argument, or 

1177 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1178 

1179 Examples: 

1180 

1181 By default, partial matches are OK. 

1182 

1183 >>> res = Word('a').parse_string('aaaaabaaa') 

1184 >>> print(res) 

1185 ['aaaaa'] 

1186 

1187 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1188 directly to see more examples. 

1189 

1190 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1191 

1192 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1193 Traceback (most recent call last): 

1194 ... 

1195 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) 

1196 """ 

1197 parseAll = parse_all or parseAll 

1198 

1199 ParserElement.reset_cache() 

1200 if not self.streamlined: 

1201 self.streamline() 

1202 for e in self.ignoreExprs: 

1203 e.streamline() 

1204 if not self.keepTabs: 

1205 instring = instring.expandtabs() 

1206 try: 

1207 loc, tokens = self._parse(instring, 0) 

1208 if parseAll: 

1209 loc = self.preParse(instring, loc) 

1210 se = Empty() + StringEnd().set_debug(False) 

1211 se._parse(instring, loc) 

1212 except _ParseActionIndexError as pa_exc: 

1213 raise pa_exc.exc 

1214 except ParseBaseException as exc: 

1215 if ParserElement.verbose_stacktrace: 

1216 raise 

1217 

1218 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1219 raise exc.with_traceback(None) 

1220 else: 

1221 return tokens 

1222 

1223 def scan_string( 

1224 self, 

1225 instring: str, 

1226 max_matches: int = _MAX_INT, 

1227 overlap: bool = False, 

1228 always_skip_whitespace=True, 

1229 *, 

1230 debug: bool = False, 

1231 maxMatches: int = _MAX_INT, 

1232 ) -> Generator[tuple[ParseResults, int, int], None, None]: 

1233 """ 

1234 Scan the input string for expression matches. Each match will return the 

1235 matching tokens, start location, and end location. May be called with optional 

1236 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1237 ``overlap`` is specified, then overlapping matches will be reported. 

1238 

1239 Note that the start and end locations are reported relative to the string 

1240 being parsed. See :class:`parse_string` for more information on parsing 

1241 strings with embedded tabs. 

1242 

1243 Example:: 

1244 

1245 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1246 print(source) 

1247 for tokens, start, end in Word(alphas).scan_string(source): 

1248 print(' '*start + '^'*(end-start)) 

1249 print(' '*start + tokens[0]) 

1250 

1251 prints:: 

1252 

1253 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1254 ^^^^^ 

1255 sldjf 

1256 ^^^^^^^ 

1257 lsdjjkf 

1258 ^^^^^^ 

1259 sldkjf 

1260 ^^^^^^ 

1261 lkjsfd 

1262 """ 

1263 maxMatches = min(maxMatches, max_matches) 

1264 if not self.streamlined: 

1265 self.streamline() 

1266 for e in self.ignoreExprs: 

1267 e.streamline() 

1268 

1269 if not self.keepTabs: 

1270 instring = str(instring).expandtabs() 

1271 instrlen = len(instring) 

1272 loc = 0 

1273 if always_skip_whitespace: 

1274 preparser = Empty() 

1275 preparser.ignoreExprs = self.ignoreExprs 

1276 preparser.whiteChars = self.whiteChars 

1277 preparseFn = preparser.preParse 

1278 else: 

1279 preparseFn = self.preParse 

1280 parseFn = self._parse 

1281 ParserElement.resetCache() 

1282 matches = 0 

1283 try: 

1284 while loc <= instrlen and matches < maxMatches: 

1285 try: 

1286 preloc: int = preparseFn(instring, loc) 

1287 nextLoc: int 

1288 tokens: ParseResults 

1289 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1290 except ParseException: 

1291 loc = preloc + 1 

1292 else: 

1293 if nextLoc > loc: 

1294 matches += 1 

1295 if debug: 

1296 print( 

1297 { 

1298 "tokens": tokens.asList(), 

1299 "start": preloc, 

1300 "end": nextLoc, 

1301 } 

1302 ) 

1303 yield tokens, preloc, nextLoc 

1304 if overlap: 

1305 nextloc = preparseFn(instring, loc) 

1306 if nextloc > loc: 

1307 loc = nextLoc 

1308 else: 

1309 loc += 1 

1310 else: 

1311 loc = nextLoc 

1312 else: 

1313 loc = preloc + 1 

1314 except ParseBaseException as exc: 

1315 if ParserElement.verbose_stacktrace: 

1316 raise 

1317 

1318 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1319 raise exc.with_traceback(None) 

1320 

1321 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1322 """ 

1323 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1324 be returned from a parse action. To use ``transform_string``, define a grammar and 

1325 attach a parse action to it that modifies the returned token list. 

1326 Invoking ``transform_string()`` on a target string will then scan for matches, 

1327 and replace the matched text patterns according to the logic in the parse 

1328 action. ``transform_string()`` returns the resulting transformed string. 

1329 

1330 Example:: 

1331 

1332 wd = Word(alphas) 

1333 wd.set_parse_action(lambda toks: toks[0].title()) 

1334 

1335 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) 

1336 

1337 prints:: 

1338 

1339 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

1340 """ 

1341 out: list[str] = [] 

1342 lastE = 0 

1343 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1344 # keep string locs straight between transform_string and scan_string 

1345 self.keepTabs = True 

1346 try: 

1347 for t, s, e in self.scan_string(instring, debug=debug): 

1348 if s > lastE: 

1349 out.append(instring[lastE:s]) 

1350 lastE = e 

1351 

1352 if not t: 

1353 continue 

1354 

1355 if isinstance(t, ParseResults): 

1356 out += t.as_list() 

1357 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1358 out.extend(t) 

1359 else: 

1360 out.append(t) 

1361 

1362 out.append(instring[lastE:]) 

1363 out = [o for o in out if o] 

1364 return "".join([str(s) for s in _flatten(out)]) 

1365 except ParseBaseException as exc: 

1366 if ParserElement.verbose_stacktrace: 

1367 raise 

1368 

1369 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1370 raise exc.with_traceback(None) 

1371 

1372 def search_string( 

1373 self, 

1374 instring: str, 

1375 max_matches: int = _MAX_INT, 

1376 *, 

1377 debug: bool = False, 

1378 maxMatches: int = _MAX_INT, 

1379 ) -> ParseResults: 

1380 """ 

1381 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1382 to match the given parse expression. May be called with optional 

1383 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1384 

1385 Example:: 

1386 

1387 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

1388 cap_word = Word(alphas.upper(), alphas.lower()) 

1389 

1390 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) 

1391 

1392 # the sum() builtin can be used to merge results into a single ParseResults object 

1393 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) 

1394 

1395 prints:: 

1396 

1397 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1398 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1399 """ 

1400 maxMatches = min(maxMatches, max_matches) 

1401 try: 

1402 return ParseResults( 

1403 [ 

1404 t 

1405 for t, s, e in self.scan_string( 

1406 instring, maxMatches, always_skip_whitespace=False, debug=debug 

1407 ) 

1408 ] 

1409 ) 

1410 except ParseBaseException as exc: 

1411 if ParserElement.verbose_stacktrace: 

1412 raise 

1413 

1414 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1415 raise exc.with_traceback(None) 

1416 

1417 def split( 

1418 self, 

1419 instring: str, 

1420 maxsplit: int = _MAX_INT, 

1421 include_separators: bool = False, 

1422 *, 

1423 includeSeparators=False, 

1424 ) -> Generator[str, None, None]: 

1425 """ 

1426 Generator method to split a string using the given expression as a separator. 

1427 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1428 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1429 matching text should be included in the split results. 

1430 

1431 Example:: 

1432 

1433 punc = one_of(list(".,;:/-!?")) 

1434 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

1435 

1436 prints:: 

1437 

1438 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1439 """ 

1440 includeSeparators = includeSeparators or include_separators 

1441 last = 0 

1442 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1443 yield instring[last:s] 

1444 if includeSeparators: 

1445 yield t[0] 

1446 last = e 

1447 yield instring[last:] 

1448 

1449 def __add__(self, other) -> ParserElement: 

1450 """ 

1451 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1452 converts them to :class:`Literal`\\ s by default. 

1453 

1454 Example:: 

1455 

1456 greet = Word(alphas) + "," + Word(alphas) + "!" 

1457 hello = "Hello, World!" 

1458 print(hello, "->", greet.parse_string(hello)) 

1459 

1460 prints:: 

1461 

1462 Hello, World! -> ['Hello', ',', 'World', '!'] 

1463 

1464 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: 

1465 

1466 Literal('start') + ... + Literal('end') 

1467 

1468 is equivalent to:: 

1469 

1470 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1471 

1472 Note that the skipped text is returned with '_skipped' as a results name, 

1473 and to support having multiple skips in the same parser, the value returned is 

1474 a list of all skipped text. 

1475 """ 

1476 if other is Ellipsis: 

1477 return _PendingSkip(self) 

1478 

1479 if isinstance(other, str_type): 

1480 other = self._literalStringClass(other) 

1481 if not isinstance(other, ParserElement): 

1482 return NotImplemented 

1483 return And([self, other]) 

1484 

1485 def __radd__(self, other) -> ParserElement: 

1486 """ 

1487 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1488 """ 

1489 if other is Ellipsis: 

1490 return SkipTo(self)("_skipped*") + self 

1491 

1492 if isinstance(other, str_type): 

1493 other = self._literalStringClass(other) 

1494 if not isinstance(other, ParserElement): 

1495 return NotImplemented 

1496 return other + self 

1497 

1498 def __sub__(self, other) -> ParserElement: 

1499 """ 

1500 Implementation of ``-`` operator, returns :class:`And` with error stop 

1501 """ 

1502 if isinstance(other, str_type): 

1503 other = self._literalStringClass(other) 

1504 if not isinstance(other, ParserElement): 

1505 return NotImplemented 

1506 return self + And._ErrorStop() + other 

1507 

1508 def __rsub__(self, other) -> ParserElement: 

1509 """ 

1510 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1511 """ 

1512 if isinstance(other, str_type): 

1513 other = self._literalStringClass(other) 

1514 if not isinstance(other, ParserElement): 

1515 return NotImplemented 

1516 return other - self 

1517 

1518 def __mul__(self, other) -> ParserElement: 

1519 """ 

1520 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1521 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1522 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1523 may also include ``None`` as in: 

1524 

1525 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1526 to ``expr*n + ZeroOrMore(expr)`` 

1527 (read as "at least n instances of ``expr``") 

1528 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1529 (read as "0 to n instances of ``expr``") 

1530 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1531 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1532 

1533 Note that ``expr*(None, n)`` does not raise an exception if 

1534 more than n exprs exist in the input stream; that is, 

1535 ``expr*(None, n)`` does not enforce a maximum number of expr 

1536 occurrences. If this behavior is desired, then write 

1537 ``expr*(None, n) + ~expr`` 

1538 """ 

1539 if other is Ellipsis: 

1540 other = (0, None) 

1541 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1542 other = ((0,) + other[1:] + (None,))[:2] 

1543 

1544 if not isinstance(other, (int, tuple)): 

1545 return NotImplemented 

1546 

1547 if isinstance(other, int): 

1548 minElements, optElements = other, 0 

1549 else: 

1550 other = tuple(o if o is not Ellipsis else None for o in other) 

1551 other = (other + (None, None))[:2] 

1552 if other[0] is None: 

1553 other = (0, other[1]) 

1554 if isinstance(other[0], int) and other[1] is None: 

1555 if other[0] == 0: 

1556 return ZeroOrMore(self) 

1557 if other[0] == 1: 

1558 return OneOrMore(self) 

1559 else: 

1560 return self * other[0] + ZeroOrMore(self) 

1561 elif isinstance(other[0], int) and isinstance(other[1], int): 

1562 minElements, optElements = other 

1563 optElements -= minElements 

1564 else: 

1565 return NotImplemented 

1566 

1567 if minElements < 0: 

1568 raise ValueError("cannot multiply ParserElement by negative value") 

1569 if optElements < 0: 

1570 raise ValueError( 

1571 "second tuple value must be greater or equal to first tuple value" 

1572 ) 

1573 if minElements == optElements == 0: 

1574 return And([]) 

1575 

1576 if optElements: 

1577 

1578 def makeOptionalList(n): 

1579 if n > 1: 

1580 return Opt(self + makeOptionalList(n - 1)) 

1581 else: 

1582 return Opt(self) 

1583 

1584 if minElements: 

1585 if minElements == 1: 

1586 ret = self + makeOptionalList(optElements) 

1587 else: 

1588 ret = And([self] * minElements) + makeOptionalList(optElements) 

1589 else: 

1590 ret = makeOptionalList(optElements) 

1591 else: 

1592 if minElements == 1: 

1593 ret = self 

1594 else: 

1595 ret = And([self] * minElements) 

1596 return ret 

1597 

1598 def __rmul__(self, other) -> ParserElement: 

1599 return self.__mul__(other) 

1600 

1601 def __or__(self, other) -> ParserElement: 

1602 """ 

1603 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1604 """ 

1605 if other is Ellipsis: 

1606 return _PendingSkip(self, must_skip=True) 

1607 

1608 if isinstance(other, str_type): 

1609 # `expr | ""` is equivalent to `Opt(expr)` 

1610 if other == "": 

1611 return Opt(self) 

1612 other = self._literalStringClass(other) 

1613 if not isinstance(other, ParserElement): 

1614 return NotImplemented 

1615 return MatchFirst([self, other]) 

1616 

1617 def __ror__(self, other) -> ParserElement: 

1618 """ 

1619 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1620 """ 

1621 if isinstance(other, str_type): 

1622 other = self._literalStringClass(other) 

1623 if not isinstance(other, ParserElement): 

1624 return NotImplemented 

1625 return other | self 

1626 

1627 def __xor__(self, other) -> ParserElement: 

1628 """ 

1629 Implementation of ``^`` operator - returns :class:`Or` 

1630 """ 

1631 if isinstance(other, str_type): 

1632 other = self._literalStringClass(other) 

1633 if not isinstance(other, ParserElement): 

1634 return NotImplemented 

1635 return Or([self, other]) 

1636 

1637 def __rxor__(self, other) -> ParserElement: 

1638 """ 

1639 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1640 """ 

1641 if isinstance(other, str_type): 

1642 other = self._literalStringClass(other) 

1643 if not isinstance(other, ParserElement): 

1644 return NotImplemented 

1645 return other ^ self 

1646 

1647 def __and__(self, other) -> ParserElement: 

1648 """ 

1649 Implementation of ``&`` operator - returns :class:`Each` 

1650 """ 

1651 if isinstance(other, str_type): 

1652 other = self._literalStringClass(other) 

1653 if not isinstance(other, ParserElement): 

1654 return NotImplemented 

1655 return Each([self, other]) 

1656 

1657 def __rand__(self, other) -> ParserElement: 

1658 """ 

1659 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1660 """ 

1661 if isinstance(other, str_type): 

1662 other = self._literalStringClass(other) 

1663 if not isinstance(other, ParserElement): 

1664 return NotImplemented 

1665 return other & self 

1666 

1667 def __invert__(self) -> ParserElement: 

1668 """ 

1669 Implementation of ``~`` operator - returns :class:`NotAny` 

1670 """ 

1671 return NotAny(self) 

1672 

1673 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1674 # iterate over a sequence 

1675 __iter__ = None 

1676 

1677 def __getitem__(self, key): 

1678 """ 

1679 use ``[]`` indexing notation as a short form for expression repetition: 

1680 

1681 - ``expr[n]`` is equivalent to ``expr*n`` 

1682 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1683 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1684 to ``expr*n + ZeroOrMore(expr)`` 

1685 (read as "at least n instances of ``expr``") 

1686 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1687 (read as "0 to n instances of ``expr``") 

1688 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1689 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1690 

1691 ``None`` may be used in place of ``...``. 

1692 

1693 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception 

1694 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is 

1695 desired, then write ``expr[..., n] + ~expr``. 

1696 

1697 For repetition with a stop_on expression, use slice notation: 

1698 

1699 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` 

1700 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` 

1701 

1702 """ 

1703 

1704 stop_on_defined = False 

1705 stop_on = NoMatch() 

1706 if isinstance(key, slice): 

1707 key, stop_on = key.start, key.stop 

1708 if key is None: 

1709 key = ... 

1710 stop_on_defined = True 

1711 elif isinstance(key, tuple) and isinstance(key[-1], slice): 

1712 key, stop_on = (key[0], key[1].start), key[1].stop 

1713 stop_on_defined = True 

1714 

1715 # convert single arg keys to tuples 

1716 if isinstance(key, str_type): 

1717 key = (key,) 

1718 try: 

1719 iter(key) 

1720 except TypeError: 

1721 key = (key, key) 

1722 

1723 if len(key) > 2: 

1724 raise TypeError( 

1725 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" 

1726 ) 

1727 

1728 # clip to 2 elements 

1729 ret = self * tuple(key[:2]) 

1730 ret = typing.cast(_MultipleMatch, ret) 

1731 

1732 if stop_on_defined: 

1733 ret.stopOn(stop_on) 

1734 

1735 return ret 

1736 

1737 def __call__(self, name: typing.Optional[str] = None) -> ParserElement: 

1738 """ 

1739 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1740 

1741 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1742 passed as ``True``. 

1743 

1744 If ``name`` is omitted, same as calling :class:`copy`. 

1745 

1746 Example:: 

1747 

1748 # these are equivalent 

1749 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") 

1750 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1751 """ 

1752 if name is not None: 

1753 return self._setResultsName(name) 

1754 

1755 return self.copy() 

1756 

1757 def suppress(self) -> ParserElement: 

1758 """ 

1759 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1760 cluttering up returned output. 

1761 """ 

1762 return Suppress(self) 

1763 

1764 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

1765 """ 

1766 Enables the skipping of whitespace before matching the characters in the 

1767 :class:`ParserElement`'s defined pattern. 

1768 

1769 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1770 """ 

1771 self.skipWhitespace = True 

1772 return self 

1773 

1774 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

1775 """ 

1776 Disables the skipping of whitespace before matching the characters in the 

1777 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1778 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1779 

1780 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1781 """ 

1782 self.skipWhitespace = False 

1783 return self 

1784 

1785 def set_whitespace_chars( 

1786 self, chars: Union[set[str], str], copy_defaults: bool = False 

1787 ) -> ParserElement: 

1788 """ 

1789 Overrides the default whitespace chars 

1790 """ 

1791 self.skipWhitespace = True 

1792 self.whiteChars = set(chars) 

1793 self.copyDefaultWhiteChars = copy_defaults 

1794 return self 

1795 

1796 def parse_with_tabs(self) -> ParserElement: 

1797 """ 

1798 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1799 Must be called before ``parse_string`` when the input grammar contains elements that 

1800 match ``<TAB>`` characters. 

1801 """ 

1802 self.keepTabs = True 

1803 return self 

1804 

1805 def ignore(self, other: ParserElement) -> ParserElement: 

1806 """ 

1807 Define expression to be ignored (e.g., comments) while doing pattern 

1808 matching; may be called repeatedly, to define multiple comment or other 

1809 ignorable patterns. 

1810 

1811 Example:: 

1812 

1813 patt = Word(alphas)[...] 

1814 patt.parse_string('ablaj /* comment */ lskjd') 

1815 # -> ['ablaj'] 

1816 

1817 patt.ignore(c_style_comment) 

1818 patt.parse_string('ablaj /* comment */ lskjd') 

1819 # -> ['ablaj', 'lskjd'] 

1820 """ 

1821 if isinstance(other, str_type): 

1822 other = Suppress(other) 

1823 

1824 if isinstance(other, Suppress): 

1825 if other not in self.ignoreExprs: 

1826 self.ignoreExprs.append(other) 

1827 else: 

1828 self.ignoreExprs.append(Suppress(other.copy())) 

1829 return self 

1830 

1831 def set_debug_actions( 

1832 self, 

1833 start_action: DebugStartAction, 

1834 success_action: DebugSuccessAction, 

1835 exception_action: DebugExceptionAction, 

1836 ) -> ParserElement: 

1837 """ 

1838 Customize display of debugging messages while doing pattern matching: 

1839 

1840 - ``start_action`` - method to be called when an expression is about to be parsed; 

1841 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` 

1842 

1843 - ``success_action`` - method to be called when an expression has successfully parsed; 

1844 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` 

1845 

1846 - ``exception_action`` - method to be called when expression fails to parse; 

1847 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` 

1848 """ 

1849 self.debugActions = self.DebugActions( 

1850 start_action or _default_start_debug_action, # type: ignore[truthy-function] 

1851 success_action or _default_success_debug_action, # type: ignore[truthy-function] 

1852 exception_action or _default_exception_debug_action, # type: ignore[truthy-function] 

1853 ) 

1854 self.debug = True 

1855 return self 

1856 

1857 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement: 

1858 """ 

1859 Enable display of debugging messages while doing pattern matching. 

1860 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

1861 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. 

1862 

1863 Example:: 

1864 

1865 wd = Word(alphas).set_name("alphaword") 

1866 integer = Word(nums).set_name("numword") 

1867 term = wd | integer 

1868 

1869 # turn on debugging for wd 

1870 wd.set_debug() 

1871 

1872 term[1, ...].parse_string("abc 123 xyz 890") 

1873 

1874 prints:: 

1875 

1876 Match alphaword at loc 0(1,1) 

1877 Matched alphaword -> ['abc'] 

1878 Match alphaword at loc 3(1,4) 

1879 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

1880 Match alphaword at loc 7(1,8) 

1881 Matched alphaword -> ['xyz'] 

1882 Match alphaword at loc 11(1,12) 

1883 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

1884 Match alphaword at loc 15(1,16) 

1885 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

1886 

1887 The output shown is that produced by the default debug actions - custom debug actions can be 

1888 specified using :class:`set_debug_actions`. Prior to attempting 

1889 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

1890 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

1891 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, 

1892 which makes debugging and exception messages easier to understand - for instance, the default 

1893 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. 

1894 """ 

1895 if recurse: 

1896 for expr in self.visit_all(): 

1897 expr.set_debug(flag, recurse=False) 

1898 return self 

1899 

1900 if flag: 

1901 self.set_debug_actions( 

1902 _default_start_debug_action, 

1903 _default_success_debug_action, 

1904 _default_exception_debug_action, 

1905 ) 

1906 else: 

1907 self.debug = False 

1908 return self 

1909 

1910 @property 

1911 def default_name(self) -> str: 

1912 if self._defaultName is None: 

1913 self._defaultName = self._generateDefaultName() 

1914 return self._defaultName 

1915 

1916 @abstractmethod 

1917 def _generateDefaultName(self) -> str: 

1918 """ 

1919 Child classes must define this method, which defines how the ``default_name`` is set. 

1920 """ 

1921 

1922 def set_name(self, name: typing.Optional[str]) -> ParserElement: 

1923 """ 

1924 Define name for this expression, makes debugging and exception messages clearer. If 

1925 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also 

1926 enable debug for this expression. 

1927 

1928 If `name` is None, clears any custom name for this expression, and clears the 

1929 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`. 

1930 

1931 Example:: 

1932 

1933 integer = Word(nums) 

1934 integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) 

1935 

1936 integer.set_name("integer") 

1937 integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1938 """ 

1939 self.customName = name # type: ignore[assignment] 

1940 self.errmsg = f"Expected {str(self)}" 

1941 

1942 if __diag__.enable_debug_on_named_expressions: 

1943 self.set_debug(name is not None) 

1944 

1945 return self 

1946 

1947 @property 

1948 def name(self) -> str: 

1949 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 

1950 return self.customName if self.customName is not None else self.default_name 

1951 

1952 @name.setter 

1953 def name(self, new_name) -> None: 

1954 self.set_name(new_name) 

1955 

1956 def __str__(self) -> str: 

1957 return self.name 

1958 

1959 def __repr__(self) -> str: 

1960 return str(self) 

1961 

1962 def streamline(self) -> ParserElement: 

1963 self.streamlined = True 

1964 self._defaultName = None 

1965 return self 

1966 

1967 def recurse(self) -> list[ParserElement]: 

1968 return [] 

1969 

1970 def _checkRecursion(self, parseElementList): 

1971 subRecCheckList = parseElementList[:] + [self] 

1972 for e in self.recurse(): 

1973 e._checkRecursion(subRecCheckList) 

1974 

1975 def validate(self, validateTrace=None) -> None: 

1976 """ 

1977 Check defined expressions for valid structure, check for infinite recursive definitions. 

1978 """ 

1979 warnings.warn( 

1980 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

1981 DeprecationWarning, 

1982 stacklevel=2, 

1983 ) 

1984 self._checkRecursion([]) 

1985 

1986 def parse_file( 

1987 self, 

1988 file_or_filename: Union[str, Path, TextIO], 

1989 encoding: str = "utf-8", 

1990 parse_all: bool = False, 

1991 *, 

1992 parseAll: bool = False, 

1993 ) -> ParseResults: 

1994 """ 

1995 Execute the parse expression on the given file or filename. 

1996 If a filename is specified (instead of a file object), 

1997 the entire file is opened, read, and closed before parsing. 

1998 """ 

1999 parseAll = parseAll or parse_all 

2000 try: 

2001 file_or_filename = typing.cast(TextIO, file_or_filename) 

2002 file_contents = file_or_filename.read() 

2003 except AttributeError: 

2004 file_or_filename = typing.cast(str, file_or_filename) 

2005 with open(file_or_filename, "r", encoding=encoding) as f: 

2006 file_contents = f.read() 

2007 try: 

2008 return self.parse_string(file_contents, parseAll) 

2009 except ParseBaseException as exc: 

2010 if ParserElement.verbose_stacktrace: 

2011 raise 

2012 

2013 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

2014 raise exc.with_traceback(None) 

2015 

2016 def __eq__(self, other): 

2017 if self is other: 

2018 return True 

2019 elif isinstance(other, str_type): 

2020 return self.matches(other, parse_all=True) 

2021 elif isinstance(other, ParserElement): 

2022 return vars(self) == vars(other) 

2023 return False 

2024 

2025 def __hash__(self): 

2026 return id(self) 

2027 

2028 def matches( 

2029 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 

2030 ) -> bool: 

2031 """ 

2032 Method for quick testing of a parser against a test string. Good for simple 

2033 inline microtests of sub expressions while building up larger parser. 

2034 

2035 Parameters: 

2036 

2037 - ``test_string`` - to test against this expression for a match 

2038 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2039 

2040 Example:: 

2041 

2042 expr = Word(nums) 

2043 assert expr.matches("100") 

2044 """ 

2045 parseAll = parseAll and parse_all 

2046 try: 

2047 self.parse_string(str(test_string), parse_all=parseAll) 

2048 return True 

2049 except ParseBaseException: 

2050 return False 

2051 

2052 def run_tests( 

2053 self, 

2054 tests: Union[str, list[str]], 

2055 parse_all: bool = True, 

2056 comment: typing.Optional[Union[ParserElement, str]] = "#", 

2057 full_dump: bool = True, 

2058 print_results: bool = True, 

2059 failure_tests: bool = False, 

2060 post_parse: typing.Optional[ 

2061 Callable[[str, ParseResults], typing.Optional[str]] 

2062 ] = None, 

2063 file: typing.Optional[TextIO] = None, 

2064 with_line_numbers: bool = False, 

2065 *, 

2066 parseAll: bool = True, 

2067 fullDump: bool = True, 

2068 printResults: bool = True, 

2069 failureTests: bool = False, 

2070 postParse: typing.Optional[ 

2071 Callable[[str, ParseResults], typing.Optional[str]] 

2072 ] = None, 

2073 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]: 

2074 """ 

2075 Execute the parse expression on a series of test strings, showing each 

2076 test, the parsed results or where the parse failed. Quick and easy way to 

2077 run a parse expression against a list of sample strings. 

2078 

2079 Parameters: 

2080 

2081 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

2082 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

2083 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

2084 string; pass None to disable comment filtering 

2085 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

2086 if False, only dump nested list 

2087 - ``print_results`` - (default= ``True``) prints test output to stdout 

2088 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

2089 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

2090 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2091 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

2092 if None, will default to ``sys.stdout`` 

2093 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

2094 

2095 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2096 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

2097 test's output 

2098 

2099 Example:: 

2100 

2101 number_expr = pyparsing_common.number.copy() 

2102 

2103 result = number_expr.run_tests(''' 

2104 # unsigned integer 

2105 100 

2106 # negative integer 

2107 -100 

2108 # float with scientific notation 

2109 6.02e23 

2110 # integer with scientific notation 

2111 1e-12 

2112 ''') 

2113 print("Success" if result[0] else "Failed!") 

2114 

2115 result = number_expr.run_tests(''' 

2116 # stray character 

2117 100Z 

2118 # missing leading digit before '.' 

2119 -.100 

2120 # too many '.' 

2121 3.14.159 

2122 ''', failure_tests=True) 

2123 print("Success" if result[0] else "Failed!") 

2124 

2125 prints:: 

2126 

2127 # unsigned integer 

2128 100 

2129 [100] 

2130 

2131 # negative integer 

2132 -100 

2133 [-100] 

2134 

2135 # float with scientific notation 

2136 6.02e23 

2137 [6.02e+23] 

2138 

2139 # integer with scientific notation 

2140 1e-12 

2141 [1e-12] 

2142 

2143 Success 

2144 

2145 # stray character 

2146 100Z 

2147 ^ 

2148 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2149 

2150 # missing leading digit before '.' 

2151 -.100 

2152 ^ 

2153 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2154 

2155 # too many '.' 

2156 3.14.159 

2157 ^ 

2158 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2159 

2160 Success 

2161 

2162 Each test string must be on a single line. If you want to test a string that spans multiple 

2163 lines, create a test like this:: 

2164 

2165 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2166 

2167 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2168 """ 

2169 from .testing import pyparsing_test 

2170 

2171 parseAll = parseAll and parse_all 

2172 fullDump = fullDump and full_dump 

2173 printResults = printResults and print_results 

2174 failureTests = failureTests or failure_tests 

2175 postParse = postParse or post_parse 

2176 if isinstance(tests, str_type): 

2177 tests = typing.cast(str, tests) 

2178 line_strip = type(tests).strip 

2179 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2180 comment_specified = comment is not None 

2181 if comment_specified: 

2182 if isinstance(comment, str_type): 

2183 comment = typing.cast(str, comment) 

2184 comment = Literal(comment) 

2185 comment = typing.cast(ParserElement, comment) 

2186 if file is None: 

2187 file = sys.stdout 

2188 print_ = file.write 

2189 

2190 result: Union[ParseResults, Exception] 

2191 allResults: list[tuple[str, Union[ParseResults, Exception]]] = [] 

2192 comments: list[str] = [] 

2193 success = True 

2194 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2195 BOM = "\ufeff" 

2196 nlstr = "\n" 

2197 for t in tests: 

2198 if comment_specified and comment.matches(t, False) or comments and not t: 

2199 comments.append( 

2200 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2201 ) 

2202 continue 

2203 if not t: 

2204 continue 

2205 out = [ 

2206 f"{nlstr}{nlstr.join(comments) if comments else ''}", 

2207 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2208 ] 

2209 comments.clear() 

2210 try: 

2211 # convert newline marks to actual newlines, and strip leading BOM if present 

2212 t = NL.transform_string(t.lstrip(BOM)) 

2213 result = self.parse_string(t, parse_all=parseAll) 

2214 except ParseBaseException as pe: 

2215 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" 

2216 out.append(pe.explain()) 

2217 out.append(f"FAIL: {fatal}{pe}") 

2218 if ParserElement.verbose_stacktrace: 

2219 out.extend(traceback.format_tb(pe.__traceback__)) 

2220 success = success and failureTests 

2221 result = pe 

2222 except Exception as exc: 

2223 tag = "FAIL-EXCEPTION" 

2224 

2225 # see if this exception was raised in a parse action 

2226 tb = exc.__traceback__ 

2227 it = iter(traceback.walk_tb(tb)) 

2228 for f, line in it: 

2229 if (f.f_code.co_filename, line) == pa_call_line_synth: 

2230 next_f = next(it)[0] 

2231 tag += f" (raised in parse action {next_f.f_code.co_name!r})" 

2232 break 

2233 

2234 out.append(f"{tag}: {type(exc).__name__}: {exc}") 

2235 if ParserElement.verbose_stacktrace: 

2236 out.extend(traceback.format_tb(exc.__traceback__)) 

2237 success = success and failureTests 

2238 result = exc 

2239 else: 

2240 success = success and not failureTests 

2241 if postParse is not None: 

2242 try: 

2243 pp_value = postParse(t, result) 

2244 if pp_value is not None: 

2245 if isinstance(pp_value, ParseResults): 

2246 out.append(pp_value.dump()) 

2247 else: 

2248 out.append(str(pp_value)) 

2249 else: 

2250 out.append(result.dump()) 

2251 except Exception as e: 

2252 out.append(result.dump(full=fullDump)) 

2253 out.append( 

2254 f"{postParse.__name__} failed: {type(e).__name__}: {e}" 

2255 ) 

2256 else: 

2257 out.append(result.dump(full=fullDump)) 

2258 out.append("") 

2259 

2260 if printResults: 

2261 print_("\n".join(out)) 

2262 

2263 allResults.append((t, result)) 

2264 

2265 return success, allResults 

2266 

2267 def create_diagram( 

2268 self, 

2269 output_html: Union[TextIO, Path, str], 

2270 vertical: int = 3, 

2271 show_results_names: bool = False, 

2272 show_groups: bool = False, 

2273 embed: bool = False, 

2274 show_hidden: bool = False, 

2275 **kwargs, 

2276 ) -> None: 

2277 """ 

2278 Create a railroad diagram for the parser. 

2279 

2280 Parameters: 

2281 

2282 - ``output_html`` (str or file-like object) - output target for generated 

2283 diagram HTML 

2284 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically 

2285 instead of horizontally (default=3) 

2286 - ``show_results_names`` - bool flag whether diagram should show annotations for 

2287 defined results names 

2288 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2289 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden 

2290 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed 

2291 the resulting HTML in an enclosing HTML source 

2292 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; 

2293 can be used to insert custom CSS styling 

2294 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the 

2295 generated code 

2296 

2297 Additional diagram-formatting keyword arguments can also be included; 

2298 see railroad.Diagram class. 

2299 """ 

2300 

2301 try: 

2302 from .diagram import to_railroad, railroad_to_html 

2303 except ImportError as ie: 

2304 raise Exception( 

2305 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2306 ) from ie 

2307 

2308 self.streamline() 

2309 

2310 railroad = to_railroad( 

2311 self, 

2312 vertical=vertical, 

2313 show_results_names=show_results_names, 

2314 show_groups=show_groups, 

2315 show_hidden=show_hidden, 

2316 diagram_kwargs=kwargs, 

2317 ) 

2318 if not isinstance(output_html, (str, Path)): 

2319 # we were passed a file-like object, just write to it 

2320 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2321 return 

2322 

2323 with open(output_html, "w", encoding="utf-8") as diag_file: 

2324 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) 

2325 

2326 # Compatibility synonyms 

2327 # fmt: off 

2328 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using)) 

2329 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8( 

2330 "setDefaultWhitespaceChars", set_default_whitespace_chars 

2331 )) 

2332 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization)) 

2333 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion)) 

2334 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat)) 

2335 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache)) 

2336 

2337 setResultsName = replaced_by_pep8("setResultsName", set_results_name) 

2338 setBreak = replaced_by_pep8("setBreak", set_break) 

2339 setParseAction = replaced_by_pep8("setParseAction", set_parse_action) 

2340 addParseAction = replaced_by_pep8("addParseAction", add_parse_action) 

2341 addCondition = replaced_by_pep8("addCondition", add_condition) 

2342 setFailAction = replaced_by_pep8("setFailAction", set_fail_action) 

2343 tryParse = replaced_by_pep8("tryParse", try_parse) 

2344 parseString = replaced_by_pep8("parseString", parse_string) 

2345 scanString = replaced_by_pep8("scanString", scan_string) 

2346 transformString = replaced_by_pep8("transformString", transform_string) 

2347 searchString = replaced_by_pep8("searchString", search_string) 

2348 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

2349 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

2350 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) 

2351 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) 

2352 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) 

2353 setDebug = replaced_by_pep8("setDebug", set_debug) 

2354 setName = replaced_by_pep8("setName", set_name) 

2355 parseFile = replaced_by_pep8("parseFile", parse_file) 

2356 runTests = replaced_by_pep8("runTests", run_tests) 

2357 canParseNext = replaced_by_pep8("canParseNext", can_parse_next) 

2358 defaultName = default_name 

2359 # fmt: on 

2360 

2361 

2362class _PendingSkip(ParserElement): 

2363 # internal placeholder class to hold a place were '...' is added to a parser element, 

2364 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2365 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None: 

2366 super().__init__() 

2367 self.anchor = expr 

2368 self.must_skip = must_skip 

2369 

2370 def _generateDefaultName(self) -> str: 

2371 return str(self.anchor + Empty()).replace("Empty", "...") 

2372 

2373 def __add__(self, other) -> ParserElement: 

2374 skipper = SkipTo(other).set_name("...")("_skipped*") 

2375 if self.must_skip: 

2376 

2377 def must_skip(t): 

2378 if not t._skipped or t._skipped.as_list() == [""]: 

2379 del t[0] 

2380 t.pop("_skipped", None) 

2381 

2382 def show_skip(t): 

2383 if t._skipped.as_list()[-1:] == [""]: 

2384 t.pop("_skipped") 

2385 t["_skipped"] = f"missing <{self.anchor!r}>" 

2386 

2387 return ( 

2388 self.anchor + skipper().add_parse_action(must_skip) 

2389 | skipper().add_parse_action(show_skip) 

2390 ) + other 

2391 

2392 return self.anchor + skipper + other 

2393 

2394 def __repr__(self): 

2395 return self.defaultName 

2396 

2397 def parseImpl(self, *args) -> ParseImplReturnType: 

2398 raise Exception( 

2399 "use of `...` expression without following SkipTo target expression" 

2400 ) 

2401 

2402 

2403class Token(ParserElement): 

2404 """Abstract :class:`ParserElement` subclass, for defining atomic 

2405 matching patterns. 

2406 """ 

2407 

2408 def __init__(self) -> None: 

2409 super().__init__(savelist=False) 

2410 

2411 def _generateDefaultName(self) -> str: 

2412 return type(self).__name__ 

2413 

2414 

2415class NoMatch(Token): 

2416 """ 

2417 A token that will never match. 

2418 """ 

2419 

2420 def __init__(self) -> None: 

2421 super().__init__() 

2422 self._may_return_empty = True 

2423 self.mayIndexError = False 

2424 self.errmsg = "Unmatchable token" 

2425 

2426 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2427 raise ParseException(instring, loc, self.errmsg, self) 

2428 

2429 

2430class Literal(Token): 

2431 """ 

2432 Token to exactly match a specified string. 

2433 

2434 Example:: 

2435 

2436 Literal('abc').parse_string('abc') # -> ['abc'] 

2437 Literal('abc').parse_string('abcdef') # -> ['abc'] 

2438 Literal('abc').parse_string('ab') # -> Exception: Expected "abc" 

2439 

2440 For case-insensitive matching, use :class:`CaselessLiteral`. 

2441 

2442 For keyword matching (force word break before and after the matched string), 

2443 use :class:`Keyword` or :class:`CaselessKeyword`. 

2444 """ 

2445 

2446 def __new__(cls, match_string: str = "", *, matchString: str = ""): 

2447 # Performance tuning: select a subclass with optimized parseImpl 

2448 if cls is Literal: 

2449 match_string = matchString or match_string 

2450 if not match_string: 

2451 return super().__new__(Empty) 

2452 if len(match_string) == 1: 

2453 return super().__new__(_SingleCharLiteral) 

2454 

2455 # Default behavior 

2456 return super().__new__(cls) 

2457 

2458 # Needed to make copy.copy() work correctly if we customize __new__ 

2459 def __getnewargs__(self): 

2460 return (self.match,) 

2461 

2462 def __init__(self, match_string: str = "", *, matchString: str = "") -> None: 

2463 super().__init__() 

2464 match_string = matchString or match_string 

2465 self.match = match_string 

2466 self.matchLen = len(match_string) 

2467 self.firstMatchChar = match_string[:1] 

2468 self.errmsg = f"Expected {self.name}" 

2469 self._may_return_empty = False 

2470 self.mayIndexError = False 

2471 

2472 def _generateDefaultName(self) -> str: 

2473 return repr(self.match) 

2474 

2475 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2476 if instring[loc] == self.firstMatchChar and instring.startswith( 

2477 self.match, loc 

2478 ): 

2479 return loc + self.matchLen, self.match 

2480 raise ParseException(instring, loc, self.errmsg, self) 

2481 

2482 

2483class Empty(Literal): 

2484 """ 

2485 An empty token, will always match. 

2486 """ 

2487 

2488 def __init__(self, match_string="", *, matchString="") -> None: 

2489 super().__init__("") 

2490 self._may_return_empty = True 

2491 self.mayIndexError = False 

2492 

2493 def _generateDefaultName(self) -> str: 

2494 return "Empty" 

2495 

2496 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2497 return loc, [] 

2498 

2499 

2500class _SingleCharLiteral(Literal): 

2501 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2502 if instring[loc] == self.firstMatchChar: 

2503 return loc + 1, self.match 

2504 raise ParseException(instring, loc, self.errmsg, self) 

2505 

2506 

2507ParserElement._literalStringClass = Literal 

2508 

2509 

2510class Keyword(Token): 

2511 """ 

2512 Token to exactly match a specified string as a keyword, that is, 

2513 it must be immediately preceded and followed by whitespace or 

2514 non-keyword characters. Compare with :class:`Literal`: 

2515 

2516 - ``Literal("if")`` will match the leading ``'if'`` in 

2517 ``'ifAndOnlyIf'``. 

2518 - ``Keyword("if")`` will not; it will only match the leading 

2519 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2520 

2521 Accepts two optional constructor arguments in addition to the 

2522 keyword string: 

2523 

2524 - ``ident_chars`` is a string of characters that would be valid 

2525 identifier characters, defaulting to all alphanumerics + "_" and 

2526 "$" 

2527 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2528 

2529 Example:: 

2530 

2531 Keyword("start").parse_string("start") # -> ['start'] 

2532 Keyword("start").parse_string("starting") # -> Exception 

2533 

2534 For case-insensitive matching, use :class:`CaselessKeyword`. 

2535 """ 

2536 

2537 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2538 

2539 def __init__( 

2540 self, 

2541 match_string: str = "", 

2542 ident_chars: typing.Optional[str] = None, 

2543 caseless: bool = False, 

2544 *, 

2545 matchString: str = "", 

2546 identChars: typing.Optional[str] = None, 

2547 ) -> None: 

2548 super().__init__() 

2549 identChars = identChars or ident_chars 

2550 if identChars is None: 

2551 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2552 match_string = matchString or match_string 

2553 self.match = match_string 

2554 self.matchLen = len(match_string) 

2555 self.firstMatchChar = match_string[:1] 

2556 if not self.firstMatchChar: 

2557 raise ValueError("null string passed to Keyword; use Empty() instead") 

2558 self.errmsg = f"Expected {type(self).__name__} {self.name}" 

2559 self._may_return_empty = False 

2560 self.mayIndexError = False 

2561 self.caseless = caseless 

2562 if caseless: 

2563 self.caselessmatch = match_string.upper() 

2564 identChars = identChars.upper() 

2565 self.identChars = set(identChars) 

2566 

2567 def _generateDefaultName(self) -> str: 

2568 return repr(self.match) 

2569 

2570 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2571 errmsg = self.errmsg or "" 

2572 errloc = loc 

2573 if self.caseless: 

2574 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2575 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2576 if ( 

2577 loc >= len(instring) - self.matchLen 

2578 or instring[loc + self.matchLen].upper() not in self.identChars 

2579 ): 

2580 return loc + self.matchLen, self.match 

2581 

2582 # followed by keyword char 

2583 errmsg += ", was immediately followed by keyword character" 

2584 errloc = loc + self.matchLen 

2585 else: 

2586 # preceded by keyword char 

2587 errmsg += ", keyword was immediately preceded by keyword character" 

2588 errloc = loc - 1 

2589 # else no match just raise plain exception 

2590 

2591 elif ( 

2592 instring[loc] == self.firstMatchChar 

2593 and self.matchLen == 1 

2594 or instring.startswith(self.match, loc) 

2595 ): 

2596 if loc == 0 or instring[loc - 1] not in self.identChars: 

2597 if ( 

2598 loc >= len(instring) - self.matchLen 

2599 or instring[loc + self.matchLen] not in self.identChars 

2600 ): 

2601 return loc + self.matchLen, self.match 

2602 

2603 # followed by keyword char 

2604 errmsg += ", keyword was immediately followed by keyword character" 

2605 errloc = loc + self.matchLen 

2606 else: 

2607 # preceded by keyword char 

2608 errmsg += ", keyword was immediately preceded by keyword character" 

2609 errloc = loc - 1 

2610 # else no match just raise plain exception 

2611 

2612 raise ParseException(instring, errloc, errmsg, self) 

2613 

2614 @staticmethod 

2615 def set_default_keyword_chars(chars) -> None: 

2616 """ 

2617 Overrides the default characters used by :class:`Keyword` expressions. 

2618 """ 

2619 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2620 

2621 # Compatibility synonyms 

2622 setDefaultKeywordChars = staticmethod( 

2623 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars) 

2624 ) 

2625 

2626 

2627class CaselessLiteral(Literal): 

2628 """ 

2629 Token to match a specified string, ignoring case of letters. 

2630 Note: the matched results will always be in the case of the given 

2631 match string, NOT the case of the input text. 

2632 

2633 Example:: 

2634 

2635 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2636 # -> ['CMD', 'CMD', 'CMD'] 

2637 

2638 (Contrast with example for :class:`CaselessKeyword`.) 

2639 """ 

2640 

2641 def __init__(self, match_string: str = "", *, matchString: str = "") -> None: 

2642 match_string = matchString or match_string 

2643 super().__init__(match_string.upper()) 

2644 # Preserve the defining literal. 

2645 self.returnString = match_string 

2646 self.errmsg = f"Expected {self.name}" 

2647 

2648 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2649 if instring[loc : loc + self.matchLen].upper() == self.match: 

2650 return loc + self.matchLen, self.returnString 

2651 raise ParseException(instring, loc, self.errmsg, self) 

2652 

2653 

2654class CaselessKeyword(Keyword): 

2655 """ 

2656 Caseless version of :class:`Keyword`. 

2657 

2658 Example:: 

2659 

2660 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2661 # -> ['CMD', 'CMD'] 

2662 

2663 (Contrast with example for :class:`CaselessLiteral`.) 

2664 """ 

2665 

2666 def __init__( 

2667 self, 

2668 match_string: str = "", 

2669 ident_chars: typing.Optional[str] = None, 

2670 *, 

2671 matchString: str = "", 

2672 identChars: typing.Optional[str] = None, 

2673 ) -> None: 

2674 identChars = identChars or ident_chars 

2675 match_string = matchString or match_string 

2676 super().__init__(match_string, identChars, caseless=True) 

2677 

2678 

2679class CloseMatch(Token): 

2680 """A variation on :class:`Literal` which matches "close" matches, 

2681 that is, strings with at most 'n' mismatching characters. 

2682 :class:`CloseMatch` takes parameters: 

2683 

2684 - ``match_string`` - string to be matched 

2685 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2686 - ``max_mismatches`` - (``default=1``) maximum number of 

2687 mismatches allowed to count as a match 

2688 

2689 The results from a successful parse will contain the matched text 

2690 from the input string and the following named results: 

2691 

2692 - ``mismatches`` - a list of the positions within the 

2693 match_string where mismatches were found 

2694 - ``original`` - the original match_string used to compare 

2695 against the input string 

2696 

2697 If ``mismatches`` is an empty list, then the match was an exact 

2698 match. 

2699 

2700 Example:: 

2701 

2702 patt = CloseMatch("ATCATCGAATGGA") 

2703 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

2704 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

2705 

2706 # exact match 

2707 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

2708 

2709 # close match allowing up to 2 mismatches 

2710 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

2711 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

2712 """ 

2713 

2714 def __init__( 

2715 self, 

2716 match_string: str, 

2717 max_mismatches: typing.Optional[int] = None, 

2718 *, 

2719 maxMismatches: int = 1, 

2720 caseless=False, 

2721 ) -> None: 

2722 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

2723 super().__init__() 

2724 self.match_string = match_string 

2725 self.maxMismatches = maxMismatches 

2726 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" 

2727 self.caseless = caseless 

2728 self.mayIndexError = False 

2729 self._may_return_empty = False 

2730 

2731 def _generateDefaultName(self) -> str: 

2732 return f"{type(self).__name__}:{self.match_string!r}" 

2733 

2734 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2735 start = loc 

2736 instrlen = len(instring) 

2737 maxloc = start + len(self.match_string) 

2738 

2739 if maxloc <= instrlen: 

2740 match_string = self.match_string 

2741 match_stringloc = 0 

2742 mismatches = [] 

2743 maxMismatches = self.maxMismatches 

2744 

2745 for match_stringloc, s_m in enumerate( 

2746 zip(instring[loc:maxloc], match_string) 

2747 ): 

2748 src, mat = s_m 

2749 if self.caseless: 

2750 src, mat = src.lower(), mat.lower() 

2751 

2752 if src != mat: 

2753 mismatches.append(match_stringloc) 

2754 if len(mismatches) > maxMismatches: 

2755 break 

2756 else: 

2757 loc = start + match_stringloc + 1 

2758 results = ParseResults([instring[start:loc]]) 

2759 results["original"] = match_string 

2760 results["mismatches"] = mismatches 

2761 return loc, results 

2762 

2763 raise ParseException(instring, loc, self.errmsg, self) 

2764 

2765 

2766class Word(Token): 

2767 """Token for matching words composed of allowed character sets. 

2768 

2769 Parameters: 

2770 

2771 - ``init_chars`` - string of all characters that should be used to 

2772 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

2773 if ``body_chars`` is also specified, then this is the string of 

2774 initial characters 

2775 - ``body_chars`` - string of characters that 

2776 can be used for matching after a matched initial character as 

2777 given in ``init_chars``; if omitted, same as the initial characters 

2778 (default=``None``) 

2779 - ``min`` - minimum number of characters to match (default=1) 

2780 - ``max`` - maximum number of characters to match (default=0) 

2781 - ``exact`` - exact number of characters to match (default=0) 

2782 - ``as_keyword`` - match as a keyword (default=``False``) 

2783 - ``exclude_chars`` - characters that might be 

2784 found in the input ``body_chars`` string but which should not be 

2785 accepted for matching ;useful to define a word of all 

2786 printables except for one or two characters, for instance 

2787 (default=``None``) 

2788 

2789 :class:`srange` is useful for defining custom character set strings 

2790 for defining :class:`Word` expressions, using range notation from 

2791 regular expression character sets. 

2792 

2793 A common mistake is to use :class:`Word` to match a specific literal 

2794 string, as in ``Word("Address")``. Remember that :class:`Word` 

2795 uses the string argument to define *sets* of matchable characters. 

2796 This expression would match "Add", "AAA", "dAred", or any other word 

2797 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

2798 exact literal string, use :class:`Literal` or :class:`Keyword`. 

2799 

2800 pyparsing includes helper strings for building Words: 

2801 

2802 - :class:`alphas` 

2803 - :class:`nums` 

2804 - :class:`alphanums` 

2805 - :class:`hexnums` 

2806 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 

2807 - accented, tilded, umlauted, etc.) 

2808 - :class:`punc8bit` (non-alphabetic characters in ASCII range 

2809 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

2810 - :class:`printables` (any non-whitespace character) 

2811 

2812 ``alphas``, ``nums``, and ``printables`` are also defined in several 

2813 Unicode sets - see :class:`pyparsing_unicode``. 

2814 

2815 Example:: 

2816 

2817 # a word composed of digits 

2818 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

2819 

2820 # a word with a leading capital, and zero or more lowercase 

2821 capitalized_word = Word(alphas.upper(), alphas.lower()) 

2822 

2823 # hostnames are alphanumeric, with leading alpha, and '-' 

2824 hostname = Word(alphas, alphanums + '-') 

2825 

2826 # roman numeral (not a strict parser, accepts invalid mix of characters) 

2827 roman = Word("IVXLCDM") 

2828 

2829 # any string of non-whitespace characters, except for ',' 

2830 csv_value = Word(printables, exclude_chars=",") 

2831 """ 

2832 

2833 def __init__( 

2834 self, 

2835 init_chars: str = "", 

2836 body_chars: typing.Optional[str] = None, 

2837 min: int = 1, 

2838 max: int = 0, 

2839 exact: int = 0, 

2840 as_keyword: bool = False, 

2841 exclude_chars: typing.Optional[str] = None, 

2842 *, 

2843 initChars: typing.Optional[str] = None, 

2844 bodyChars: typing.Optional[str] = None, 

2845 asKeyword: bool = False, 

2846 excludeChars: typing.Optional[str] = None, 

2847 ) -> None: 

2848 initChars = initChars or init_chars 

2849 bodyChars = bodyChars or body_chars 

2850 asKeyword = asKeyword or as_keyword 

2851 excludeChars = excludeChars or exclude_chars 

2852 super().__init__() 

2853 if not initChars: 

2854 raise ValueError( 

2855 f"invalid {type(self).__name__}, initChars cannot be empty string" 

2856 ) 

2857 

2858 initChars_set = set(initChars) 

2859 if excludeChars: 

2860 excludeChars_set = set(excludeChars) 

2861 initChars_set -= excludeChars_set 

2862 if bodyChars: 

2863 bodyChars = "".join(set(bodyChars) - excludeChars_set) 

2864 self.initChars = initChars_set 

2865 self.initCharsOrig = "".join(sorted(initChars_set)) 

2866 

2867 if bodyChars: 

2868 self.bodyChars = set(bodyChars) 

2869 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

2870 else: 

2871 self.bodyChars = initChars_set 

2872 self.bodyCharsOrig = self.initCharsOrig 

2873 

2874 self.maxSpecified = max > 0 

2875 

2876 if min < 1: 

2877 raise ValueError( 

2878 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

2879 ) 

2880 

2881 if self.maxSpecified and min > max: 

2882 raise ValueError( 

2883 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" 

2884 ) 

2885 

2886 self.minLen = min 

2887 

2888 if max > 0: 

2889 self.maxLen = max 

2890 else: 

2891 self.maxLen = _MAX_INT 

2892 

2893 if exact > 0: 

2894 min = max = exact 

2895 self.maxLen = exact 

2896 self.minLen = exact 

2897 

2898 self.errmsg = f"Expected {self.name}" 

2899 self.mayIndexError = False 

2900 self.asKeyword = asKeyword 

2901 if self.asKeyword: 

2902 self.errmsg += " as a keyword" 

2903 

2904 # see if we can make a regex for this Word 

2905 if " " not in (self.initChars | self.bodyChars): 

2906 if len(self.initChars) == 1: 

2907 re_leading_fragment = re.escape(self.initCharsOrig) 

2908 else: 

2909 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" 

2910 

2911 if self.bodyChars == self.initChars: 

2912 if max == 0 and self.minLen == 1: 

2913 repeat = "+" 

2914 elif max == 1: 

2915 repeat = "" 

2916 else: 

2917 if self.minLen != self.maxLen: 

2918 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" 

2919 else: 

2920 repeat = f"{{{self.minLen}}}" 

2921 self.reString = f"{re_leading_fragment}{repeat}" 

2922 else: 

2923 if max == 1: 

2924 re_body_fragment = "" 

2925 repeat = "" 

2926 else: 

2927 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" 

2928 if max == 0 and self.minLen == 1: 

2929 repeat = "*" 

2930 elif max == 2: 

2931 repeat = "?" if min <= 1 else "" 

2932 else: 

2933 if min != max: 

2934 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" 

2935 else: 

2936 repeat = f"{{{min - 1 if min > 0 else ''}}}" 

2937 

2938 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" 

2939 

2940 if self.asKeyword: 

2941 self.reString = rf"\b{self.reString}\b" 

2942 

2943 try: 

2944 self.re = re.compile(self.reString) 

2945 except re.error: 

2946 self.re = None # type: ignore[assignment] 

2947 else: 

2948 self.re_match = self.re.match 

2949 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign] 

2950 

2951 def _generateDefaultName(self) -> str: 

2952 def charsAsStr(s): 

2953 max_repr_len = 16 

2954 s = _collapse_string_to_ranges(s, re_escape=False) 

2955 

2956 if len(s) > max_repr_len: 

2957 return s[: max_repr_len - 3] + "..." 

2958 

2959 return s 

2960 

2961 if self.initChars != self.bodyChars: 

2962 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" 

2963 else: 

2964 base = f"W:({charsAsStr(self.initChars)})" 

2965 

2966 # add length specification 

2967 if self.minLen > 1 or self.maxLen != _MAX_INT: 

2968 if self.minLen == self.maxLen: 

2969 if self.minLen == 1: 

2970 return base[2:] 

2971 else: 

2972 return base + f"{{{self.minLen}}}" 

2973 elif self.maxLen == _MAX_INT: 

2974 return base + f"{{{self.minLen},...}}" 

2975 else: 

2976 return base + f"{{{self.minLen},{self.maxLen}}}" 

2977 return base 

2978 

2979 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

2980 if instring[loc] not in self.initChars: 

2981 raise ParseException(instring, loc, self.errmsg, self) 

2982 

2983 start = loc 

2984 loc += 1 

2985 instrlen = len(instring) 

2986 body_chars: set[str] = self.bodyChars 

2987 maxloc = start + self.maxLen 

2988 maxloc = min(maxloc, instrlen) 

2989 while loc < maxloc and instring[loc] in body_chars: 

2990 loc += 1 

2991 

2992 throw_exception = False 

2993 if loc - start < self.minLen: 

2994 throw_exception = True 

2995 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars: 

2996 throw_exception = True 

2997 elif self.asKeyword and ( 

2998 (start > 0 and instring[start - 1] in body_chars) 

2999 or (loc < instrlen and instring[loc] in body_chars) 

3000 ): 

3001 throw_exception = True 

3002 

3003 if throw_exception: 

3004 raise ParseException(instring, loc, self.errmsg, self) 

3005 

3006 return loc, instring[start:loc] 

3007 

3008 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3009 result = self.re_match(instring, loc) 

3010 if not result: 

3011 raise ParseException(instring, loc, self.errmsg, self) 

3012 

3013 loc = result.end() 

3014 return loc, result.group() 

3015 

3016 

3017class Char(Word): 

3018 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

3019 when defining a match of any single character in a string of 

3020 characters. 

3021 """ 

3022 

3023 def __init__( 

3024 self, 

3025 charset: str, 

3026 as_keyword: bool = False, 

3027 exclude_chars: typing.Optional[str] = None, 

3028 *, 

3029 asKeyword: bool = False, 

3030 excludeChars: typing.Optional[str] = None, 

3031 ) -> None: 

3032 asKeyword = asKeyword or as_keyword 

3033 excludeChars = excludeChars or exclude_chars 

3034 super().__init__( 

3035 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars 

3036 ) 

3037 

3038 

3039class Regex(Token): 

3040 r"""Token for matching strings that match a given regular 

3041 expression. Defined with string specifying the regular expression in 

3042 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3043 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3044 these will be preserved as named :class:`ParseResults`. 

3045 

3046 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

3047 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

3048 a compiled RE that was compiled using ``regex``. 

3049 

3050 Example:: 

3051 

3052 realnum = Regex(r"[+-]?\d+\.\d*") 

3053 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3054 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3055 

3056 # named fields in a regex will be returned as named results 

3057 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3058 

3059 # the Regex class will accept re's compiled using the regex module 

3060 import regex 

3061 parser = pp.Regex(regex.compile(r'[0-9]')) 

3062 """ 

3063 

3064 def __init__( 

3065 self, 

3066 pattern: Any, 

3067 flags: Union[re.RegexFlag, int] = 0, 

3068 as_group_list: bool = False, 

3069 as_match: bool = False, 

3070 *, 

3071 asGroupList: bool = False, 

3072 asMatch: bool = False, 

3073 ) -> None: 

3074 """The parameters ``pattern`` and ``flags`` are passed 

3075 to the ``re.compile()`` function as-is. See the Python 

3076 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3077 explanation of the acceptable patterns and flags. 

3078 """ 

3079 super().__init__() 

3080 asGroupList = asGroupList or as_group_list 

3081 asMatch = asMatch or as_match 

3082 

3083 if isinstance(pattern, str_type): 

3084 if not pattern: 

3085 raise ValueError("null string passed to Regex; use Empty() instead") 

3086 

3087 self._re = None 

3088 self._may_return_empty = None # type: ignore [assignment] 

3089 self.reString = self.pattern = pattern 

3090 

3091 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

3092 self._re = pattern 

3093 self._may_return_empty = None # type: ignore [assignment] 

3094 self.pattern = self.reString = pattern.pattern 

3095 

3096 elif callable(pattern): 

3097 # defer creating this pattern until we really need it 

3098 self.pattern = pattern 

3099 self._may_return_empty = None # type: ignore [assignment] 

3100 self._re = None 

3101 

3102 else: 

3103 raise TypeError( 

3104 "Regex may only be constructed with a string or a compiled RE object," 

3105 " or a callable that takes no arguments and returns a string or a" 

3106 " compiled RE object" 

3107 ) 

3108 

3109 self.flags = flags 

3110 self.errmsg = f"Expected {self.name}" 

3111 self.mayIndexError = False 

3112 self.asGroupList = asGroupList 

3113 self.asMatch = asMatch 

3114 if self.asGroupList: 

3115 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign] 

3116 if self.asMatch: 

3117 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign] 

3118 

3119 @cached_property 

3120 def re(self) -> re.Pattern: 

3121 if self._re: 

3122 return self._re 

3123 

3124 if callable(self.pattern): 

3125 # replace self.pattern with the string returned by calling self.pattern() 

3126 self.pattern = cast(Callable[[], str], self.pattern)() 

3127 

3128 # see if we got a compiled RE back instead of a str - if so, we're done 

3129 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"): 

3130 self._re = cast(re.Pattern[str], self.pattern) 

3131 self.pattern = self.reString = self._re.pattern 

3132 return self._re 

3133 

3134 try: 

3135 self._re = re.compile(self.pattern, self.flags) 

3136 except re.error: 

3137 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") 

3138 else: 

3139 self._may_return_empty = self.re.match("", pos=0) is not None 

3140 return self._re 

3141 

3142 @cached_property 

3143 def re_match(self) -> Callable[[str, int], Any]: 

3144 return self.re.match 

3145 

3146 @property 

3147 def mayReturnEmpty(self): 

3148 if self._may_return_empty is None: 

3149 # force compile of regex pattern, to set may_return_empty flag 

3150 self.re # noqa 

3151 return self._may_return_empty 

3152 

3153 @mayReturnEmpty.setter 

3154 def mayReturnEmpty(self, value): 

3155 self._may_return_empty = value 

3156 

3157 def _generateDefaultName(self) -> str: 

3158 unescaped = repr(self.pattern).replace("\\\\", "\\") 

3159 return f"Re:({unescaped})" 

3160 

3161 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3162 # explicit check for matching past the length of the string; 

3163 # this is done because the re module will not complain about 

3164 # a match with `pos > len(instring)`, it will just return "" 

3165 if loc > len(instring) and self.mayReturnEmpty: 

3166 raise ParseException(instring, loc, self.errmsg, self) 

3167 

3168 result = self.re_match(instring, loc) 

3169 if not result: 

3170 raise ParseException(instring, loc, self.errmsg, self) 

3171 

3172 loc = result.end() 

3173 ret = ParseResults(result.group()) 

3174 d = result.groupdict() 

3175 

3176 for k, v in d.items(): 

3177 ret[k] = v 

3178 

3179 return loc, ret 

3180 

3181 def parseImplAsGroupList(self, instring, loc, do_actions=True): 

3182 if loc > len(instring) and self.mayReturnEmpty: 

3183 raise ParseException(instring, loc, self.errmsg, self) 

3184 

3185 result = self.re_match(instring, loc) 

3186 if not result: 

3187 raise ParseException(instring, loc, self.errmsg, self) 

3188 

3189 loc = result.end() 

3190 ret = result.groups() 

3191 return loc, ret 

3192 

3193 def parseImplAsMatch(self, instring, loc, do_actions=True): 

3194 if loc > len(instring) and self.mayReturnEmpty: 

3195 raise ParseException(instring, loc, self.errmsg, self) 

3196 

3197 result = self.re_match(instring, loc) 

3198 if not result: 

3199 raise ParseException(instring, loc, self.errmsg, self) 

3200 

3201 loc = result.end() 

3202 ret = result 

3203 return loc, ret 

3204 

3205 def sub(self, repl: str) -> ParserElement: 

3206 r""" 

3207 Return :class:`Regex` with an attached parse action to transform the parsed 

3208 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3209 

3210 Example:: 

3211 

3212 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3213 print(make_html.transform_string("h1:main title:")) 

3214 # prints "<h1>main title</h1>" 

3215 """ 

3216 if self.asGroupList: 

3217 raise TypeError("cannot use sub() with Regex(as_group_list=True)") 

3218 

3219 if self.asMatch and callable(repl): 

3220 raise TypeError( 

3221 "cannot use sub() with a callable with Regex(as_match=True)" 

3222 ) 

3223 

3224 if self.asMatch: 

3225 

3226 def pa(tokens): 

3227 return tokens[0].expand(repl) 

3228 

3229 else: 

3230 

3231 def pa(tokens): 

3232 return self.re.sub(repl, tokens[0]) 

3233 

3234 return self.add_parse_action(pa) 

3235 

3236 

3237class QuotedString(Token): 

3238 r""" 

3239 Token for matching strings that are delimited by quoting characters. 

3240 

3241 Defined with the following parameters: 

3242 

3243 - ``quote_char`` - string of one or more characters defining the 

3244 quote delimiting string 

3245 - ``esc_char`` - character to re_escape quotes, typically backslash 

3246 (default= ``None``) 

3247 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3248 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3249 (default= ``None``) 

3250 - ``multiline`` - boolean indicating whether quotes can span 

3251 multiple lines (default= ``False``) 

3252 - ``unquote_results`` - boolean indicating whether the matched text 

3253 should be unquoted (default= ``True``) 

3254 - ``end_quote_char`` - string of one or more characters defining the 

3255 end of the quote delimited string (default= ``None`` => same as 

3256 quote_char) 

3257 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3258 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3259 (default= ``True``) 

3260 

3261 Example:: 

3262 

3263 qs = QuotedString('"') 

3264 print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3265 complex_qs = QuotedString('{{', end_quote_char='}}') 

3266 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) 

3267 sql_qs = QuotedString('"', esc_quote='""') 

3268 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3269 

3270 prints:: 

3271 

3272 [['This is the quote']] 

3273 [['This is the "quote"']] 

3274 [['This is the quote with "embedded" quotes']] 

3275 """ 

3276 

3277 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) 

3278 

3279 def __init__( 

3280 self, 

3281 quote_char: str = "", 

3282 esc_char: typing.Optional[str] = None, 

3283 esc_quote: typing.Optional[str] = None, 

3284 multiline: bool = False, 

3285 unquote_results: bool = True, 

3286 end_quote_char: typing.Optional[str] = None, 

3287 convert_whitespace_escapes: bool = True, 

3288 *, 

3289 quoteChar: str = "", 

3290 escChar: typing.Optional[str] = None, 

3291 escQuote: typing.Optional[str] = None, 

3292 unquoteResults: bool = True, 

3293 endQuoteChar: typing.Optional[str] = None, 

3294 convertWhitespaceEscapes: bool = True, 

3295 ) -> None: 

3296 super().__init__() 

3297 esc_char = escChar or esc_char 

3298 esc_quote = escQuote or esc_quote 

3299 unquote_results = unquoteResults and unquote_results 

3300 end_quote_char = endQuoteChar or end_quote_char 

3301 convert_whitespace_escapes = ( 

3302 convertWhitespaceEscapes and convert_whitespace_escapes 

3303 ) 

3304 quote_char = quoteChar or quote_char 

3305 

3306 # remove white space from quote chars 

3307 quote_char = quote_char.strip() 

3308 if not quote_char: 

3309 raise ValueError("quote_char cannot be the empty string") 

3310 

3311 if end_quote_char is None: 

3312 end_quote_char = quote_char 

3313 else: 

3314 end_quote_char = end_quote_char.strip() 

3315 if not end_quote_char: 

3316 raise ValueError("end_quote_char cannot be the empty string") 

3317 

3318 self.quote_char: str = quote_char 

3319 self.quote_char_len: int = len(quote_char) 

3320 self.first_quote_char: str = quote_char[0] 

3321 self.end_quote_char: str = end_quote_char 

3322 self.end_quote_char_len: int = len(end_quote_char) 

3323 self.esc_char: str = esc_char or "" 

3324 self.has_esc_char: bool = esc_char is not None 

3325 self.esc_quote: str = esc_quote or "" 

3326 self.unquote_results: bool = unquote_results 

3327 self.convert_whitespace_escapes: bool = convert_whitespace_escapes 

3328 self.multiline = multiline 

3329 self.re_flags = re.RegexFlag(0) 

3330 

3331 # fmt: off 

3332 # build up re pattern for the content between the quote delimiters 

3333 inner_pattern: list[str] = [] 

3334 

3335 if esc_quote: 

3336 inner_pattern.append(rf"(?:{re.escape(esc_quote)})") 

3337 

3338 if esc_char: 

3339 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") 

3340 

3341 if len(self.end_quote_char) > 1: 

3342 inner_pattern.append( 

3343 "(?:" 

3344 + "|".join( 

3345 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" 

3346 for i in range(len(self.end_quote_char) - 1, 0, -1) 

3347 ) 

3348 + ")" 

3349 ) 

3350 

3351 if self.multiline: 

3352 self.re_flags |= re.MULTILINE | re.DOTALL 

3353 inner_pattern.append( 

3354 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" 

3355 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3356 ) 

3357 else: 

3358 inner_pattern.append( 

3359 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" 

3360 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])" 

3361 ) 

3362 

3363 self.pattern = "".join( 

3364 [ 

3365 re.escape(self.quote_char), 

3366 "(?:", 

3367 '|'.join(inner_pattern), 

3368 ")*", 

3369 re.escape(self.end_quote_char), 

3370 ] 

3371 ) 

3372 

3373 if self.unquote_results: 

3374 if self.convert_whitespace_escapes: 

3375 self.unquote_scan_re = re.compile( 

3376 rf"({'|'.join(re.escape(k) for k in self.ws_map)})" 

3377 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})" 

3378 rf"|({re.escape(self.esc_char)}.)" 

3379 rf"|(\n|.)", 

3380 flags=self.re_flags, 

3381 ) 

3382 else: 

3383 self.unquote_scan_re = re.compile( 

3384 rf"({re.escape(self.esc_char)}.)" 

3385 rf"|(\n|.)", 

3386 flags=self.re_flags 

3387 ) 

3388 # fmt: on 

3389 

3390 try: 

3391 self.re = re.compile(self.pattern, self.re_flags) 

3392 self.reString = self.pattern 

3393 self.re_match = self.re.match 

3394 except re.error: 

3395 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") 

3396 

3397 self.errmsg = f"Expected {self.name}" 

3398 self.mayIndexError = False 

3399 self._may_return_empty = True 

3400 

3401 def _generateDefaultName(self) -> str: 

3402 if self.quote_char == self.end_quote_char and isinstance( 

3403 self.quote_char, str_type 

3404 ): 

3405 return f"string enclosed in {self.quote_char!r}" 

3406 

3407 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" 

3408 

3409 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3410 # check first character of opening quote to see if that is a match 

3411 # before doing the more complicated regex match 

3412 result = ( 

3413 instring[loc] == self.first_quote_char 

3414 and self.re_match(instring, loc) 

3415 or None 

3416 ) 

3417 if not result: 

3418 raise ParseException(instring, loc, self.errmsg, self) 

3419 

3420 # get ending loc and matched string from regex matching result 

3421 loc = result.end() 

3422 ret = result.group() 

3423 

3424 def convert_escaped_numerics(s: str) -> str: 

3425 if s == "0": 

3426 return "\0" 

3427 if s.isdigit() and len(s) == 3: 

3428 return chr(int(s, base=8)) 

3429 elif s.startswith(("u", "x")): 

3430 return chr(int(s[1:], base=16)) 

3431 else: 

3432 return s 

3433 

3434 if self.unquote_results: 

3435 # strip off quotes 

3436 ret = ret[self.quote_char_len : -self.end_quote_char_len] 

3437 

3438 if isinstance(ret, str_type): 

3439 # fmt: off 

3440 if self.convert_whitespace_escapes: 

3441 # as we iterate over matches in the input string, 

3442 # collect from whichever match group of the unquote_scan_re 

3443 # regex matches (only 1 group will match at any given time) 

3444 ret = "".join( 

3445 # match group 1 matches \t, \n, etc. 

3446 self.ws_map[match.group(1)] if match.group(1) 

3447 # match group 2 matches escaped octal, null, hex, and Unicode 

3448 # sequences 

3449 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2) 

3450 # match group 3 matches escaped characters 

3451 else match.group(3)[-1] if match.group(3) 

3452 # match group 4 matches any character 

3453 else match.group(4) 

3454 for match in self.unquote_scan_re.finditer(ret) 

3455 ) 

3456 else: 

3457 ret = "".join( 

3458 # match group 1 matches escaped characters 

3459 match.group(1)[-1] if match.group(1) 

3460 # match group 2 matches any character 

3461 else match.group(2) 

3462 for match in self.unquote_scan_re.finditer(ret) 

3463 ) 

3464 # fmt: on 

3465 

3466 # replace escaped quotes 

3467 if self.esc_quote: 

3468 ret = ret.replace(self.esc_quote, self.end_quote_char) 

3469 

3470 return loc, ret 

3471 

3472 

3473class CharsNotIn(Token): 

3474 """Token for matching words composed of characters *not* in a given 

3475 set (will include whitespace in matched characters if not listed in 

3476 the provided exclusion set - see example). Defined with string 

3477 containing all disallowed characters, and an optional minimum, 

3478 maximum, and/or exact length. The default value for ``min`` is 

3479 1 (a minimum value < 1 is not valid); the default values for 

3480 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3481 length restriction. 

3482 

3483 Example:: 

3484 

3485 # define a comma-separated-value as anything that is not a ',' 

3486 csv_value = CharsNotIn(',') 

3487 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) 

3488 

3489 prints:: 

3490 

3491 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3492 """ 

3493 

3494 def __init__( 

3495 self, 

3496 not_chars: str = "", 

3497 min: int = 1, 

3498 max: int = 0, 

3499 exact: int = 0, 

3500 *, 

3501 notChars: str = "", 

3502 ) -> None: 

3503 super().__init__() 

3504 self.skipWhitespace = False 

3505 self.notChars = not_chars or notChars 

3506 self.notCharsSet = set(self.notChars) 

3507 

3508 if min < 1: 

3509 raise ValueError( 

3510 "cannot specify a minimum length < 1; use" 

3511 " Opt(CharsNotIn()) if zero-length char group is permitted" 

3512 ) 

3513 

3514 self.minLen = min 

3515 

3516 if max > 0: 

3517 self.maxLen = max 

3518 else: 

3519 self.maxLen = _MAX_INT 

3520 

3521 if exact > 0: 

3522 self.maxLen = exact 

3523 self.minLen = exact 

3524 

3525 self.errmsg = f"Expected {self.name}" 

3526 self._may_return_empty = self.minLen == 0 

3527 self.mayIndexError = False 

3528 

3529 def _generateDefaultName(self) -> str: 

3530 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3531 if len(not_chars_str) > 16: 

3532 return f"!W:({self.notChars[: 16 - 3]}...)" 

3533 else: 

3534 return f"!W:({self.notChars})" 

3535 

3536 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3537 notchars = self.notCharsSet 

3538 if instring[loc] in notchars: 

3539 raise ParseException(instring, loc, self.errmsg, self) 

3540 

3541 start = loc 

3542 loc += 1 

3543 maxlen = min(start + self.maxLen, len(instring)) 

3544 while loc < maxlen and instring[loc] not in notchars: 

3545 loc += 1 

3546 

3547 if loc - start < self.minLen: 

3548 raise ParseException(instring, loc, self.errmsg, self) 

3549 

3550 return loc, instring[start:loc] 

3551 

3552 

3553class White(Token): 

3554 """Special matching class for matching whitespace. Normally, 

3555 whitespace is ignored by pyparsing grammars. This class is included 

3556 when some whitespace structures are significant. Define with 

3557 a string containing the whitespace characters to be matched; default 

3558 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3559 ``max``, and ``exact`` arguments, as defined for the 

3560 :class:`Word` class. 

3561 """ 

3562 

3563 whiteStrs = { 

3564 " ": "<SP>", 

3565 "\t": "<TAB>", 

3566 "\n": "<LF>", 

3567 "\r": "<CR>", 

3568 "\f": "<FF>", 

3569 "\u00A0": "<NBSP>", 

3570 "\u1680": "<OGHAM_SPACE_MARK>", 

3571 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3572 "\u2000": "<EN_QUAD>", 

3573 "\u2001": "<EM_QUAD>", 

3574 "\u2002": "<EN_SPACE>", 

3575 "\u2003": "<EM_SPACE>", 

3576 "\u2004": "<THREE-PER-EM_SPACE>", 

3577 "\u2005": "<FOUR-PER-EM_SPACE>", 

3578 "\u2006": "<SIX-PER-EM_SPACE>", 

3579 "\u2007": "<FIGURE_SPACE>", 

3580 "\u2008": "<PUNCTUATION_SPACE>", 

3581 "\u2009": "<THIN_SPACE>", 

3582 "\u200A": "<HAIR_SPACE>", 

3583 "\u200B": "<ZERO_WIDTH_SPACE>", 

3584 "\u202F": "<NNBSP>", 

3585 "\u205F": "<MMSP>", 

3586 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3587 } 

3588 

3589 def __init__( 

3590 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0 

3591 ) -> None: 

3592 super().__init__() 

3593 self.matchWhite = ws 

3594 self.set_whitespace_chars( 

3595 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3596 copy_defaults=True, 

3597 ) 

3598 # self.leave_whitespace() 

3599 self._may_return_empty = True 

3600 self.errmsg = f"Expected {self.name}" 

3601 

3602 self.minLen = min 

3603 

3604 if max > 0: 

3605 self.maxLen = max 

3606 else: 

3607 self.maxLen = _MAX_INT 

3608 

3609 if exact > 0: 

3610 self.maxLen = exact 

3611 self.minLen = exact 

3612 

3613 def _generateDefaultName(self) -> str: 

3614 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

3615 

3616 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3617 if instring[loc] not in self.matchWhite: 

3618 raise ParseException(instring, loc, self.errmsg, self) 

3619 start = loc 

3620 loc += 1 

3621 maxloc = start + self.maxLen 

3622 maxloc = min(maxloc, len(instring)) 

3623 while loc < maxloc and instring[loc] in self.matchWhite: 

3624 loc += 1 

3625 

3626 if loc - start < self.minLen: 

3627 raise ParseException(instring, loc, self.errmsg, self) 

3628 

3629 return loc, instring[start:loc] 

3630 

3631 

3632class PositionToken(Token): 

3633 def __init__(self) -> None: 

3634 super().__init__() 

3635 self._may_return_empty = True 

3636 self.mayIndexError = False 

3637 

3638 

3639class GoToColumn(PositionToken): 

3640 """Token to advance to a specific column of input text; useful for 

3641 tabular report scraping. 

3642 """ 

3643 

3644 def __init__(self, colno: int) -> None: 

3645 super().__init__() 

3646 self.col = colno 

3647 

3648 def preParse(self, instring: str, loc: int) -> int: 

3649 if col(loc, instring) == self.col: 

3650 return loc 

3651 

3652 instrlen = len(instring) 

3653 if self.ignoreExprs: 

3654 loc = self._skipIgnorables(instring, loc) 

3655 while ( 

3656 loc < instrlen 

3657 and instring[loc].isspace() 

3658 and col(loc, instring) != self.col 

3659 ): 

3660 loc += 1 

3661 

3662 return loc 

3663 

3664 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3665 thiscol = col(loc, instring) 

3666 if thiscol > self.col: 

3667 raise ParseException(instring, loc, "Text not in expected column", self) 

3668 newloc = loc + self.col - thiscol 

3669 ret = instring[loc:newloc] 

3670 return newloc, ret 

3671 

3672 

3673class LineStart(PositionToken): 

3674 r"""Matches if current position is at the beginning of a line within 

3675 the parse string 

3676 

3677 Example:: 

3678 

3679 test = '''\ 

3680 AAA this line 

3681 AAA and this line 

3682 AAA but not this one 

3683 B AAA and definitely not this one 

3684 ''' 

3685 

3686 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): 

3687 print(t) 

3688 

3689 prints:: 

3690 

3691 ['AAA', ' this line'] 

3692 ['AAA', ' and this line'] 

3693 

3694 """ 

3695 

3696 def __init__(self) -> None: 

3697 super().__init__() 

3698 self.leave_whitespace() 

3699 self.orig_whiteChars = set() | self.whiteChars 

3700 self.whiteChars.discard("\n") 

3701 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

3702 self.set_name("start of line") 

3703 

3704 def preParse(self, instring: str, loc: int) -> int: 

3705 if loc == 0: 

3706 return loc 

3707 

3708 ret = self.skipper.preParse(instring, loc) 

3709 

3710 if "\n" in self.orig_whiteChars: 

3711 while instring[ret : ret + 1] == "\n": 

3712 ret = self.skipper.preParse(instring, ret + 1) 

3713 

3714 return ret 

3715 

3716 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3717 if col(loc, instring) == 1: 

3718 return loc, [] 

3719 raise ParseException(instring, loc, self.errmsg, self) 

3720 

3721 

3722class LineEnd(PositionToken): 

3723 """Matches if current position is at the end of a line within the 

3724 parse string 

3725 """ 

3726 

3727 def __init__(self) -> None: 

3728 super().__init__() 

3729 self.whiteChars.discard("\n") 

3730 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

3731 self.set_name("end of line") 

3732 

3733 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3734 if loc < len(instring): 

3735 if instring[loc] == "\n": 

3736 return loc + 1, "\n" 

3737 else: 

3738 raise ParseException(instring, loc, self.errmsg, self) 

3739 elif loc == len(instring): 

3740 return loc + 1, [] 

3741 else: 

3742 raise ParseException(instring, loc, self.errmsg, self) 

3743 

3744 

3745class StringStart(PositionToken): 

3746 """Matches if current position is at the beginning of the parse 

3747 string 

3748 """ 

3749 

3750 def __init__(self) -> None: 

3751 super().__init__() 

3752 self.set_name("start of text") 

3753 

3754 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3755 # see if entire string up to here is just whitespace and ignoreables 

3756 if loc != 0 and loc != self.preParse(instring, 0): 

3757 raise ParseException(instring, loc, self.errmsg, self) 

3758 

3759 return loc, [] 

3760 

3761 

3762class StringEnd(PositionToken): 

3763 """ 

3764 Matches if current position is at the end of the parse string 

3765 """ 

3766 

3767 def __init__(self) -> None: 

3768 super().__init__() 

3769 self.set_name("end of text") 

3770 

3771 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3772 if loc < len(instring): 

3773 raise ParseException(instring, loc, self.errmsg, self) 

3774 if loc == len(instring): 

3775 return loc + 1, [] 

3776 if loc > len(instring): 

3777 return loc, [] 

3778 

3779 raise ParseException(instring, loc, self.errmsg, self) 

3780 

3781 

3782class WordStart(PositionToken): 

3783 """Matches if the current position is at the beginning of a 

3784 :class:`Word`, and is not preceded by any character in a given 

3785 set of ``word_chars`` (default= ``printables``). To emulate the 

3786 ``\b`` behavior of regular expressions, use 

3787 ``WordStart(alphanums)``. ``WordStart`` will also match at 

3788 the beginning of the string being parsed, or at the beginning of 

3789 a line. 

3790 """ 

3791 

3792 def __init__( 

3793 self, word_chars: str = printables, *, wordChars: str = printables 

3794 ) -> None: 

3795 wordChars = word_chars if wordChars == printables else wordChars 

3796 super().__init__() 

3797 self.wordChars = set(wordChars) 

3798 self.set_name("start of a word") 

3799 

3800 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3801 if loc != 0: 

3802 if ( 

3803 instring[loc - 1] in self.wordChars 

3804 or instring[loc] not in self.wordChars 

3805 ): 

3806 raise ParseException(instring, loc, self.errmsg, self) 

3807 return loc, [] 

3808 

3809 

3810class WordEnd(PositionToken): 

3811 """Matches if the current position is at the end of a :class:`Word`, 

3812 and is not followed by any character in a given set of ``word_chars`` 

3813 (default= ``printables``). To emulate the ``\b`` behavior of 

3814 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

3815 will also match at the end of the string being parsed, or at the end 

3816 of a line. 

3817 """ 

3818 

3819 def __init__( 

3820 self, word_chars: str = printables, *, wordChars: str = printables 

3821 ) -> None: 

3822 wordChars = word_chars if wordChars == printables else wordChars 

3823 super().__init__() 

3824 self.wordChars = set(wordChars) 

3825 self.skipWhitespace = False 

3826 self.set_name("end of a word") 

3827 

3828 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

3829 instrlen = len(instring) 

3830 if instrlen > 0 and loc < instrlen: 

3831 if ( 

3832 instring[loc] in self.wordChars 

3833 or instring[loc - 1] not in self.wordChars 

3834 ): 

3835 raise ParseException(instring, loc, self.errmsg, self) 

3836 return loc, [] 

3837 

3838 

3839class Tag(Token): 

3840 """ 

3841 A meta-element for inserting a named result into the parsed 

3842 tokens that may be checked later in a parse action or while 

3843 processing the parsed results. Accepts an optional tag value, 

3844 defaulting to `True`. 

3845 

3846 Example:: 

3847 

3848 end_punc = "." | ("!" + Tag("enthusiastic"))) 

3849 greeting = "Hello," + Word(alphas) + end_punc 

3850 

3851 result = greeting.parse_string("Hello, World.") 

3852 print(result.dump()) 

3853 

3854 result = greeting.parse_string("Hello, World!") 

3855 print(result.dump()) 

3856 

3857 prints:: 

3858 

3859 ['Hello,', 'World', '.'] 

3860 

3861 ['Hello,', 'World', '!'] 

3862 - enthusiastic: True 

3863 """ 

3864 

3865 def __init__(self, tag_name: str, value: Any = True) -> None: 

3866 super().__init__() 

3867 self._may_return_empty = True 

3868 self.mayIndexError = False 

3869 self.leave_whitespace() 

3870 self.tag_name = tag_name 

3871 self.tag_value = value 

3872 self.add_parse_action(self._add_tag) 

3873 self.show_in_diagram = False 

3874 

3875 def _add_tag(self, tokens: ParseResults): 

3876 tokens[self.tag_name] = self.tag_value 

3877 

3878 def _generateDefaultName(self) -> str: 

3879 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}" 

3880 

3881 

3882class ParseExpression(ParserElement): 

3883 """Abstract subclass of ParserElement, for combining and 

3884 post-processing parsed tokens. 

3885 """ 

3886 

3887 def __init__( 

3888 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

3889 ) -> None: 

3890 super().__init__(savelist) 

3891 self.exprs: list[ParserElement] 

3892 if isinstance(exprs, _generatorType): 

3893 exprs = list(exprs) 

3894 

3895 if isinstance(exprs, str_type): 

3896 self.exprs = [self._literalStringClass(exprs)] 

3897 elif isinstance(exprs, ParserElement): 

3898 self.exprs = [exprs] 

3899 elif isinstance(exprs, Iterable): 

3900 exprs = list(exprs) 

3901 # if sequence of strings provided, wrap with Literal 

3902 if any(isinstance(expr, str_type) for expr in exprs): 

3903 exprs = ( 

3904 self._literalStringClass(e) if isinstance(e, str_type) else e 

3905 for e in exprs 

3906 ) 

3907 self.exprs = list(exprs) 

3908 else: 

3909 try: 

3910 self.exprs = list(exprs) 

3911 except TypeError: 

3912 self.exprs = [exprs] 

3913 self.callPreparse = False 

3914 

3915 def recurse(self) -> list[ParserElement]: 

3916 return self.exprs[:] 

3917 

3918 def append(self, other) -> ParserElement: 

3919 self.exprs.append(other) 

3920 self._defaultName = None 

3921 return self 

3922 

3923 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

3924 """ 

3925 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3926 all contained expressions. 

3927 """ 

3928 super().leave_whitespace(recursive) 

3929 

3930 if recursive: 

3931 self.exprs = [e.copy() for e in self.exprs] 

3932 for e in self.exprs: 

3933 e.leave_whitespace(recursive) 

3934 return self 

3935 

3936 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

3937 """ 

3938 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3939 all contained expressions. 

3940 """ 

3941 super().ignore_whitespace(recursive) 

3942 if recursive: 

3943 self.exprs = [e.copy() for e in self.exprs] 

3944 for e in self.exprs: 

3945 e.ignore_whitespace(recursive) 

3946 return self 

3947 

3948 def ignore(self, other) -> ParserElement: 

3949 if isinstance(other, Suppress): 

3950 if other not in self.ignoreExprs: 

3951 super().ignore(other) 

3952 for e in self.exprs: 

3953 e.ignore(self.ignoreExprs[-1]) 

3954 else: 

3955 super().ignore(other) 

3956 for e in self.exprs: 

3957 e.ignore(self.ignoreExprs[-1]) 

3958 return self 

3959 

3960 def _generateDefaultName(self) -> str: 

3961 return f"{type(self).__name__}:({self.exprs})" 

3962 

3963 def streamline(self) -> ParserElement: 

3964 if self.streamlined: 

3965 return self 

3966 

3967 super().streamline() 

3968 

3969 for e in self.exprs: 

3970 e.streamline() 

3971 

3972 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

3973 # but only if there are no parse actions or resultsNames on the nested And's 

3974 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

3975 if len(self.exprs) == 2: 

3976 other = self.exprs[0] 

3977 if ( 

3978 isinstance(other, self.__class__) 

3979 and not other.parseAction 

3980 and other.resultsName is None 

3981 and not other.debug 

3982 ): 

3983 self.exprs = other.exprs[:] + [self.exprs[1]] 

3984 self._defaultName = None 

3985 self._may_return_empty |= other.mayReturnEmpty 

3986 self.mayIndexError |= other.mayIndexError 

3987 

3988 other = self.exprs[-1] 

3989 if ( 

3990 isinstance(other, self.__class__) 

3991 and not other.parseAction 

3992 and other.resultsName is None 

3993 and not other.debug 

3994 ): 

3995 self.exprs = self.exprs[:-1] + other.exprs[:] 

3996 self._defaultName = None 

3997 self._may_return_empty |= other.mayReturnEmpty 

3998 self.mayIndexError |= other.mayIndexError 

3999 

4000 self.errmsg = f"Expected {self}" 

4001 

4002 return self 

4003 

4004 def validate(self, validateTrace=None) -> None: 

4005 warnings.warn( 

4006 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4007 DeprecationWarning, 

4008 stacklevel=2, 

4009 ) 

4010 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

4011 for e in self.exprs: 

4012 e.validate(tmp) 

4013 self._checkRecursion([]) 

4014 

4015 def copy(self) -> ParserElement: 

4016 ret = super().copy() 

4017 ret = typing.cast(ParseExpression, ret) 

4018 ret.exprs = [e.copy() for e in self.exprs] 

4019 return ret 

4020 

4021 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4022 if not ( 

4023 __diag__.warn_ungrouped_named_tokens_in_collection 

4024 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4025 not in self.suppress_warnings_ 

4026 ): 

4027 return super()._setResultsName(name, list_all_matches) 

4028 

4029 for e in self.exprs: 

4030 if ( 

4031 isinstance(e, ParserElement) 

4032 and e.resultsName 

4033 and ( 

4034 Diagnostics.warn_ungrouped_named_tokens_in_collection 

4035 not in e.suppress_warnings_ 

4036 ) 

4037 ): 

4038 warning = ( 

4039 "warn_ungrouped_named_tokens_in_collection:" 

4040 f" setting results name {name!r} on {type(self).__name__} expression" 

4041 f" collides with {e.resultsName!r} on contained expression" 

4042 ) 

4043 warnings.warn(warning, stacklevel=3) 

4044 break 

4045 

4046 return super()._setResultsName(name, list_all_matches) 

4047 

4048 # Compatibility synonyms 

4049 # fmt: off 

4050 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4051 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4052 # fmt: on 

4053 

4054 

4055class And(ParseExpression): 

4056 """ 

4057 Requires all given :class:`ParserElement` s to be found in the given order. 

4058 Expressions may be separated by whitespace. 

4059 May be constructed using the ``'+'`` operator. 

4060 May also be constructed using the ``'-'`` operator, which will 

4061 suppress backtracking. 

4062 

4063 Example:: 

4064 

4065 integer = Word(nums) 

4066 name_expr = Word(alphas)[1, ...] 

4067 

4068 expr = And([integer("id"), name_expr("name"), integer("age")]) 

4069 # more easily written as: 

4070 expr = integer("id") + name_expr("name") + integer("age") 

4071 """ 

4072 

4073 class _ErrorStop(Empty): 

4074 def __init__(self, *args, **kwargs) -> None: 

4075 super().__init__(*args, **kwargs) 

4076 self.leave_whitespace() 

4077 

4078 def _generateDefaultName(self) -> str: 

4079 return "-" 

4080 

4081 def __init__( 

4082 self, 

4083 exprs_arg: typing.Iterable[Union[ParserElement, str]], 

4084 savelist: bool = True, 

4085 ) -> None: 

4086 # instantiate exprs as a list, converting strs to ParserElements 

4087 exprs: list[ParserElement] = [ 

4088 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg 

4089 ] 

4090 

4091 # convert any Ellipsis elements to SkipTo 

4092 if Ellipsis in exprs: 

4093 

4094 # Ellipsis cannot be the last element 

4095 if exprs[-1] is Ellipsis: 

4096 raise Exception("cannot construct And with sequence ending in ...") 

4097 

4098 tmp: list[ParserElement] = [] 

4099 for cur_expr, next_expr in zip(exprs, exprs[1:]): 

4100 if cur_expr is Ellipsis: 

4101 tmp.append(SkipTo(next_expr)("_skipped*")) 

4102 else: 

4103 tmp.append(cur_expr) 

4104 

4105 exprs[:-1] = tmp 

4106 

4107 super().__init__(exprs, savelist) 

4108 if self.exprs: 

4109 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4110 if not isinstance(self.exprs[0], White): 

4111 self.set_whitespace_chars( 

4112 self.exprs[0].whiteChars, 

4113 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

4114 ) 

4115 self.skipWhitespace = self.exprs[0].skipWhitespace 

4116 else: 

4117 self.skipWhitespace = False 

4118 else: 

4119 self._may_return_empty = True 

4120 self.callPreparse = True 

4121 

4122 def streamline(self) -> ParserElement: 

4123 # collapse any _PendingSkip's 

4124 if self.exprs and any( 

4125 isinstance(e, ParseExpression) 

4126 and e.exprs 

4127 and isinstance(e.exprs[-1], _PendingSkip) 

4128 for e in self.exprs[:-1] 

4129 ): 

4130 deleted_expr_marker = NoMatch() 

4131 for i, e in enumerate(self.exprs[:-1]): 

4132 if e is deleted_expr_marker: 

4133 continue 

4134 if ( 

4135 isinstance(e, ParseExpression) 

4136 and e.exprs 

4137 and isinstance(e.exprs[-1], _PendingSkip) 

4138 ): 

4139 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4140 self.exprs[i + 1] = deleted_expr_marker 

4141 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] 

4142 

4143 super().streamline() 

4144 

4145 # link any IndentedBlocks to the prior expression 

4146 prev: ParserElement 

4147 cur: ParserElement 

4148 for prev, cur in zip(self.exprs, self.exprs[1:]): 

4149 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

4150 # (but watch out for recursive grammar) 

4151 seen = set() 

4152 while True: 

4153 if id(cur) in seen: 

4154 break 

4155 seen.add(id(cur)) 

4156 if isinstance(cur, IndentedBlock): 

4157 prev.add_parse_action( 

4158 lambda s, l, t, cur_=cur: setattr( 

4159 cur_, "parent_anchor", col(l, s) 

4160 ) 

4161 ) 

4162 break 

4163 subs = cur.recurse() 

4164 next_first = next(iter(subs), None) 

4165 if next_first is None: 

4166 break 

4167 cur = typing.cast(ParserElement, next_first) 

4168 

4169 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4170 return self 

4171 

4172 def parseImpl(self, instring, loc, do_actions=True): 

4173 # pass False as callPreParse arg to _parse for first element, since we already 

4174 # pre-parsed the string as part of our And pre-parsing 

4175 loc, resultlist = self.exprs[0]._parse( 

4176 instring, loc, do_actions, callPreParse=False 

4177 ) 

4178 errorStop = False 

4179 for e in self.exprs[1:]: 

4180 # if isinstance(e, And._ErrorStop): 

4181 if type(e) is And._ErrorStop: 

4182 errorStop = True 

4183 continue 

4184 if errorStop: 

4185 try: 

4186 loc, exprtokens = e._parse(instring, loc, do_actions) 

4187 except ParseSyntaxException: 

4188 raise 

4189 except ParseBaseException as pe: 

4190 pe.__traceback__ = None 

4191 raise ParseSyntaxException._from_exception(pe) 

4192 except IndexError: 

4193 raise ParseSyntaxException( 

4194 instring, len(instring), self.errmsg, self 

4195 ) 

4196 else: 

4197 loc, exprtokens = e._parse(instring, loc, do_actions) 

4198 resultlist += exprtokens 

4199 return loc, resultlist 

4200 

4201 def __iadd__(self, other): 

4202 if isinstance(other, str_type): 

4203 other = self._literalStringClass(other) 

4204 if not isinstance(other, ParserElement): 

4205 return NotImplemented 

4206 return self.append(other) # And([self, other]) 

4207 

4208 def _checkRecursion(self, parseElementList): 

4209 subRecCheckList = parseElementList[:] + [self] 

4210 for e in self.exprs: 

4211 e._checkRecursion(subRecCheckList) 

4212 if not e.mayReturnEmpty: 

4213 break 

4214 

4215 def _generateDefaultName(self) -> str: 

4216 inner = " ".join(str(e) for e in self.exprs) 

4217 # strip off redundant inner {}'s 

4218 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4219 inner = inner[1:-1] 

4220 return f"{{{inner}}}" 

4221 

4222 

4223class Or(ParseExpression): 

4224 """Requires that at least one :class:`ParserElement` is found. If 

4225 two expressions match, the expression that matches the longest 

4226 string will be used. May be constructed using the ``'^'`` 

4227 operator. 

4228 

4229 Example:: 

4230 

4231 # construct Or using '^' operator 

4232 

4233 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4234 print(number.search_string("123 3.1416 789")) 

4235 

4236 prints:: 

4237 

4238 [['123'], ['3.1416'], ['789']] 

4239 """ 

4240 

4241 def __init__( 

4242 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4243 ) -> None: 

4244 super().__init__(exprs, savelist) 

4245 if self.exprs: 

4246 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4247 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4248 else: 

4249 self._may_return_empty = True 

4250 

4251 def streamline(self) -> ParserElement: 

4252 super().streamline() 

4253 if self.exprs: 

4254 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4255 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4256 self.skipWhitespace = all( 

4257 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4258 ) 

4259 else: 

4260 self.saveAsList = False 

4261 return self 

4262 

4263 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4264 maxExcLoc = -1 

4265 maxException = None 

4266 matches: list[tuple[int, ParserElement]] = [] 

4267 fatals: list[ParseFatalException] = [] 

4268 if all(e.callPreparse for e in self.exprs): 

4269 loc = self.preParse(instring, loc) 

4270 for e in self.exprs: 

4271 try: 

4272 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

4273 except ParseFatalException as pfe: 

4274 pfe.__traceback__ = None 

4275 pfe.parser_element = e 

4276 fatals.append(pfe) 

4277 maxException = None 

4278 maxExcLoc = -1 

4279 except ParseException as err: 

4280 if not fatals: 

4281 err.__traceback__ = None 

4282 if err.loc > maxExcLoc: 

4283 maxException = err 

4284 maxExcLoc = err.loc 

4285 except IndexError: 

4286 if len(instring) > maxExcLoc: 

4287 maxException = ParseException( 

4288 instring, len(instring), e.errmsg, self 

4289 ) 

4290 maxExcLoc = len(instring) 

4291 else: 

4292 # save match among all matches, to retry longest to shortest 

4293 matches.append((loc2, e)) 

4294 

4295 if matches: 

4296 # re-evaluate all matches in descending order of length of match, in case attached actions 

4297 # might change whether or how much they match of the input. 

4298 matches.sort(key=itemgetter(0), reverse=True) 

4299 

4300 if not do_actions: 

4301 # no further conditions or parse actions to change the selection of 

4302 # alternative, so the first match will be the best match 

4303 best_expr = matches[0][1] 

4304 return best_expr._parse(instring, loc, do_actions) 

4305 

4306 longest: tuple[int, typing.Optional[ParseResults]] = -1, None 

4307 for loc1, expr1 in matches: 

4308 if loc1 <= longest[0]: 

4309 # already have a longer match than this one will deliver, we are done 

4310 return longest 

4311 

4312 try: 

4313 loc2, toks = expr1._parse(instring, loc, do_actions) 

4314 except ParseException as err: 

4315 err.__traceback__ = None 

4316 if err.loc > maxExcLoc: 

4317 maxException = err 

4318 maxExcLoc = err.loc 

4319 else: 

4320 if loc2 >= loc1: 

4321 return loc2, toks 

4322 # didn't match as much as before 

4323 elif loc2 > longest[0]: 

4324 longest = loc2, toks 

4325 

4326 if longest != (-1, None): 

4327 return longest 

4328 

4329 if fatals: 

4330 if len(fatals) > 1: 

4331 fatals.sort(key=lambda e: -e.loc) 

4332 if fatals[0].loc == fatals[1].loc: 

4333 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4334 max_fatal = fatals[0] 

4335 raise max_fatal 

4336 

4337 if maxException is not None: 

4338 # infer from this check that all alternatives failed at the current position 

4339 # so emit this collective error message instead of any single error message 

4340 parse_start_loc = self.preParse(instring, loc) 

4341 if maxExcLoc == parse_start_loc: 

4342 maxException.msg = self.errmsg or "" 

4343 raise maxException 

4344 

4345 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4346 

4347 def __ixor__(self, other): 

4348 if isinstance(other, str_type): 

4349 other = self._literalStringClass(other) 

4350 if not isinstance(other, ParserElement): 

4351 return NotImplemented 

4352 return self.append(other) # Or([self, other]) 

4353 

4354 def _generateDefaultName(self) -> str: 

4355 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" 

4356 

4357 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4358 if ( 

4359 __diag__.warn_multiple_tokens_in_named_alternation 

4360 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4361 not in self.suppress_warnings_ 

4362 ): 

4363 if any( 

4364 isinstance(e, And) 

4365 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4366 not in e.suppress_warnings_ 

4367 for e in self.exprs 

4368 ): 

4369 warning = ( 

4370 "warn_multiple_tokens_in_named_alternation:" 

4371 f" setting results name {name!r} on {type(self).__name__} expression" 

4372 " will return a list of all parsed tokens in an And alternative," 

4373 " in prior versions only the first token was returned; enclose" 

4374 " contained argument in Group" 

4375 ) 

4376 warnings.warn(warning, stacklevel=3) 

4377 

4378 return super()._setResultsName(name, list_all_matches) 

4379 

4380 

4381class MatchFirst(ParseExpression): 

4382 """Requires that at least one :class:`ParserElement` is found. If 

4383 more than one expression matches, the first one listed is the one that will 

4384 match. May be constructed using the ``'|'`` operator. 

4385 

4386 Example:: 

4387 

4388 # construct MatchFirst using '|' operator 

4389 

4390 # watch the order of expressions to match 

4391 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4392 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

4393 

4394 # put more selective expression first 

4395 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4396 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

4397 """ 

4398 

4399 def __init__( 

4400 self, exprs: typing.Iterable[ParserElement], savelist: bool = False 

4401 ) -> None: 

4402 super().__init__(exprs, savelist) 

4403 if self.exprs: 

4404 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4405 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4406 else: 

4407 self._may_return_empty = True 

4408 

4409 def streamline(self) -> ParserElement: 

4410 if self.streamlined: 

4411 return self 

4412 

4413 super().streamline() 

4414 if self.exprs: 

4415 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4416 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) 

4417 self.skipWhitespace = all( 

4418 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4419 ) 

4420 else: 

4421 self.saveAsList = False 

4422 self._may_return_empty = True 

4423 return self 

4424 

4425 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4426 maxExcLoc = -1 

4427 maxException = None 

4428 

4429 for e in self.exprs: 

4430 try: 

4431 return e._parse(instring, loc, do_actions) 

4432 except ParseFatalException as pfe: 

4433 pfe.__traceback__ = None 

4434 pfe.parser_element = e 

4435 raise 

4436 except ParseException as err: 

4437 if err.loc > maxExcLoc: 

4438 maxException = err 

4439 maxExcLoc = err.loc 

4440 except IndexError: 

4441 if len(instring) > maxExcLoc: 

4442 maxException = ParseException( 

4443 instring, len(instring), e.errmsg, self 

4444 ) 

4445 maxExcLoc = len(instring) 

4446 

4447 if maxException is not None: 

4448 # infer from this check that all alternatives failed at the current position 

4449 # so emit this collective error message instead of any individual error message 

4450 parse_start_loc = self.preParse(instring, loc) 

4451 if maxExcLoc == parse_start_loc: 

4452 maxException.msg = self.errmsg or "" 

4453 raise maxException 

4454 

4455 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4456 

4457 def __ior__(self, other): 

4458 if isinstance(other, str_type): 

4459 other = self._literalStringClass(other) 

4460 if not isinstance(other, ParserElement): 

4461 return NotImplemented 

4462 return self.append(other) # MatchFirst([self, other]) 

4463 

4464 def _generateDefaultName(self) -> str: 

4465 return f"{{{' | '.join(str(e) for e in self.exprs)}}}" 

4466 

4467 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

4468 if ( 

4469 __diag__.warn_multiple_tokens_in_named_alternation 

4470 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4471 not in self.suppress_warnings_ 

4472 ): 

4473 if any( 

4474 isinstance(e, And) 

4475 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4476 not in e.suppress_warnings_ 

4477 for e in self.exprs 

4478 ): 

4479 warning = ( 

4480 "warn_multiple_tokens_in_named_alternation:" 

4481 f" setting results name {name!r} on {type(self).__name__} expression" 

4482 " will return a list of all parsed tokens in an And alternative," 

4483 " in prior versions only the first token was returned; enclose" 

4484 " contained argument in Group" 

4485 ) 

4486 warnings.warn(warning, stacklevel=3) 

4487 

4488 return super()._setResultsName(name, list_all_matches) 

4489 

4490 

4491class Each(ParseExpression): 

4492 """Requires all given :class:`ParserElement` s to be found, but in 

4493 any order. Expressions may be separated by whitespace. 

4494 

4495 May be constructed using the ``'&'`` operator. 

4496 

4497 Example:: 

4498 

4499 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4500 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4501 integer = Word(nums) 

4502 shape_attr = "shape:" + shape_type("shape") 

4503 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4504 color_attr = "color:" + color("color") 

4505 size_attr = "size:" + integer("size") 

4506 

4507 # use Each (using operator '&') to accept attributes in any order 

4508 # (shape and posn are required, color and size are optional) 

4509 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4510 

4511 shape_spec.run_tests(''' 

4512 shape: SQUARE color: BLACK posn: 100, 120 

4513 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4514 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4515 ''' 

4516 ) 

4517 

4518 prints:: 

4519 

4520 shape: SQUARE color: BLACK posn: 100, 120 

4521 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4522 - color: BLACK 

4523 - posn: ['100', ',', '120'] 

4524 - x: 100 

4525 - y: 120 

4526 - shape: SQUARE 

4527 

4528 

4529 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4530 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

4531 - color: BLUE 

4532 - posn: ['50', ',', '80'] 

4533 - x: 50 

4534 - y: 80 

4535 - shape: CIRCLE 

4536 - size: 50 

4537 

4538 

4539 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

4540 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

4541 - color: GREEN 

4542 - posn: ['20', ',', '40'] 

4543 - x: 20 

4544 - y: 40 

4545 - shape: TRIANGLE 

4546 - size: 20 

4547 """ 

4548 

4549 def __init__( 

4550 self, exprs: typing.Iterable[ParserElement], savelist: bool = True 

4551 ) -> None: 

4552 super().__init__(exprs, savelist) 

4553 if self.exprs: 

4554 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4555 else: 

4556 self._may_return_empty = True 

4557 self.skipWhitespace = True 

4558 self.initExprGroups = True 

4559 self.saveAsList = True 

4560 

4561 def __iand__(self, other): 

4562 if isinstance(other, str_type): 

4563 other = self._literalStringClass(other) 

4564 if not isinstance(other, ParserElement): 

4565 return NotImplemented 

4566 return self.append(other) # Each([self, other]) 

4567 

4568 def streamline(self) -> ParserElement: 

4569 super().streamline() 

4570 if self.exprs: 

4571 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) 

4572 else: 

4573 self._may_return_empty = True 

4574 return self 

4575 

4576 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4577 if self.initExprGroups: 

4578 self.opt1map = dict( 

4579 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

4580 ) 

4581 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

4582 opt2 = [ 

4583 e 

4584 for e in self.exprs 

4585 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

4586 ] 

4587 self.optionals = opt1 + opt2 

4588 self.multioptionals = [ 

4589 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4590 for e in self.exprs 

4591 if isinstance(e, _MultipleMatch) 

4592 ] 

4593 self.multirequired = [ 

4594 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4595 for e in self.exprs 

4596 if isinstance(e, OneOrMore) 

4597 ] 

4598 self.required = [ 

4599 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

4600 ] 

4601 self.required += self.multirequired 

4602 self.initExprGroups = False 

4603 

4604 tmpLoc = loc 

4605 tmpReqd = self.required[:] 

4606 tmpOpt = self.optionals[:] 

4607 multis = self.multioptionals[:] 

4608 matchOrder: list[ParserElement] = [] 

4609 

4610 keepMatching = True 

4611 failed: list[ParserElement] = [] 

4612 fatals: list[ParseFatalException] = [] 

4613 while keepMatching: 

4614 tmpExprs = tmpReqd + tmpOpt + multis 

4615 failed.clear() 

4616 fatals.clear() 

4617 for e in tmpExprs: 

4618 try: 

4619 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

4620 except ParseFatalException as pfe: 

4621 pfe.__traceback__ = None 

4622 pfe.parser_element = e 

4623 fatals.append(pfe) 

4624 failed.append(e) 

4625 except ParseException: 

4626 failed.append(e) 

4627 else: 

4628 matchOrder.append(self.opt1map.get(id(e), e)) 

4629 if e in tmpReqd: 

4630 tmpReqd.remove(e) 

4631 elif e in tmpOpt: 

4632 tmpOpt.remove(e) 

4633 if len(failed) == len(tmpExprs): 

4634 keepMatching = False 

4635 

4636 # look for any ParseFatalExceptions 

4637 if fatals: 

4638 if len(fatals) > 1: 

4639 fatals.sort(key=lambda e: -e.loc) 

4640 if fatals[0].loc == fatals[1].loc: 

4641 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) 

4642 max_fatal = fatals[0] 

4643 raise max_fatal 

4644 

4645 if tmpReqd: 

4646 missing = ", ".join([str(e) for e in tmpReqd]) 

4647 raise ParseException( 

4648 instring, 

4649 loc, 

4650 f"Missing one or more required elements ({missing})", 

4651 ) 

4652 

4653 # add any unmatched Opts, in case they have default values defined 

4654 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

4655 

4656 total_results = ParseResults([]) 

4657 for e in matchOrder: 

4658 loc, results = e._parse(instring, loc, do_actions) 

4659 total_results += results 

4660 

4661 return loc, total_results 

4662 

4663 def _generateDefaultName(self) -> str: 

4664 return f"{{{' & '.join(str(e) for e in self.exprs)}}}" 

4665 

4666 

4667class ParseElementEnhance(ParserElement): 

4668 """Abstract subclass of :class:`ParserElement`, for combining and 

4669 post-processing parsed tokens. 

4670 """ 

4671 

4672 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

4673 super().__init__(savelist) 

4674 if isinstance(expr, str_type): 

4675 expr_str = typing.cast(str, expr) 

4676 if issubclass(self._literalStringClass, Token): 

4677 expr = self._literalStringClass(expr_str) # type: ignore[call-arg] 

4678 elif issubclass(type(self), self._literalStringClass): 

4679 expr = Literal(expr_str) 

4680 else: 

4681 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] 

4682 expr = typing.cast(ParserElement, expr) 

4683 self.expr = expr 

4684 if expr is not None: 

4685 self.mayIndexError = expr.mayIndexError 

4686 self._may_return_empty = expr.mayReturnEmpty 

4687 self.set_whitespace_chars( 

4688 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

4689 ) 

4690 self.skipWhitespace = expr.skipWhitespace 

4691 self.saveAsList = expr.saveAsList 

4692 self.callPreparse = expr.callPreparse 

4693 self.ignoreExprs.extend(expr.ignoreExprs) 

4694 

4695 def recurse(self) -> list[ParserElement]: 

4696 return [self.expr] if self.expr is not None else [] 

4697 

4698 def parseImpl(self, instring, loc, do_actions=True): 

4699 if self.expr is None: 

4700 raise ParseException(instring, loc, "No expression defined", self) 

4701 

4702 try: 

4703 return self.expr._parse(instring, loc, do_actions, callPreParse=False) 

4704 except ParseSyntaxException: 

4705 raise 

4706 except ParseBaseException as pbe: 

4707 pbe.pstr = pbe.pstr or instring 

4708 pbe.loc = pbe.loc or loc 

4709 pbe.parser_element = pbe.parser_element or self 

4710 if not isinstance(self, Forward) and self.customName is not None: 

4711 if self.errmsg: 

4712 pbe.msg = self.errmsg 

4713 raise 

4714 

4715 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4716 super().leave_whitespace(recursive) 

4717 

4718 if recursive: 

4719 if self.expr is not None: 

4720 self.expr = self.expr.copy() 

4721 self.expr.leave_whitespace(recursive) 

4722 return self 

4723 

4724 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4725 super().ignore_whitespace(recursive) 

4726 

4727 if recursive: 

4728 if self.expr is not None: 

4729 self.expr = self.expr.copy() 

4730 self.expr.ignore_whitespace(recursive) 

4731 return self 

4732 

4733 def ignore(self, other) -> ParserElement: 

4734 if not isinstance(other, Suppress) or other not in self.ignoreExprs: 

4735 super().ignore(other) 

4736 if self.expr is not None: 

4737 self.expr.ignore(self.ignoreExprs[-1]) 

4738 

4739 return self 

4740 

4741 def streamline(self) -> ParserElement: 

4742 super().streamline() 

4743 if self.expr is not None: 

4744 self.expr.streamline() 

4745 return self 

4746 

4747 def _checkRecursion(self, parseElementList): 

4748 if self in parseElementList: 

4749 raise RecursiveGrammarException(parseElementList + [self]) 

4750 subRecCheckList = parseElementList[:] + [self] 

4751 if self.expr is not None: 

4752 self.expr._checkRecursion(subRecCheckList) 

4753 

4754 def validate(self, validateTrace=None) -> None: 

4755 warnings.warn( 

4756 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

4757 DeprecationWarning, 

4758 stacklevel=2, 

4759 ) 

4760 if validateTrace is None: 

4761 validateTrace = [] 

4762 tmp = validateTrace[:] + [self] 

4763 if self.expr is not None: 

4764 self.expr.validate(tmp) 

4765 self._checkRecursion([]) 

4766 

4767 def _generateDefaultName(self) -> str: 

4768 return f"{type(self).__name__}:({self.expr})" 

4769 

4770 # Compatibility synonyms 

4771 # fmt: off 

4772 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

4773 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

4774 # fmt: on 

4775 

4776 

4777class IndentedBlock(ParseElementEnhance): 

4778 """ 

4779 Expression to match one or more expressions at a given indentation level. 

4780 Useful for parsing text where structure is implied by indentation (like Python source code). 

4781 """ 

4782 

4783 class _Indent(Empty): 

4784 def __init__(self, ref_col: int) -> None: 

4785 super().__init__() 

4786 self.errmsg = f"expected indent at column {ref_col}" 

4787 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

4788 

4789 class _IndentGreater(Empty): 

4790 def __init__(self, ref_col: int) -> None: 

4791 super().__init__() 

4792 self.errmsg = f"expected indent at column greater than {ref_col}" 

4793 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

4794 

4795 def __init__( 

4796 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

4797 ) -> None: 

4798 super().__init__(expr, savelist=True) 

4799 # if recursive: 

4800 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

4801 self._recursive = recursive 

4802 self._grouped = grouped 

4803 self.parent_anchor = 1 

4804 

4805 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4806 # advance parse position to non-whitespace by using an Empty() 

4807 # this should be the column to be used for all subsequent indented lines 

4808 anchor_loc = Empty().preParse(instring, loc) 

4809 

4810 # see if self.expr matches at the current location - if not it will raise an exception 

4811 # and no further work is necessary 

4812 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions) 

4813 

4814 indent_col = col(anchor_loc, instring) 

4815 peer_detect_expr = self._Indent(indent_col) 

4816 

4817 inner_expr = Empty() + peer_detect_expr + self.expr 

4818 if self._recursive: 

4819 sub_indent = self._IndentGreater(indent_col) 

4820 nested_block = IndentedBlock( 

4821 self.expr, recursive=self._recursive, grouped=self._grouped 

4822 ) 

4823 nested_block.set_debug(self.debug) 

4824 nested_block.parent_anchor = indent_col 

4825 inner_expr += Opt(sub_indent + nested_block) 

4826 

4827 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

4828 block = OneOrMore(inner_expr) 

4829 

4830 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

4831 

4832 if self._grouped: 

4833 wrapper = Group 

4834 else: 

4835 wrapper = lambda expr: expr # type: ignore[misc, assignment] 

4836 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

4837 instring, anchor_loc, do_actions 

4838 ) 

4839 

4840 

4841class AtStringStart(ParseElementEnhance): 

4842 """Matches if expression matches at the beginning of the parse 

4843 string:: 

4844 

4845 AtStringStart(Word(nums)).parse_string("123") 

4846 # prints ["123"] 

4847 

4848 AtStringStart(Word(nums)).parse_string(" 123") 

4849 # raises ParseException 

4850 """ 

4851 

4852 def __init__(self, expr: Union[ParserElement, str]) -> None: 

4853 super().__init__(expr) 

4854 self.callPreparse = False 

4855 

4856 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4857 if loc != 0: 

4858 raise ParseException(instring, loc, "not found at string start") 

4859 return super().parseImpl(instring, loc, do_actions) 

4860 

4861 

4862class AtLineStart(ParseElementEnhance): 

4863 r"""Matches if an expression matches at the beginning of a line within 

4864 the parse string 

4865 

4866 Example:: 

4867 

4868 test = '''\ 

4869 AAA this line 

4870 AAA and this line 

4871 AAA but not this one 

4872 B AAA and definitely not this one 

4873 ''' 

4874 

4875 for t in (AtLineStart('AAA') + rest_of_line).search_string(test): 

4876 print(t) 

4877 

4878 prints:: 

4879 

4880 ['AAA', ' this line'] 

4881 ['AAA', ' and this line'] 

4882 

4883 """ 

4884 

4885 def __init__(self, expr: Union[ParserElement, str]) -> None: 

4886 super().__init__(expr) 

4887 self.callPreparse = False 

4888 

4889 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4890 if col(loc, instring) != 1: 

4891 raise ParseException(instring, loc, "not found at line start") 

4892 return super().parseImpl(instring, loc, do_actions) 

4893 

4894 

4895class FollowedBy(ParseElementEnhance): 

4896 """Lookahead matching of the given parse expression. 

4897 ``FollowedBy`` does *not* advance the parsing position within 

4898 the input string, it only verifies that the specified parse 

4899 expression matches at the current position. ``FollowedBy`` 

4900 always returns a null token list. If any results names are defined 

4901 in the lookahead expression, those *will* be returned for access by 

4902 name. 

4903 

4904 Example:: 

4905 

4906 # use FollowedBy to match a label only if it is followed by a ':' 

4907 data_word = Word(alphas) 

4908 label = data_word + FollowedBy(':') 

4909 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

4910 

4911 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() 

4912 

4913 prints:: 

4914 

4915 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

4916 """ 

4917 

4918 def __init__(self, expr: Union[ParserElement, str]) -> None: 

4919 super().__init__(expr) 

4920 self._may_return_empty = True 

4921 

4922 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

4923 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

4924 # we keep any named results that were defined in the FollowedBy expression 

4925 _, ret = self.expr._parse(instring, loc, do_actions=do_actions) 

4926 del ret[:] 

4927 

4928 return loc, ret 

4929 

4930 

4931class PrecededBy(ParseElementEnhance): 

4932 """Lookbehind matching of the given parse expression. 

4933 ``PrecededBy`` does not advance the parsing position within the 

4934 input string, it only verifies that the specified parse expression 

4935 matches prior to the current position. ``PrecededBy`` always 

4936 returns a null token list, but if a results name is defined on the 

4937 given expression, it is returned. 

4938 

4939 Parameters: 

4940 

4941 - ``expr`` - expression that must match prior to the current parse 

4942 location 

4943 - ``retreat`` - (default= ``None``) - (int) maximum number of characters 

4944 to lookbehind prior to the current parse location 

4945 

4946 If the lookbehind expression is a string, :class:`Literal`, 

4947 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

4948 with a specified exact or maximum length, then the retreat 

4949 parameter is not required. Otherwise, retreat must be specified to 

4950 give a maximum number of characters to look back from 

4951 the current parse position for a lookbehind match. 

4952 

4953 Example:: 

4954 

4955 # VB-style variable names with type prefixes 

4956 int_var = PrecededBy("#") + pyparsing_common.identifier 

4957 str_var = PrecededBy("$") + pyparsing_common.identifier 

4958 

4959 """ 

4960 

4961 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None: 

4962 super().__init__(expr) 

4963 self.expr = self.expr().leave_whitespace() 

4964 self._may_return_empty = True 

4965 self.mayIndexError = False 

4966 self.exact = False 

4967 if isinstance(expr, str_type): 

4968 expr = typing.cast(str, expr) 

4969 retreat = len(expr) 

4970 self.exact = True 

4971 elif isinstance(expr, (Literal, Keyword)): 

4972 retreat = expr.matchLen 

4973 self.exact = True 

4974 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

4975 retreat = expr.maxLen 

4976 self.exact = True 

4977 elif isinstance(expr, PositionToken): 

4978 retreat = 0 

4979 self.exact = True 

4980 self.retreat = retreat 

4981 self.errmsg = f"not preceded by {expr}" 

4982 self.skipWhitespace = False 

4983 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

4984 

4985 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType: 

4986 if self.exact: 

4987 if loc < self.retreat: 

4988 raise ParseException(instring, loc, self.errmsg, self) 

4989 start = loc - self.retreat 

4990 _, ret = self.expr._parse(instring, start) 

4991 return loc, ret 

4992 

4993 # retreat specified a maximum lookbehind window, iterate 

4994 test_expr = self.expr + StringEnd() 

4995 instring_slice = instring[max(0, loc - self.retreat) : loc] 

4996 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self) 

4997 

4998 for offset in range(1, min(loc, self.retreat + 1) + 1): 

4999 try: 

5000 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

5001 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

5002 except ParseBaseException as pbe: 

5003 last_expr = pbe 

5004 else: 

5005 break 

5006 else: 

5007 raise last_expr 

5008 

5009 return loc, ret 

5010 

5011 

5012class Located(ParseElementEnhance): 

5013 """ 

5014 Decorates a returned token with its starting and ending 

5015 locations in the input string. 

5016 

5017 This helper adds the following results names: 

5018 

5019 - ``locn_start`` - location where matched expression begins 

5020 - ``locn_end`` - location where matched expression ends 

5021 - ``value`` - the actual parsed results 

5022 

5023 Be careful if the input text contains ``<TAB>`` characters, you 

5024 may want to call :class:`ParserElement.parse_with_tabs` 

5025 

5026 Example:: 

5027 

5028 wd = Word(alphas) 

5029 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

5030 print(match) 

5031 

5032 prints:: 

5033 

5034 [0, ['ljsdf'], 5] 

5035 [8, ['lksdjjf'], 15] 

5036 [18, ['lkkjj'], 23] 

5037 

5038 """ 

5039 

5040 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5041 start = loc 

5042 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False) 

5043 ret_tokens = ParseResults([start, tokens, loc]) 

5044 ret_tokens["locn_start"] = start 

5045 ret_tokens["value"] = tokens 

5046 ret_tokens["locn_end"] = loc 

5047 if self.resultsName: 

5048 # must return as a list, so that the name will be attached to the complete group 

5049 return loc, [ret_tokens] 

5050 else: 

5051 return loc, ret_tokens 

5052 

5053 

5054class NotAny(ParseElementEnhance): 

5055 """ 

5056 Lookahead to disallow matching with the given parse expression. 

5057 ``NotAny`` does *not* advance the parsing position within the 

5058 input string, it only verifies that the specified parse expression 

5059 does *not* match at the current position. Also, ``NotAny`` does 

5060 *not* skip over leading whitespace. ``NotAny`` always returns 

5061 a null token list. May be constructed using the ``'~'`` operator. 

5062 

5063 Example:: 

5064 

5065 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

5066 

5067 # take care not to mistake keywords for identifiers 

5068 ident = ~(AND | OR | NOT) + Word(alphas) 

5069 boolean_term = Opt(NOT) + ident 

5070 

5071 # very crude boolean expression - to support parenthesis groups and 

5072 # operation hierarchy, use infix_notation 

5073 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

5074 

5075 # integers that are followed by "." are actually floats 

5076 integer = Word(nums) + ~Char(".") 

5077 """ 

5078 

5079 def __init__(self, expr: Union[ParserElement, str]) -> None: 

5080 super().__init__(expr) 

5081 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

5082 # self.leave_whitespace() 

5083 self.skipWhitespace = False 

5084 

5085 self._may_return_empty = True 

5086 self.errmsg = f"Found unwanted token, {self.expr}" 

5087 

5088 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5089 if self.expr.can_parse_next(instring, loc, do_actions=do_actions): 

5090 raise ParseException(instring, loc, self.errmsg, self) 

5091 return loc, [] 

5092 

5093 def _generateDefaultName(self) -> str: 

5094 return f"~{{{self.expr}}}" 

5095 

5096 

5097class _MultipleMatch(ParseElementEnhance): 

5098 def __init__( 

5099 self, 

5100 expr: Union[str, ParserElement], 

5101 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5102 *, 

5103 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5104 ) -> None: 

5105 super().__init__(expr) 

5106 stopOn = stopOn or stop_on 

5107 self.saveAsList = True 

5108 ender = stopOn 

5109 if isinstance(ender, str_type): 

5110 ender = self._literalStringClass(ender) 

5111 self.stopOn(ender) 

5112 

5113 def stopOn(self, ender) -> ParserElement: 

5114 if isinstance(ender, str_type): 

5115 ender = self._literalStringClass(ender) 

5116 self.not_ender = ~ender if ender is not None else None 

5117 return self 

5118 

5119 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5120 self_expr_parse = self.expr._parse 

5121 self_skip_ignorables = self._skipIgnorables 

5122 check_ender = False 

5123 if self.not_ender is not None: 

5124 try_not_ender = self.not_ender.try_parse 

5125 check_ender = True 

5126 

5127 # must be at least one (but first see if we are the stopOn sentinel; 

5128 # if so, fail) 

5129 if check_ender: 

5130 try_not_ender(instring, loc) 

5131 loc, tokens = self_expr_parse(instring, loc, do_actions) 

5132 try: 

5133 hasIgnoreExprs = not not self.ignoreExprs 

5134 while 1: 

5135 if check_ender: 

5136 try_not_ender(instring, loc) 

5137 if hasIgnoreExprs: 

5138 preloc = self_skip_ignorables(instring, loc) 

5139 else: 

5140 preloc = loc 

5141 loc, tmptokens = self_expr_parse(instring, preloc, do_actions) 

5142 tokens += tmptokens 

5143 except (ParseException, IndexError): 

5144 pass 

5145 

5146 return loc, tokens 

5147 

5148 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5149 if ( 

5150 __diag__.warn_ungrouped_named_tokens_in_collection 

5151 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

5152 not in self.suppress_warnings_ 

5153 ): 

5154 for e in [self.expr] + self.expr.recurse(): 

5155 if ( 

5156 isinstance(e, ParserElement) 

5157 and e.resultsName 

5158 and ( 

5159 Diagnostics.warn_ungrouped_named_tokens_in_collection 

5160 not in e.suppress_warnings_ 

5161 ) 

5162 ): 

5163 warning = ( 

5164 "warn_ungrouped_named_tokens_in_collection:" 

5165 f" setting results name {name!r} on {type(self).__name__} expression" 

5166 f" collides with {e.resultsName!r} on contained expression" 

5167 ) 

5168 warnings.warn(warning, stacklevel=3) 

5169 break 

5170 

5171 return super()._setResultsName(name, list_all_matches) 

5172 

5173 

5174class OneOrMore(_MultipleMatch): 

5175 """ 

5176 Repetition of one or more of the given expression. 

5177 

5178 Parameters: 

5179 

5180 - ``expr`` - expression that must match one or more times 

5181 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel 

5182 (only required if the sentinel would ordinarily match the repetition 

5183 expression) 

5184 

5185 Example:: 

5186 

5187 data_word = Word(alphas) 

5188 label = data_word + FollowedBy(':') 

5189 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) 

5190 

5191 text = "shape: SQUARE posn: upper left color: BLACK" 

5192 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

5193 

5194 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data 

5195 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5196 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

5197 

5198 # could also be written as 

5199 (attr_expr * (1,)).parse_string(text).pprint() 

5200 """ 

5201 

5202 def _generateDefaultName(self) -> str: 

5203 return f"{{{self.expr}}}..." 

5204 

5205 

5206class ZeroOrMore(_MultipleMatch): 

5207 """ 

5208 Optional repetition of zero or more of the given expression. 

5209 

5210 Parameters: 

5211 

5212 - ``expr`` - expression that must match zero or more times 

5213 - ``stop_on`` - expression for a terminating sentinel 

5214 (only required if the sentinel would ordinarily match the repetition 

5215 expression) - (default= ``None``) 

5216 

5217 Example: similar to :class:`OneOrMore` 

5218 """ 

5219 

5220 def __init__( 

5221 self, 

5222 expr: Union[str, ParserElement], 

5223 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

5224 *, 

5225 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

5226 ) -> None: 

5227 super().__init__(expr, stopOn=stopOn or stop_on) 

5228 self._may_return_empty = True 

5229 

5230 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5231 try: 

5232 return super().parseImpl(instring, loc, do_actions) 

5233 except (ParseException, IndexError): 

5234 return loc, ParseResults([], name=self.resultsName) 

5235 

5236 def _generateDefaultName(self) -> str: 

5237 return f"[{self.expr}]..." 

5238 

5239 

5240class DelimitedList(ParseElementEnhance): 

5241 def __init__( 

5242 self, 

5243 expr: Union[str, ParserElement], 

5244 delim: Union[str, ParserElement] = ",", 

5245 combine: bool = False, 

5246 min: typing.Optional[int] = None, 

5247 max: typing.Optional[int] = None, 

5248 *, 

5249 allow_trailing_delim: bool = False, 

5250 ) -> None: 

5251 """Helper to define a delimited list of expressions - the delimiter 

5252 defaults to ','. By default, the list elements and delimiters can 

5253 have intervening whitespace, and comments, but this can be 

5254 overridden by passing ``combine=True`` in the constructor. If 

5255 ``combine`` is set to ``True``, the matching tokens are 

5256 returned as a single token string, with the delimiters included; 

5257 otherwise, the matching tokens are returned as a list of tokens, 

5258 with the delimiters suppressed. 

5259 

5260 If ``allow_trailing_delim`` is set to True, then the list may end with 

5261 a delimiter. 

5262 

5263 Example:: 

5264 

5265 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

5266 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

5267 """ 

5268 if isinstance(expr, str_type): 

5269 expr = ParserElement._literalStringClass(expr) 

5270 expr = typing.cast(ParserElement, expr) 

5271 

5272 if min is not None and min < 1: 

5273 raise ValueError("min must be greater than 0") 

5274 

5275 if max is not None and min is not None and max < min: 

5276 raise ValueError("max must be greater than, or equal to min") 

5277 

5278 self.content = expr 

5279 self.raw_delim = str(delim) 

5280 self.delim = delim 

5281 self.combine = combine 

5282 if not combine: 

5283 self.delim = Suppress(delim) 

5284 self.min = min or 1 

5285 self.max = max 

5286 self.allow_trailing_delim = allow_trailing_delim 

5287 

5288 delim_list_expr = self.content + (self.delim + self.content) * ( 

5289 self.min - 1, 

5290 None if self.max is None else self.max - 1, 

5291 ) 

5292 if self.allow_trailing_delim: 

5293 delim_list_expr += Opt(self.delim) 

5294 

5295 if self.combine: 

5296 delim_list_expr = Combine(delim_list_expr) 

5297 

5298 super().__init__(delim_list_expr, savelist=True) 

5299 

5300 def _generateDefaultName(self) -> str: 

5301 content_expr = self.content.streamline() 

5302 return f"{content_expr} [{self.raw_delim} {content_expr}]..." 

5303 

5304 

5305class _NullToken: 

5306 def __bool__(self): 

5307 return False 

5308 

5309 def __str__(self): 

5310 return "" 

5311 

5312 

5313class Opt(ParseElementEnhance): 

5314 """ 

5315 Optional matching of the given expression. 

5316 

5317 Parameters: 

5318 

5319 - ``expr`` - expression that must match zero or more times 

5320 - ``default`` (optional) - value to be returned if the optional expression is not found. 

5321 

5322 Example:: 

5323 

5324 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

5325 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

5326 zip.run_tests(''' 

5327 # traditional ZIP code 

5328 12345 

5329 

5330 # ZIP+4 form 

5331 12101-0001 

5332 

5333 # invalid ZIP 

5334 98765- 

5335 ''') 

5336 

5337 prints:: 

5338 

5339 # traditional ZIP code 

5340 12345 

5341 ['12345'] 

5342 

5343 # ZIP+4 form 

5344 12101-0001 

5345 ['12101-0001'] 

5346 

5347 # invalid ZIP 

5348 98765- 

5349 ^ 

5350 FAIL: Expected end of text (at char 5), (line:1, col:6) 

5351 """ 

5352 

5353 __optionalNotMatched = _NullToken() 

5354 

5355 def __init__( 

5356 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

5357 ) -> None: 

5358 super().__init__(expr, savelist=False) 

5359 self.saveAsList = self.expr.saveAsList 

5360 self.defaultValue = default 

5361 self._may_return_empty = True 

5362 

5363 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5364 self_expr = self.expr 

5365 try: 

5366 loc, tokens = self_expr._parse( 

5367 instring, loc, do_actions, callPreParse=False 

5368 ) 

5369 except (ParseException, IndexError): 

5370 default_value = self.defaultValue 

5371 if default_value is not self.__optionalNotMatched: 

5372 if self_expr.resultsName: 

5373 tokens = ParseResults([default_value]) 

5374 tokens[self_expr.resultsName] = default_value 

5375 else: 

5376 tokens = [default_value] # type: ignore[assignment] 

5377 else: 

5378 tokens = [] # type: ignore[assignment] 

5379 return loc, tokens 

5380 

5381 def _generateDefaultName(self) -> str: 

5382 inner = str(self.expr) 

5383 # strip off redundant inner {}'s 

5384 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

5385 inner = inner[1:-1] 

5386 return f"[{inner}]" 

5387 

5388 

5389Optional = Opt 

5390 

5391 

5392class SkipTo(ParseElementEnhance): 

5393 """ 

5394 Token for skipping over all undefined text until the matched 

5395 expression is found. 

5396 

5397 Parameters: 

5398 

5399 - ``expr`` - target expression marking the end of the data to be skipped 

5400 - ``include`` - if ``True``, the target expression is also parsed 

5401 (the skipped text and target expression are returned as a 2-element 

5402 list) (default= ``False``). 

5403 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and 

5404 comments) that might contain false matches to the target expression 

5405 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be 

5406 included in the skipped test; if found before the target expression is found, 

5407 the :class:`SkipTo` is not a match 

5408 

5409 Example:: 

5410 

5411 report = ''' 

5412 Outstanding Issues Report - 1 Jan 2000 

5413 

5414 # | Severity | Description | Days Open 

5415 -----+----------+-------------------------------------------+----------- 

5416 101 | Critical | Intermittent system crash | 6 

5417 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5418 79 | Minor | System slow when running too many reports | 47 

5419 ''' 

5420 integer = Word(nums) 

5421 SEP = Suppress('|') 

5422 # use SkipTo to simply match everything up until the next SEP 

5423 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5424 # - parse action will call token.strip() for each matched token, i.e., the description body 

5425 string_data = SkipTo(SEP, ignore=quoted_string) 

5426 string_data.set_parse_action(token_map(str.strip)) 

5427 ticket_expr = (integer("issue_num") + SEP 

5428 + string_data("sev") + SEP 

5429 + string_data("desc") + SEP 

5430 + integer("days_open")) 

5431 

5432 for tkt in ticket_expr.search_string(report): 

5433 print tkt.dump() 

5434 

5435 prints:: 

5436 

5437 ['101', 'Critical', 'Intermittent system crash', '6'] 

5438 - days_open: '6' 

5439 - desc: 'Intermittent system crash' 

5440 - issue_num: '101' 

5441 - sev: 'Critical' 

5442 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

5443 - days_open: '14' 

5444 - desc: "Spelling error on Login ('log|n')" 

5445 - issue_num: '94' 

5446 - sev: 'Cosmetic' 

5447 ['79', 'Minor', 'System slow when running too many reports', '47'] 

5448 - days_open: '47' 

5449 - desc: 'System slow when running too many reports' 

5450 - issue_num: '79' 

5451 - sev: 'Minor' 

5452 """ 

5453 

5454 def __init__( 

5455 self, 

5456 other: Union[ParserElement, str], 

5457 include: bool = False, 

5458 ignore: typing.Optional[Union[ParserElement, str]] = None, 

5459 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

5460 *, 

5461 failOn: typing.Optional[Union[ParserElement, str]] = None, 

5462 ) -> None: 

5463 super().__init__(other) 

5464 failOn = failOn or fail_on 

5465 self.ignoreExpr = ignore 

5466 self._may_return_empty = True 

5467 self.mayIndexError = False 

5468 self.includeMatch = include 

5469 self.saveAsList = False 

5470 if isinstance(failOn, str_type): 

5471 self.failOn = self._literalStringClass(failOn) 

5472 else: 

5473 self.failOn = failOn 

5474 self.errmsg = f"No match found for {self.expr}" 

5475 self.ignorer = Empty().leave_whitespace() 

5476 self._update_ignorer() 

5477 

5478 def _update_ignorer(self): 

5479 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr 

5480 self.ignorer.ignoreExprs.clear() 

5481 for e in self.expr.ignoreExprs: 

5482 self.ignorer.ignore(e) 

5483 if self.ignoreExpr: 

5484 self.ignorer.ignore(self.ignoreExpr) 

5485 

5486 def ignore(self, expr): 

5487 super().ignore(expr) 

5488 self._update_ignorer() 

5489 

5490 def parseImpl(self, instring, loc, do_actions=True): 

5491 startloc = loc 

5492 instrlen = len(instring) 

5493 self_expr_parse = self.expr._parse 

5494 self_failOn_canParseNext = ( 

5495 self.failOn.canParseNext if self.failOn is not None else None 

5496 ) 

5497 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None 

5498 

5499 tmploc = loc 

5500 while tmploc <= instrlen: 

5501 if self_failOn_canParseNext is not None: 

5502 # break if failOn expression matches 

5503 if self_failOn_canParseNext(instring, tmploc): 

5504 break 

5505 

5506 if ignorer_try_parse is not None: 

5507 # advance past ignore expressions 

5508 prev_tmploc = tmploc 

5509 while 1: 

5510 try: 

5511 tmploc = ignorer_try_parse(instring, tmploc) 

5512 except ParseBaseException: 

5513 break 

5514 # see if all ignorers matched, but didn't actually ignore anything 

5515 if tmploc == prev_tmploc: 

5516 break 

5517 prev_tmploc = tmploc 

5518 

5519 try: 

5520 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False) 

5521 except (ParseException, IndexError): 

5522 # no match, advance loc in string 

5523 tmploc += 1 

5524 else: 

5525 # matched skipto expr, done 

5526 break 

5527 

5528 else: 

5529 # ran off the end of the input string without matching skipto expr, fail 

5530 raise ParseException(instring, loc, self.errmsg, self) 

5531 

5532 # build up return values 

5533 loc = tmploc 

5534 skiptext = instring[startloc:loc] 

5535 skipresult = ParseResults(skiptext) 

5536 

5537 if self.includeMatch: 

5538 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False) 

5539 skipresult += mat 

5540 

5541 return loc, skipresult 

5542 

5543 

5544class Forward(ParseElementEnhance): 

5545 """ 

5546 Forward declaration of an expression to be defined later - 

5547 used for recursive grammars, such as algebraic infix notation. 

5548 When the expression is known, it is assigned to the ``Forward`` 

5549 variable using the ``'<<'`` operator. 

5550 

5551 Note: take care when assigning to ``Forward`` not to overlook 

5552 precedence of operators. 

5553 

5554 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

5555 

5556 fwd_expr << a | b | c 

5557 

5558 will actually be evaluated as:: 

5559 

5560 (fwd_expr << a) | b | c 

5561 

5562 thereby leaving b and c out as parseable alternatives. It is recommended that you 

5563 explicitly group the values inserted into the ``Forward``:: 

5564 

5565 fwd_expr << (a | b | c) 

5566 

5567 Converting to use the ``'<<='`` operator instead will avoid this problem. 

5568 

5569 See :class:`ParseResults.pprint` for an example of a recursive 

5570 parser created using ``Forward``. 

5571 """ 

5572 

5573 def __init__( 

5574 self, other: typing.Optional[Union[ParserElement, str]] = None 

5575 ) -> None: 

5576 self.caller_frame = traceback.extract_stack(limit=2)[0] 

5577 super().__init__(other, savelist=False) # type: ignore[arg-type] 

5578 self.lshift_line = None 

5579 

5580 def __lshift__(self, other) -> Forward: 

5581 if hasattr(self, "caller_frame"): 

5582 del self.caller_frame 

5583 if isinstance(other, str_type): 

5584 other = self._literalStringClass(other) 

5585 

5586 if not isinstance(other, ParserElement): 

5587 return NotImplemented 

5588 

5589 self.expr = other 

5590 self.streamlined = other.streamlined 

5591 self.mayIndexError = self.expr.mayIndexError 

5592 self._may_return_empty = self.expr.mayReturnEmpty 

5593 self.set_whitespace_chars( 

5594 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

5595 ) 

5596 self.skipWhitespace = self.expr.skipWhitespace 

5597 self.saveAsList = self.expr.saveAsList 

5598 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5599 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] 

5600 return self 

5601 

5602 def __ilshift__(self, other) -> Forward: 

5603 if not isinstance(other, ParserElement): 

5604 return NotImplemented 

5605 

5606 return self << other 

5607 

5608 def __or__(self, other) -> ParserElement: 

5609 caller_line = traceback.extract_stack(limit=2)[-2] 

5610 if ( 

5611 __diag__.warn_on_match_first_with_lshift_operator 

5612 and caller_line == self.lshift_line 

5613 and Diagnostics.warn_on_match_first_with_lshift_operator 

5614 not in self.suppress_warnings_ 

5615 ): 

5616 warnings.warn( 

5617 "warn_on_match_first_with_lshift_operator:" 

5618 " using '<<' operator with '|' is probably an error, use '<<='", 

5619 stacklevel=2, 

5620 ) 

5621 ret = super().__or__(other) 

5622 return ret 

5623 

5624 def __del__(self): 

5625 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

5626 if ( 

5627 self.expr is None 

5628 and __diag__.warn_on_assignment_to_Forward 

5629 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

5630 ): 

5631 warnings.warn_explicit( 

5632 "warn_on_assignment_to_Forward:" 

5633 " Forward defined here but no expression attached later using '<<=' or '<<'", 

5634 UserWarning, 

5635 filename=self.caller_frame.filename, 

5636 lineno=self.caller_frame.lineno, 

5637 ) 

5638 

5639 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: 

5640 if ( 

5641 self.expr is None 

5642 and __diag__.warn_on_parse_using_empty_Forward 

5643 and Diagnostics.warn_on_parse_using_empty_Forward 

5644 not in self.suppress_warnings_ 

5645 ): 

5646 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

5647 parse_fns = ( 

5648 "parse_string", 

5649 "scan_string", 

5650 "search_string", 

5651 "transform_string", 

5652 ) 

5653 tb = traceback.extract_stack(limit=200) 

5654 for i, frm in enumerate(reversed(tb), start=1): 

5655 if frm.name in parse_fns: 

5656 stacklevel = i + 1 

5657 break 

5658 else: 

5659 stacklevel = 2 

5660 warnings.warn( 

5661 "warn_on_parse_using_empty_Forward:" 

5662 " Forward expression was never assigned a value, will not parse any input", 

5663 stacklevel=stacklevel, 

5664 ) 

5665 if not ParserElement._left_recursion_enabled: 

5666 return super().parseImpl(instring, loc, do_actions) 

5667 # ## Bounded Recursion algorithm ## 

5668 # Recursion only needs to be processed at ``Forward`` elements, since they are 

5669 # the only ones that can actually refer to themselves. The general idea is 

5670 # to handle recursion stepwise: We start at no recursion, then recurse once, 

5671 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

5672 # 

5673 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

5674 # - to *match* a specific recursion level, and 

5675 # - to *search* the bounded recursion level 

5676 # and the two run concurrently. The *search* must *match* each recursion level 

5677 # to find the best possible match. This is handled by a memo table, which 

5678 # provides the previous match to the next level match attempt. 

5679 # 

5680 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

5681 # 

5682 # There is a complication since we not only *parse* but also *transform* via 

5683 # actions: We do not want to run the actions too often while expanding. Thus, 

5684 # we expand using `do_actions=False` and only run `do_actions=True` if the next 

5685 # recursion level is acceptable. 

5686 with ParserElement.recursion_lock: 

5687 memo = ParserElement.recursion_memos 

5688 try: 

5689 # we are parsing at a specific recursion expansion - use it as-is 

5690 prev_loc, prev_result = memo[loc, self, do_actions] 

5691 if isinstance(prev_result, Exception): 

5692 raise prev_result 

5693 return prev_loc, prev_result.copy() 

5694 except KeyError: 

5695 act_key = (loc, self, True) 

5696 peek_key = (loc, self, False) 

5697 # we are searching for the best recursion expansion - keep on improving 

5698 # both `do_actions` cases must be tracked separately here! 

5699 prev_loc, prev_peek = memo[peek_key] = ( 

5700 loc - 1, 

5701 ParseException( 

5702 instring, loc, "Forward recursion without base case", self 

5703 ), 

5704 ) 

5705 if do_actions: 

5706 memo[act_key] = memo[peek_key] 

5707 while True: 

5708 try: 

5709 new_loc, new_peek = super().parseImpl(instring, loc, False) 

5710 except ParseException: 

5711 # we failed before getting any match - do not hide the error 

5712 if isinstance(prev_peek, Exception): 

5713 raise 

5714 new_loc, new_peek = prev_loc, prev_peek 

5715 # the match did not get better: we are done 

5716 if new_loc <= prev_loc: 

5717 if do_actions: 

5718 # replace the match for do_actions=False as well, 

5719 # in case the action did backtrack 

5720 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

5721 del memo[peek_key], memo[act_key] 

5722 return prev_loc, copy.copy(prev_result) 

5723 del memo[peek_key] 

5724 return prev_loc, copy.copy(prev_peek) 

5725 # the match did get better: see if we can improve further 

5726 if do_actions: 

5727 try: 

5728 memo[act_key] = super().parseImpl(instring, loc, True) 

5729 except ParseException as e: 

5730 memo[peek_key] = memo[act_key] = (new_loc, e) 

5731 raise 

5732 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

5733 

5734 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5735 self.skipWhitespace = False 

5736 return self 

5737 

5738 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5739 self.skipWhitespace = True 

5740 return self 

5741 

5742 def streamline(self) -> ParserElement: 

5743 if not self.streamlined: 

5744 self.streamlined = True 

5745 if self.expr is not None: 

5746 self.expr.streamline() 

5747 return self 

5748 

5749 def validate(self, validateTrace=None) -> None: 

5750 warnings.warn( 

5751 "ParserElement.validate() is deprecated, and should not be used to check for left recursion", 

5752 DeprecationWarning, 

5753 stacklevel=2, 

5754 ) 

5755 if validateTrace is None: 

5756 validateTrace = [] 

5757 

5758 if self not in validateTrace: 

5759 tmp = validateTrace[:] + [self] 

5760 if self.expr is not None: 

5761 self.expr.validate(tmp) 

5762 self._checkRecursion([]) 

5763 

5764 def _generateDefaultName(self) -> str: 

5765 # Avoid infinite recursion by setting a temporary _defaultName 

5766 save_default_name = self._defaultName 

5767 self._defaultName = ": ..." 

5768 

5769 # Use the string representation of main expression. 

5770 try: 

5771 if self.expr is not None: 

5772 ret_string = str(self.expr)[:1000] 

5773 else: 

5774 ret_string = "None" 

5775 except Exception: 

5776 ret_string = "..." 

5777 

5778 self._defaultName = save_default_name 

5779 return f"{type(self).__name__}: {ret_string}" 

5780 

5781 def copy(self) -> ParserElement: 

5782 if self.expr is not None: 

5783 return super().copy() 

5784 else: 

5785 ret = Forward() 

5786 ret <<= self 

5787 return ret 

5788 

5789 def _setResultsName(self, name, list_all_matches=False) -> ParserElement: 

5790 # fmt: off 

5791 if ( 

5792 __diag__.warn_name_set_on_empty_Forward 

5793 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ 

5794 and self.expr is None 

5795 ): 

5796 warning = ( 

5797 "warn_name_set_on_empty_Forward:" 

5798 f" setting results name {name!r} on {type(self).__name__} expression" 

5799 " that has no contained expression" 

5800 ) 

5801 warnings.warn(warning, stacklevel=3) 

5802 # fmt: on 

5803 

5804 return super()._setResultsName(name, list_all_matches) 

5805 

5806 # Compatibility synonyms 

5807 # fmt: off 

5808 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) 

5809 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) 

5810 # fmt: on 

5811 

5812 

5813class TokenConverter(ParseElementEnhance): 

5814 """ 

5815 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. 

5816 """ 

5817 

5818 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None: 

5819 super().__init__(expr) # , savelist) 

5820 self.saveAsList = False 

5821 

5822 

5823class Combine(TokenConverter): 

5824 """Converter to concatenate all matching tokens to a single string. 

5825 By default, the matching patterns must also be contiguous in the 

5826 input string; this can be disabled by specifying 

5827 ``'adjacent=False'`` in the constructor. 

5828 

5829 Example:: 

5830 

5831 real = Word(nums) + '.' + Word(nums) 

5832 print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] 

5833 # will also erroneously match the following 

5834 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] 

5835 

5836 real = Combine(Word(nums) + '.' + Word(nums)) 

5837 print(real.parse_string('3.1416')) # -> ['3.1416'] 

5838 # no match when there are internal spaces 

5839 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) 

5840 """ 

5841 

5842 def __init__( 

5843 self, 

5844 expr: ParserElement, 

5845 join_string: str = "", 

5846 adjacent: bool = True, 

5847 *, 

5848 joinString: typing.Optional[str] = None, 

5849 ) -> None: 

5850 super().__init__(expr) 

5851 joinString = joinString if joinString is not None else join_string 

5852 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

5853 if adjacent: 

5854 self.leave_whitespace() 

5855 self.adjacent = adjacent 

5856 self.skipWhitespace = True 

5857 self.joinString = joinString 

5858 self.callPreparse = True 

5859 

5860 def ignore(self, other) -> ParserElement: 

5861 if self.adjacent: 

5862 ParserElement.ignore(self, other) 

5863 else: 

5864 super().ignore(other) 

5865 return self 

5866 

5867 def postParse(self, instring, loc, tokenlist): 

5868 retToks = tokenlist.copy() 

5869 del retToks[:] 

5870 retToks += ParseResults( 

5871 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

5872 ) 

5873 

5874 if self.resultsName and retToks.haskeys(): 

5875 return [retToks] 

5876 else: 

5877 return retToks 

5878 

5879 

5880class Group(TokenConverter): 

5881 """Converter to return the matched tokens as a list - useful for 

5882 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

5883 

5884 The optional ``aslist`` argument when set to True will return the 

5885 parsed tokens as a Python list instead of a pyparsing ParseResults. 

5886 

5887 Example:: 

5888 

5889 ident = Word(alphas) 

5890 num = Word(nums) 

5891 term = ident | num 

5892 func = ident + Opt(DelimitedList(term)) 

5893 print(func.parse_string("fn a, b, 100")) 

5894 # -> ['fn', 'a', 'b', '100'] 

5895 

5896 func = ident + Group(Opt(DelimitedList(term))) 

5897 print(func.parse_string("fn a, b, 100")) 

5898 # -> ['fn', ['a', 'b', '100']] 

5899 """ 

5900 

5901 def __init__(self, expr: ParserElement, aslist: bool = False) -> None: 

5902 super().__init__(expr) 

5903 self.saveAsList = True 

5904 self._asPythonList = aslist 

5905 

5906 def postParse(self, instring, loc, tokenlist): 

5907 if self._asPythonList: 

5908 return ParseResults.List( 

5909 tokenlist.asList() 

5910 if isinstance(tokenlist, ParseResults) 

5911 else list(tokenlist) 

5912 ) 

5913 

5914 return [tokenlist] 

5915 

5916 

5917class Dict(TokenConverter): 

5918 """Converter to return a repetitive expression as a list, but also 

5919 as a dictionary. Each element can also be referenced using the first 

5920 token in the expression as its key. Useful for tabular report 

5921 scraping when the first column can be used as a item key. 

5922 

5923 The optional ``asdict`` argument when set to True will return the 

5924 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

5925 

5926 Example:: 

5927 

5928 data_word = Word(alphas) 

5929 label = data_word + FollowedBy(':') 

5930 

5931 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5932 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5933 

5934 # print attributes as plain groups 

5935 print(attr_expr[1, ...].parse_string(text).dump()) 

5936 

5937 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names 

5938 result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

5939 print(result.dump()) 

5940 

5941 # access named fields as dict entries, or output as dict 

5942 print(result['shape']) 

5943 print(result.as_dict()) 

5944 

5945 prints:: 

5946 

5947 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

5948 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5949 - color: 'light blue' 

5950 - posn: 'upper left' 

5951 - shape: 'SQUARE' 

5952 - texture: 'burlap' 

5953 SQUARE 

5954 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

5955 

5956 See more examples at :class:`ParseResults` of accessing fields by results name. 

5957 """ 

5958 

5959 def __init__(self, expr: ParserElement, asdict: bool = False) -> None: 

5960 super().__init__(expr) 

5961 self.saveAsList = True 

5962 self._asPythonDict = asdict 

5963 

5964 def postParse(self, instring, loc, tokenlist): 

5965 for i, tok in enumerate(tokenlist): 

5966 if len(tok) == 0: 

5967 continue 

5968 

5969 ikey = tok[0] 

5970 if isinstance(ikey, int): 

5971 ikey = str(ikey).strip() 

5972 

5973 if len(tok) == 1: 

5974 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

5975 

5976 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

5977 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

5978 

5979 else: 

5980 try: 

5981 dictvalue = tok.copy() # ParseResults(i) 

5982 except Exception: 

5983 exc = TypeError( 

5984 "could not extract dict values from parsed results" 

5985 " - Dict expression must contain Grouped expressions" 

5986 ) 

5987 raise exc from None 

5988 

5989 del dictvalue[0] 

5990 

5991 if len(dictvalue) != 1 or ( 

5992 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

5993 ): 

5994 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

5995 else: 

5996 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

5997 

5998 if self._asPythonDict: 

5999 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

6000 

6001 return [tokenlist] if self.resultsName else tokenlist 

6002 

6003 

6004class Suppress(TokenConverter): 

6005 """Converter for ignoring the results of a parsed expression. 

6006 

6007 Example:: 

6008 

6009 source = "a, b, c,d" 

6010 wd = Word(alphas) 

6011 wd_list1 = wd + (',' + wd)[...] 

6012 print(wd_list1.parse_string(source)) 

6013 

6014 # often, delimiters that are useful during parsing are just in the 

6015 # way afterward - use Suppress to keep them out of the parsed output 

6016 wd_list2 = wd + (Suppress(',') + wd)[...] 

6017 print(wd_list2.parse_string(source)) 

6018 

6019 # Skipped text (using '...') can be suppressed as well 

6020 source = "lead in START relevant text END trailing text" 

6021 start_marker = Keyword("START") 

6022 end_marker = Keyword("END") 

6023 find_body = Suppress(...) + start_marker + ... + end_marker 

6024 print(find_body.parse_string(source) 

6025 

6026 prints:: 

6027 

6028 ['a', ',', 'b', ',', 'c', ',', 'd'] 

6029 ['a', 'b', 'c', 'd'] 

6030 ['START', 'relevant text ', 'END'] 

6031 

6032 (See also :class:`DelimitedList`.) 

6033 """ 

6034 

6035 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: 

6036 if expr is ...: 

6037 expr = _PendingSkip(NoMatch()) 

6038 super().__init__(expr) 

6039 

6040 def __add__(self, other) -> ParserElement: 

6041 if isinstance(self.expr, _PendingSkip): 

6042 return Suppress(SkipTo(other)) + other 

6043 

6044 return super().__add__(other) 

6045 

6046 def __sub__(self, other) -> ParserElement: 

6047 if isinstance(self.expr, _PendingSkip): 

6048 return Suppress(SkipTo(other)) - other 

6049 

6050 return super().__sub__(other) 

6051 

6052 def postParse(self, instring, loc, tokenlist): 

6053 return [] 

6054 

6055 def suppress(self) -> ParserElement: 

6056 return self 

6057 

6058 

6059def trace_parse_action(f: ParseAction) -> ParseAction: 

6060 """Decorator for debugging parse actions. 

6061 

6062 When the parse action is called, this decorator will print 

6063 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

6064 When the parse action completes, the decorator will print 

6065 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

6066 

6067 Example:: 

6068 

6069 wd = Word(alphas) 

6070 

6071 @trace_parse_action 

6072 def remove_duplicate_chars(tokens): 

6073 return ''.join(sorted(set(''.join(tokens)))) 

6074 

6075 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

6076 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

6077 

6078 prints:: 

6079 

6080 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

6081 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

6082 ['dfjkls'] 

6083 """ 

6084 f = _trim_arity(f) 

6085 

6086 def z(*paArgs): 

6087 thisFunc = f.__name__ 

6088 s, l, t = paArgs[-3:] 

6089 if len(paArgs) > 3: 

6090 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" 

6091 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") 

6092 try: 

6093 ret = f(*paArgs) 

6094 except Exception as exc: 

6095 sys.stderr.write( 

6096 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n" 

6097 ) 

6098 raise 

6099 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n") 

6100 return ret 

6101 

6102 z.__name__ = f.__name__ 

6103 return z 

6104 

6105 

6106# convenience constants for positional expressions 

6107empty = Empty().set_name("empty") 

6108line_start = LineStart().set_name("line_start") 

6109line_end = LineEnd().set_name("line_end") 

6110string_start = StringStart().set_name("string_start") 

6111string_end = StringEnd().set_name("string_end") 

6112 

6113_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action( 

6114 lambda s, l, t: t[0][1] 

6115) 

6116_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

6117 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

6118) 

6119_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

6120 lambda s, l, t: chr(int(t[0][1:], 8)) 

6121) 

6122_singleChar = ( 

6123 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

6124) 

6125_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

6126_reBracketExpr = ( 

6127 Literal("[") 

6128 + Opt("^").set_results_name("negate") 

6129 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

6130 + Literal("]") 

6131) 

6132 

6133 

6134def srange(s: str) -> str: 

6135 r"""Helper to easily define string ranges for use in :class:`Word` 

6136 construction. Borrows syntax from regexp ``'[]'`` string range 

6137 definitions:: 

6138 

6139 srange("[0-9]") -> "0123456789" 

6140 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

6141 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

6142 

6143 The input string must be enclosed in []'s, and the returned string 

6144 is the expanded character set joined into a single string. The 

6145 values enclosed in the []'s may be: 

6146 

6147 - a single character 

6148 - an escaped character with a leading backslash (such as ``\-`` 

6149 or ``\]``) 

6150 - an escaped hex character with a leading ``'\x'`` 

6151 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

6152 is also supported for backwards compatibility) 

6153 - an escaped octal character with a leading ``'\0'`` 

6154 (``\041``, which is a ``'!'`` character) 

6155 - a range of any of the above, separated by a dash (``'a-z'``, 

6156 etc.) 

6157 - any combination of the above (``'aeiouy'``, 

6158 ``'a-zA-Z0-9_$'``, etc.) 

6159 """ 

6160 

6161 def _expanded(p): 

6162 if isinstance(p, ParseResults): 

6163 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

6164 else: 

6165 yield p 

6166 

6167 try: 

6168 return "".join( 

6169 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)] 

6170 ) 

6171 except Exception as e: 

6172 return "" 

6173 

6174 

6175def token_map(func, *args) -> ParseAction: 

6176 """Helper to define a parse action by mapping a function to all 

6177 elements of a :class:`ParseResults` list. If any additional args are passed, 

6178 they are forwarded to the given function as additional arguments 

6179 after the token, as in 

6180 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

6181 which will convert the parsed data to an integer using base 16. 

6182 

6183 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

6184 

6185 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

6186 hex_ints.run_tests(''' 

6187 00 11 22 aa FF 0a 0d 1a 

6188 ''') 

6189 

6190 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

6191 upperword[1, ...].run_tests(''' 

6192 my kingdom for a horse 

6193 ''') 

6194 

6195 wd = Word(alphas).set_parse_action(token_map(str.title)) 

6196 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

6197 now is the winter of our discontent made glorious summer by this sun of york 

6198 ''') 

6199 

6200 prints:: 

6201 

6202 00 11 22 aa FF 0a 0d 1a 

6203 [0, 17, 34, 170, 255, 10, 13, 26] 

6204 

6205 my kingdom for a horse 

6206 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

6207 

6208 now is the winter of our discontent made glorious summer by this sun of york 

6209 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

6210 """ 

6211 

6212 def pa(s, l, t): 

6213 return [func(tokn, *args) for tokn in t] 

6214 

6215 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

6216 pa.__name__ = func_name 

6217 

6218 return pa 

6219 

6220 

6221def autoname_elements() -> None: 

6222 """ 

6223 Utility to simplify mass-naming of parser elements, for 

6224 generating railroad diagram with named subdiagrams. 

6225 """ 

6226 

6227 # guard against _getframe not being implemented in the current Python 

6228 getframe_fn = getattr(sys, "_getframe", lambda _: None) 

6229 calling_frame = getframe_fn(1) 

6230 if calling_frame is None: 

6231 return 

6232 

6233 # find all locals in the calling frame that are ParserElements 

6234 calling_frame = typing.cast(types.FrameType, calling_frame) 

6235 for name, var in calling_frame.f_locals.items(): 

6236 # if no custom name defined, set the name to the var name 

6237 if isinstance(var, ParserElement) and not var.customName: 

6238 var.set_name(name) 

6239 

6240 

6241dbl_quoted_string = Combine( 

6242 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6243).set_name("string enclosed in double quotes") 

6244 

6245sgl_quoted_string = Combine( 

6246 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

6247).set_name("string enclosed in single quotes") 

6248 

6249quoted_string = Combine( 

6250 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6251 "double quoted string" 

6252 ) 

6253 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6254 "single quoted string" 

6255 ) 

6256).set_name("quoted string using single or double quotes") 

6257 

6258python_quoted_string = Combine( 

6259 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( 

6260 "multiline double quoted string" 

6261 ) 

6262 ^ ( 

6263 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" 

6264 ).set_name("multiline single quoted string") 

6265 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( 

6266 "double quoted string" 

6267 ) 

6268 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( 

6269 "single quoted string" 

6270 ) 

6271).set_name("Python quoted string") 

6272 

6273unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

6274 

6275 

6276alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6277punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6278 

6279# build list of built-in expressions, for future reference if a global default value 

6280# gets updated 

6281_builtin_exprs: list[ParserElement] = [ 

6282 v for v in vars().values() if isinstance(v, ParserElement) 

6283] 

6284 

6285# Compatibility synonyms 

6286# fmt: off 

6287sglQuotedString = sgl_quoted_string 

6288dblQuotedString = dbl_quoted_string 

6289quotedString = quoted_string 

6290unicodeString = unicode_string 

6291lineStart = line_start 

6292lineEnd = line_end 

6293stringStart = string_start 

6294stringEnd = string_end 

6295nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) 

6296traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) 

6297conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) 

6298tokenMap = replaced_by_pep8("tokenMap", token_map) 

6299# fmt: on