Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/_vendor/pyparsing/core.py: 49%

2416 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:48 +0000

1# 

2# core.py 

3# 

4import os 

5import typing 

6from typing import ( 

7 NamedTuple, 

8 Union, 

9 Callable, 

10 Any, 

11 Generator, 

12 Tuple, 

13 List, 

14 TextIO, 

15 Set, 

16 Sequence, 

17) 

18from abc import ABC, abstractmethod 

19from enum import Enum 

20import string 

21import copy 

22import warnings 

23import re 

24import sys 

25from collections.abc import Iterable 

26import traceback 

27import types 

28from operator import itemgetter 

29from functools import wraps 

30from threading import RLock 

31from pathlib import Path 

32 

33from .util import ( 

34 _FifoCache, 

35 _UnboundedCache, 

36 __config_flags, 

37 _collapse_string_to_ranges, 

38 _escape_regex_range_chars, 

39 _bslash, 

40 _flatten, 

41 LRUMemo as _LRUMemo, 

42 UnboundedMemo as _UnboundedMemo, 

43) 

44from .exceptions import * 

45from .actions import * 

46from .results import ParseResults, _ParseResultsWithOffset 

47from .unicode import pyparsing_unicode 

48 

49_MAX_INT = sys.maxsize 

50str_type: Tuple[type, ...] = (str, bytes) 

51 

52# 

53# Copyright (c) 2003-2022 Paul T. McGuire 

54# 

55# Permission is hereby granted, free of charge, to any person obtaining 

56# a copy of this software and associated documentation files (the 

57# "Software"), to deal in the Software without restriction, including 

58# without limitation the rights to use, copy, modify, merge, publish, 

59# distribute, sublicense, and/or sell copies of the Software, and to 

60# permit persons to whom the Software is furnished to do so, subject to 

61# the following conditions: 

62# 

63# The above copyright notice and this permission notice shall be 

64# included in all copies or substantial portions of the Software. 

65# 

66# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

67# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

68# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

69# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

70# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

71# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

72# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

73# 

74 

75 

76if sys.version_info >= (3, 8): 

77 from functools import cached_property 

78else: 

79 

80 class cached_property: 

81 def __init__(self, func): 

82 self._func = func 

83 

84 def __get__(self, instance, owner=None): 

85 ret = instance.__dict__[self._func.__name__] = self._func(instance) 

86 return ret 

87 

88 

89class __compat__(__config_flags): 

90 """ 

91 A cross-version compatibility configuration for pyparsing features that will be 

92 released in a future version. By setting values in this configuration to True, 

93 those features can be enabled in prior versions for compatibility development 

94 and testing. 

95 

96 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 

97 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 

98 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 

99 behavior 

100 """ 

101 

102 _type_desc = "compatibility" 

103 

104 collect_all_And_tokens = True 

105 

106 _all_names = [__ for __ in locals() if not __.startswith("_")] 

107 _fixed_names = """ 

108 collect_all_And_tokens 

109 """.split() 

110 

111 

112class __diag__(__config_flags): 

113 _type_desc = "diagnostic" 

114 

115 warn_multiple_tokens_in_named_alternation = False 

116 warn_ungrouped_named_tokens_in_collection = False 

117 warn_name_set_on_empty_Forward = False 

118 warn_on_parse_using_empty_Forward = False 

119 warn_on_assignment_to_Forward = False 

120 warn_on_multiple_string_args_to_oneof = False 

121 warn_on_match_first_with_lshift_operator = False 

122 enable_debug_on_named_expressions = False 

123 

124 _all_names = [__ for __ in locals() if not __.startswith("_")] 

125 _warning_names = [name for name in _all_names if name.startswith("warn")] 

126 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 

127 

128 @classmethod 

129 def enable_all_warnings(cls) -> None: 

130 for name in cls._warning_names: 

131 cls.enable(name) 

132 

133 

134class Diagnostics(Enum): 

135 """ 

136 Diagnostic configuration (all default to disabled) 

137 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 

138 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 

139 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 

140 name is defined on a containing expression with ungrouped subexpressions that also 

141 have results names 

142 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

143 with a results name, but has no contents defined 

144 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 

145 defined in a grammar but has never had an expression attached to it 

146 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 

147 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 

148 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 

149 incorrectly called with multiple str arguments 

150 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 

151 calls to :class:`ParserElement.set_name` 

152 

153 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 

154 All warnings can be enabled by calling :class:`enable_all_warnings`. 

155 """ 

156 

157 warn_multiple_tokens_in_named_alternation = 0 

158 warn_ungrouped_named_tokens_in_collection = 1 

159 warn_name_set_on_empty_Forward = 2 

160 warn_on_parse_using_empty_Forward = 3 

161 warn_on_assignment_to_Forward = 4 

162 warn_on_multiple_string_args_to_oneof = 5 

163 warn_on_match_first_with_lshift_operator = 6 

164 enable_debug_on_named_expressions = 7 

165 

166 

167def enable_diag(diag_enum: Diagnostics) -> None: 

168 """ 

169 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

170 """ 

171 __diag__.enable(diag_enum.name) 

172 

173 

174def disable_diag(diag_enum: Diagnostics) -> None: 

175 """ 

176 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 

177 """ 

178 __diag__.disable(diag_enum.name) 

179 

180 

181def enable_all_warnings() -> None: 

182 """ 

183 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 

184 """ 

185 __diag__.enable_all_warnings() 

186 

187 

188# hide abstract class 

189del __config_flags 

190 

191 

192def _should_enable_warnings( 

193 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] 

194) -> bool: 

195 enable = bool(warn_env_var) 

196 for warn_opt in cmd_line_warn_options: 

197 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 

198 ":" 

199 )[:5] 

200 if not w_action.lower().startswith("i") and ( 

201 not (w_message or w_category or w_module) or w_module == "pyparsing" 

202 ): 

203 enable = True 

204 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 

205 enable = False 

206 return enable 

207 

208 

209if _should_enable_warnings( 

210 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 

211): 

212 enable_all_warnings() 

213 

214 

215# build list of single arg builtins, that can be used as parse actions 

216_single_arg_builtins = { 

217 sum, 

218 len, 

219 sorted, 

220 reversed, 

221 list, 

222 tuple, 

223 set, 

224 any, 

225 all, 

226 min, 

227 max, 

228} 

229 

230_generatorType = types.GeneratorType 

231ParseAction = Union[ 

232 Callable[[], Any], 

233 Callable[[ParseResults], Any], 

234 Callable[[int, ParseResults], Any], 

235 Callable[[str, int, ParseResults], Any], 

236] 

237ParseCondition = Union[ 

238 Callable[[], bool], 

239 Callable[[ParseResults], bool], 

240 Callable[[int, ParseResults], bool], 

241 Callable[[str, int, ParseResults], bool], 

242] 

243ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 

244DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 

245DebugSuccessAction = Callable[ 

246 [str, int, int, "ParserElement", ParseResults, bool], None 

247] 

248DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 

249 

250 

251alphas = string.ascii_uppercase + string.ascii_lowercase 

252identchars = pyparsing_unicode.Latin1.identchars 

253identbodychars = pyparsing_unicode.Latin1.identbodychars 

254nums = "0123456789" 

255hexnums = nums + "ABCDEFabcdef" 

256alphanums = alphas + nums 

257printables = "".join([c for c in string.printable if c not in string.whitespace]) 

258 

259_trim_arity_call_line: traceback.StackSummary = None 

260 

261 

262def _trim_arity(func, max_limit=3): 

263 """decorator to trim function calls to match the arity of the target""" 

264 global _trim_arity_call_line 

265 

266 if func in _single_arg_builtins: 

267 return lambda s, l, t: func(t) 

268 

269 limit = 0 

270 found_arity = False 

271 

272 def extract_tb(tb, limit=0): 

273 frames = traceback.extract_tb(tb, limit=limit) 

274 frame_summary = frames[-1] 

275 return [frame_summary[:2]] 

276 

277 # synthesize what would be returned by traceback.extract_stack at the call to 

278 # user's parse action 'func', so that we don't incur call penalty at parse time 

279 

280 # fmt: off 

281 LINE_DIFF = 7 

282 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

283 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

284 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) 

285 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) 

286 

287 def wrapper(*args): 

288 nonlocal found_arity, limit 

289 while 1: 

290 try: 

291 ret = func(*args[limit:]) 

292 found_arity = True 

293 return ret 

294 except TypeError as te: 

295 # re-raise TypeErrors if they did not come from our arity testing 

296 if found_arity: 

297 raise 

298 else: 

299 tb = te.__traceback__ 

300 trim_arity_type_error = ( 

301 extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth 

302 ) 

303 del tb 

304 

305 if trim_arity_type_error: 

306 if limit < max_limit: 

307 limit += 1 

308 continue 

309 

310 raise 

311 # fmt: on 

312 

313 # copy func name to wrapper for sensible debug output 

314 # (can't use functools.wraps, since that messes with function signature) 

315 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

316 wrapper.__name__ = func_name 

317 wrapper.__doc__ = func.__doc__ 

318 

319 return wrapper 

320 

321 

322def condition_as_parse_action( 

323 fn: ParseCondition, message: str = None, fatal: bool = False 

324) -> ParseAction: 

325 """ 

326 Function to convert a simple predicate function that returns ``True`` or ``False`` 

327 into a parse action. Can be used in places when a parse action is required 

328 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition 

329 to an operator level in :class:`infix_notation`). 

330 

331 Optional keyword arguments: 

332 

333 - ``message`` - define a custom message to be used in the raised exception 

334 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; 

335 otherwise will raise :class:`ParseException` 

336 

337 """ 

338 msg = message if message is not None else "failed user-defined condition" 

339 exc_type = ParseFatalException if fatal else ParseException 

340 fn = _trim_arity(fn) 

341 

342 @wraps(fn) 

343 def pa(s, l, t): 

344 if not bool(fn(s, l, t)): 

345 raise exc_type(s, l, msg) 

346 

347 return pa 

348 

349 

350def _default_start_debug_action( 

351 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False 

352): 

353 cache_hit_str = "*" if cache_hit else "" 

354 print( 

355 ( 

356 "{}Match {} at loc {}({},{})\n {}\n {}^".format( 

357 cache_hit_str, 

358 expr, 

359 loc, 

360 lineno(loc, instring), 

361 col(loc, instring), 

362 line(loc, instring), 

363 " " * (col(loc, instring) - 1), 

364 ) 

365 ) 

366 ) 

367 

368 

369def _default_success_debug_action( 

370 instring: str, 

371 startloc: int, 

372 endloc: int, 

373 expr: "ParserElement", 

374 toks: ParseResults, 

375 cache_hit: bool = False, 

376): 

377 cache_hit_str = "*" if cache_hit else "" 

378 print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.as_list())) 

379 

380 

381def _default_exception_debug_action( 

382 instring: str, 

383 loc: int, 

384 expr: "ParserElement", 

385 exc: Exception, 

386 cache_hit: bool = False, 

387): 

388 cache_hit_str = "*" if cache_hit else "" 

389 print( 

390 "{}Match {} failed, {} raised: {}".format( 

391 cache_hit_str, expr, type(exc).__name__, exc 

392 ) 

393 ) 

394 

395 

396def null_debug_action(*args): 

397 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

398 

399 

400class ParserElement(ABC): 

401 """Abstract base level parser element class.""" 

402 

403 DEFAULT_WHITE_CHARS: str = " \n\t\r" 

404 verbose_stacktrace: bool = False 

405 _literalStringClass: typing.Optional[type] = None 

406 

407 @staticmethod 

408 def set_default_whitespace_chars(chars: str) -> None: 

409 r""" 

410 Overrides the default whitespace chars 

411 

412 Example:: 

413 

414 # default whitespace chars are space, <TAB> and newline 

415 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

416 

417 # change to just treat newline as significant 

418 ParserElement.set_default_whitespace_chars(" \t") 

419 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] 

420 """ 

421 ParserElement.DEFAULT_WHITE_CHARS = chars 

422 

423 # update whitespace all parse expressions defined in this module 

424 for expr in _builtin_exprs: 

425 if expr.copyDefaultWhiteChars: 

426 expr.whiteChars = set(chars) 

427 

428 @staticmethod 

429 def inline_literals_using(cls: type) -> None: 

430 """ 

431 Set class to be used for inclusion of string literals into a parser. 

432 

433 Example:: 

434 

435 # default literal class used is Literal 

436 integer = Word(nums) 

437 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

438 

439 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

440 

441 

442 # change to Suppress 

443 ParserElement.inline_literals_using(Suppress) 

444 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

445 

446 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] 

447 """ 

448 ParserElement._literalStringClass = cls 

449 

450 class DebugActions(NamedTuple): 

451 debug_try: typing.Optional[DebugStartAction] 

452 debug_match: typing.Optional[DebugSuccessAction] 

453 debug_fail: typing.Optional[DebugExceptionAction] 

454 

455 def __init__(self, savelist: bool = False): 

456 self.parseAction: List[ParseAction] = list() 

457 self.failAction: typing.Optional[ParseFailAction] = None 

458 self.customName = None 

459 self._defaultName = None 

460 self.resultsName = None 

461 self.saveAsList = savelist 

462 self.skipWhitespace = True 

463 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

464 self.copyDefaultWhiteChars = True 

465 # used when checking for left-recursion 

466 self.mayReturnEmpty = False 

467 self.keepTabs = False 

468 self.ignoreExprs: List["ParserElement"] = list() 

469 self.debug = False 

470 self.streamlined = False 

471 # optimize exception handling for subclasses that don't advance parse index 

472 self.mayIndexError = True 

473 self.errmsg = "" 

474 # mark results names as modal (report only last) or cumulative (list all) 

475 self.modalResults = True 

476 # custom debug actions 

477 self.debugActions = self.DebugActions(None, None, None) 

478 # avoid redundant calls to preParse 

479 self.callPreparse = True 

480 self.callDuringTry = False 

481 self.suppress_warnings_: List[Diagnostics] = [] 

482 

483 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": 

484 """ 

485 Suppress warnings emitted for a particular diagnostic on this expression. 

486 

487 Example:: 

488 

489 base = pp.Forward() 

490 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) 

491 

492 # statement would normally raise a warning, but is now suppressed 

493 print(base.parseString("x")) 

494 

495 """ 

496 self.suppress_warnings_.append(warning_type) 

497 return self 

498 

499 def copy(self) -> "ParserElement": 

500 """ 

501 Make a copy of this :class:`ParserElement`. Useful for defining 

502 different parse actions for the same parsing pattern, using copies of 

503 the original parse element. 

504 

505 Example:: 

506 

507 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

508 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") 

509 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

510 

511 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) 

512 

513 prints:: 

514 

515 [5120, 100, 655360, 268435456] 

516 

517 Equivalent form of ``expr.copy()`` is just ``expr()``:: 

518 

519 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

520 """ 

521 cpy = copy.copy(self) 

522 cpy.parseAction = self.parseAction[:] 

523 cpy.ignoreExprs = self.ignoreExprs[:] 

524 if self.copyDefaultWhiteChars: 

525 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

526 return cpy 

527 

528 def set_results_name( 

529 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 

530 ) -> "ParserElement": 

531 """ 

532 Define name for referencing matching tokens as a nested attribute 

533 of the returned parse results. 

534 

535 Normally, results names are assigned as you would assign keys in a dict: 

536 any existing value is overwritten by later values. If it is necessary to 

537 keep all values captured for a particular results name, call ``set_results_name`` 

538 with ``list_all_matches`` = True. 

539 

540 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 

541 this is so that the client can define a basic element, such as an 

542 integer, and reference it in multiple places with different names. 

543 

544 You can also set results names using the abbreviated syntax, 

545 ``expr("name")`` in place of ``expr.set_results_name("name")`` 

546 - see :class:`__call__`. If ``list_all_matches`` is required, use 

547 ``expr("name*")``. 

548 

549 Example:: 

550 

551 date_str = (integer.set_results_name("year") + '/' 

552 + integer.set_results_name("month") + '/' 

553 + integer.set_results_name("day")) 

554 

555 # equivalent form: 

556 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

557 """ 

558 listAllMatches = listAllMatches or list_all_matches 

559 return self._setResultsName(name, listAllMatches) 

560 

561 def _setResultsName(self, name, listAllMatches=False): 

562 if name is None: 

563 return self 

564 newself = self.copy() 

565 if name.endswith("*"): 

566 name = name[:-1] 

567 listAllMatches = True 

568 newself.resultsName = name 

569 newself.modalResults = not listAllMatches 

570 return newself 

571 

572 def set_break(self, break_flag: bool = True) -> "ParserElement": 

573 """ 

574 Method to invoke the Python pdb debugger when this element is 

575 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 

576 disable. 

577 """ 

578 if break_flag: 

579 _parseMethod = self._parse 

580 

581 def breaker(instring, loc, doActions=True, callPreParse=True): 

582 import pdb 

583 

584 # this call to pdb.set_trace() is intentional, not a checkin error 

585 pdb.set_trace() 

586 return _parseMethod(instring, loc, doActions, callPreParse) 

587 

588 breaker._originalParseMethod = _parseMethod 

589 self._parse = breaker 

590 else: 

591 if hasattr(self._parse, "_originalParseMethod"): 

592 self._parse = self._parse._originalParseMethod 

593 return self 

594 

595 def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": 

596 """ 

597 Define one or more actions to perform when successfully matching parse element definition. 

598 

599 Parse actions can be called to perform data conversions, do extra validation, 

600 update external data structures, or enhance or replace the parsed tokens. 

601 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 

602 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

603 

604 - s = the original string being parsed (see note below) 

605 - loc = the location of the matching substring 

606 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object 

607 

608 The parsed tokens are passed to the parse action as ParseResults. They can be 

609 modified in place using list-style append, extend, and pop operations to update 

610 the parsed list elements; and with dictionary-style item set and del operations 

611 to add, update, or remove any named results. If the tokens are modified in place, 

612 it is not necessary to return them with a return statement. 

613 

614 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 

615 object, or with some entirely different object (common for parse actions that perform data 

616 conversions). A convenient way to build a new parse result is to define the values 

617 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 

618 

619 If None is passed as the ``fn`` parse action, all previously added parse actions for this 

620 expression are cleared. 

621 

622 Optional keyword arguments: 

623 

624 - call_during_try = (default= ``False``) indicate if parse action should be run during 

625 lookaheads and alternate testing. For parse actions that have side effects, it is 

626 important to only call the parse action once it is determined that it is being 

627 called as part of a successful parse. For parse actions that perform additional 

628 validation, then call_during_try should be passed as True, so that the validation 

629 code is included in the preliminary "try" parses. 

630 

631 Note: the default parsing behavior is to expand tabs in the input string 

632 before starting the parsing process. See :class:`parse_string` for more 

633 information on parsing strings containing ``<TAB>`` s, and suggested 

634 methods to maintain a consistent view of the parsed string, the parse 

635 location, and line and column positions within the parsed string. 

636 

637 Example:: 

638 

639 # parse dates in the form YYYY/MM/DD 

640 

641 # use parse action to convert toks from str to int at parse time 

642 def convert_to_int(toks): 

643 return int(toks[0]) 

644 

645 # use a parse action to verify that the date is a valid date 

646 def is_valid_date(instring, loc, toks): 

647 from datetime import date 

648 year, month, day = toks[::2] 

649 try: 

650 date(year, month, day) 

651 except ValueError: 

652 raise ParseException(instring, loc, "invalid date given") 

653 

654 integer = Word(nums) 

655 date_str = integer + '/' + integer + '/' + integer 

656 

657 # add parse actions 

658 integer.set_parse_action(convert_to_int) 

659 date_str.set_parse_action(is_valid_date) 

660 

661 # note that integer fields are now ints, not strings 

662 date_str.run_tests(''' 

663 # successful parse - note that integer fields were converted to ints 

664 1999/12/31 

665 

666 # fail - invalid date 

667 1999/13/31 

668 ''') 

669 """ 

670 if list(fns) == [None]: 

671 self.parseAction = [] 

672 else: 

673 if not all(callable(fn) for fn in fns): 

674 raise TypeError("parse actions must be callable") 

675 self.parseAction = [_trim_arity(fn) for fn in fns] 

676 self.callDuringTry = kwargs.get( 

677 "call_during_try", kwargs.get("callDuringTry", False) 

678 ) 

679 return self 

680 

681 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": 

682 """ 

683 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 

684 

685 See examples in :class:`copy`. 

686 """ 

687 self.parseAction += [_trim_arity(fn) for fn in fns] 

688 self.callDuringTry = self.callDuringTry or kwargs.get( 

689 "call_during_try", kwargs.get("callDuringTry", False) 

690 ) 

691 return self 

692 

693 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": 

694 """Add a boolean predicate function to expression's list of parse actions. See 

695 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 

696 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 

697 

698 Optional keyword arguments: 

699 

700 - message = define a custom message to be used in the raised exception 

701 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 

702 ParseException 

703 - call_during_try = boolean to indicate if this method should be called during internal tryParse calls, 

704 default=False 

705 

706 Example:: 

707 

708 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 

709 year_int = integer.copy() 

710 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

711 date_str = year_int + '/' + integer + '/' + integer 

712 

713 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), 

714 (line:1, col:1) 

715 """ 

716 for fn in fns: 

717 self.parseAction.append( 

718 condition_as_parse_action( 

719 fn, message=kwargs.get("message"), fatal=kwargs.get("fatal", False) 

720 ) 

721 ) 

722 

723 self.callDuringTry = self.callDuringTry or kwargs.get( 

724 "call_during_try", kwargs.get("callDuringTry", False) 

725 ) 

726 return self 

727 

728 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": 

729 """ 

730 Define action to perform if parsing fails at this expression. 

731 Fail acton fn is a callable function that takes the arguments 

732 ``fn(s, loc, expr, err)`` where: 

733 

734 - s = string being parsed 

735 - loc = location where expression match was attempted and failed 

736 - expr = the parse expression that failed 

737 - err = the exception thrown 

738 

739 The function returns no value. It may throw :class:`ParseFatalException` 

740 if it is desired to stop parsing immediately.""" 

741 self.failAction = fn 

742 return self 

743 

744 def _skipIgnorables(self, instring, loc): 

745 exprsFound = True 

746 while exprsFound: 

747 exprsFound = False 

748 for e in self.ignoreExprs: 

749 try: 

750 while 1: 

751 loc, dummy = e._parse(instring, loc) 

752 exprsFound = True 

753 except ParseException: 

754 pass 

755 return loc 

756 

757 def preParse(self, instring, loc): 

758 if self.ignoreExprs: 

759 loc = self._skipIgnorables(instring, loc) 

760 

761 if self.skipWhitespace: 

762 instrlen = len(instring) 

763 white_chars = self.whiteChars 

764 while loc < instrlen and instring[loc] in white_chars: 

765 loc += 1 

766 

767 return loc 

768 

769 def parseImpl(self, instring, loc, doActions=True): 

770 return loc, [] 

771 

772 def postParse(self, instring, loc, tokenlist): 

773 return tokenlist 

774 

775 # @profile 

776 def _parseNoCache( 

777 self, instring, loc, doActions=True, callPreParse=True 

778 ) -> Tuple[int, ParseResults]: 

779 TRY, MATCH, FAIL = 0, 1, 2 

780 debugging = self.debug # and doActions) 

781 len_instring = len(instring) 

782 

783 if debugging or self.failAction: 

784 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 

785 try: 

786 if callPreParse and self.callPreparse: 

787 pre_loc = self.preParse(instring, loc) 

788 else: 

789 pre_loc = loc 

790 tokens_start = pre_loc 

791 if self.debugActions.debug_try: 

792 self.debugActions.debug_try(instring, tokens_start, self, False) 

793 if self.mayIndexError or pre_loc >= len_instring: 

794 try: 

795 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

796 except IndexError: 

797 raise ParseException(instring, len_instring, self.errmsg, self) 

798 else: 

799 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

800 except Exception as err: 

801 # print("Exception raised:", err) 

802 if self.debugActions.debug_fail: 

803 self.debugActions.debug_fail( 

804 instring, tokens_start, self, err, False 

805 ) 

806 if self.failAction: 

807 self.failAction(instring, tokens_start, self, err) 

808 raise 

809 else: 

810 if callPreParse and self.callPreparse: 

811 pre_loc = self.preParse(instring, loc) 

812 else: 

813 pre_loc = loc 

814 tokens_start = pre_loc 

815 if self.mayIndexError or pre_loc >= len_instring: 

816 try: 

817 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

818 except IndexError: 

819 raise ParseException(instring, len_instring, self.errmsg, self) 

820 else: 

821 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 

822 

823 tokens = self.postParse(instring, loc, tokens) 

824 

825 ret_tokens = ParseResults( 

826 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 

827 ) 

828 if self.parseAction and (doActions or self.callDuringTry): 

829 if debugging: 

830 try: 

831 for fn in self.parseAction: 

832 try: 

833 tokens = fn(instring, tokens_start, ret_tokens) 

834 except IndexError as parse_action_exc: 

835 exc = ParseException("exception raised in parse action") 

836 raise exc from parse_action_exc 

837 

838 if tokens is not None and tokens is not ret_tokens: 

839 ret_tokens = ParseResults( 

840 tokens, 

841 self.resultsName, 

842 asList=self.saveAsList 

843 and isinstance(tokens, (ParseResults, list)), 

844 modal=self.modalResults, 

845 ) 

846 except Exception as err: 

847 # print "Exception raised in user parse action:", err 

848 if self.debugActions.debug_fail: 

849 self.debugActions.debug_fail( 

850 instring, tokens_start, self, err, False 

851 ) 

852 raise 

853 else: 

854 for fn in self.parseAction: 

855 try: 

856 tokens = fn(instring, tokens_start, ret_tokens) 

857 except IndexError as parse_action_exc: 

858 exc = ParseException("exception raised in parse action") 

859 raise exc from parse_action_exc 

860 

861 if tokens is not None and tokens is not ret_tokens: 

862 ret_tokens = ParseResults( 

863 tokens, 

864 self.resultsName, 

865 asList=self.saveAsList 

866 and isinstance(tokens, (ParseResults, list)), 

867 modal=self.modalResults, 

868 ) 

869 if debugging: 

870 # print("Matched", self, "->", ret_tokens.as_list()) 

871 if self.debugActions.debug_match: 

872 self.debugActions.debug_match( 

873 instring, tokens_start, loc, self, ret_tokens, False 

874 ) 

875 

876 return loc, ret_tokens 

877 

878 def try_parse(self, instring: str, loc: int, raise_fatal: bool = False) -> int: 

879 try: 

880 return self._parse(instring, loc, doActions=False)[0] 

881 except ParseFatalException: 

882 if raise_fatal: 

883 raise 

884 raise ParseException(instring, loc, self.errmsg, self) 

885 

886 def can_parse_next(self, instring: str, loc: int) -> bool: 

887 try: 

888 self.try_parse(instring, loc) 

889 except (ParseException, IndexError): 

890 return False 

891 else: 

892 return True 

893 

894 # cache for left-recursion in Forward references 

895 recursion_lock = RLock() 

896 recursion_memos: typing.Dict[ 

897 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] 

898 ] = {} 

899 

900 # argument cache for optimizing repeated calls when backtracking through recursive expressions 

901 packrat_cache = ( 

902 {} 

903 ) # this is set later by enabled_packrat(); this is here so that reset_cache() doesn't fail 

904 packrat_cache_lock = RLock() 

905 packrat_cache_stats = [0, 0] 

906 

907 # this method gets repeatedly called during backtracking with the same arguments - 

908 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

909 def _parseCache( 

910 self, instring, loc, doActions=True, callPreParse=True 

911 ) -> Tuple[int, ParseResults]: 

912 HIT, MISS = 0, 1 

913 TRY, MATCH, FAIL = 0, 1, 2 

914 lookup = (self, instring, loc, callPreParse, doActions) 

915 with ParserElement.packrat_cache_lock: 

916 cache = ParserElement.packrat_cache 

917 value = cache.get(lookup) 

918 if value is cache.not_in_cache: 

919 ParserElement.packrat_cache_stats[MISS] += 1 

920 try: 

921 value = self._parseNoCache(instring, loc, doActions, callPreParse) 

922 except ParseBaseException as pe: 

923 # cache a copy of the exception, without the traceback 

924 cache.set(lookup, pe.__class__(*pe.args)) 

925 raise 

926 else: 

927 cache.set(lookup, (value[0], value[1].copy(), loc)) 

928 return value 

929 else: 

930 ParserElement.packrat_cache_stats[HIT] += 1 

931 if self.debug and self.debugActions.debug_try: 

932 try: 

933 self.debugActions.debug_try(instring, loc, self, cache_hit=True) 

934 except TypeError: 

935 pass 

936 if isinstance(value, Exception): 

937 if self.debug and self.debugActions.debug_fail: 

938 try: 

939 self.debugActions.debug_fail( 

940 instring, loc, self, value, cache_hit=True 

941 ) 

942 except TypeError: 

943 pass 

944 raise value 

945 

946 loc_, result, endloc = value[0], value[1].copy(), value[2] 

947 if self.debug and self.debugActions.debug_match: 

948 try: 

949 self.debugActions.debug_match( 

950 instring, loc_, endloc, self, result, cache_hit=True 

951 ) 

952 except TypeError: 

953 pass 

954 

955 return loc_, result 

956 

957 _parse = _parseNoCache 

958 

959 @staticmethod 

960 def reset_cache() -> None: 

961 ParserElement.packrat_cache.clear() 

962 ParserElement.packrat_cache_stats[:] = [0] * len( 

963 ParserElement.packrat_cache_stats 

964 ) 

965 ParserElement.recursion_memos.clear() 

966 

967 _packratEnabled = False 

968 _left_recursion_enabled = False 

969 

970 @staticmethod 

971 def disable_memoization() -> None: 

972 """ 

973 Disables active Packrat or Left Recursion parsing and their memoization 

974 

975 This method also works if neither Packrat nor Left Recursion are enabled. 

976 This makes it safe to call before activating Packrat nor Left Recursion 

977 to clear any previous settings. 

978 """ 

979 ParserElement.reset_cache() 

980 ParserElement._left_recursion_enabled = False 

981 ParserElement._packratEnabled = False 

982 ParserElement._parse = ParserElement._parseNoCache 

983 

984 @staticmethod 

985 def enable_left_recursion( 

986 cache_size_limit: typing.Optional[int] = None, *, force=False 

987 ) -> None: 

988 """ 

989 Enables "bounded recursion" parsing, which allows for both direct and indirect 

990 left-recursion. During parsing, left-recursive :class:`Forward` elements are 

991 repeatedly matched with a fixed recursion depth that is gradually increased 

992 until finding the longest match. 

993 

994 Example:: 

995 

996 from pip._vendor import pyparsing as pp 

997 pp.ParserElement.enable_left_recursion() 

998 

999 E = pp.Forward("E") 

1000 num = pp.Word(pp.nums) 

1001 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 

1002 E <<= E + '+' - num | num 

1003 

1004 print(E.parse_string("1+2+3")) 

1005 

1006 Recursion search naturally memoizes matches of ``Forward`` elements and may 

1007 thus skip reevaluation of parse actions during backtracking. This may break 

1008 programs with parse actions which rely on strict ordering of side-effects. 

1009 

1010 Parameters: 

1011 

1012 - cache_size_limit - (default=``None``) - memoize at most this many 

1013 ``Forward`` elements during matching; if ``None`` (the default), 

1014 memoize all ``Forward`` elements. 

1015 

1016 Bounded Recursion parsing works similar but not identical to Packrat parsing, 

1017 thus the two cannot be used together. Use ``force=True`` to disable any 

1018 previous, conflicting settings. 

1019 """ 

1020 if force: 

1021 ParserElement.disable_memoization() 

1022 elif ParserElement._packratEnabled: 

1023 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1024 if cache_size_limit is None: 

1025 ParserElement.recursion_memos = _UnboundedMemo() 

1026 elif cache_size_limit > 0: 

1027 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) 

1028 else: 

1029 raise NotImplementedError("Memo size of %s" % cache_size_limit) 

1030 ParserElement._left_recursion_enabled = True 

1031 

1032 @staticmethod 

1033 def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None: 

1034 """ 

1035 Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1036 Repeated parse attempts at the same string location (which happens 

1037 often in many complex grammars) can immediately return a cached value, 

1038 instead of re-executing parsing/validating code. Memoizing is done of 

1039 both valid results and parsing exceptions. 

1040 

1041 Parameters: 

1042 

1043 - cache_size_limit - (default= ``128``) - if an integer value is provided 

1044 will limit the size of the packrat cache; if None is passed, then 

1045 the cache size will be unbounded; if 0 is passed, the cache will 

1046 be effectively disabled. 

1047 

1048 This speedup may break existing programs that use parse actions that 

1049 have side-effects. For this reason, packrat parsing is disabled when 

1050 you first import pyparsing. To activate the packrat feature, your 

1051 program must call the class method :class:`ParserElement.enable_packrat`. 

1052 For best results, call ``enable_packrat()`` immediately after 

1053 importing pyparsing. 

1054 

1055 Example:: 

1056 

1057 from pip._vendor import pyparsing 

1058 pyparsing.ParserElement.enable_packrat() 

1059 

1060 Packrat parsing works similar but not identical to Bounded Recursion parsing, 

1061 thus the two cannot be used together. Use ``force=True`` to disable any 

1062 previous, conflicting settings. 

1063 """ 

1064 if force: 

1065 ParserElement.disable_memoization() 

1066 elif ParserElement._left_recursion_enabled: 

1067 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 

1068 if not ParserElement._packratEnabled: 

1069 ParserElement._packratEnabled = True 

1070 if cache_size_limit is None: 

1071 ParserElement.packrat_cache = _UnboundedCache() 

1072 else: 

1073 ParserElement.packrat_cache = _FifoCache(cache_size_limit) 

1074 ParserElement._parse = ParserElement._parseCache 

1075 

1076 def parse_string( 

1077 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 

1078 ) -> ParseResults: 

1079 """ 

1080 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 

1081 client code. 

1082 

1083 :param instring: The input string to be parsed. 

1084 :param parse_all: If set, the entire input string must match the grammar. 

1085 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 

1086 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 

1087 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 

1088 an object with attributes if the given parser includes results names. 

1089 

1090 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 

1091 is also equivalent to ending the grammar with :class:`StringEnd`(). 

1092 

1093 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 

1094 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 

1095 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 

1096 being parsed, one can ensure a consistent view of the input string by doing one of the following: 

1097 

1098 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 

1099 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 

1100 parse action's ``s`` argument, or 

1101 - explicitly expand the tabs in your input string before calling ``parse_string``. 

1102 

1103 Examples: 

1104 

1105 By default, partial matches are OK. 

1106 

1107 >>> res = Word('a').parse_string('aaaaabaaa') 

1108 >>> print(res) 

1109 ['aaaaa'] 

1110 

1111 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 

1112 directly to see more examples. 

1113 

1114 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 

1115 

1116 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 

1117 Traceback (most recent call last): 

1118 ... 

1119 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) 

1120 """ 

1121 parseAll = parse_all or parseAll 

1122 

1123 ParserElement.reset_cache() 

1124 if not self.streamlined: 

1125 self.streamline() 

1126 for e in self.ignoreExprs: 

1127 e.streamline() 

1128 if not self.keepTabs: 

1129 instring = instring.expandtabs() 

1130 try: 

1131 loc, tokens = self._parse(instring, 0) 

1132 if parseAll: 

1133 loc = self.preParse(instring, loc) 

1134 se = Empty() + StringEnd() 

1135 se._parse(instring, loc) 

1136 except ParseBaseException as exc: 

1137 if ParserElement.verbose_stacktrace: 

1138 raise 

1139 else: 

1140 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1141 raise exc.with_traceback(None) 

1142 else: 

1143 return tokens 

1144 

1145 def scan_string( 

1146 self, 

1147 instring: str, 

1148 max_matches: int = _MAX_INT, 

1149 overlap: bool = False, 

1150 *, 

1151 debug: bool = False, 

1152 maxMatches: int = _MAX_INT, 

1153 ) -> Generator[Tuple[ParseResults, int, int], None, None]: 

1154 """ 

1155 Scan the input string for expression matches. Each match will return the 

1156 matching tokens, start location, and end location. May be called with optional 

1157 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 

1158 ``overlap`` is specified, then overlapping matches will be reported. 

1159 

1160 Note that the start and end locations are reported relative to the string 

1161 being parsed. See :class:`parse_string` for more information on parsing 

1162 strings with embedded tabs. 

1163 

1164 Example:: 

1165 

1166 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1167 print(source) 

1168 for tokens, start, end in Word(alphas).scan_string(source): 

1169 print(' '*start + '^'*(end-start)) 

1170 print(' '*start + tokens[0]) 

1171 

1172 prints:: 

1173 

1174 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1175 ^^^^^ 

1176 sldjf 

1177 ^^^^^^^ 

1178 lsdjjkf 

1179 ^^^^^^ 

1180 sldkjf 

1181 ^^^^^^ 

1182 lkjsfd 

1183 """ 

1184 maxMatches = min(maxMatches, max_matches) 

1185 if not self.streamlined: 

1186 self.streamline() 

1187 for e in self.ignoreExprs: 

1188 e.streamline() 

1189 

1190 if not self.keepTabs: 

1191 instring = str(instring).expandtabs() 

1192 instrlen = len(instring) 

1193 loc = 0 

1194 preparseFn = self.preParse 

1195 parseFn = self._parse 

1196 ParserElement.resetCache() 

1197 matches = 0 

1198 try: 

1199 while loc <= instrlen and matches < maxMatches: 

1200 try: 

1201 preloc = preparseFn(instring, loc) 

1202 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

1203 except ParseException: 

1204 loc = preloc + 1 

1205 else: 

1206 if nextLoc > loc: 

1207 matches += 1 

1208 if debug: 

1209 print( 

1210 { 

1211 "tokens": tokens.asList(), 

1212 "start": preloc, 

1213 "end": nextLoc, 

1214 } 

1215 ) 

1216 yield tokens, preloc, nextLoc 

1217 if overlap: 

1218 nextloc = preparseFn(instring, loc) 

1219 if nextloc > loc: 

1220 loc = nextLoc 

1221 else: 

1222 loc += 1 

1223 else: 

1224 loc = nextLoc 

1225 else: 

1226 loc = preloc + 1 

1227 except ParseBaseException as exc: 

1228 if ParserElement.verbose_stacktrace: 

1229 raise 

1230 else: 

1231 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1232 raise exc.with_traceback(None) 

1233 

1234 def transform_string(self, instring: str, *, debug: bool = False) -> str: 

1235 """ 

1236 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 

1237 be returned from a parse action. To use ``transform_string``, define a grammar and 

1238 attach a parse action to it that modifies the returned token list. 

1239 Invoking ``transform_string()`` on a target string will then scan for matches, 

1240 and replace the matched text patterns according to the logic in the parse 

1241 action. ``transform_string()`` returns the resulting transformed string. 

1242 

1243 Example:: 

1244 

1245 wd = Word(alphas) 

1246 wd.set_parse_action(lambda toks: toks[0].title()) 

1247 

1248 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) 

1249 

1250 prints:: 

1251 

1252 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

1253 """ 

1254 out: List[str] = [] 

1255 lastE = 0 

1256 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1257 # keep string locs straight between transform_string and scan_string 

1258 self.keepTabs = True 

1259 try: 

1260 for t, s, e in self.scan_string(instring, debug=debug): 

1261 out.append(instring[lastE:s]) 

1262 if t: 

1263 if isinstance(t, ParseResults): 

1264 out += t.as_list() 

1265 elif isinstance(t, Iterable) and not isinstance(t, str_type): 

1266 out.extend(t) 

1267 else: 

1268 out.append(t) 

1269 lastE = e 

1270 out.append(instring[lastE:]) 

1271 out = [o for o in out if o] 

1272 return "".join([str(s) for s in _flatten(out)]) 

1273 except ParseBaseException as exc: 

1274 if ParserElement.verbose_stacktrace: 

1275 raise 

1276 else: 

1277 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1278 raise exc.with_traceback(None) 

1279 

1280 def search_string( 

1281 self, 

1282 instring: str, 

1283 max_matches: int = _MAX_INT, 

1284 *, 

1285 debug: bool = False, 

1286 maxMatches: int = _MAX_INT, 

1287 ) -> ParseResults: 

1288 """ 

1289 Another extension to :class:`scan_string`, simplifying the access to the tokens found 

1290 to match the given parse expression. May be called with optional 

1291 ``max_matches`` argument, to clip searching after 'n' matches are found. 

1292 

1293 Example:: 

1294 

1295 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

1296 cap_word = Word(alphas.upper(), alphas.lower()) 

1297 

1298 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) 

1299 

1300 # the sum() builtin can be used to merge results into a single ParseResults object 

1301 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) 

1302 

1303 prints:: 

1304 

1305 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1306 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1307 """ 

1308 maxMatches = min(maxMatches, max_matches) 

1309 try: 

1310 return ParseResults( 

1311 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] 

1312 ) 

1313 except ParseBaseException as exc: 

1314 if ParserElement.verbose_stacktrace: 

1315 raise 

1316 else: 

1317 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1318 raise exc.with_traceback(None) 

1319 

1320 def split( 

1321 self, 

1322 instring: str, 

1323 maxsplit: int = _MAX_INT, 

1324 include_separators: bool = False, 

1325 *, 

1326 includeSeparators=False, 

1327 ) -> Generator[str, None, None]: 

1328 """ 

1329 Generator method to split a string using the given expression as a separator. 

1330 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

1331 and the optional ``include_separators`` argument (default= ``False``), if the separating 

1332 matching text should be included in the split results. 

1333 

1334 Example:: 

1335 

1336 punc = one_of(list(".,;:/-!?")) 

1337 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

1338 

1339 prints:: 

1340 

1341 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1342 """ 

1343 includeSeparators = includeSeparators or include_separators 

1344 last = 0 

1345 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 

1346 yield instring[last:s] 

1347 if includeSeparators: 

1348 yield t[0] 

1349 last = e 

1350 yield instring[last:] 

1351 

1352 def __add__(self, other) -> "ParserElement": 

1353 """ 

1354 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 

1355 converts them to :class:`Literal`s by default. 

1356 

1357 Example:: 

1358 

1359 greet = Word(alphas) + "," + Word(alphas) + "!" 

1360 hello = "Hello, World!" 

1361 print(hello, "->", greet.parse_string(hello)) 

1362 

1363 prints:: 

1364 

1365 Hello, World! -> ['Hello', ',', 'World', '!'] 

1366 

1367 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. 

1368 

1369 Literal('start') + ... + Literal('end') 

1370 

1371 is equivalent to: 

1372 

1373 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

1374 

1375 Note that the skipped text is returned with '_skipped' as a results name, 

1376 and to support having multiple skips in the same parser, the value returned is 

1377 a list of all skipped text. 

1378 """ 

1379 if other is Ellipsis: 

1380 return _PendingSkip(self) 

1381 

1382 if isinstance(other, str_type): 

1383 other = self._literalStringClass(other) 

1384 if not isinstance(other, ParserElement): 

1385 raise TypeError( 

1386 "Cannot combine element of type {} with ParserElement".format( 

1387 type(other).__name__ 

1388 ) 

1389 ) 

1390 return And([self, other]) 

1391 

1392 def __radd__(self, other) -> "ParserElement": 

1393 """ 

1394 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 

1395 """ 

1396 if other is Ellipsis: 

1397 return SkipTo(self)("_skipped*") + self 

1398 

1399 if isinstance(other, str_type): 

1400 other = self._literalStringClass(other) 

1401 if not isinstance(other, ParserElement): 

1402 raise TypeError( 

1403 "Cannot combine element of type {} with ParserElement".format( 

1404 type(other).__name__ 

1405 ) 

1406 ) 

1407 return other + self 

1408 

1409 def __sub__(self, other) -> "ParserElement": 

1410 """ 

1411 Implementation of ``-`` operator, returns :class:`And` with error stop 

1412 """ 

1413 if isinstance(other, str_type): 

1414 other = self._literalStringClass(other) 

1415 if not isinstance(other, ParserElement): 

1416 raise TypeError( 

1417 "Cannot combine element of type {} with ParserElement".format( 

1418 type(other).__name__ 

1419 ) 

1420 ) 

1421 return self + And._ErrorStop() + other 

1422 

1423 def __rsub__(self, other) -> "ParserElement": 

1424 """ 

1425 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 

1426 """ 

1427 if isinstance(other, str_type): 

1428 other = self._literalStringClass(other) 

1429 if not isinstance(other, ParserElement): 

1430 raise TypeError( 

1431 "Cannot combine element of type {} with ParserElement".format( 

1432 type(other).__name__ 

1433 ) 

1434 ) 

1435 return other - self 

1436 

1437 def __mul__(self, other) -> "ParserElement": 

1438 """ 

1439 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 

1440 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 

1441 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

1442 may also include ``None`` as in: 

1443 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

1444 to ``expr*n + ZeroOrMore(expr)`` 

1445 (read as "at least n instances of ``expr``") 

1446 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

1447 (read as "0 to n instances of ``expr``") 

1448 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

1449 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

1450 

1451 Note that ``expr*(None, n)`` does not raise an exception if 

1452 more than n exprs exist in the input stream; that is, 

1453 ``expr*(None, n)`` does not enforce a maximum number of expr 

1454 occurrences. If this behavior is desired, then write 

1455 ``expr*(None, n) + ~expr`` 

1456 """ 

1457 if other is Ellipsis: 

1458 other = (0, None) 

1459 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

1460 other = ((0,) + other[1:] + (None,))[:2] 

1461 

1462 if isinstance(other, int): 

1463 minElements, optElements = other, 0 

1464 elif isinstance(other, tuple): 

1465 other = tuple(o if o is not Ellipsis else None for o in other) 

1466 other = (other + (None, None))[:2] 

1467 if other[0] is None: 

1468 other = (0, other[1]) 

1469 if isinstance(other[0], int) and other[1] is None: 

1470 if other[0] == 0: 

1471 return ZeroOrMore(self) 

1472 if other[0] == 1: 

1473 return OneOrMore(self) 

1474 else: 

1475 return self * other[0] + ZeroOrMore(self) 

1476 elif isinstance(other[0], int) and isinstance(other[1], int): 

1477 minElements, optElements = other 

1478 optElements -= minElements 

1479 else: 

1480 raise TypeError( 

1481 "cannot multiply ParserElement and ({}) objects".format( 

1482 ",".join(type(item).__name__ for item in other) 

1483 ) 

1484 ) 

1485 else: 

1486 raise TypeError( 

1487 "cannot multiply ParserElement and {} objects".format( 

1488 type(other).__name__ 

1489 ) 

1490 ) 

1491 

1492 if minElements < 0: 

1493 raise ValueError("cannot multiply ParserElement by negative value") 

1494 if optElements < 0: 

1495 raise ValueError( 

1496 "second tuple value must be greater or equal to first tuple value" 

1497 ) 

1498 if minElements == optElements == 0: 

1499 return And([]) 

1500 

1501 if optElements: 

1502 

1503 def makeOptionalList(n): 

1504 if n > 1: 

1505 return Opt(self + makeOptionalList(n - 1)) 

1506 else: 

1507 return Opt(self) 

1508 

1509 if minElements: 

1510 if minElements == 1: 

1511 ret = self + makeOptionalList(optElements) 

1512 else: 

1513 ret = And([self] * minElements) + makeOptionalList(optElements) 

1514 else: 

1515 ret = makeOptionalList(optElements) 

1516 else: 

1517 if minElements == 1: 

1518 ret = self 

1519 else: 

1520 ret = And([self] * minElements) 

1521 return ret 

1522 

1523 def __rmul__(self, other) -> "ParserElement": 

1524 return self.__mul__(other) 

1525 

1526 def __or__(self, other) -> "ParserElement": 

1527 """ 

1528 Implementation of ``|`` operator - returns :class:`MatchFirst` 

1529 """ 

1530 if other is Ellipsis: 

1531 return _PendingSkip(self, must_skip=True) 

1532 

1533 if isinstance(other, str_type): 

1534 other = self._literalStringClass(other) 

1535 if not isinstance(other, ParserElement): 

1536 raise TypeError( 

1537 "Cannot combine element of type {} with ParserElement".format( 

1538 type(other).__name__ 

1539 ) 

1540 ) 

1541 return MatchFirst([self, other]) 

1542 

1543 def __ror__(self, other) -> "ParserElement": 

1544 """ 

1545 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 

1546 """ 

1547 if isinstance(other, str_type): 

1548 other = self._literalStringClass(other) 

1549 if not isinstance(other, ParserElement): 

1550 raise TypeError( 

1551 "Cannot combine element of type {} with ParserElement".format( 

1552 type(other).__name__ 

1553 ) 

1554 ) 

1555 return other | self 

1556 

1557 def __xor__(self, other) -> "ParserElement": 

1558 """ 

1559 Implementation of ``^`` operator - returns :class:`Or` 

1560 """ 

1561 if isinstance(other, str_type): 

1562 other = self._literalStringClass(other) 

1563 if not isinstance(other, ParserElement): 

1564 raise TypeError( 

1565 "Cannot combine element of type {} with ParserElement".format( 

1566 type(other).__name__ 

1567 ) 

1568 ) 

1569 return Or([self, other]) 

1570 

1571 def __rxor__(self, other) -> "ParserElement": 

1572 """ 

1573 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 

1574 """ 

1575 if isinstance(other, str_type): 

1576 other = self._literalStringClass(other) 

1577 if not isinstance(other, ParserElement): 

1578 raise TypeError( 

1579 "Cannot combine element of type {} with ParserElement".format( 

1580 type(other).__name__ 

1581 ) 

1582 ) 

1583 return other ^ self 

1584 

1585 def __and__(self, other) -> "ParserElement": 

1586 """ 

1587 Implementation of ``&`` operator - returns :class:`Each` 

1588 """ 

1589 if isinstance(other, str_type): 

1590 other = self._literalStringClass(other) 

1591 if not isinstance(other, ParserElement): 

1592 raise TypeError( 

1593 "Cannot combine element of type {} with ParserElement".format( 

1594 type(other).__name__ 

1595 ) 

1596 ) 

1597 return Each([self, other]) 

1598 

1599 def __rand__(self, other) -> "ParserElement": 

1600 """ 

1601 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 

1602 """ 

1603 if isinstance(other, str_type): 

1604 other = self._literalStringClass(other) 

1605 if not isinstance(other, ParserElement): 

1606 raise TypeError( 

1607 "Cannot combine element of type {} with ParserElement".format( 

1608 type(other).__name__ 

1609 ) 

1610 ) 

1611 return other & self 

1612 

1613 def __invert__(self) -> "ParserElement": 

1614 """ 

1615 Implementation of ``~`` operator - returns :class:`NotAny` 

1616 """ 

1617 return NotAny(self) 

1618 

1619 # disable __iter__ to override legacy use of sequential access to __getitem__ to 

1620 # iterate over a sequence 

1621 __iter__ = None 

1622 

1623 def __getitem__(self, key): 

1624 """ 

1625 use ``[]`` indexing notation as a short form for expression repetition: 

1626 

1627 - ``expr[n]`` is equivalent to ``expr*n`` 

1628 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

1629 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

1630 to ``expr*n + ZeroOrMore(expr)`` 

1631 (read as "at least n instances of ``expr``") 

1632 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

1633 (read as "0 to n instances of ``expr``") 

1634 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

1635 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

1636 

1637 ``None`` may be used in place of ``...``. 

1638 

1639 Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception 

1640 if more than ``n`` ``expr``s exist in the input stream. If this behavior is 

1641 desired, then write ``expr[..., n] + ~expr``. 

1642 """ 

1643 

1644 # convert single arg keys to tuples 

1645 try: 

1646 if isinstance(key, str_type): 

1647 key = (key,) 

1648 iter(key) 

1649 except TypeError: 

1650 key = (key, key) 

1651 

1652 if len(key) > 2: 

1653 raise TypeError( 

1654 "only 1 or 2 index arguments supported ({}{})".format( 

1655 key[:5], "... [{}]".format(len(key)) if len(key) > 5 else "" 

1656 ) 

1657 ) 

1658 

1659 # clip to 2 elements 

1660 ret = self * tuple(key[:2]) 

1661 return ret 

1662 

1663 def __call__(self, name: str = None) -> "ParserElement": 

1664 """ 

1665 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 

1666 

1667 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 

1668 passed as ``True``. 

1669 

1670 If ``name` is omitted, same as calling :class:`copy`. 

1671 

1672 Example:: 

1673 

1674 # these are equivalent 

1675 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") 

1676 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

1677 """ 

1678 if name is not None: 

1679 return self._setResultsName(name) 

1680 else: 

1681 return self.copy() 

1682 

1683 def suppress(self) -> "ParserElement": 

1684 """ 

1685 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

1686 cluttering up returned output. 

1687 """ 

1688 return Suppress(self) 

1689 

1690 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": 

1691 """ 

1692 Enables the skipping of whitespace before matching the characters in the 

1693 :class:`ParserElement`'s defined pattern. 

1694 

1695 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 

1696 """ 

1697 self.skipWhitespace = True 

1698 return self 

1699 

1700 def leave_whitespace(self, recursive: bool = True) -> "ParserElement": 

1701 """ 

1702 Disables the skipping of whitespace before matching the characters in the 

1703 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

1704 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

1705 

1706 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 

1707 """ 

1708 self.skipWhitespace = False 

1709 return self 

1710 

1711 def set_whitespace_chars( 

1712 self, chars: Union[Set[str], str], copy_defaults: bool = False 

1713 ) -> "ParserElement": 

1714 """ 

1715 Overrides the default whitespace chars 

1716 """ 

1717 self.skipWhitespace = True 

1718 self.whiteChars = set(chars) 

1719 self.copyDefaultWhiteChars = copy_defaults 

1720 return self 

1721 

1722 def parse_with_tabs(self) -> "ParserElement": 

1723 """ 

1724 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 

1725 Must be called before ``parse_string`` when the input grammar contains elements that 

1726 match ``<TAB>`` characters. 

1727 """ 

1728 self.keepTabs = True 

1729 return self 

1730 

1731 def ignore(self, other: "ParserElement") -> "ParserElement": 

1732 """ 

1733 Define expression to be ignored (e.g., comments) while doing pattern 

1734 matching; may be called repeatedly, to define multiple comment or other 

1735 ignorable patterns. 

1736 

1737 Example:: 

1738 

1739 patt = Word(alphas)[1, ...] 

1740 patt.parse_string('ablaj /* comment */ lskjd') 

1741 # -> ['ablaj'] 

1742 

1743 patt.ignore(c_style_comment) 

1744 patt.parse_string('ablaj /* comment */ lskjd') 

1745 # -> ['ablaj', 'lskjd'] 

1746 """ 

1747 import typing 

1748 

1749 if isinstance(other, str_type): 

1750 other = Suppress(other) 

1751 

1752 if isinstance(other, Suppress): 

1753 if other not in self.ignoreExprs: 

1754 self.ignoreExprs.append(other) 

1755 else: 

1756 self.ignoreExprs.append(Suppress(other.copy())) 

1757 return self 

1758 

1759 def set_debug_actions( 

1760 self, 

1761 start_action: DebugStartAction, 

1762 success_action: DebugSuccessAction, 

1763 exception_action: DebugExceptionAction, 

1764 ) -> "ParserElement": 

1765 """ 

1766 Customize display of debugging messages while doing pattern matching: 

1767 

1768 - ``start_action`` - method to be called when an expression is about to be parsed; 

1769 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` 

1770 

1771 - ``success_action`` - method to be called when an expression has successfully parsed; 

1772 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` 

1773 

1774 - ``exception_action`` - method to be called when expression fails to parse; 

1775 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` 

1776 """ 

1777 self.debugActions = self.DebugActions( 

1778 start_action or _default_start_debug_action, 

1779 success_action or _default_success_debug_action, 

1780 exception_action or _default_exception_debug_action, 

1781 ) 

1782 self.debug = True 

1783 return self 

1784 

1785 def set_debug(self, flag: bool = True) -> "ParserElement": 

1786 """ 

1787 Enable display of debugging messages while doing pattern matching. 

1788 Set ``flag`` to ``True`` to enable, ``False`` to disable. 

1789 

1790 Example:: 

1791 

1792 wd = Word(alphas).set_name("alphaword") 

1793 integer = Word(nums).set_name("numword") 

1794 term = wd | integer 

1795 

1796 # turn on debugging for wd 

1797 wd.set_debug() 

1798 

1799 term[1, ...].parse_string("abc 123 xyz 890") 

1800 

1801 prints:: 

1802 

1803 Match alphaword at loc 0(1,1) 

1804 Matched alphaword -> ['abc'] 

1805 Match alphaword at loc 3(1,4) 

1806 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

1807 Match alphaword at loc 7(1,8) 

1808 Matched alphaword -> ['xyz'] 

1809 Match alphaword at loc 11(1,12) 

1810 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

1811 Match alphaword at loc 15(1,16) 

1812 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

1813 

1814 The output shown is that produced by the default debug actions - custom debug actions can be 

1815 specified using :class:`set_debug_actions`. Prior to attempting 

1816 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

1817 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

1818 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, 

1819 which makes debugging and exception messages easier to understand - for instance, the default 

1820 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. 

1821 """ 

1822 if flag: 

1823 self.set_debug_actions( 

1824 _default_start_debug_action, 

1825 _default_success_debug_action, 

1826 _default_exception_debug_action, 

1827 ) 

1828 else: 

1829 self.debug = False 

1830 return self 

1831 

1832 @property 

1833 def default_name(self) -> str: 

1834 if self._defaultName is None: 

1835 self._defaultName = self._generateDefaultName() 

1836 return self._defaultName 

1837 

1838 @abstractmethod 

1839 def _generateDefaultName(self): 

1840 """ 

1841 Child classes must define this method, which defines how the ``default_name`` is set. 

1842 """ 

1843 

1844 def set_name(self, name: str) -> "ParserElement": 

1845 """ 

1846 Define name for this expression, makes debugging and exception messages clearer. 

1847 Example:: 

1848 Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) 

1849 Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1850 """ 

1851 self.customName = name 

1852 self.errmsg = "Expected " + self.name 

1853 if __diag__.enable_debug_on_named_expressions: 

1854 self.set_debug() 

1855 return self 

1856 

1857 @property 

1858 def name(self) -> str: 

1859 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 

1860 return self.customName if self.customName is not None else self.default_name 

1861 

1862 def __str__(self) -> str: 

1863 return self.name 

1864 

1865 def __repr__(self) -> str: 

1866 return str(self) 

1867 

1868 def streamline(self) -> "ParserElement": 

1869 self.streamlined = True 

1870 self._defaultName = None 

1871 return self 

1872 

1873 def recurse(self) -> Sequence["ParserElement"]: 

1874 return [] 

1875 

1876 def _checkRecursion(self, parseElementList): 

1877 subRecCheckList = parseElementList[:] + [self] 

1878 for e in self.recurse(): 

1879 e._checkRecursion(subRecCheckList) 

1880 

1881 def validate(self, validateTrace=None) -> None: 

1882 """ 

1883 Check defined expressions for valid structure, check for infinite recursive definitions. 

1884 """ 

1885 self._checkRecursion([]) 

1886 

1887 def parse_file( 

1888 self, 

1889 file_or_filename: Union[str, Path, TextIO], 

1890 encoding: str = "utf-8", 

1891 parse_all: bool = False, 

1892 *, 

1893 parseAll: bool = False, 

1894 ) -> ParseResults: 

1895 """ 

1896 Execute the parse expression on the given file or filename. 

1897 If a filename is specified (instead of a file object), 

1898 the entire file is opened, read, and closed before parsing. 

1899 """ 

1900 parseAll = parseAll or parse_all 

1901 try: 

1902 file_contents = file_or_filename.read() 

1903 except AttributeError: 

1904 with open(file_or_filename, "r", encoding=encoding) as f: 

1905 file_contents = f.read() 

1906 try: 

1907 return self.parse_string(file_contents, parseAll) 

1908 except ParseBaseException as exc: 

1909 if ParserElement.verbose_stacktrace: 

1910 raise 

1911 else: 

1912 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1913 raise exc.with_traceback(None) 

1914 

1915 def __eq__(self, other): 

1916 if self is other: 

1917 return True 

1918 elif isinstance(other, str_type): 

1919 return self.matches(other, parse_all=True) 

1920 elif isinstance(other, ParserElement): 

1921 return vars(self) == vars(other) 

1922 return False 

1923 

1924 def __hash__(self): 

1925 return id(self) 

1926 

1927 def matches( 

1928 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 

1929 ) -> bool: 

1930 """ 

1931 Method for quick testing of a parser against a test string. Good for simple 

1932 inline microtests of sub expressions while building up larger parser. 

1933 

1934 Parameters: 

1935 - ``test_string`` - to test against this expression for a match 

1936 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

1937 

1938 Example:: 

1939 

1940 expr = Word(nums) 

1941 assert expr.matches("100") 

1942 """ 

1943 parseAll = parseAll and parse_all 

1944 try: 

1945 self.parse_string(str(test_string), parse_all=parseAll) 

1946 return True 

1947 except ParseBaseException: 

1948 return False 

1949 

1950 def run_tests( 

1951 self, 

1952 tests: Union[str, List[str]], 

1953 parse_all: bool = True, 

1954 comment: typing.Optional[Union["ParserElement", str]] = "#", 

1955 full_dump: bool = True, 

1956 print_results: bool = True, 

1957 failure_tests: bool = False, 

1958 post_parse: Callable[[str, ParseResults], str] = None, 

1959 file: typing.Optional[TextIO] = None, 

1960 with_line_numbers: bool = False, 

1961 *, 

1962 parseAll: bool = True, 

1963 fullDump: bool = True, 

1964 printResults: bool = True, 

1965 failureTests: bool = False, 

1966 postParse: Callable[[str, ParseResults], str] = None, 

1967 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: 

1968 """ 

1969 Execute the parse expression on a series of test strings, showing each 

1970 test, the parsed results or where the parse failed. Quick and easy way to 

1971 run a parse expression against a list of sample strings. 

1972 

1973 Parameters: 

1974 - ``tests`` - a list of separate test strings, or a multiline string of test strings 

1975 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 

1976 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 

1977 string; pass None to disable comment filtering 

1978 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 

1979 if False, only dump nested list 

1980 - ``print_results`` - (default= ``True``) prints test output to stdout 

1981 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 

1982 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 

1983 `fn(test_string, parse_results)` and returns a string to be added to the test output 

1984 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 

1985 if None, will default to ``sys.stdout`` 

1986 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 

1987 

1988 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

1989 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 

1990 test's output 

1991 

1992 Example:: 

1993 

1994 number_expr = pyparsing_common.number.copy() 

1995 

1996 result = number_expr.run_tests(''' 

1997 # unsigned integer 

1998 100 

1999 # negative integer 

2000 -100 

2001 # float with scientific notation 

2002 6.02e23 

2003 # integer with scientific notation 

2004 1e-12 

2005 ''') 

2006 print("Success" if result[0] else "Failed!") 

2007 

2008 result = number_expr.run_tests(''' 

2009 # stray character 

2010 100Z 

2011 # missing leading digit before '.' 

2012 -.100 

2013 # too many '.' 

2014 3.14.159 

2015 ''', failure_tests=True) 

2016 print("Success" if result[0] else "Failed!") 

2017 

2018 prints:: 

2019 

2020 # unsigned integer 

2021 100 

2022 [100] 

2023 

2024 # negative integer 

2025 -100 

2026 [-100] 

2027 

2028 # float with scientific notation 

2029 6.02e23 

2030 [6.02e+23] 

2031 

2032 # integer with scientific notation 

2033 1e-12 

2034 [1e-12] 

2035 

2036 Success 

2037 

2038 # stray character 

2039 100Z 

2040 ^ 

2041 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2042 

2043 # missing leading digit before '.' 

2044 -.100 

2045 ^ 

2046 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2047 

2048 # too many '.' 

2049 3.14.159 

2050 ^ 

2051 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2052 

2053 Success 

2054 

2055 Each test string must be on a single line. If you want to test a string that spans multiple 

2056 lines, create a test like this:: 

2057 

2058 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 

2059 

2060 (Note that this is a raw string literal, you must include the leading ``'r'``.) 

2061 """ 

2062 from .testing import pyparsing_test 

2063 

2064 parseAll = parseAll and parse_all 

2065 fullDump = fullDump and full_dump 

2066 printResults = printResults and print_results 

2067 failureTests = failureTests or failure_tests 

2068 postParse = postParse or post_parse 

2069 if isinstance(tests, str_type): 

2070 line_strip = type(tests).strip 

2071 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] 

2072 if isinstance(comment, str_type): 

2073 comment = Literal(comment) 

2074 if file is None: 

2075 file = sys.stdout 

2076 print_ = file.write 

2077 

2078 result: Union[ParseResults, Exception] 

2079 allResults = [] 

2080 comments = [] 

2081 success = True 

2082 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 

2083 BOM = "\ufeff" 

2084 for t in tests: 

2085 if comment is not None and comment.matches(t, False) or comments and not t: 

2086 comments.append( 

2087 pyparsing_test.with_line_numbers(t) if with_line_numbers else t 

2088 ) 

2089 continue 

2090 if not t: 

2091 continue 

2092 out = [ 

2093 "\n" + "\n".join(comments) if comments else "", 

2094 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 

2095 ] 

2096 comments = [] 

2097 try: 

2098 # convert newline marks to actual newlines, and strip leading BOM if present 

2099 t = NL.transform_string(t.lstrip(BOM)) 

2100 result = self.parse_string(t, parse_all=parseAll) 

2101 except ParseBaseException as pe: 

2102 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 

2103 out.append(pe.explain()) 

2104 out.append("FAIL: " + str(pe)) 

2105 if ParserElement.verbose_stacktrace: 

2106 out.extend(traceback.format_tb(pe.__traceback__)) 

2107 success = success and failureTests 

2108 result = pe 

2109 except Exception as exc: 

2110 out.append("FAIL-EXCEPTION: {}: {}".format(type(exc).__name__, exc)) 

2111 if ParserElement.verbose_stacktrace: 

2112 out.extend(traceback.format_tb(exc.__traceback__)) 

2113 success = success and failureTests 

2114 result = exc 

2115 else: 

2116 success = success and not failureTests 

2117 if postParse is not None: 

2118 try: 

2119 pp_value = postParse(t, result) 

2120 if pp_value is not None: 

2121 if isinstance(pp_value, ParseResults): 

2122 out.append(pp_value.dump()) 

2123 else: 

2124 out.append(str(pp_value)) 

2125 else: 

2126 out.append(result.dump()) 

2127 except Exception as e: 

2128 out.append(result.dump(full=fullDump)) 

2129 out.append( 

2130 "{} failed: {}: {}".format( 

2131 postParse.__name__, type(e).__name__, e 

2132 ) 

2133 ) 

2134 else: 

2135 out.append(result.dump(full=fullDump)) 

2136 out.append("") 

2137 

2138 if printResults: 

2139 print_("\n".join(out)) 

2140 

2141 allResults.append((t, result)) 

2142 

2143 return success, allResults 

2144 

2145 def create_diagram( 

2146 self, 

2147 output_html: Union[TextIO, Path, str], 

2148 vertical: int = 3, 

2149 show_results_names: bool = False, 

2150 show_groups: bool = False, 

2151 **kwargs, 

2152 ) -> None: 

2153 """ 

2154 Create a railroad diagram for the parser. 

2155 

2156 Parameters: 

2157 - output_html (str or file-like object) - output target for generated 

2158 diagram HTML 

2159 - vertical (int) - threshold for formatting multiple alternatives vertically 

2160 instead of horizontally (default=3) 

2161 - show_results_names - bool flag whether diagram should show annotations for 

2162 defined results names 

2163 - show_groups - bool flag whether groups should be highlighted with an unlabeled surrounding box 

2164 Additional diagram-formatting keyword arguments can also be included; 

2165 see railroad.Diagram class. 

2166 """ 

2167 

2168 try: 

2169 from .diagram import to_railroad, railroad_to_html 

2170 except ImportError as ie: 

2171 raise Exception( 

2172 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 

2173 ) from ie 

2174 

2175 self.streamline() 

2176 

2177 railroad = to_railroad( 

2178 self, 

2179 vertical=vertical, 

2180 show_results_names=show_results_names, 

2181 show_groups=show_groups, 

2182 diagram_kwargs=kwargs, 

2183 ) 

2184 if isinstance(output_html, (str, Path)): 

2185 with open(output_html, "w", encoding="utf-8") as diag_file: 

2186 diag_file.write(railroad_to_html(railroad)) 

2187 else: 

2188 # we were passed a file-like object, just write to it 

2189 output_html.write(railroad_to_html(railroad)) 

2190 

2191 setDefaultWhitespaceChars = set_default_whitespace_chars 

2192 inlineLiteralsUsing = inline_literals_using 

2193 setResultsName = set_results_name 

2194 setBreak = set_break 

2195 setParseAction = set_parse_action 

2196 addParseAction = add_parse_action 

2197 addCondition = add_condition 

2198 setFailAction = set_fail_action 

2199 tryParse = try_parse 

2200 canParseNext = can_parse_next 

2201 resetCache = reset_cache 

2202 enableLeftRecursion = enable_left_recursion 

2203 enablePackrat = enable_packrat 

2204 parseString = parse_string 

2205 scanString = scan_string 

2206 searchString = search_string 

2207 transformString = transform_string 

2208 setWhitespaceChars = set_whitespace_chars 

2209 parseWithTabs = parse_with_tabs 

2210 setDebugActions = set_debug_actions 

2211 setDebug = set_debug 

2212 defaultName = default_name 

2213 setName = set_name 

2214 parseFile = parse_file 

2215 runTests = run_tests 

2216 ignoreWhitespace = ignore_whitespace 

2217 leaveWhitespace = leave_whitespace 

2218 

2219 

2220class _PendingSkip(ParserElement): 

2221 # internal placeholder class to hold a place were '...' is added to a parser element, 

2222 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2223 def __init__(self, expr: ParserElement, must_skip: bool = False): 

2224 super().__init__() 

2225 self.anchor = expr 

2226 self.must_skip = must_skip 

2227 

2228 def _generateDefaultName(self): 

2229 return str(self.anchor + Empty()).replace("Empty", "...") 

2230 

2231 def __add__(self, other) -> "ParserElement": 

2232 skipper = SkipTo(other).set_name("...")("_skipped*") 

2233 if self.must_skip: 

2234 

2235 def must_skip(t): 

2236 if not t._skipped or t._skipped.as_list() == [""]: 

2237 del t[0] 

2238 t.pop("_skipped", None) 

2239 

2240 def show_skip(t): 

2241 if t._skipped.as_list()[-1:] == [""]: 

2242 t.pop("_skipped") 

2243 t["_skipped"] = "missing <" + repr(self.anchor) + ">" 

2244 

2245 return ( 

2246 self.anchor + skipper().add_parse_action(must_skip) 

2247 | skipper().add_parse_action(show_skip) 

2248 ) + other 

2249 

2250 return self.anchor + skipper + other 

2251 

2252 def __repr__(self): 

2253 return self.defaultName 

2254 

2255 def parseImpl(self, *args): 

2256 raise Exception( 

2257 "use of `...` expression without following SkipTo target expression" 

2258 ) 

2259 

2260 

2261class Token(ParserElement): 

2262 """Abstract :class:`ParserElement` subclass, for defining atomic 

2263 matching patterns. 

2264 """ 

2265 

2266 def __init__(self): 

2267 super().__init__(savelist=False) 

2268 

2269 def _generateDefaultName(self): 

2270 return type(self).__name__ 

2271 

2272 

2273class Empty(Token): 

2274 """ 

2275 An empty token, will always match. 

2276 """ 

2277 

2278 def __init__(self): 

2279 super().__init__() 

2280 self.mayReturnEmpty = True 

2281 self.mayIndexError = False 

2282 

2283 

2284class NoMatch(Token): 

2285 """ 

2286 A token that will never match. 

2287 """ 

2288 

2289 def __init__(self): 

2290 super().__init__() 

2291 self.mayReturnEmpty = True 

2292 self.mayIndexError = False 

2293 self.errmsg = "Unmatchable token" 

2294 

2295 def parseImpl(self, instring, loc, doActions=True): 

2296 raise ParseException(instring, loc, self.errmsg, self) 

2297 

2298 

2299class Literal(Token): 

2300 """ 

2301 Token to exactly match a specified string. 

2302 

2303 Example:: 

2304 

2305 Literal('blah').parse_string('blah') # -> ['blah'] 

2306 Literal('blah').parse_string('blahfooblah') # -> ['blah'] 

2307 Literal('blah').parse_string('bla') # -> Exception: Expected "blah" 

2308 

2309 For case-insensitive matching, use :class:`CaselessLiteral`. 

2310 

2311 For keyword matching (force word break before and after the matched string), 

2312 use :class:`Keyword` or :class:`CaselessKeyword`. 

2313 """ 

2314 

2315 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2316 super().__init__() 

2317 match_string = matchString or match_string 

2318 self.match = match_string 

2319 self.matchLen = len(match_string) 

2320 try: 

2321 self.firstMatchChar = match_string[0] 

2322 except IndexError: 

2323 raise ValueError("null string passed to Literal; use Empty() instead") 

2324 self.errmsg = "Expected " + self.name 

2325 self.mayReturnEmpty = False 

2326 self.mayIndexError = False 

2327 

2328 # Performance tuning: modify __class__ to select 

2329 # a parseImpl optimized for single-character check 

2330 if self.matchLen == 1 and type(self) is Literal: 

2331 self.__class__ = _SingleCharLiteral 

2332 

2333 def _generateDefaultName(self): 

2334 return repr(self.match) 

2335 

2336 def parseImpl(self, instring, loc, doActions=True): 

2337 if instring[loc] == self.firstMatchChar and instring.startswith( 

2338 self.match, loc 

2339 ): 

2340 return loc + self.matchLen, self.match 

2341 raise ParseException(instring, loc, self.errmsg, self) 

2342 

2343 

2344class _SingleCharLiteral(Literal): 

2345 def parseImpl(self, instring, loc, doActions=True): 

2346 if instring[loc] == self.firstMatchChar: 

2347 return loc + 1, self.match 

2348 raise ParseException(instring, loc, self.errmsg, self) 

2349 

2350 

2351ParserElement._literalStringClass = Literal 

2352 

2353 

2354class Keyword(Token): 

2355 """ 

2356 Token to exactly match a specified string as a keyword, that is, 

2357 it must be immediately followed by a non-keyword character. Compare 

2358 with :class:`Literal`: 

2359 

2360 - ``Literal("if")`` will match the leading ``'if'`` in 

2361 ``'ifAndOnlyIf'``. 

2362 - ``Keyword("if")`` will not; it will only match the leading 

2363 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2364 

2365 Accepts two optional constructor arguments in addition to the 

2366 keyword string: 

2367 

2368 - ``identChars`` is a string of characters that would be valid 

2369 identifier characters, defaulting to all alphanumerics + "_" and 

2370 "$" 

2371 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2372 

2373 Example:: 

2374 

2375 Keyword("start").parse_string("start") # -> ['start'] 

2376 Keyword("start").parse_string("starting") # -> Exception 

2377 

2378 For case-insensitive matching, use :class:`CaselessKeyword`. 

2379 """ 

2380 

2381 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2382 

2383 def __init__( 

2384 self, 

2385 match_string: str = "", 

2386 ident_chars: typing.Optional[str] = None, 

2387 caseless: bool = False, 

2388 *, 

2389 matchString: str = "", 

2390 identChars: typing.Optional[str] = None, 

2391 ): 

2392 super().__init__() 

2393 identChars = identChars or ident_chars 

2394 if identChars is None: 

2395 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2396 match_string = matchString or match_string 

2397 self.match = match_string 

2398 self.matchLen = len(match_string) 

2399 try: 

2400 self.firstMatchChar = match_string[0] 

2401 except IndexError: 

2402 raise ValueError("null string passed to Keyword; use Empty() instead") 

2403 self.errmsg = "Expected {} {}".format(type(self).__name__, self.name) 

2404 self.mayReturnEmpty = False 

2405 self.mayIndexError = False 

2406 self.caseless = caseless 

2407 if caseless: 

2408 self.caselessmatch = match_string.upper() 

2409 identChars = identChars.upper() 

2410 self.identChars = set(identChars) 

2411 

2412 def _generateDefaultName(self): 

2413 return repr(self.match) 

2414 

2415 def parseImpl(self, instring, loc, doActions=True): 

2416 errmsg = self.errmsg 

2417 errloc = loc 

2418 if self.caseless: 

2419 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 

2420 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 

2421 if ( 

2422 loc >= len(instring) - self.matchLen 

2423 or instring[loc + self.matchLen].upper() not in self.identChars 

2424 ): 

2425 return loc + self.matchLen, self.match 

2426 else: 

2427 # followed by keyword char 

2428 errmsg += ", was immediately followed by keyword character" 

2429 errloc = loc + self.matchLen 

2430 else: 

2431 # preceded by keyword char 

2432 errmsg += ", keyword was immediately preceded by keyword character" 

2433 errloc = loc - 1 

2434 # else no match just raise plain exception 

2435 

2436 else: 

2437 if ( 

2438 instring[loc] == self.firstMatchChar 

2439 and self.matchLen == 1 

2440 or instring.startswith(self.match, loc) 

2441 ): 

2442 if loc == 0 or instring[loc - 1] not in self.identChars: 

2443 if ( 

2444 loc >= len(instring) - self.matchLen 

2445 or instring[loc + self.matchLen] not in self.identChars 

2446 ): 

2447 return loc + self.matchLen, self.match 

2448 else: 

2449 # followed by keyword char 

2450 errmsg += ( 

2451 ", keyword was immediately followed by keyword character" 

2452 ) 

2453 errloc = loc + self.matchLen 

2454 else: 

2455 # preceded by keyword char 

2456 errmsg += ", keyword was immediately preceded by keyword character" 

2457 errloc = loc - 1 

2458 # else no match just raise plain exception 

2459 

2460 raise ParseException(instring, errloc, errmsg, self) 

2461 

2462 @staticmethod 

2463 def set_default_keyword_chars(chars) -> None: 

2464 """ 

2465 Overrides the default characters used by :class:`Keyword` expressions. 

2466 """ 

2467 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2468 

2469 setDefaultKeywordChars = set_default_keyword_chars 

2470 

2471 

2472class CaselessLiteral(Literal): 

2473 """ 

2474 Token to match a specified string, ignoring case of letters. 

2475 Note: the matched results will always be in the case of the given 

2476 match string, NOT the case of the input text. 

2477 

2478 Example:: 

2479 

2480 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2481 # -> ['CMD', 'CMD', 'CMD'] 

2482 

2483 (Contrast with example for :class:`CaselessKeyword`.) 

2484 """ 

2485 

2486 def __init__(self, match_string: str = "", *, matchString: str = ""): 

2487 match_string = matchString or match_string 

2488 super().__init__(match_string.upper()) 

2489 # Preserve the defining literal. 

2490 self.returnString = match_string 

2491 self.errmsg = "Expected " + self.name 

2492 

2493 def parseImpl(self, instring, loc, doActions=True): 

2494 if instring[loc : loc + self.matchLen].upper() == self.match: 

2495 return loc + self.matchLen, self.returnString 

2496 raise ParseException(instring, loc, self.errmsg, self) 

2497 

2498 

2499class CaselessKeyword(Keyword): 

2500 """ 

2501 Caseless version of :class:`Keyword`. 

2502 

2503 Example:: 

2504 

2505 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") 

2506 # -> ['CMD', 'CMD'] 

2507 

2508 (Contrast with example for :class:`CaselessLiteral`.) 

2509 """ 

2510 

2511 def __init__( 

2512 self, 

2513 match_string: str = "", 

2514 ident_chars: typing.Optional[str] = None, 

2515 *, 

2516 matchString: str = "", 

2517 identChars: typing.Optional[str] = None, 

2518 ): 

2519 identChars = identChars or ident_chars 

2520 match_string = matchString or match_string 

2521 super().__init__(match_string, identChars, caseless=True) 

2522 

2523 

2524class CloseMatch(Token): 

2525 """A variation on :class:`Literal` which matches "close" matches, 

2526 that is, strings with at most 'n' mismatching characters. 

2527 :class:`CloseMatch` takes parameters: 

2528 

2529 - ``match_string`` - string to be matched 

2530 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 

2531 - ``max_mismatches`` - (``default=1``) maximum number of 

2532 mismatches allowed to count as a match 

2533 

2534 The results from a successful parse will contain the matched text 

2535 from the input string and the following named results: 

2536 

2537 - ``mismatches`` - a list of the positions within the 

2538 match_string where mismatches were found 

2539 - ``original`` - the original match_string used to compare 

2540 against the input string 

2541 

2542 If ``mismatches`` is an empty list, then the match was an exact 

2543 match. 

2544 

2545 Example:: 

2546 

2547 patt = CloseMatch("ATCATCGAATGGA") 

2548 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

2549 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

2550 

2551 # exact match 

2552 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

2553 

2554 # close match allowing up to 2 mismatches 

2555 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 

2556 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

2557 """ 

2558 

2559 def __init__( 

2560 self, 

2561 match_string: str, 

2562 max_mismatches: int = None, 

2563 *, 

2564 maxMismatches: int = 1, 

2565 caseless=False, 

2566 ): 

2567 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 

2568 super().__init__() 

2569 self.match_string = match_string 

2570 self.maxMismatches = maxMismatches 

2571 self.errmsg = "Expected {!r} (with up to {} mismatches)".format( 

2572 self.match_string, self.maxMismatches 

2573 ) 

2574 self.caseless = caseless 

2575 self.mayIndexError = False 

2576 self.mayReturnEmpty = False 

2577 

2578 def _generateDefaultName(self): 

2579 return "{}:{!r}".format(type(self).__name__, self.match_string) 

2580 

2581 def parseImpl(self, instring, loc, doActions=True): 

2582 start = loc 

2583 instrlen = len(instring) 

2584 maxloc = start + len(self.match_string) 

2585 

2586 if maxloc <= instrlen: 

2587 match_string = self.match_string 

2588 match_stringloc = 0 

2589 mismatches = [] 

2590 maxMismatches = self.maxMismatches 

2591 

2592 for match_stringloc, s_m in enumerate( 

2593 zip(instring[loc:maxloc], match_string) 

2594 ): 

2595 src, mat = s_m 

2596 if self.caseless: 

2597 src, mat = src.lower(), mat.lower() 

2598 

2599 if src != mat: 

2600 mismatches.append(match_stringloc) 

2601 if len(mismatches) > maxMismatches: 

2602 break 

2603 else: 

2604 loc = start + match_stringloc + 1 

2605 results = ParseResults([instring[start:loc]]) 

2606 results["original"] = match_string 

2607 results["mismatches"] = mismatches 

2608 return loc, results 

2609 

2610 raise ParseException(instring, loc, self.errmsg, self) 

2611 

2612 

2613class Word(Token): 

2614 """Token for matching words composed of allowed character sets. 

2615 Parameters: 

2616 - ``init_chars`` - string of all characters that should be used to 

2617 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 

2618 if ``body_chars`` is also specified, then this is the string of 

2619 initial characters 

2620 - ``body_chars`` - string of characters that 

2621 can be used for matching after a matched initial character as 

2622 given in ``init_chars``; if omitted, same as the initial characters 

2623 (default=``None``) 

2624 - ``min`` - minimum number of characters to match (default=1) 

2625 - ``max`` - maximum number of characters to match (default=0) 

2626 - ``exact`` - exact number of characters to match (default=0) 

2627 - ``as_keyword`` - match as a keyword (default=``False``) 

2628 - ``exclude_chars`` - characters that might be 

2629 found in the input ``body_chars`` string but which should not be 

2630 accepted for matching ;useful to define a word of all 

2631 printables except for one or two characters, for instance 

2632 (default=``None``) 

2633 

2634 :class:`srange` is useful for defining custom character set strings 

2635 for defining :class:`Word` expressions, using range notation from 

2636 regular expression character sets. 

2637 

2638 A common mistake is to use :class:`Word` to match a specific literal 

2639 string, as in ``Word("Address")``. Remember that :class:`Word` 

2640 uses the string argument to define *sets* of matchable characters. 

2641 This expression would match "Add", "AAA", "dAred", or any other word 

2642 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

2643 exact literal string, use :class:`Literal` or :class:`Keyword`. 

2644 

2645 pyparsing includes helper strings for building Words: 

2646 

2647 - :class:`alphas` 

2648 - :class:`nums` 

2649 - :class:`alphanums` 

2650 - :class:`hexnums` 

2651 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 

2652 - accented, tilded, umlauted, etc.) 

2653 - :class:`punc8bit` (non-alphabetic characters in ASCII range 

2654 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

2655 - :class:`printables` (any non-whitespace character) 

2656 

2657 ``alphas``, ``nums``, and ``printables`` are also defined in several 

2658 Unicode sets - see :class:`pyparsing_unicode``. 

2659 

2660 Example:: 

2661 

2662 # a word composed of digits 

2663 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

2664 

2665 # a word with a leading capital, and zero or more lowercase 

2666 capital_word = Word(alphas.upper(), alphas.lower()) 

2667 

2668 # hostnames are alphanumeric, with leading alpha, and '-' 

2669 hostname = Word(alphas, alphanums + '-') 

2670 

2671 # roman numeral (not a strict parser, accepts invalid mix of characters) 

2672 roman = Word("IVXLCDM") 

2673 

2674 # any string of non-whitespace characters, except for ',' 

2675 csv_value = Word(printables, exclude_chars=",") 

2676 """ 

2677 

2678 def __init__( 

2679 self, 

2680 init_chars: str = "", 

2681 body_chars: typing.Optional[str] = None, 

2682 min: int = 1, 

2683 max: int = 0, 

2684 exact: int = 0, 

2685 as_keyword: bool = False, 

2686 exclude_chars: typing.Optional[str] = None, 

2687 *, 

2688 initChars: typing.Optional[str] = None, 

2689 bodyChars: typing.Optional[str] = None, 

2690 asKeyword: bool = False, 

2691 excludeChars: typing.Optional[str] = None, 

2692 ): 

2693 initChars = initChars or init_chars 

2694 bodyChars = bodyChars or body_chars 

2695 asKeyword = asKeyword or as_keyword 

2696 excludeChars = excludeChars or exclude_chars 

2697 super().__init__() 

2698 if not initChars: 

2699 raise ValueError( 

2700 "invalid {}, initChars cannot be empty string".format( 

2701 type(self).__name__ 

2702 ) 

2703 ) 

2704 

2705 initChars = set(initChars) 

2706 self.initChars = initChars 

2707 if excludeChars: 

2708 excludeChars = set(excludeChars) 

2709 initChars -= excludeChars 

2710 if bodyChars: 

2711 bodyChars = set(bodyChars) - excludeChars 

2712 self.initCharsOrig = "".join(sorted(initChars)) 

2713 

2714 if bodyChars: 

2715 self.bodyCharsOrig = "".join(sorted(bodyChars)) 

2716 self.bodyChars = set(bodyChars) 

2717 else: 

2718 self.bodyCharsOrig = "".join(sorted(initChars)) 

2719 self.bodyChars = set(initChars) 

2720 

2721 self.maxSpecified = max > 0 

2722 

2723 if min < 1: 

2724 raise ValueError( 

2725 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 

2726 ) 

2727 

2728 self.minLen = min 

2729 

2730 if max > 0: 

2731 self.maxLen = max 

2732 else: 

2733 self.maxLen = _MAX_INT 

2734 

2735 if exact > 0: 

2736 self.maxLen = exact 

2737 self.minLen = exact 

2738 

2739 self.errmsg = "Expected " + self.name 

2740 self.mayIndexError = False 

2741 self.asKeyword = asKeyword 

2742 

2743 # see if we can make a regex for this Word 

2744 if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0): 

2745 if self.bodyChars == self.initChars: 

2746 if max == 0: 

2747 repeat = "+" 

2748 elif max == 1: 

2749 repeat = "" 

2750 else: 

2751 repeat = "{{{},{}}}".format( 

2752 self.minLen, "" if self.maxLen == _MAX_INT else self.maxLen 

2753 ) 

2754 self.reString = "[{}]{}".format( 

2755 _collapse_string_to_ranges(self.initChars), 

2756 repeat, 

2757 ) 

2758 elif len(self.initChars) == 1: 

2759 if max == 0: 

2760 repeat = "*" 

2761 else: 

2762 repeat = "{{0,{}}}".format(max - 1) 

2763 self.reString = "{}[{}]{}".format( 

2764 re.escape(self.initCharsOrig), 

2765 _collapse_string_to_ranges(self.bodyChars), 

2766 repeat, 

2767 ) 

2768 else: 

2769 if max == 0: 

2770 repeat = "*" 

2771 elif max == 2: 

2772 repeat = "" 

2773 else: 

2774 repeat = "{{0,{}}}".format(max - 1) 

2775 self.reString = "[{}][{}]{}".format( 

2776 _collapse_string_to_ranges(self.initChars), 

2777 _collapse_string_to_ranges(self.bodyChars), 

2778 repeat, 

2779 ) 

2780 if self.asKeyword: 

2781 self.reString = r"\b" + self.reString + r"\b" 

2782 

2783 try: 

2784 self.re = re.compile(self.reString) 

2785 except re.error: 

2786 self.re = None 

2787 else: 

2788 self.re_match = self.re.match 

2789 self.__class__ = _WordRegex 

2790 

2791 def _generateDefaultName(self): 

2792 def charsAsStr(s): 

2793 max_repr_len = 16 

2794 s = _collapse_string_to_ranges(s, re_escape=False) 

2795 if len(s) > max_repr_len: 

2796 return s[: max_repr_len - 3] + "..." 

2797 else: 

2798 return s 

2799 

2800 if self.initChars != self.bodyChars: 

2801 base = "W:({}, {})".format( 

2802 charsAsStr(self.initChars), charsAsStr(self.bodyChars) 

2803 ) 

2804 else: 

2805 base = "W:({})".format(charsAsStr(self.initChars)) 

2806 

2807 # add length specification 

2808 if self.minLen > 1 or self.maxLen != _MAX_INT: 

2809 if self.minLen == self.maxLen: 

2810 if self.minLen == 1: 

2811 return base[2:] 

2812 else: 

2813 return base + "{{{}}}".format(self.minLen) 

2814 elif self.maxLen == _MAX_INT: 

2815 return base + "{{{},...}}".format(self.minLen) 

2816 else: 

2817 return base + "{{{},{}}}".format(self.minLen, self.maxLen) 

2818 return base 

2819 

2820 def parseImpl(self, instring, loc, doActions=True): 

2821 if instring[loc] not in self.initChars: 

2822 raise ParseException(instring, loc, self.errmsg, self) 

2823 

2824 start = loc 

2825 loc += 1 

2826 instrlen = len(instring) 

2827 bodychars = self.bodyChars 

2828 maxloc = start + self.maxLen 

2829 maxloc = min(maxloc, instrlen) 

2830 while loc < maxloc and instring[loc] in bodychars: 

2831 loc += 1 

2832 

2833 throwException = False 

2834 if loc - start < self.minLen: 

2835 throwException = True 

2836 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 

2837 throwException = True 

2838 elif self.asKeyword: 

2839 if ( 

2840 start > 0 

2841 and instring[start - 1] in bodychars 

2842 or loc < instrlen 

2843 and instring[loc] in bodychars 

2844 ): 

2845 throwException = True 

2846 

2847 if throwException: 

2848 raise ParseException(instring, loc, self.errmsg, self) 

2849 

2850 return loc, instring[start:loc] 

2851 

2852 

2853class _WordRegex(Word): 

2854 def parseImpl(self, instring, loc, doActions=True): 

2855 result = self.re_match(instring, loc) 

2856 if not result: 

2857 raise ParseException(instring, loc, self.errmsg, self) 

2858 

2859 loc = result.end() 

2860 return loc, result.group() 

2861 

2862 

2863class Char(_WordRegex): 

2864 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 

2865 when defining a match of any single character in a string of 

2866 characters. 

2867 """ 

2868 

2869 def __init__( 

2870 self, 

2871 charset: str, 

2872 as_keyword: bool = False, 

2873 exclude_chars: typing.Optional[str] = None, 

2874 *, 

2875 asKeyword: bool = False, 

2876 excludeChars: typing.Optional[str] = None, 

2877 ): 

2878 asKeyword = asKeyword or as_keyword 

2879 excludeChars = excludeChars or exclude_chars 

2880 super().__init__( 

2881 charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars 

2882 ) 

2883 self.reString = "[{}]".format(_collapse_string_to_ranges(self.initChars)) 

2884 if asKeyword: 

2885 self.reString = r"\b{}\b".format(self.reString) 

2886 self.re = re.compile(self.reString) 

2887 self.re_match = self.re.match 

2888 

2889 

2890class Regex(Token): 

2891 r"""Token for matching strings that match a given regular 

2892 expression. Defined with string specifying the regular expression in 

2893 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

2894 If the given regex contains named groups (defined using ``(?P<name>...)``), 

2895 these will be preserved as named :class:`ParseResults`. 

2896 

2897 If instead of the Python stdlib ``re`` module you wish to use a different RE module 

2898 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 

2899 a compiled RE that was compiled using ``regex``. 

2900 

2901 Example:: 

2902 

2903 realnum = Regex(r"[+-]?\d+\.\d*") 

2904 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

2905 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

2906 

2907 # named fields in a regex will be returned as named results 

2908 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

2909 

2910 # the Regex class will accept re's compiled using the regex module 

2911 import regex 

2912 parser = pp.Regex(regex.compile(r'[0-9]')) 

2913 """ 

2914 

2915 def __init__( 

2916 self, 

2917 pattern: Any, 

2918 flags: Union[re.RegexFlag, int] = 0, 

2919 as_group_list: bool = False, 

2920 as_match: bool = False, 

2921 *, 

2922 asGroupList: bool = False, 

2923 asMatch: bool = False, 

2924 ): 

2925 """The parameters ``pattern`` and ``flags`` are passed 

2926 to the ``re.compile()`` function as-is. See the Python 

2927 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

2928 explanation of the acceptable patterns and flags. 

2929 """ 

2930 super().__init__() 

2931 asGroupList = asGroupList or as_group_list 

2932 asMatch = asMatch or as_match 

2933 

2934 if isinstance(pattern, str_type): 

2935 if not pattern: 

2936 raise ValueError("null string passed to Regex; use Empty() instead") 

2937 

2938 self._re = None 

2939 self.reString = self.pattern = pattern 

2940 self.flags = flags 

2941 

2942 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 

2943 self._re = pattern 

2944 self.pattern = self.reString = pattern.pattern 

2945 self.flags = flags 

2946 

2947 else: 

2948 raise TypeError( 

2949 "Regex may only be constructed with a string or a compiled RE object" 

2950 ) 

2951 

2952 self.errmsg = "Expected " + self.name 

2953 self.mayIndexError = False 

2954 self.asGroupList = asGroupList 

2955 self.asMatch = asMatch 

2956 if self.asGroupList: 

2957 self.parseImpl = self.parseImplAsGroupList 

2958 if self.asMatch: 

2959 self.parseImpl = self.parseImplAsMatch 

2960 

2961 @cached_property 

2962 def re(self): 

2963 if self._re: 

2964 return self._re 

2965 else: 

2966 try: 

2967 return re.compile(self.pattern, self.flags) 

2968 except re.error: 

2969 raise ValueError( 

2970 "invalid pattern ({!r}) passed to Regex".format(self.pattern) 

2971 ) 

2972 

2973 @cached_property 

2974 def re_match(self): 

2975 return self.re.match 

2976 

2977 @cached_property 

2978 def mayReturnEmpty(self): 

2979 return self.re_match("") is not None 

2980 

2981 def _generateDefaultName(self): 

2982 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) 

2983 

2984 def parseImpl(self, instring, loc, doActions=True): 

2985 result = self.re_match(instring, loc) 

2986 if not result: 

2987 raise ParseException(instring, loc, self.errmsg, self) 

2988 

2989 loc = result.end() 

2990 ret = ParseResults(result.group()) 

2991 d = result.groupdict() 

2992 if d: 

2993 for k, v in d.items(): 

2994 ret[k] = v 

2995 return loc, ret 

2996 

2997 def parseImplAsGroupList(self, instring, loc, doActions=True): 

2998 result = self.re_match(instring, loc) 

2999 if not result: 

3000 raise ParseException(instring, loc, self.errmsg, self) 

3001 

3002 loc = result.end() 

3003 ret = result.groups() 

3004 return loc, ret 

3005 

3006 def parseImplAsMatch(self, instring, loc, doActions=True): 

3007 result = self.re_match(instring, loc) 

3008 if not result: 

3009 raise ParseException(instring, loc, self.errmsg, self) 

3010 

3011 loc = result.end() 

3012 ret = result 

3013 return loc, ret 

3014 

3015 def sub(self, repl: str) -> ParserElement: 

3016 r""" 

3017 Return :class:`Regex` with an attached parse action to transform the parsed 

3018 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3019 

3020 Example:: 

3021 

3022 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3023 print(make_html.transform_string("h1:main title:")) 

3024 # prints "<h1>main title</h1>" 

3025 """ 

3026 if self.asGroupList: 

3027 raise TypeError("cannot use sub() with Regex(asGroupList=True)") 

3028 

3029 if self.asMatch and callable(repl): 

3030 raise TypeError("cannot use sub() with a callable with Regex(asMatch=True)") 

3031 

3032 if self.asMatch: 

3033 

3034 def pa(tokens): 

3035 return tokens[0].expand(repl) 

3036 

3037 else: 

3038 

3039 def pa(tokens): 

3040 return self.re.sub(repl, tokens[0]) 

3041 

3042 return self.add_parse_action(pa) 

3043 

3044 

3045class QuotedString(Token): 

3046 r""" 

3047 Token for matching strings that are delimited by quoting characters. 

3048 

3049 Defined with the following parameters: 

3050 

3051 - ``quote_char`` - string of one or more characters defining the 

3052 quote delimiting string 

3053 - ``esc_char`` - character to re_escape quotes, typically backslash 

3054 (default= ``None``) 

3055 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 

3056 string (such as SQL's ``""`` to re_escape an embedded ``"``) 

3057 (default= ``None``) 

3058 - ``multiline`` - boolean indicating whether quotes can span 

3059 multiple lines (default= ``False``) 

3060 - ``unquote_results`` - boolean indicating whether the matched text 

3061 should be unquoted (default= ``True``) 

3062 - ``end_quote_char`` - string of one or more characters defining the 

3063 end of the quote delimited string (default= ``None`` => same as 

3064 quote_char) 

3065 - ``convert_whitespace_escapes`` - convert escaped whitespace 

3066 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3067 (default= ``True``) 

3068 

3069 Example:: 

3070 

3071 qs = QuotedString('"') 

3072 print(qs.search_string('lsjdf "This is the quote" sldjf')) 

3073 complex_qs = QuotedString('{{', end_quote_char='}}') 

3074 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) 

3075 sql_qs = QuotedString('"', esc_quote='""') 

3076 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3077 

3078 prints:: 

3079 

3080 [['This is the quote']] 

3081 [['This is the "quote"']] 

3082 [['This is the quote with "embedded" quotes']] 

3083 """ 

3084 ws_map = ((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")) 

3085 

3086 def __init__( 

3087 self, 

3088 quote_char: str = "", 

3089 esc_char: typing.Optional[str] = None, 

3090 esc_quote: typing.Optional[str] = None, 

3091 multiline: bool = False, 

3092 unquote_results: bool = True, 

3093 end_quote_char: typing.Optional[str] = None, 

3094 convert_whitespace_escapes: bool = True, 

3095 *, 

3096 quoteChar: str = "", 

3097 escChar: typing.Optional[str] = None, 

3098 escQuote: typing.Optional[str] = None, 

3099 unquoteResults: bool = True, 

3100 endQuoteChar: typing.Optional[str] = None, 

3101 convertWhitespaceEscapes: bool = True, 

3102 ): 

3103 super().__init__() 

3104 escChar = escChar or esc_char 

3105 escQuote = escQuote or esc_quote 

3106 unquoteResults = unquoteResults and unquote_results 

3107 endQuoteChar = endQuoteChar or end_quote_char 

3108 convertWhitespaceEscapes = ( 

3109 convertWhitespaceEscapes and convert_whitespace_escapes 

3110 ) 

3111 quote_char = quoteChar or quote_char 

3112 

3113 # remove white space from quote chars - wont work anyway 

3114 quote_char = quote_char.strip() 

3115 if not quote_char: 

3116 raise ValueError("quote_char cannot be the empty string") 

3117 

3118 if endQuoteChar is None: 

3119 endQuoteChar = quote_char 

3120 else: 

3121 endQuoteChar = endQuoteChar.strip() 

3122 if not endQuoteChar: 

3123 raise ValueError("endQuoteChar cannot be the empty string") 

3124 

3125 self.quoteChar = quote_char 

3126 self.quoteCharLen = len(quote_char) 

3127 self.firstQuoteChar = quote_char[0] 

3128 self.endQuoteChar = endQuoteChar 

3129 self.endQuoteCharLen = len(endQuoteChar) 

3130 self.escChar = escChar 

3131 self.escQuote = escQuote 

3132 self.unquoteResults = unquoteResults 

3133 self.convertWhitespaceEscapes = convertWhitespaceEscapes 

3134 

3135 sep = "" 

3136 inner_pattern = "" 

3137 

3138 if escQuote: 

3139 inner_pattern += r"{}(?:{})".format(sep, re.escape(escQuote)) 

3140 sep = "|" 

3141 

3142 if escChar: 

3143 inner_pattern += r"{}(?:{}.)".format(sep, re.escape(escChar)) 

3144 sep = "|" 

3145 self.escCharReplacePattern = re.escape(self.escChar) + "(.)" 

3146 

3147 if len(self.endQuoteChar) > 1: 

3148 inner_pattern += ( 

3149 "{}(?:".format(sep) 

3150 + "|".join( 

3151 "(?:{}(?!{}))".format( 

3152 re.escape(self.endQuoteChar[:i]), 

3153 re.escape(self.endQuoteChar[i:]), 

3154 ) 

3155 for i in range(len(self.endQuoteChar) - 1, 0, -1) 

3156 ) 

3157 + ")" 

3158 ) 

3159 sep = "|" 

3160 

3161 if multiline: 

3162 self.flags = re.MULTILINE | re.DOTALL 

3163 inner_pattern += r"{}(?:[^{}{}])".format( 

3164 sep, 

3165 _escape_regex_range_chars(self.endQuoteChar[0]), 

3166 (_escape_regex_range_chars(escChar) if escChar is not None else ""), 

3167 ) 

3168 else: 

3169 self.flags = 0 

3170 inner_pattern += r"{}(?:[^{}\n\r{}])".format( 

3171 sep, 

3172 _escape_regex_range_chars(self.endQuoteChar[0]), 

3173 (_escape_regex_range_chars(escChar) if escChar is not None else ""), 

3174 ) 

3175 

3176 self.pattern = "".join( 

3177 [ 

3178 re.escape(self.quoteChar), 

3179 "(?:", 

3180 inner_pattern, 

3181 ")*", 

3182 re.escape(self.endQuoteChar), 

3183 ] 

3184 ) 

3185 

3186 try: 

3187 self.re = re.compile(self.pattern, self.flags) 

3188 self.reString = self.pattern 

3189 self.re_match = self.re.match 

3190 except re.error: 

3191 raise ValueError( 

3192 "invalid pattern {!r} passed to Regex".format(self.pattern) 

3193 ) 

3194 

3195 self.errmsg = "Expected " + self.name 

3196 self.mayIndexError = False 

3197 self.mayReturnEmpty = True 

3198 

3199 def _generateDefaultName(self): 

3200 if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): 

3201 return "string enclosed in {!r}".format(self.quoteChar) 

3202 

3203 return "quoted string, starting with {} ending with {}".format( 

3204 self.quoteChar, self.endQuoteChar 

3205 ) 

3206 

3207 def parseImpl(self, instring, loc, doActions=True): 

3208 result = ( 

3209 instring[loc] == self.firstQuoteChar 

3210 and self.re_match(instring, loc) 

3211 or None 

3212 ) 

3213 if not result: 

3214 raise ParseException(instring, loc, self.errmsg, self) 

3215 

3216 loc = result.end() 

3217 ret = result.group() 

3218 

3219 if self.unquoteResults: 

3220 

3221 # strip off quotes 

3222 ret = ret[self.quoteCharLen : -self.endQuoteCharLen] 

3223 

3224 if isinstance(ret, str_type): 

3225 # replace escaped whitespace 

3226 if "\\" in ret and self.convertWhitespaceEscapes: 

3227 for wslit, wschar in self.ws_map: 

3228 ret = ret.replace(wslit, wschar) 

3229 

3230 # replace escaped characters 

3231 if self.escChar: 

3232 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 

3233 

3234 # replace escaped quotes 

3235 if self.escQuote: 

3236 ret = ret.replace(self.escQuote, self.endQuoteChar) 

3237 

3238 return loc, ret 

3239 

3240 

3241class CharsNotIn(Token): 

3242 """Token for matching words composed of characters *not* in a given 

3243 set (will include whitespace in matched characters if not listed in 

3244 the provided exclusion set - see example). Defined with string 

3245 containing all disallowed characters, and an optional minimum, 

3246 maximum, and/or exact length. The default value for ``min`` is 

3247 1 (a minimum value < 1 is not valid); the default values for 

3248 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3249 length restriction. 

3250 

3251 Example:: 

3252 

3253 # define a comma-separated-value as anything that is not a ',' 

3254 csv_value = CharsNotIn(',') 

3255 print(delimited_list(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) 

3256 

3257 prints:: 

3258 

3259 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3260 """ 

3261 

3262 def __init__( 

3263 self, 

3264 not_chars: str = "", 

3265 min: int = 1, 

3266 max: int = 0, 

3267 exact: int = 0, 

3268 *, 

3269 notChars: str = "", 

3270 ): 

3271 super().__init__() 

3272 self.skipWhitespace = False 

3273 self.notChars = not_chars or notChars 

3274 self.notCharsSet = set(self.notChars) 

3275 

3276 if min < 1: 

3277 raise ValueError( 

3278 "cannot specify a minimum length < 1; use " 

3279 "Opt(CharsNotIn()) if zero-length char group is permitted" 

3280 ) 

3281 

3282 self.minLen = min 

3283 

3284 if max > 0: 

3285 self.maxLen = max 

3286 else: 

3287 self.maxLen = _MAX_INT 

3288 

3289 if exact > 0: 

3290 self.maxLen = exact 

3291 self.minLen = exact 

3292 

3293 self.errmsg = "Expected " + self.name 

3294 self.mayReturnEmpty = self.minLen == 0 

3295 self.mayIndexError = False 

3296 

3297 def _generateDefaultName(self): 

3298 not_chars_str = _collapse_string_to_ranges(self.notChars) 

3299 if len(not_chars_str) > 16: 

3300 return "!W:({}...)".format(self.notChars[: 16 - 3]) 

3301 else: 

3302 return "!W:({})".format(self.notChars) 

3303 

3304 def parseImpl(self, instring, loc, doActions=True): 

3305 notchars = self.notCharsSet 

3306 if instring[loc] in notchars: 

3307 raise ParseException(instring, loc, self.errmsg, self) 

3308 

3309 start = loc 

3310 loc += 1 

3311 maxlen = min(start + self.maxLen, len(instring)) 

3312 while loc < maxlen and instring[loc] not in notchars: 

3313 loc += 1 

3314 

3315 if loc - start < self.minLen: 

3316 raise ParseException(instring, loc, self.errmsg, self) 

3317 

3318 return loc, instring[start:loc] 

3319 

3320 

3321class White(Token): 

3322 """Special matching class for matching whitespace. Normally, 

3323 whitespace is ignored by pyparsing grammars. This class is included 

3324 when some whitespace structures are significant. Define with 

3325 a string containing the whitespace characters to be matched; default 

3326 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3327 ``max``, and ``exact`` arguments, as defined for the 

3328 :class:`Word` class. 

3329 """ 

3330 

3331 whiteStrs = { 

3332 " ": "<SP>", 

3333 "\t": "<TAB>", 

3334 "\n": "<LF>", 

3335 "\r": "<CR>", 

3336 "\f": "<FF>", 

3337 "\u00A0": "<NBSP>", 

3338 "\u1680": "<OGHAM_SPACE_MARK>", 

3339 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 

3340 "\u2000": "<EN_QUAD>", 

3341 "\u2001": "<EM_QUAD>", 

3342 "\u2002": "<EN_SPACE>", 

3343 "\u2003": "<EM_SPACE>", 

3344 "\u2004": "<THREE-PER-EM_SPACE>", 

3345 "\u2005": "<FOUR-PER-EM_SPACE>", 

3346 "\u2006": "<SIX-PER-EM_SPACE>", 

3347 "\u2007": "<FIGURE_SPACE>", 

3348 "\u2008": "<PUNCTUATION_SPACE>", 

3349 "\u2009": "<THIN_SPACE>", 

3350 "\u200A": "<HAIR_SPACE>", 

3351 "\u200B": "<ZERO_WIDTH_SPACE>", 

3352 "\u202F": "<NNBSP>", 

3353 "\u205F": "<MMSP>", 

3354 "\u3000": "<IDEOGRAPHIC_SPACE>", 

3355 } 

3356 

3357 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): 

3358 super().__init__() 

3359 self.matchWhite = ws 

3360 self.set_whitespace_chars( 

3361 "".join(c for c in self.whiteStrs if c not in self.matchWhite), 

3362 copy_defaults=True, 

3363 ) 

3364 # self.leave_whitespace() 

3365 self.mayReturnEmpty = True 

3366 self.errmsg = "Expected " + self.name 

3367 

3368 self.minLen = min 

3369 

3370 if max > 0: 

3371 self.maxLen = max 

3372 else: 

3373 self.maxLen = _MAX_INT 

3374 

3375 if exact > 0: 

3376 self.maxLen = exact 

3377 self.minLen = exact 

3378 

3379 def _generateDefaultName(self): 

3380 return "".join(White.whiteStrs[c] for c in self.matchWhite) 

3381 

3382 def parseImpl(self, instring, loc, doActions=True): 

3383 if instring[loc] not in self.matchWhite: 

3384 raise ParseException(instring, loc, self.errmsg, self) 

3385 start = loc 

3386 loc += 1 

3387 maxloc = start + self.maxLen 

3388 maxloc = min(maxloc, len(instring)) 

3389 while loc < maxloc and instring[loc] in self.matchWhite: 

3390 loc += 1 

3391 

3392 if loc - start < self.minLen: 

3393 raise ParseException(instring, loc, self.errmsg, self) 

3394 

3395 return loc, instring[start:loc] 

3396 

3397 

3398class PositionToken(Token): 

3399 def __init__(self): 

3400 super().__init__() 

3401 self.mayReturnEmpty = True 

3402 self.mayIndexError = False 

3403 

3404 

3405class GoToColumn(PositionToken): 

3406 """Token to advance to a specific column of input text; useful for 

3407 tabular report scraping. 

3408 """ 

3409 

3410 def __init__(self, colno: int): 

3411 super().__init__() 

3412 self.col = colno 

3413 

3414 def preParse(self, instring, loc): 

3415 if col(loc, instring) != self.col: 

3416 instrlen = len(instring) 

3417 if self.ignoreExprs: 

3418 loc = self._skipIgnorables(instring, loc) 

3419 while ( 

3420 loc < instrlen 

3421 and instring[loc].isspace() 

3422 and col(loc, instring) != self.col 

3423 ): 

3424 loc += 1 

3425 return loc 

3426 

3427 def parseImpl(self, instring, loc, doActions=True): 

3428 thiscol = col(loc, instring) 

3429 if thiscol > self.col: 

3430 raise ParseException(instring, loc, "Text not in expected column", self) 

3431 newloc = loc + self.col - thiscol 

3432 ret = instring[loc:newloc] 

3433 return newloc, ret 

3434 

3435 

3436class LineStart(PositionToken): 

3437 r"""Matches if current position is at the beginning of a line within 

3438 the parse string 

3439 

3440 Example:: 

3441 

3442 test = '''\ 

3443 AAA this line 

3444 AAA and this line 

3445 AAA but not this one 

3446 B AAA and definitely not this one 

3447 ''' 

3448 

3449 for t in (LineStart() + 'AAA' + restOfLine).search_string(test): 

3450 print(t) 

3451 

3452 prints:: 

3453 

3454 ['AAA', ' this line'] 

3455 ['AAA', ' and this line'] 

3456 

3457 """ 

3458 

3459 def __init__(self): 

3460 super().__init__() 

3461 self.leave_whitespace() 

3462 self.orig_whiteChars = set() | self.whiteChars 

3463 self.whiteChars.discard("\n") 

3464 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 

3465 self.errmsg = "Expected start of line" 

3466 

3467 def preParse(self, instring, loc): 

3468 if loc == 0: 

3469 return loc 

3470 else: 

3471 ret = self.skipper.preParse(instring, loc) 

3472 if "\n" in self.orig_whiteChars: 

3473 while instring[ret : ret + 1] == "\n": 

3474 ret = self.skipper.preParse(instring, ret + 1) 

3475 return ret 

3476 

3477 def parseImpl(self, instring, loc, doActions=True): 

3478 if col(loc, instring) == 1: 

3479 return loc, [] 

3480 raise ParseException(instring, loc, self.errmsg, self) 

3481 

3482 

3483class LineEnd(PositionToken): 

3484 """Matches if current position is at the end of a line within the 

3485 parse string 

3486 """ 

3487 

3488 def __init__(self): 

3489 super().__init__() 

3490 self.whiteChars.discard("\n") 

3491 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 

3492 self.errmsg = "Expected end of line" 

3493 

3494 def parseImpl(self, instring, loc, doActions=True): 

3495 if loc < len(instring): 

3496 if instring[loc] == "\n": 

3497 return loc + 1, "\n" 

3498 else: 

3499 raise ParseException(instring, loc, self.errmsg, self) 

3500 elif loc == len(instring): 

3501 return loc + 1, [] 

3502 else: 

3503 raise ParseException(instring, loc, self.errmsg, self) 

3504 

3505 

3506class StringStart(PositionToken): 

3507 """Matches if current position is at the beginning of the parse 

3508 string 

3509 """ 

3510 

3511 def __init__(self): 

3512 super().__init__() 

3513 self.errmsg = "Expected start of text" 

3514 

3515 def parseImpl(self, instring, loc, doActions=True): 

3516 if loc != 0: 

3517 # see if entire string up to here is just whitespace and ignoreables 

3518 if loc != self.preParse(instring, 0): 

3519 raise ParseException(instring, loc, self.errmsg, self) 

3520 return loc, [] 

3521 

3522 

3523class StringEnd(PositionToken): 

3524 """ 

3525 Matches if current position is at the end of the parse string 

3526 """ 

3527 

3528 def __init__(self): 

3529 super().__init__() 

3530 self.errmsg = "Expected end of text" 

3531 

3532 def parseImpl(self, instring, loc, doActions=True): 

3533 if loc < len(instring): 

3534 raise ParseException(instring, loc, self.errmsg, self) 

3535 elif loc == len(instring): 

3536 return loc + 1, [] 

3537 elif loc > len(instring): 

3538 return loc, [] 

3539 else: 

3540 raise ParseException(instring, loc, self.errmsg, self) 

3541 

3542 

3543class WordStart(PositionToken): 

3544 """Matches if the current position is at the beginning of a 

3545 :class:`Word`, and is not preceded by any character in a given 

3546 set of ``word_chars`` (default= ``printables``). To emulate the 

3547 ``\b`` behavior of regular expressions, use 

3548 ``WordStart(alphanums)``. ``WordStart`` will also match at 

3549 the beginning of the string being parsed, or at the beginning of 

3550 a line. 

3551 """ 

3552 

3553 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3554 wordChars = word_chars if wordChars == printables else wordChars 

3555 super().__init__() 

3556 self.wordChars = set(wordChars) 

3557 self.errmsg = "Not at the start of a word" 

3558 

3559 def parseImpl(self, instring, loc, doActions=True): 

3560 if loc != 0: 

3561 if ( 

3562 instring[loc - 1] in self.wordChars 

3563 or instring[loc] not in self.wordChars 

3564 ): 

3565 raise ParseException(instring, loc, self.errmsg, self) 

3566 return loc, [] 

3567 

3568 

3569class WordEnd(PositionToken): 

3570 """Matches if the current position is at the end of a :class:`Word`, 

3571 and is not followed by any character in a given set of ``word_chars`` 

3572 (default= ``printables``). To emulate the ``\b`` behavior of 

3573 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

3574 will also match at the end of the string being parsed, or at the end 

3575 of a line. 

3576 """ 

3577 

3578 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 

3579 wordChars = word_chars if wordChars == printables else wordChars 

3580 super().__init__() 

3581 self.wordChars = set(wordChars) 

3582 self.skipWhitespace = False 

3583 self.errmsg = "Not at the end of a word" 

3584 

3585 def parseImpl(self, instring, loc, doActions=True): 

3586 instrlen = len(instring) 

3587 if instrlen > 0 and loc < instrlen: 

3588 if ( 

3589 instring[loc] in self.wordChars 

3590 or instring[loc - 1] not in self.wordChars 

3591 ): 

3592 raise ParseException(instring, loc, self.errmsg, self) 

3593 return loc, [] 

3594 

3595 

3596class ParseExpression(ParserElement): 

3597 """Abstract subclass of ParserElement, for combining and 

3598 post-processing parsed tokens. 

3599 """ 

3600 

3601 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

3602 super().__init__(savelist) 

3603 self.exprs: List[ParserElement] 

3604 if isinstance(exprs, _generatorType): 

3605 exprs = list(exprs) 

3606 

3607 if isinstance(exprs, str_type): 

3608 self.exprs = [self._literalStringClass(exprs)] 

3609 elif isinstance(exprs, ParserElement): 

3610 self.exprs = [exprs] 

3611 elif isinstance(exprs, Iterable): 

3612 exprs = list(exprs) 

3613 # if sequence of strings provided, wrap with Literal 

3614 if any(isinstance(expr, str_type) for expr in exprs): 

3615 exprs = ( 

3616 self._literalStringClass(e) if isinstance(e, str_type) else e 

3617 for e in exprs 

3618 ) 

3619 self.exprs = list(exprs) 

3620 else: 

3621 try: 

3622 self.exprs = list(exprs) 

3623 except TypeError: 

3624 self.exprs = [exprs] 

3625 self.callPreparse = False 

3626 

3627 def recurse(self) -> Sequence[ParserElement]: 

3628 return self.exprs[:] 

3629 

3630 def append(self, other) -> ParserElement: 

3631 self.exprs.append(other) 

3632 self._defaultName = None 

3633 return self 

3634 

3635 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

3636 """ 

3637 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3638 all contained expressions. 

3639 """ 

3640 super().leave_whitespace(recursive) 

3641 

3642 if recursive: 

3643 self.exprs = [e.copy() for e in self.exprs] 

3644 for e in self.exprs: 

3645 e.leave_whitespace(recursive) 

3646 return self 

3647 

3648 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

3649 """ 

3650 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 

3651 all contained expressions. 

3652 """ 

3653 super().ignore_whitespace(recursive) 

3654 if recursive: 

3655 self.exprs = [e.copy() for e in self.exprs] 

3656 for e in self.exprs: 

3657 e.ignore_whitespace(recursive) 

3658 return self 

3659 

3660 def ignore(self, other) -> ParserElement: 

3661 if isinstance(other, Suppress): 

3662 if other not in self.ignoreExprs: 

3663 super().ignore(other) 

3664 for e in self.exprs: 

3665 e.ignore(self.ignoreExprs[-1]) 

3666 else: 

3667 super().ignore(other) 

3668 for e in self.exprs: 

3669 e.ignore(self.ignoreExprs[-1]) 

3670 return self 

3671 

3672 def _generateDefaultName(self): 

3673 return "{}:({})".format(self.__class__.__name__, str(self.exprs)) 

3674 

3675 def streamline(self) -> ParserElement: 

3676 if self.streamlined: 

3677 return self 

3678 

3679 super().streamline() 

3680 

3681 for e in self.exprs: 

3682 e.streamline() 

3683 

3684 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 

3685 # but only if there are no parse actions or resultsNames on the nested And's 

3686 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 

3687 if len(self.exprs) == 2: 

3688 other = self.exprs[0] 

3689 if ( 

3690 isinstance(other, self.__class__) 

3691 and not other.parseAction 

3692 and other.resultsName is None 

3693 and not other.debug 

3694 ): 

3695 self.exprs = other.exprs[:] + [self.exprs[1]] 

3696 self._defaultName = None 

3697 self.mayReturnEmpty |= other.mayReturnEmpty 

3698 self.mayIndexError |= other.mayIndexError 

3699 

3700 other = self.exprs[-1] 

3701 if ( 

3702 isinstance(other, self.__class__) 

3703 and not other.parseAction 

3704 and other.resultsName is None 

3705 and not other.debug 

3706 ): 

3707 self.exprs = self.exprs[:-1] + other.exprs[:] 

3708 self._defaultName = None 

3709 self.mayReturnEmpty |= other.mayReturnEmpty 

3710 self.mayIndexError |= other.mayIndexError 

3711 

3712 self.errmsg = "Expected " + str(self) 

3713 

3714 return self 

3715 

3716 def validate(self, validateTrace=None) -> None: 

3717 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

3718 for e in self.exprs: 

3719 e.validate(tmp) 

3720 self._checkRecursion([]) 

3721 

3722 def copy(self) -> ParserElement: 

3723 ret = super().copy() 

3724 ret.exprs = [e.copy() for e in self.exprs] 

3725 return ret 

3726 

3727 def _setResultsName(self, name, listAllMatches=False): 

3728 if ( 

3729 __diag__.warn_ungrouped_named_tokens_in_collection 

3730 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

3731 not in self.suppress_warnings_ 

3732 ): 

3733 for e in self.exprs: 

3734 if ( 

3735 isinstance(e, ParserElement) 

3736 and e.resultsName 

3737 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

3738 not in e.suppress_warnings_ 

3739 ): 

3740 warnings.warn( 

3741 "{}: setting results name {!r} on {} expression " 

3742 "collides with {!r} on contained expression".format( 

3743 "warn_ungrouped_named_tokens_in_collection", 

3744 name, 

3745 type(self).__name__, 

3746 e.resultsName, 

3747 ), 

3748 stacklevel=3, 

3749 ) 

3750 

3751 return super()._setResultsName(name, listAllMatches) 

3752 

3753 ignoreWhitespace = ignore_whitespace 

3754 leaveWhitespace = leave_whitespace 

3755 

3756 

3757class And(ParseExpression): 

3758 """ 

3759 Requires all given :class:`ParseExpression` s to be found in the given order. 

3760 Expressions may be separated by whitespace. 

3761 May be constructed using the ``'+'`` operator. 

3762 May also be constructed using the ``'-'`` operator, which will 

3763 suppress backtracking. 

3764 

3765 Example:: 

3766 

3767 integer = Word(nums) 

3768 name_expr = Word(alphas)[1, ...] 

3769 

3770 expr = And([integer("id"), name_expr("name"), integer("age")]) 

3771 # more easily written as: 

3772 expr = integer("id") + name_expr("name") + integer("age") 

3773 """ 

3774 

3775 class _ErrorStop(Empty): 

3776 def __init__(self, *args, **kwargs): 

3777 super().__init__(*args, **kwargs) 

3778 self.leave_whitespace() 

3779 

3780 def _generateDefaultName(self): 

3781 return "-" 

3782 

3783 def __init__( 

3784 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True 

3785 ): 

3786 exprs: List[ParserElement] = list(exprs_arg) 

3787 if exprs and Ellipsis in exprs: 

3788 tmp = [] 

3789 for i, expr in enumerate(exprs): 

3790 if expr is Ellipsis: 

3791 if i < len(exprs) - 1: 

3792 skipto_arg: ParserElement = (Empty() + exprs[i + 1]).exprs[-1] 

3793 tmp.append(SkipTo(skipto_arg)("_skipped*")) 

3794 else: 

3795 raise Exception( 

3796 "cannot construct And with sequence ending in ..." 

3797 ) 

3798 else: 

3799 tmp.append(expr) 

3800 exprs[:] = tmp 

3801 super().__init__(exprs, savelist) 

3802 if self.exprs: 

3803 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

3804 if not isinstance(self.exprs[0], White): 

3805 self.set_whitespace_chars( 

3806 self.exprs[0].whiteChars, 

3807 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 

3808 ) 

3809 self.skipWhitespace = self.exprs[0].skipWhitespace 

3810 else: 

3811 self.skipWhitespace = False 

3812 else: 

3813 self.mayReturnEmpty = True 

3814 self.callPreparse = True 

3815 

3816 def streamline(self) -> ParserElement: 

3817 # collapse any _PendingSkip's 

3818 if self.exprs: 

3819 if any( 

3820 isinstance(e, ParseExpression) 

3821 and e.exprs 

3822 and isinstance(e.exprs[-1], _PendingSkip) 

3823 for e in self.exprs[:-1] 

3824 ): 

3825 for i, e in enumerate(self.exprs[:-1]): 

3826 if e is None: 

3827 continue 

3828 if ( 

3829 isinstance(e, ParseExpression) 

3830 and e.exprs 

3831 and isinstance(e.exprs[-1], _PendingSkip) 

3832 ): 

3833 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

3834 self.exprs[i + 1] = None 

3835 self.exprs = [e for e in self.exprs if e is not None] 

3836 

3837 super().streamline() 

3838 

3839 # link any IndentedBlocks to the prior expression 

3840 for prev, cur in zip(self.exprs, self.exprs[1:]): 

3841 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 

3842 # (but watch out for recursive grammar) 

3843 seen = set() 

3844 while cur: 

3845 if id(cur) in seen: 

3846 break 

3847 seen.add(id(cur)) 

3848 if isinstance(cur, IndentedBlock): 

3849 prev.add_parse_action( 

3850 lambda s, l, t, cur_=cur: setattr( 

3851 cur_, "parent_anchor", col(l, s) 

3852 ) 

3853 ) 

3854 break 

3855 subs = cur.recurse() 

3856 cur = next(iter(subs), None) 

3857 

3858 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

3859 return self 

3860 

3861 def parseImpl(self, instring, loc, doActions=True): 

3862 # pass False as callPreParse arg to _parse for first element, since we already 

3863 # pre-parsed the string as part of our And pre-parsing 

3864 loc, resultlist = self.exprs[0]._parse( 

3865 instring, loc, doActions, callPreParse=False 

3866 ) 

3867 errorStop = False 

3868 for e in self.exprs[1:]: 

3869 # if isinstance(e, And._ErrorStop): 

3870 if type(e) is And._ErrorStop: 

3871 errorStop = True 

3872 continue 

3873 if errorStop: 

3874 try: 

3875 loc, exprtokens = e._parse(instring, loc, doActions) 

3876 except ParseSyntaxException: 

3877 raise 

3878 except ParseBaseException as pe: 

3879 pe.__traceback__ = None 

3880 raise ParseSyntaxException._from_exception(pe) 

3881 except IndexError: 

3882 raise ParseSyntaxException( 

3883 instring, len(instring), self.errmsg, self 

3884 ) 

3885 else: 

3886 loc, exprtokens = e._parse(instring, loc, doActions) 

3887 if exprtokens or exprtokens.haskeys(): 

3888 resultlist += exprtokens 

3889 return loc, resultlist 

3890 

3891 def __iadd__(self, other): 

3892 if isinstance(other, str_type): 

3893 other = self._literalStringClass(other) 

3894 return self.append(other) # And([self, other]) 

3895 

3896 def _checkRecursion(self, parseElementList): 

3897 subRecCheckList = parseElementList[:] + [self] 

3898 for e in self.exprs: 

3899 e._checkRecursion(subRecCheckList) 

3900 if not e.mayReturnEmpty: 

3901 break 

3902 

3903 def _generateDefaultName(self): 

3904 inner = " ".join(str(e) for e in self.exprs) 

3905 # strip off redundant inner {}'s 

3906 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

3907 inner = inner[1:-1] 

3908 return "{" + inner + "}" 

3909 

3910 

3911class Or(ParseExpression): 

3912 """Requires that at least one :class:`ParseExpression` is found. If 

3913 two expressions match, the expression that matches the longest 

3914 string will be used. May be constructed using the ``'^'`` 

3915 operator. 

3916 

3917 Example:: 

3918 

3919 # construct Or using '^' operator 

3920 

3921 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

3922 print(number.search_string("123 3.1416 789")) 

3923 

3924 prints:: 

3925 

3926 [['123'], ['3.1416'], ['789']] 

3927 """ 

3928 

3929 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

3930 super().__init__(exprs, savelist) 

3931 if self.exprs: 

3932 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

3933 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

3934 else: 

3935 self.mayReturnEmpty = True 

3936 

3937 def streamline(self) -> ParserElement: 

3938 super().streamline() 

3939 if self.exprs: 

3940 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

3941 self.saveAsList = any(e.saveAsList for e in self.exprs) 

3942 self.skipWhitespace = all( 

3943 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

3944 ) 

3945 else: 

3946 self.saveAsList = False 

3947 return self 

3948 

3949 def parseImpl(self, instring, loc, doActions=True): 

3950 maxExcLoc = -1 

3951 maxException = None 

3952 matches = [] 

3953 fatals = [] 

3954 if all(e.callPreparse for e in self.exprs): 

3955 loc = self.preParse(instring, loc) 

3956 for e in self.exprs: 

3957 try: 

3958 loc2 = e.try_parse(instring, loc, raise_fatal=True) 

3959 except ParseFatalException as pfe: 

3960 pfe.__traceback__ = None 

3961 pfe.parserElement = e 

3962 fatals.append(pfe) 

3963 maxException = None 

3964 maxExcLoc = -1 

3965 except ParseException as err: 

3966 if not fatals: 

3967 err.__traceback__ = None 

3968 if err.loc > maxExcLoc: 

3969 maxException = err 

3970 maxExcLoc = err.loc 

3971 except IndexError: 

3972 if len(instring) > maxExcLoc: 

3973 maxException = ParseException( 

3974 instring, len(instring), e.errmsg, self 

3975 ) 

3976 maxExcLoc = len(instring) 

3977 else: 

3978 # save match among all matches, to retry longest to shortest 

3979 matches.append((loc2, e)) 

3980 

3981 if matches: 

3982 # re-evaluate all matches in descending order of length of match, in case attached actions 

3983 # might change whether or how much they match of the input. 

3984 matches.sort(key=itemgetter(0), reverse=True) 

3985 

3986 if not doActions: 

3987 # no further conditions or parse actions to change the selection of 

3988 # alternative, so the first match will be the best match 

3989 best_expr = matches[0][1] 

3990 return best_expr._parse(instring, loc, doActions) 

3991 

3992 longest = -1, None 

3993 for loc1, expr1 in matches: 

3994 if loc1 <= longest[0]: 

3995 # already have a longer match than this one will deliver, we are done 

3996 return longest 

3997 

3998 try: 

3999 loc2, toks = expr1._parse(instring, loc, doActions) 

4000 except ParseException as err: 

4001 err.__traceback__ = None 

4002 if err.loc > maxExcLoc: 

4003 maxException = err 

4004 maxExcLoc = err.loc 

4005 else: 

4006 if loc2 >= loc1: 

4007 return loc2, toks 

4008 # didn't match as much as before 

4009 elif loc2 > longest[0]: 

4010 longest = loc2, toks 

4011 

4012 if longest != (-1, None): 

4013 return longest 

4014 

4015 if fatals: 

4016 if len(fatals) > 1: 

4017 fatals.sort(key=lambda e: -e.loc) 

4018 if fatals[0].loc == fatals[1].loc: 

4019 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) 

4020 max_fatal = fatals[0] 

4021 raise max_fatal 

4022 

4023 if maxException is not None: 

4024 maxException.msg = self.errmsg 

4025 raise maxException 

4026 else: 

4027 raise ParseException( 

4028 instring, loc, "no defined alternatives to match", self 

4029 ) 

4030 

4031 def __ixor__(self, other): 

4032 if isinstance(other, str_type): 

4033 other = self._literalStringClass(other) 

4034 return self.append(other) # Or([self, other]) 

4035 

4036 def _generateDefaultName(self): 

4037 return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" 

4038 

4039 def _setResultsName(self, name, listAllMatches=False): 

4040 if ( 

4041 __diag__.warn_multiple_tokens_in_named_alternation 

4042 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4043 not in self.suppress_warnings_ 

4044 ): 

4045 if any( 

4046 isinstance(e, And) 

4047 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4048 not in e.suppress_warnings_ 

4049 for e in self.exprs 

4050 ): 

4051 warnings.warn( 

4052 "{}: setting results name {!r} on {} expression " 

4053 "will return a list of all parsed tokens in an And alternative, " 

4054 "in prior versions only the first token was returned; enclose " 

4055 "contained argument in Group".format( 

4056 "warn_multiple_tokens_in_named_alternation", 

4057 name, 

4058 type(self).__name__, 

4059 ), 

4060 stacklevel=3, 

4061 ) 

4062 

4063 return super()._setResultsName(name, listAllMatches) 

4064 

4065 

4066class MatchFirst(ParseExpression): 

4067 """Requires that at least one :class:`ParseExpression` is found. If 

4068 more than one expression matches, the first one listed is the one that will 

4069 match. May be constructed using the ``'|'`` operator. 

4070 

4071 Example:: 

4072 

4073 # construct MatchFirst using '|' operator 

4074 

4075 # watch the order of expressions to match 

4076 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4077 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

4078 

4079 # put more selective expression first 

4080 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4081 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

4082 """ 

4083 

4084 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): 

4085 super().__init__(exprs, savelist) 

4086 if self.exprs: 

4087 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4088 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 

4089 else: 

4090 self.mayReturnEmpty = True 

4091 

4092 def streamline(self) -> ParserElement: 

4093 if self.streamlined: 

4094 return self 

4095 

4096 super().streamline() 

4097 if self.exprs: 

4098 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4099 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4100 self.skipWhitespace = all( 

4101 e.skipWhitespace and not isinstance(e, White) for e in self.exprs 

4102 ) 

4103 else: 

4104 self.saveAsList = False 

4105 self.mayReturnEmpty = True 

4106 return self 

4107 

4108 def parseImpl(self, instring, loc, doActions=True): 

4109 maxExcLoc = -1 

4110 maxException = None 

4111 

4112 for e in self.exprs: 

4113 try: 

4114 return e._parse( 

4115 instring, 

4116 loc, 

4117 doActions, 

4118 ) 

4119 except ParseFatalException as pfe: 

4120 pfe.__traceback__ = None 

4121 pfe.parserElement = e 

4122 raise 

4123 except ParseException as err: 

4124 if err.loc > maxExcLoc: 

4125 maxException = err 

4126 maxExcLoc = err.loc 

4127 except IndexError: 

4128 if len(instring) > maxExcLoc: 

4129 maxException = ParseException( 

4130 instring, len(instring), e.errmsg, self 

4131 ) 

4132 maxExcLoc = len(instring) 

4133 

4134 if maxException is not None: 

4135 maxException.msg = self.errmsg 

4136 raise maxException 

4137 else: 

4138 raise ParseException( 

4139 instring, loc, "no defined alternatives to match", self 

4140 ) 

4141 

4142 def __ior__(self, other): 

4143 if isinstance(other, str_type): 

4144 other = self._literalStringClass(other) 

4145 return self.append(other) # MatchFirst([self, other]) 

4146 

4147 def _generateDefaultName(self): 

4148 return "{" + " | ".join(str(e) for e in self.exprs) + "}" 

4149 

4150 def _setResultsName(self, name, listAllMatches=False): 

4151 if ( 

4152 __diag__.warn_multiple_tokens_in_named_alternation 

4153 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4154 not in self.suppress_warnings_ 

4155 ): 

4156 if any( 

4157 isinstance(e, And) 

4158 and Diagnostics.warn_multiple_tokens_in_named_alternation 

4159 not in e.suppress_warnings_ 

4160 for e in self.exprs 

4161 ): 

4162 warnings.warn( 

4163 "{}: setting results name {!r} on {} expression " 

4164 "will return a list of all parsed tokens in an And alternative, " 

4165 "in prior versions only the first token was returned; enclose " 

4166 "contained argument in Group".format( 

4167 "warn_multiple_tokens_in_named_alternation", 

4168 name, 

4169 type(self).__name__, 

4170 ), 

4171 stacklevel=3, 

4172 ) 

4173 

4174 return super()._setResultsName(name, listAllMatches) 

4175 

4176 

4177class Each(ParseExpression): 

4178 """Requires all given :class:`ParseExpression` s to be found, but in 

4179 any order. Expressions may be separated by whitespace. 

4180 

4181 May be constructed using the ``'&'`` operator. 

4182 

4183 Example:: 

4184 

4185 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4186 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4187 integer = Word(nums) 

4188 shape_attr = "shape:" + shape_type("shape") 

4189 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4190 color_attr = "color:" + color("color") 

4191 size_attr = "size:" + integer("size") 

4192 

4193 # use Each (using operator '&') to accept attributes in any order 

4194 # (shape and posn are required, color and size are optional) 

4195 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 

4196 

4197 shape_spec.run_tests(''' 

4198 shape: SQUARE color: BLACK posn: 100, 120 

4199 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4200 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4201 ''' 

4202 ) 

4203 

4204 prints:: 

4205 

4206 shape: SQUARE color: BLACK posn: 100, 120 

4207 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4208 - color: BLACK 

4209 - posn: ['100', ',', '120'] 

4210 - x: 100 

4211 - y: 120 

4212 - shape: SQUARE 

4213 

4214 

4215 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4216 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

4217 - color: BLUE 

4218 - posn: ['50', ',', '80'] 

4219 - x: 50 

4220 - y: 80 

4221 - shape: CIRCLE 

4222 - size: 50 

4223 

4224 

4225 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

4226 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

4227 - color: GREEN 

4228 - posn: ['20', ',', '40'] 

4229 - x: 20 

4230 - y: 40 

4231 - shape: TRIANGLE 

4232 - size: 20 

4233 """ 

4234 

4235 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): 

4236 super().__init__(exprs, savelist) 

4237 if self.exprs: 

4238 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4239 else: 

4240 self.mayReturnEmpty = True 

4241 self.skipWhitespace = True 

4242 self.initExprGroups = True 

4243 self.saveAsList = True 

4244 

4245 def streamline(self) -> ParserElement: 

4246 super().streamline() 

4247 if self.exprs: 

4248 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4249 else: 

4250 self.mayReturnEmpty = True 

4251 return self 

4252 

4253 def parseImpl(self, instring, loc, doActions=True): 

4254 if self.initExprGroups: 

4255 self.opt1map = dict( 

4256 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 

4257 ) 

4258 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 

4259 opt2 = [ 

4260 e 

4261 for e in self.exprs 

4262 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 

4263 ] 

4264 self.optionals = opt1 + opt2 

4265 self.multioptionals = [ 

4266 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4267 for e in self.exprs 

4268 if isinstance(e, _MultipleMatch) 

4269 ] 

4270 self.multirequired = [ 

4271 e.expr.set_results_name(e.resultsName, list_all_matches=True) 

4272 for e in self.exprs 

4273 if isinstance(e, OneOrMore) 

4274 ] 

4275 self.required = [ 

4276 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 

4277 ] 

4278 self.required += self.multirequired 

4279 self.initExprGroups = False 

4280 

4281 tmpLoc = loc 

4282 tmpReqd = self.required[:] 

4283 tmpOpt = self.optionals[:] 

4284 multis = self.multioptionals[:] 

4285 matchOrder = [] 

4286 

4287 keepMatching = True 

4288 failed = [] 

4289 fatals = [] 

4290 while keepMatching: 

4291 tmpExprs = tmpReqd + tmpOpt + multis 

4292 failed.clear() 

4293 fatals.clear() 

4294 for e in tmpExprs: 

4295 try: 

4296 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 

4297 except ParseFatalException as pfe: 

4298 pfe.__traceback__ = None 

4299 pfe.parserElement = e 

4300 fatals.append(pfe) 

4301 failed.append(e) 

4302 except ParseException: 

4303 failed.append(e) 

4304 else: 

4305 matchOrder.append(self.opt1map.get(id(e), e)) 

4306 if e in tmpReqd: 

4307 tmpReqd.remove(e) 

4308 elif e in tmpOpt: 

4309 tmpOpt.remove(e) 

4310 if len(failed) == len(tmpExprs): 

4311 keepMatching = False 

4312 

4313 # look for any ParseFatalExceptions 

4314 if fatals: 

4315 if len(fatals) > 1: 

4316 fatals.sort(key=lambda e: -e.loc) 

4317 if fatals[0].loc == fatals[1].loc: 

4318 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) 

4319 max_fatal = fatals[0] 

4320 raise max_fatal 

4321 

4322 if tmpReqd: 

4323 missing = ", ".join([str(e) for e in tmpReqd]) 

4324 raise ParseException( 

4325 instring, 

4326 loc, 

4327 "Missing one or more required elements ({})".format(missing), 

4328 ) 

4329 

4330 # add any unmatched Opts, in case they have default values defined 

4331 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 

4332 

4333 total_results = ParseResults([]) 

4334 for e in matchOrder: 

4335 loc, results = e._parse(instring, loc, doActions) 

4336 total_results += results 

4337 

4338 return loc, total_results 

4339 

4340 def _generateDefaultName(self): 

4341 return "{" + " & ".join(str(e) for e in self.exprs) + "}" 

4342 

4343 

4344class ParseElementEnhance(ParserElement): 

4345 """Abstract subclass of :class:`ParserElement`, for combining and 

4346 post-processing parsed tokens. 

4347 """ 

4348 

4349 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

4350 super().__init__(savelist) 

4351 if isinstance(expr, str_type): 

4352 if issubclass(self._literalStringClass, Token): 

4353 expr = self._literalStringClass(expr) 

4354 elif issubclass(type(self), self._literalStringClass): 

4355 expr = Literal(expr) 

4356 else: 

4357 expr = self._literalStringClass(Literal(expr)) 

4358 self.expr = expr 

4359 if expr is not None: 

4360 self.mayIndexError = expr.mayIndexError 

4361 self.mayReturnEmpty = expr.mayReturnEmpty 

4362 self.set_whitespace_chars( 

4363 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 

4364 ) 

4365 self.skipWhitespace = expr.skipWhitespace 

4366 self.saveAsList = expr.saveAsList 

4367 self.callPreparse = expr.callPreparse 

4368 self.ignoreExprs.extend(expr.ignoreExprs) 

4369 

4370 def recurse(self) -> Sequence[ParserElement]: 

4371 return [self.expr] if self.expr is not None else [] 

4372 

4373 def parseImpl(self, instring, loc, doActions=True): 

4374 if self.expr is not None: 

4375 return self.expr._parse(instring, loc, doActions, callPreParse=False) 

4376 else: 

4377 raise ParseException(instring, loc, "No expression defined", self) 

4378 

4379 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

4380 super().leave_whitespace(recursive) 

4381 

4382 if recursive: 

4383 self.expr = self.expr.copy() 

4384 if self.expr is not None: 

4385 self.expr.leave_whitespace(recursive) 

4386 return self 

4387 

4388 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

4389 super().ignore_whitespace(recursive) 

4390 

4391 if recursive: 

4392 self.expr = self.expr.copy() 

4393 if self.expr is not None: 

4394 self.expr.ignore_whitespace(recursive) 

4395 return self 

4396 

4397 def ignore(self, other) -> ParserElement: 

4398 if isinstance(other, Suppress): 

4399 if other not in self.ignoreExprs: 

4400 super().ignore(other) 

4401 if self.expr is not None: 

4402 self.expr.ignore(self.ignoreExprs[-1]) 

4403 else: 

4404 super().ignore(other) 

4405 if self.expr is not None: 

4406 self.expr.ignore(self.ignoreExprs[-1]) 

4407 return self 

4408 

4409 def streamline(self) -> ParserElement: 

4410 super().streamline() 

4411 if self.expr is not None: 

4412 self.expr.streamline() 

4413 return self 

4414 

4415 def _checkRecursion(self, parseElementList): 

4416 if self in parseElementList: 

4417 raise RecursiveGrammarException(parseElementList + [self]) 

4418 subRecCheckList = parseElementList[:] + [self] 

4419 if self.expr is not None: 

4420 self.expr._checkRecursion(subRecCheckList) 

4421 

4422 def validate(self, validateTrace=None) -> None: 

4423 if validateTrace is None: 

4424 validateTrace = [] 

4425 tmp = validateTrace[:] + [self] 

4426 if self.expr is not None: 

4427 self.expr.validate(tmp) 

4428 self._checkRecursion([]) 

4429 

4430 def _generateDefaultName(self): 

4431 return "{}:({})".format(self.__class__.__name__, str(self.expr)) 

4432 

4433 ignoreWhitespace = ignore_whitespace 

4434 leaveWhitespace = leave_whitespace 

4435 

4436 

4437class IndentedBlock(ParseElementEnhance): 

4438 """ 

4439 Expression to match one or more expressions at a given indentation level. 

4440 Useful for parsing text where structure is implied by indentation (like Python source code). 

4441 """ 

4442 

4443 class _Indent(Empty): 

4444 def __init__(self, ref_col: int): 

4445 super().__init__() 

4446 self.errmsg = "expected indent at column {}".format(ref_col) 

4447 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 

4448 

4449 class _IndentGreater(Empty): 

4450 def __init__(self, ref_col: int): 

4451 super().__init__() 

4452 self.errmsg = "expected indent at column greater than {}".format(ref_col) 

4453 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 

4454 

4455 def __init__( 

4456 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 

4457 ): 

4458 super().__init__(expr, savelist=True) 

4459 # if recursive: 

4460 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 

4461 self._recursive = recursive 

4462 self._grouped = grouped 

4463 self.parent_anchor = 1 

4464 

4465 def parseImpl(self, instring, loc, doActions=True): 

4466 # advance parse position to non-whitespace by using an Empty() 

4467 # this should be the column to be used for all subsequent indented lines 

4468 anchor_loc = Empty().preParse(instring, loc) 

4469 

4470 # see if self.expr matches at the current location - if not it will raise an exception 

4471 # and no further work is necessary 

4472 self.expr.try_parse(instring, anchor_loc, doActions) 

4473 

4474 indent_col = col(anchor_loc, instring) 

4475 peer_detect_expr = self._Indent(indent_col) 

4476 

4477 inner_expr = Empty() + peer_detect_expr + self.expr 

4478 if self._recursive: 

4479 sub_indent = self._IndentGreater(indent_col) 

4480 nested_block = IndentedBlock( 

4481 self.expr, recursive=self._recursive, grouped=self._grouped 

4482 ) 

4483 nested_block.set_debug(self.debug) 

4484 nested_block.parent_anchor = indent_col 

4485 inner_expr += Opt(sub_indent + nested_block) 

4486 

4487 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 

4488 block = OneOrMore(inner_expr) 

4489 

4490 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 

4491 

4492 if self._grouped: 

4493 wrapper = Group 

4494 else: 

4495 wrapper = lambda expr: expr 

4496 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 

4497 instring, anchor_loc, doActions 

4498 ) 

4499 

4500 

4501class AtStringStart(ParseElementEnhance): 

4502 """Matches if expression matches at the beginning of the parse 

4503 string:: 

4504 

4505 AtStringStart(Word(nums)).parse_string("123") 

4506 # prints ["123"] 

4507 

4508 AtStringStart(Word(nums)).parse_string(" 123") 

4509 # raises ParseException 

4510 """ 

4511 

4512 def __init__(self, expr: Union[ParserElement, str]): 

4513 super().__init__(expr) 

4514 self.callPreparse = False 

4515 

4516 def parseImpl(self, instring, loc, doActions=True): 

4517 if loc != 0: 

4518 raise ParseException(instring, loc, "not found at string start") 

4519 return super().parseImpl(instring, loc, doActions) 

4520 

4521 

4522class AtLineStart(ParseElementEnhance): 

4523 r"""Matches if an expression matches at the beginning of a line within 

4524 the parse string 

4525 

4526 Example:: 

4527 

4528 test = '''\ 

4529 AAA this line 

4530 AAA and this line 

4531 AAA but not this one 

4532 B AAA and definitely not this one 

4533 ''' 

4534 

4535 for t in (AtLineStart('AAA') + restOfLine).search_string(test): 

4536 print(t) 

4537 

4538 prints:: 

4539 

4540 ['AAA', ' this line'] 

4541 ['AAA', ' and this line'] 

4542 

4543 """ 

4544 

4545 def __init__(self, expr: Union[ParserElement, str]): 

4546 super().__init__(expr) 

4547 self.callPreparse = False 

4548 

4549 def parseImpl(self, instring, loc, doActions=True): 

4550 if col(loc, instring) != 1: 

4551 raise ParseException(instring, loc, "not found at line start") 

4552 return super().parseImpl(instring, loc, doActions) 

4553 

4554 

4555class FollowedBy(ParseElementEnhance): 

4556 """Lookahead matching of the given parse expression. 

4557 ``FollowedBy`` does *not* advance the parsing position within 

4558 the input string, it only verifies that the specified parse 

4559 expression matches at the current position. ``FollowedBy`` 

4560 always returns a null token list. If any results names are defined 

4561 in the lookahead expression, those *will* be returned for access by 

4562 name. 

4563 

4564 Example:: 

4565 

4566 # use FollowedBy to match a label only if it is followed by a ':' 

4567 data_word = Word(alphas) 

4568 label = data_word + FollowedBy(':') 

4569 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

4570 

4571 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() 

4572 

4573 prints:: 

4574 

4575 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

4576 """ 

4577 

4578 def __init__(self, expr: Union[ParserElement, str]): 

4579 super().__init__(expr) 

4580 self.mayReturnEmpty = True 

4581 

4582 def parseImpl(self, instring, loc, doActions=True): 

4583 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

4584 # we keep any named results that were defined in the FollowedBy expression 

4585 _, ret = self.expr._parse(instring, loc, doActions=doActions) 

4586 del ret[:] 

4587 

4588 return loc, ret 

4589 

4590 

4591class PrecededBy(ParseElementEnhance): 

4592 """Lookbehind matching of the given parse expression. 

4593 ``PrecededBy`` does not advance the parsing position within the 

4594 input string, it only verifies that the specified parse expression 

4595 matches prior to the current position. ``PrecededBy`` always 

4596 returns a null token list, but if a results name is defined on the 

4597 given expression, it is returned. 

4598 

4599 Parameters: 

4600 

4601 - expr - expression that must match prior to the current parse 

4602 location 

4603 - retreat - (default= ``None``) - (int) maximum number of characters 

4604 to lookbehind prior to the current parse location 

4605 

4606 If the lookbehind expression is a string, :class:`Literal`, 

4607 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 

4608 with a specified exact or maximum length, then the retreat 

4609 parameter is not required. Otherwise, retreat must be specified to 

4610 give a maximum number of characters to look back from 

4611 the current parse position for a lookbehind match. 

4612 

4613 Example:: 

4614 

4615 # VB-style variable names with type prefixes 

4616 int_var = PrecededBy("#") + pyparsing_common.identifier 

4617 str_var = PrecededBy("$") + pyparsing_common.identifier 

4618 

4619 """ 

4620 

4621 def __init__( 

4622 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None 

4623 ): 

4624 super().__init__(expr) 

4625 self.expr = self.expr().leave_whitespace() 

4626 self.mayReturnEmpty = True 

4627 self.mayIndexError = False 

4628 self.exact = False 

4629 if isinstance(expr, str_type): 

4630 retreat = len(expr) 

4631 self.exact = True 

4632 elif isinstance(expr, (Literal, Keyword)): 

4633 retreat = expr.matchLen 

4634 self.exact = True 

4635 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

4636 retreat = expr.maxLen 

4637 self.exact = True 

4638 elif isinstance(expr, PositionToken): 

4639 retreat = 0 

4640 self.exact = True 

4641 self.retreat = retreat 

4642 self.errmsg = "not preceded by " + str(expr) 

4643 self.skipWhitespace = False 

4644 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

4645 

4646 def parseImpl(self, instring, loc=0, doActions=True): 

4647 if self.exact: 

4648 if loc < self.retreat: 

4649 raise ParseException(instring, loc, self.errmsg) 

4650 start = loc - self.retreat 

4651 _, ret = self.expr._parse(instring, start) 

4652 else: 

4653 # retreat specified a maximum lookbehind window, iterate 

4654 test_expr = self.expr + StringEnd() 

4655 instring_slice = instring[max(0, loc - self.retreat) : loc] 

4656 last_expr = ParseException(instring, loc, self.errmsg) 

4657 for offset in range(1, min(loc, self.retreat + 1) + 1): 

4658 try: 

4659 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

4660 _, ret = test_expr._parse( 

4661 instring_slice, len(instring_slice) - offset 

4662 ) 

4663 except ParseBaseException as pbe: 

4664 last_expr = pbe 

4665 else: 

4666 break 

4667 else: 

4668 raise last_expr 

4669 return loc, ret 

4670 

4671 

4672class Located(ParseElementEnhance): 

4673 """ 

4674 Decorates a returned token with its starting and ending 

4675 locations in the input string. 

4676 

4677 This helper adds the following results names: 

4678 

4679 - ``locn_start`` - location where matched expression begins 

4680 - ``locn_end`` - location where matched expression ends 

4681 - ``value`` - the actual parsed results 

4682 

4683 Be careful if the input text contains ``<TAB>`` characters, you 

4684 may want to call :class:`ParserElement.parse_with_tabs` 

4685 

4686 Example:: 

4687 

4688 wd = Word(alphas) 

4689 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

4690 print(match) 

4691 

4692 prints:: 

4693 

4694 [0, ['ljsdf'], 5] 

4695 [8, ['lksdjjf'], 15] 

4696 [18, ['lkkjj'], 23] 

4697 

4698 """ 

4699 

4700 def parseImpl(self, instring, loc, doActions=True): 

4701 start = loc 

4702 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False) 

4703 ret_tokens = ParseResults([start, tokens, loc]) 

4704 ret_tokens["locn_start"] = start 

4705 ret_tokens["value"] = tokens 

4706 ret_tokens["locn_end"] = loc 

4707 if self.resultsName: 

4708 # must return as a list, so that the name will be attached to the complete group 

4709 return loc, [ret_tokens] 

4710 else: 

4711 return loc, ret_tokens 

4712 

4713 

4714class NotAny(ParseElementEnhance): 

4715 """ 

4716 Lookahead to disallow matching with the given parse expression. 

4717 ``NotAny`` does *not* advance the parsing position within the 

4718 input string, it only verifies that the specified parse expression 

4719 does *not* match at the current position. Also, ``NotAny`` does 

4720 *not* skip over leading whitespace. ``NotAny`` always returns 

4721 a null token list. May be constructed using the ``'~'`` operator. 

4722 

4723 Example:: 

4724 

4725 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

4726 

4727 # take care not to mistake keywords for identifiers 

4728 ident = ~(AND | OR | NOT) + Word(alphas) 

4729 boolean_term = Opt(NOT) + ident 

4730 

4731 # very crude boolean expression - to support parenthesis groups and 

4732 # operation hierarchy, use infix_notation 

4733 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] 

4734 

4735 # integers that are followed by "." are actually floats 

4736 integer = Word(nums) + ~Char(".") 

4737 """ 

4738 

4739 def __init__(self, expr: Union[ParserElement, str]): 

4740 super().__init__(expr) 

4741 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 

4742 # self.leave_whitespace() 

4743 self.skipWhitespace = False 

4744 

4745 self.mayReturnEmpty = True 

4746 self.errmsg = "Found unwanted token, " + str(self.expr) 

4747 

4748 def parseImpl(self, instring, loc, doActions=True): 

4749 if self.expr.can_parse_next(instring, loc): 

4750 raise ParseException(instring, loc, self.errmsg, self) 

4751 return loc, [] 

4752 

4753 def _generateDefaultName(self): 

4754 return "~{" + str(self.expr) + "}" 

4755 

4756 

4757class _MultipleMatch(ParseElementEnhance): 

4758 def __init__( 

4759 self, 

4760 expr: ParserElement, 

4761 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

4762 *, 

4763 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

4764 ): 

4765 super().__init__(expr) 

4766 stopOn = stopOn or stop_on 

4767 self.saveAsList = True 

4768 ender = stopOn 

4769 if isinstance(ender, str_type): 

4770 ender = self._literalStringClass(ender) 

4771 self.stopOn(ender) 

4772 

4773 def stopOn(self, ender) -> ParserElement: 

4774 if isinstance(ender, str_type): 

4775 ender = self._literalStringClass(ender) 

4776 self.not_ender = ~ender if ender is not None else None 

4777 return self 

4778 

4779 def parseImpl(self, instring, loc, doActions=True): 

4780 self_expr_parse = self.expr._parse 

4781 self_skip_ignorables = self._skipIgnorables 

4782 check_ender = self.not_ender is not None 

4783 if check_ender: 

4784 try_not_ender = self.not_ender.tryParse 

4785 

4786 # must be at least one (but first see if we are the stopOn sentinel; 

4787 # if so, fail) 

4788 if check_ender: 

4789 try_not_ender(instring, loc) 

4790 loc, tokens = self_expr_parse(instring, loc, doActions) 

4791 try: 

4792 hasIgnoreExprs = not not self.ignoreExprs 

4793 while 1: 

4794 if check_ender: 

4795 try_not_ender(instring, loc) 

4796 if hasIgnoreExprs: 

4797 preloc = self_skip_ignorables(instring, loc) 

4798 else: 

4799 preloc = loc 

4800 loc, tmptokens = self_expr_parse(instring, preloc, doActions) 

4801 if tmptokens or tmptokens.haskeys(): 

4802 tokens += tmptokens 

4803 except (ParseException, IndexError): 

4804 pass 

4805 

4806 return loc, tokens 

4807 

4808 def _setResultsName(self, name, listAllMatches=False): 

4809 if ( 

4810 __diag__.warn_ungrouped_named_tokens_in_collection 

4811 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4812 not in self.suppress_warnings_ 

4813 ): 

4814 for e in [self.expr] + self.expr.recurse(): 

4815 if ( 

4816 isinstance(e, ParserElement) 

4817 and e.resultsName 

4818 and Diagnostics.warn_ungrouped_named_tokens_in_collection 

4819 not in e.suppress_warnings_ 

4820 ): 

4821 warnings.warn( 

4822 "{}: setting results name {!r} on {} expression " 

4823 "collides with {!r} on contained expression".format( 

4824 "warn_ungrouped_named_tokens_in_collection", 

4825 name, 

4826 type(self).__name__, 

4827 e.resultsName, 

4828 ), 

4829 stacklevel=3, 

4830 ) 

4831 

4832 return super()._setResultsName(name, listAllMatches) 

4833 

4834 

4835class OneOrMore(_MultipleMatch): 

4836 """ 

4837 Repetition of one or more of the given expression. 

4838 

4839 Parameters: 

4840 - expr - expression that must match one or more times 

4841 - stop_on - (default= ``None``) - expression for a terminating sentinel 

4842 (only required if the sentinel would ordinarily match the repetition 

4843 expression) 

4844 

4845 Example:: 

4846 

4847 data_word = Word(alphas) 

4848 label = data_word + FollowedBy(':') 

4849 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) 

4850 

4851 text = "shape: SQUARE posn: upper left color: BLACK" 

4852 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

4853 

4854 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data 

4855 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

4856 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

4857 

4858 # could also be written as 

4859 (attr_expr * (1,)).parse_string(text).pprint() 

4860 """ 

4861 

4862 def _generateDefaultName(self): 

4863 return "{" + str(self.expr) + "}..." 

4864 

4865 

4866class ZeroOrMore(_MultipleMatch): 

4867 """ 

4868 Optional repetition of zero or more of the given expression. 

4869 

4870 Parameters: 

4871 - ``expr`` - expression that must match zero or more times 

4872 - ``stop_on`` - expression for a terminating sentinel 

4873 (only required if the sentinel would ordinarily match the repetition 

4874 expression) - (default= ``None``) 

4875 

4876 Example: similar to :class:`OneOrMore` 

4877 """ 

4878 

4879 def __init__( 

4880 self, 

4881 expr: ParserElement, 

4882 stop_on: typing.Optional[Union[ParserElement, str]] = None, 

4883 *, 

4884 stopOn: typing.Optional[Union[ParserElement, str]] = None, 

4885 ): 

4886 super().__init__(expr, stopOn=stopOn or stop_on) 

4887 self.mayReturnEmpty = True 

4888 

4889 def parseImpl(self, instring, loc, doActions=True): 

4890 try: 

4891 return super().parseImpl(instring, loc, doActions) 

4892 except (ParseException, IndexError): 

4893 return loc, ParseResults([], name=self.resultsName) 

4894 

4895 def _generateDefaultName(self): 

4896 return "[" + str(self.expr) + "]..." 

4897 

4898 

4899class _NullToken: 

4900 def __bool__(self): 

4901 return False 

4902 

4903 def __str__(self): 

4904 return "" 

4905 

4906 

4907class Opt(ParseElementEnhance): 

4908 """ 

4909 Optional matching of the given expression. 

4910 

4911 Parameters: 

4912 - ``expr`` - expression that must match zero or more times 

4913 - ``default`` (optional) - value to be returned if the optional expression is not found. 

4914 

4915 Example:: 

4916 

4917 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

4918 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 

4919 zip.run_tests(''' 

4920 # traditional ZIP code 

4921 12345 

4922 

4923 # ZIP+4 form 

4924 12101-0001 

4925 

4926 # invalid ZIP 

4927 98765- 

4928 ''') 

4929 

4930 prints:: 

4931 

4932 # traditional ZIP code 

4933 12345 

4934 ['12345'] 

4935 

4936 # ZIP+4 form 

4937 12101-0001 

4938 ['12101-0001'] 

4939 

4940 # invalid ZIP 

4941 98765- 

4942 ^ 

4943 FAIL: Expected end of text (at char 5), (line:1, col:6) 

4944 """ 

4945 

4946 __optionalNotMatched = _NullToken() 

4947 

4948 def __init__( 

4949 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 

4950 ): 

4951 super().__init__(expr, savelist=False) 

4952 self.saveAsList = self.expr.saveAsList 

4953 self.defaultValue = default 

4954 self.mayReturnEmpty = True 

4955 

4956 def parseImpl(self, instring, loc, doActions=True): 

4957 self_expr = self.expr 

4958 try: 

4959 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False) 

4960 except (ParseException, IndexError): 

4961 default_value = self.defaultValue 

4962 if default_value is not self.__optionalNotMatched: 

4963 if self_expr.resultsName: 

4964 tokens = ParseResults([default_value]) 

4965 tokens[self_expr.resultsName] = default_value 

4966 else: 

4967 tokens = [default_value] 

4968 else: 

4969 tokens = [] 

4970 return loc, tokens 

4971 

4972 def _generateDefaultName(self): 

4973 inner = str(self.expr) 

4974 # strip off redundant inner {}'s 

4975 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 

4976 inner = inner[1:-1] 

4977 return "[" + inner + "]" 

4978 

4979 

4980Optional = Opt 

4981 

4982 

4983class SkipTo(ParseElementEnhance): 

4984 """ 

4985 Token for skipping over all undefined text until the matched 

4986 expression is found. 

4987 

4988 Parameters: 

4989 - ``expr`` - target expression marking the end of the data to be skipped 

4990 - ``include`` - if ``True``, the target expression is also parsed 

4991 (the skipped text and target expression are returned as a 2-element 

4992 list) (default= ``False``). 

4993 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and 

4994 comments) that might contain false matches to the target expression 

4995 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be 

4996 included in the skipped test; if found before the target expression is found, 

4997 the :class:`SkipTo` is not a match 

4998 

4999 Example:: 

5000 

5001 report = ''' 

5002 Outstanding Issues Report - 1 Jan 2000 

5003 

5004 # | Severity | Description | Days Open 

5005 -----+----------+-------------------------------------------+----------- 

5006 101 | Critical | Intermittent system crash | 6 

5007 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

5008 79 | Minor | System slow when running too many reports | 47 

5009 ''' 

5010 integer = Word(nums) 

5011 SEP = Suppress('|') 

5012 # use SkipTo to simply match everything up until the next SEP 

5013 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

5014 # - parse action will call token.strip() for each matched token, i.e., the description body 

5015 string_data = SkipTo(SEP, ignore=quoted_string) 

5016 string_data.set_parse_action(token_map(str.strip)) 

5017 ticket_expr = (integer("issue_num") + SEP 

5018 + string_data("sev") + SEP 

5019 + string_data("desc") + SEP 

5020 + integer("days_open")) 

5021 

5022 for tkt in ticket_expr.search_string(report): 

5023 print tkt.dump() 

5024 

5025 prints:: 

5026 

5027 ['101', 'Critical', 'Intermittent system crash', '6'] 

5028 - days_open: '6' 

5029 - desc: 'Intermittent system crash' 

5030 - issue_num: '101' 

5031 - sev: 'Critical' 

5032 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

5033 - days_open: '14' 

5034 - desc: "Spelling error on Login ('log|n')" 

5035 - issue_num: '94' 

5036 - sev: 'Cosmetic' 

5037 ['79', 'Minor', 'System slow when running too many reports', '47'] 

5038 - days_open: '47' 

5039 - desc: 'System slow when running too many reports' 

5040 - issue_num: '79' 

5041 - sev: 'Minor' 

5042 """ 

5043 

5044 def __init__( 

5045 self, 

5046 other: Union[ParserElement, str], 

5047 include: bool = False, 

5048 ignore: bool = None, 

5049 fail_on: typing.Optional[Union[ParserElement, str]] = None, 

5050 *, 

5051 failOn: Union[ParserElement, str] = None, 

5052 ): 

5053 super().__init__(other) 

5054 failOn = failOn or fail_on 

5055 self.ignoreExpr = ignore 

5056 self.mayReturnEmpty = True 

5057 self.mayIndexError = False 

5058 self.includeMatch = include 

5059 self.saveAsList = False 

5060 if isinstance(failOn, str_type): 

5061 self.failOn = self._literalStringClass(failOn) 

5062 else: 

5063 self.failOn = failOn 

5064 self.errmsg = "No match found for " + str(self.expr) 

5065 

5066 def parseImpl(self, instring, loc, doActions=True): 

5067 startloc = loc 

5068 instrlen = len(instring) 

5069 self_expr_parse = self.expr._parse 

5070 self_failOn_canParseNext = ( 

5071 self.failOn.canParseNext if self.failOn is not None else None 

5072 ) 

5073 self_ignoreExpr_tryParse = ( 

5074 self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 

5075 ) 

5076 

5077 tmploc = loc 

5078 while tmploc <= instrlen: 

5079 if self_failOn_canParseNext is not None: 

5080 # break if failOn expression matches 

5081 if self_failOn_canParseNext(instring, tmploc): 

5082 break 

5083 

5084 if self_ignoreExpr_tryParse is not None: 

5085 # advance past ignore expressions 

5086 while 1: 

5087 try: 

5088 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 

5089 except ParseBaseException: 

5090 break 

5091 

5092 try: 

5093 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) 

5094 except (ParseException, IndexError): 

5095 # no match, advance loc in string 

5096 tmploc += 1 

5097 else: 

5098 # matched skipto expr, done 

5099 break 

5100 

5101 else: 

5102 # ran off the end of the input string without matching skipto expr, fail 

5103 raise ParseException(instring, loc, self.errmsg, self) 

5104 

5105 # build up return values 

5106 loc = tmploc 

5107 skiptext = instring[startloc:loc] 

5108 skipresult = ParseResults(skiptext) 

5109 

5110 if self.includeMatch: 

5111 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False) 

5112 skipresult += mat 

5113 

5114 return loc, skipresult 

5115 

5116 

5117class Forward(ParseElementEnhance): 

5118 """ 

5119 Forward declaration of an expression to be defined later - 

5120 used for recursive grammars, such as algebraic infix notation. 

5121 When the expression is known, it is assigned to the ``Forward`` 

5122 variable using the ``'<<'`` operator. 

5123 

5124 Note: take care when assigning to ``Forward`` not to overlook 

5125 precedence of operators. 

5126 

5127 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 

5128 

5129 fwd_expr << a | b | c 

5130 

5131 will actually be evaluated as:: 

5132 

5133 (fwd_expr << a) | b | c 

5134 

5135 thereby leaving b and c out as parseable alternatives. It is recommended that you 

5136 explicitly group the values inserted into the ``Forward``:: 

5137 

5138 fwd_expr << (a | b | c) 

5139 

5140 Converting to use the ``'<<='`` operator instead will avoid this problem. 

5141 

5142 See :class:`ParseResults.pprint` for an example of a recursive 

5143 parser created using ``Forward``. 

5144 """ 

5145 

5146 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): 

5147 self.caller_frame = traceback.extract_stack(limit=2)[0] 

5148 super().__init__(other, savelist=False) 

5149 self.lshift_line = None 

5150 

5151 def __lshift__(self, other): 

5152 if hasattr(self, "caller_frame"): 

5153 del self.caller_frame 

5154 if isinstance(other, str_type): 

5155 other = self._literalStringClass(other) 

5156 self.expr = other 

5157 self.mayIndexError = self.expr.mayIndexError 

5158 self.mayReturnEmpty = self.expr.mayReturnEmpty 

5159 self.set_whitespace_chars( 

5160 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 

5161 ) 

5162 self.skipWhitespace = self.expr.skipWhitespace 

5163 self.saveAsList = self.expr.saveAsList 

5164 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5165 self.lshift_line = traceback.extract_stack(limit=2)[-2] 

5166 return self 

5167 

5168 def __ilshift__(self, other): 

5169 return self << other 

5170 

5171 def __or__(self, other): 

5172 caller_line = traceback.extract_stack(limit=2)[-2] 

5173 if ( 

5174 __diag__.warn_on_match_first_with_lshift_operator 

5175 and caller_line == self.lshift_line 

5176 and Diagnostics.warn_on_match_first_with_lshift_operator 

5177 not in self.suppress_warnings_ 

5178 ): 

5179 warnings.warn( 

5180 "using '<<' operator with '|' is probably an error, use '<<='", 

5181 stacklevel=2, 

5182 ) 

5183 ret = super().__or__(other) 

5184 return ret 

5185 

5186 def __del__(self): 

5187 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 

5188 if ( 

5189 self.expr is None 

5190 and __diag__.warn_on_assignment_to_Forward 

5191 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 

5192 ): 

5193 warnings.warn_explicit( 

5194 "Forward defined here but no expression attached later using '<<=' or '<<'", 

5195 UserWarning, 

5196 filename=self.caller_frame.filename, 

5197 lineno=self.caller_frame.lineno, 

5198 ) 

5199 

5200 def parseImpl(self, instring, loc, doActions=True): 

5201 if ( 

5202 self.expr is None 

5203 and __diag__.warn_on_parse_using_empty_Forward 

5204 and Diagnostics.warn_on_parse_using_empty_Forward 

5205 not in self.suppress_warnings_ 

5206 ): 

5207 # walk stack until parse_string, scan_string, search_string, or transform_string is found 

5208 parse_fns = [ 

5209 "parse_string", 

5210 "scan_string", 

5211 "search_string", 

5212 "transform_string", 

5213 ] 

5214 tb = traceback.extract_stack(limit=200) 

5215 for i, frm in enumerate(reversed(tb), start=1): 

5216 if frm.name in parse_fns: 

5217 stacklevel = i + 1 

5218 break 

5219 else: 

5220 stacklevel = 2 

5221 warnings.warn( 

5222 "Forward expression was never assigned a value, will not parse any input", 

5223 stacklevel=stacklevel, 

5224 ) 

5225 if not ParserElement._left_recursion_enabled: 

5226 return super().parseImpl(instring, loc, doActions) 

5227 # ## Bounded Recursion algorithm ## 

5228 # Recursion only needs to be processed at ``Forward`` elements, since they are 

5229 # the only ones that can actually refer to themselves. The general idea is 

5230 # to handle recursion stepwise: We start at no recursion, then recurse once, 

5231 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 

5232 # 

5233 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 

5234 # - to *match* a specific recursion level, and 

5235 # - to *search* the bounded recursion level 

5236 # and the two run concurrently. The *search* must *match* each recursion level 

5237 # to find the best possible match. This is handled by a memo table, which 

5238 # provides the previous match to the next level match attempt. 

5239 # 

5240 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 

5241 # 

5242 # There is a complication since we not only *parse* but also *transform* via 

5243 # actions: We do not want to run the actions too often while expanding. Thus, 

5244 # we expand using `doActions=False` and only run `doActions=True` if the next 

5245 # recursion level is acceptable. 

5246 with ParserElement.recursion_lock: 

5247 memo = ParserElement.recursion_memos 

5248 try: 

5249 # we are parsing at a specific recursion expansion - use it as-is 

5250 prev_loc, prev_result = memo[loc, self, doActions] 

5251 if isinstance(prev_result, Exception): 

5252 raise prev_result 

5253 return prev_loc, prev_result.copy() 

5254 except KeyError: 

5255 act_key = (loc, self, True) 

5256 peek_key = (loc, self, False) 

5257 # we are searching for the best recursion expansion - keep on improving 

5258 # both `doActions` cases must be tracked separately here! 

5259 prev_loc, prev_peek = memo[peek_key] = ( 

5260 loc - 1, 

5261 ParseException( 

5262 instring, loc, "Forward recursion without base case", self 

5263 ), 

5264 ) 

5265 if doActions: 

5266 memo[act_key] = memo[peek_key] 

5267 while True: 

5268 try: 

5269 new_loc, new_peek = super().parseImpl(instring, loc, False) 

5270 except ParseException: 

5271 # we failed before getting any match – do not hide the error 

5272 if isinstance(prev_peek, Exception): 

5273 raise 

5274 new_loc, new_peek = prev_loc, prev_peek 

5275 # the match did not get better: we are done 

5276 if new_loc <= prev_loc: 

5277 if doActions: 

5278 # replace the match for doActions=False as well, 

5279 # in case the action did backtrack 

5280 prev_loc, prev_result = memo[peek_key] = memo[act_key] 

5281 del memo[peek_key], memo[act_key] 

5282 return prev_loc, prev_result.copy() 

5283 del memo[peek_key] 

5284 return prev_loc, prev_peek.copy() 

5285 # the match did get better: see if we can improve further 

5286 else: 

5287 if doActions: 

5288 try: 

5289 memo[act_key] = super().parseImpl(instring, loc, True) 

5290 except ParseException as e: 

5291 memo[peek_key] = memo[act_key] = (new_loc, e) 

5292 raise 

5293 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 

5294 

5295 def leave_whitespace(self, recursive: bool = True) -> ParserElement: 

5296 self.skipWhitespace = False 

5297 return self 

5298 

5299 def ignore_whitespace(self, recursive: bool = True) -> ParserElement: 

5300 self.skipWhitespace = True 

5301 return self 

5302 

5303 def streamline(self) -> ParserElement: 

5304 if not self.streamlined: 

5305 self.streamlined = True 

5306 if self.expr is not None: 

5307 self.expr.streamline() 

5308 return self 

5309 

5310 def validate(self, validateTrace=None) -> None: 

5311 if validateTrace is None: 

5312 validateTrace = [] 

5313 

5314 if self not in validateTrace: 

5315 tmp = validateTrace[:] + [self] 

5316 if self.expr is not None: 

5317 self.expr.validate(tmp) 

5318 self._checkRecursion([]) 

5319 

5320 def _generateDefaultName(self): 

5321 # Avoid infinite recursion by setting a temporary _defaultName 

5322 self._defaultName = ": ..." 

5323 

5324 # Use the string representation of main expression. 

5325 retString = "..." 

5326 try: 

5327 if self.expr is not None: 

5328 retString = str(self.expr)[:1000] 

5329 else: 

5330 retString = "None" 

5331 finally: 

5332 return self.__class__.__name__ + ": " + retString 

5333 

5334 def copy(self) -> ParserElement: 

5335 if self.expr is not None: 

5336 return super().copy() 

5337 else: 

5338 ret = Forward() 

5339 ret <<= self 

5340 return ret 

5341 

5342 def _setResultsName(self, name, list_all_matches=False): 

5343 if ( 

5344 __diag__.warn_name_set_on_empty_Forward 

5345 and Diagnostics.warn_name_set_on_empty_Forward 

5346 not in self.suppress_warnings_ 

5347 ): 

5348 if self.expr is None: 

5349 warnings.warn( 

5350 "{}: setting results name {!r} on {} expression " 

5351 "that has no contained expression".format( 

5352 "warn_name_set_on_empty_Forward", name, type(self).__name__ 

5353 ), 

5354 stacklevel=3, 

5355 ) 

5356 

5357 return super()._setResultsName(name, list_all_matches) 

5358 

5359 ignoreWhitespace = ignore_whitespace 

5360 leaveWhitespace = leave_whitespace 

5361 

5362 

5363class TokenConverter(ParseElementEnhance): 

5364 """ 

5365 Abstract subclass of :class:`ParseExpression`, for converting parsed results. 

5366 """ 

5367 

5368 def __init__(self, expr: Union[ParserElement, str], savelist=False): 

5369 super().__init__(expr) # , savelist) 

5370 self.saveAsList = False 

5371 

5372 

5373class Combine(TokenConverter): 

5374 """Converter to concatenate all matching tokens to a single string. 

5375 By default, the matching patterns must also be contiguous in the 

5376 input string; this can be disabled by specifying 

5377 ``'adjacent=False'`` in the constructor. 

5378 

5379 Example:: 

5380 

5381 real = Word(nums) + '.' + Word(nums) 

5382 print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] 

5383 # will also erroneously match the following 

5384 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] 

5385 

5386 real = Combine(Word(nums) + '.' + Word(nums)) 

5387 print(real.parse_string('3.1416')) # -> ['3.1416'] 

5388 # no match when there are internal spaces 

5389 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) 

5390 """ 

5391 

5392 def __init__( 

5393 self, 

5394 expr: ParserElement, 

5395 join_string: str = "", 

5396 adjacent: bool = True, 

5397 *, 

5398 joinString: typing.Optional[str] = None, 

5399 ): 

5400 super().__init__(expr) 

5401 joinString = joinString if joinString is not None else join_string 

5402 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

5403 if adjacent: 

5404 self.leave_whitespace() 

5405 self.adjacent = adjacent 

5406 self.skipWhitespace = True 

5407 self.joinString = joinString 

5408 self.callPreparse = True 

5409 

5410 def ignore(self, other) -> ParserElement: 

5411 if self.adjacent: 

5412 ParserElement.ignore(self, other) 

5413 else: 

5414 super().ignore(other) 

5415 return self 

5416 

5417 def postParse(self, instring, loc, tokenlist): 

5418 retToks = tokenlist.copy() 

5419 del retToks[:] 

5420 retToks += ParseResults( 

5421 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 

5422 ) 

5423 

5424 if self.resultsName and retToks.haskeys(): 

5425 return [retToks] 

5426 else: 

5427 return retToks 

5428 

5429 

5430class Group(TokenConverter): 

5431 """Converter to return the matched tokens as a list - useful for 

5432 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

5433 

5434 The optional ``aslist`` argument when set to True will return the 

5435 parsed tokens as a Python list instead of a pyparsing ParseResults. 

5436 

5437 Example:: 

5438 

5439 ident = Word(alphas) 

5440 num = Word(nums) 

5441 term = ident | num 

5442 func = ident + Opt(delimited_list(term)) 

5443 print(func.parse_string("fn a, b, 100")) 

5444 # -> ['fn', 'a', 'b', '100'] 

5445 

5446 func = ident + Group(Opt(delimited_list(term))) 

5447 print(func.parse_string("fn a, b, 100")) 

5448 # -> ['fn', ['a', 'b', '100']] 

5449 """ 

5450 

5451 def __init__(self, expr: ParserElement, aslist: bool = False): 

5452 super().__init__(expr) 

5453 self.saveAsList = True 

5454 self._asPythonList = aslist 

5455 

5456 def postParse(self, instring, loc, tokenlist): 

5457 if self._asPythonList: 

5458 return ParseResults.List( 

5459 tokenlist.asList() 

5460 if isinstance(tokenlist, ParseResults) 

5461 else list(tokenlist) 

5462 ) 

5463 else: 

5464 return [tokenlist] 

5465 

5466 

5467class Dict(TokenConverter): 

5468 """Converter to return a repetitive expression as a list, but also 

5469 as a dictionary. Each element can also be referenced using the first 

5470 token in the expression as its key. Useful for tabular report 

5471 scraping when the first column can be used as a item key. 

5472 

5473 The optional ``asdict`` argument when set to True will return the 

5474 parsed tokens as a Python dict instead of a pyparsing ParseResults. 

5475 

5476 Example:: 

5477 

5478 data_word = Word(alphas) 

5479 label = data_word + FollowedBy(':') 

5480 

5481 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5482 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

5483 

5484 # print attributes as plain groups 

5485 print(attr_expr[1, ...].parse_string(text).dump()) 

5486 

5487 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names 

5488 result = Dict(Group(attr_expr)[1, ...]).parse_string(text) 

5489 print(result.dump()) 

5490 

5491 # access named fields as dict entries, or output as dict 

5492 print(result['shape']) 

5493 print(result.as_dict()) 

5494 

5495 prints:: 

5496 

5497 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

5498 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5499 - color: 'light blue' 

5500 - posn: 'upper left' 

5501 - shape: 'SQUARE' 

5502 - texture: 'burlap' 

5503 SQUARE 

5504 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

5505 

5506 See more examples at :class:`ParseResults` of accessing fields by results name. 

5507 """ 

5508 

5509 def __init__(self, expr: ParserElement, asdict: bool = False): 

5510 super().__init__(expr) 

5511 self.saveAsList = True 

5512 self._asPythonDict = asdict 

5513 

5514 def postParse(self, instring, loc, tokenlist): 

5515 for i, tok in enumerate(tokenlist): 

5516 if len(tok) == 0: 

5517 continue 

5518 

5519 ikey = tok[0] 

5520 if isinstance(ikey, int): 

5521 ikey = str(ikey).strip() 

5522 

5523 if len(tok) == 1: 

5524 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

5525 

5526 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

5527 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

5528 

5529 else: 

5530 try: 

5531 dictvalue = tok.copy() # ParseResults(i) 

5532 except Exception: 

5533 exc = TypeError( 

5534 "could not extract dict values from parsed results" 

5535 " - Dict expression must contain Grouped expressions" 

5536 ) 

5537 raise exc from None 

5538 

5539 del dictvalue[0] 

5540 

5541 if len(dictvalue) != 1 or ( 

5542 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 

5543 ): 

5544 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

5545 else: 

5546 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

5547 

5548 if self._asPythonDict: 

5549 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 

5550 else: 

5551 return [tokenlist] if self.resultsName else tokenlist 

5552 

5553 

5554class Suppress(TokenConverter): 

5555 """Converter for ignoring the results of a parsed expression. 

5556 

5557 Example:: 

5558 

5559 source = "a, b, c,d" 

5560 wd = Word(alphas) 

5561 wd_list1 = wd + (',' + wd)[...] 

5562 print(wd_list1.parse_string(source)) 

5563 

5564 # often, delimiters that are useful during parsing are just in the 

5565 # way afterward - use Suppress to keep them out of the parsed output 

5566 wd_list2 = wd + (Suppress(',') + wd)[...] 

5567 print(wd_list2.parse_string(source)) 

5568 

5569 # Skipped text (using '...') can be suppressed as well 

5570 source = "lead in START relevant text END trailing text" 

5571 start_marker = Keyword("START") 

5572 end_marker = Keyword("END") 

5573 find_body = Suppress(...) + start_marker + ... + end_marker 

5574 print(find_body.parse_string(source) 

5575 

5576 prints:: 

5577 

5578 ['a', ',', 'b', ',', 'c', ',', 'd'] 

5579 ['a', 'b', 'c', 'd'] 

5580 ['START', 'relevant text ', 'END'] 

5581 

5582 (See also :class:`delimited_list`.) 

5583 """ 

5584 

5585 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 

5586 if expr is ...: 

5587 expr = _PendingSkip(NoMatch()) 

5588 super().__init__(expr) 

5589 

5590 def __add__(self, other) -> "ParserElement": 

5591 if isinstance(self.expr, _PendingSkip): 

5592 return Suppress(SkipTo(other)) + other 

5593 else: 

5594 return super().__add__(other) 

5595 

5596 def __sub__(self, other) -> "ParserElement": 

5597 if isinstance(self.expr, _PendingSkip): 

5598 return Suppress(SkipTo(other)) - other 

5599 else: 

5600 return super().__sub__(other) 

5601 

5602 def postParse(self, instring, loc, tokenlist): 

5603 return [] 

5604 

5605 def suppress(self) -> ParserElement: 

5606 return self 

5607 

5608 

5609def trace_parse_action(f: ParseAction) -> ParseAction: 

5610 """Decorator for debugging parse actions. 

5611 

5612 When the parse action is called, this decorator will print 

5613 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

5614 When the parse action completes, the decorator will print 

5615 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

5616 

5617 Example:: 

5618 

5619 wd = Word(alphas) 

5620 

5621 @trace_parse_action 

5622 def remove_duplicate_chars(tokens): 

5623 return ''.join(sorted(set(''.join(tokens)))) 

5624 

5625 wds = wd[1, ...].set_parse_action(remove_duplicate_chars) 

5626 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 

5627 

5628 prints:: 

5629 

5630 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

5631 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

5632 ['dfjkls'] 

5633 """ 

5634 f = _trim_arity(f) 

5635 

5636 def z(*paArgs): 

5637 thisFunc = f.__name__ 

5638 s, l, t = paArgs[-3:] 

5639 if len(paArgs) > 3: 

5640 thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc 

5641 sys.stderr.write( 

5642 ">>entering {}(line: {!r}, {}, {!r})\n".format(thisFunc, line(l, s), l, t) 

5643 ) 

5644 try: 

5645 ret = f(*paArgs) 

5646 except Exception as exc: 

5647 sys.stderr.write("<<leaving {} (exception: {})\n".format(thisFunc, exc)) 

5648 raise 

5649 sys.stderr.write("<<leaving {} (ret: {!r})\n".format(thisFunc, ret)) 

5650 return ret 

5651 

5652 z.__name__ = f.__name__ 

5653 return z 

5654 

5655 

5656# convenience constants for positional expressions 

5657empty = Empty().set_name("empty") 

5658line_start = LineStart().set_name("line_start") 

5659line_end = LineEnd().set_name("line_end") 

5660string_start = StringStart().set_name("string_start") 

5661string_end = StringEnd().set_name("string_end") 

5662 

5663_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).set_parse_action( 

5664 lambda s, l, t: t[0][1] 

5665) 

5666_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 

5667 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 

5668) 

5669_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 

5670 lambda s, l, t: chr(int(t[0][1:], 8)) 

5671) 

5672_singleChar = ( 

5673 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 

5674) 

5675_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

5676_reBracketExpr = ( 

5677 Literal("[") 

5678 + Opt("^").set_results_name("negate") 

5679 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 

5680 + "]" 

5681) 

5682 

5683 

5684def srange(s: str) -> str: 

5685 r"""Helper to easily define string ranges for use in :class:`Word` 

5686 construction. Borrows syntax from regexp ``'[]'`` string range 

5687 definitions:: 

5688 

5689 srange("[0-9]") -> "0123456789" 

5690 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

5691 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

5692 

5693 The input string must be enclosed in []'s, and the returned string 

5694 is the expanded character set joined into a single string. The 

5695 values enclosed in the []'s may be: 

5696 

5697 - a single character 

5698 - an escaped character with a leading backslash (such as ``\-`` 

5699 or ``\]``) 

5700 - an escaped hex character with a leading ``'\x'`` 

5701 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

5702 is also supported for backwards compatibility) 

5703 - an escaped octal character with a leading ``'\0'`` 

5704 (``\041``, which is a ``'!'`` character) 

5705 - a range of any of the above, separated by a dash (``'a-z'``, 

5706 etc.) 

5707 - any combination of the above (``'aeiouy'``, 

5708 ``'a-zA-Z0-9_$'``, etc.) 

5709 """ 

5710 _expanded = ( 

5711 lambda p: p 

5712 if not isinstance(p, ParseResults) 

5713 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

5714 ) 

5715 try: 

5716 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) 

5717 except Exception: 

5718 return "" 

5719 

5720 

5721def token_map(func, *args) -> ParseAction: 

5722 """Helper to define a parse action by mapping a function to all 

5723 elements of a :class:`ParseResults` list. If any additional args are passed, 

5724 they are forwarded to the given function as additional arguments 

5725 after the token, as in 

5726 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 

5727 which will convert the parsed data to an integer using base 16. 

5728 

5729 Example (compare the last to example in :class:`ParserElement.transform_string`:: 

5730 

5731 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) 

5732 hex_ints.run_tests(''' 

5733 00 11 22 aa FF 0a 0d 1a 

5734 ''') 

5735 

5736 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 

5737 upperword[1, ...].run_tests(''' 

5738 my kingdom for a horse 

5739 ''') 

5740 

5741 wd = Word(alphas).set_parse_action(token_map(str.title)) 

5742 wd[1, ...].set_parse_action(' '.join).run_tests(''' 

5743 now is the winter of our discontent made glorious summer by this sun of york 

5744 ''') 

5745 

5746 prints:: 

5747 

5748 00 11 22 aa FF 0a 0d 1a 

5749 [0, 17, 34, 170, 255, 10, 13, 26] 

5750 

5751 my kingdom for a horse 

5752 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

5753 

5754 now is the winter of our discontent made glorious summer by this sun of york 

5755 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

5756 """ 

5757 

5758 def pa(s, l, t): 

5759 return [func(tokn, *args) for tokn in t] 

5760 

5761 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 

5762 pa.__name__ = func_name 

5763 

5764 return pa 

5765 

5766 

5767def autoname_elements() -> None: 

5768 """ 

5769 Utility to simplify mass-naming of parser elements, for 

5770 generating railroad diagram with named subdiagrams. 

5771 """ 

5772 for name, var in sys._getframe().f_back.f_locals.items(): 

5773 if isinstance(var, ParserElement) and not var.customName: 

5774 var.set_name(name) 

5775 

5776 

5777dbl_quoted_string = Combine( 

5778 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

5779).set_name("string enclosed in double quotes") 

5780 

5781sgl_quoted_string = Combine( 

5782 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

5783).set_name("string enclosed in single quotes") 

5784 

5785quoted_string = Combine( 

5786 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

5787 | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 

5788).set_name("quotedString using single or double quotes") 

5789 

5790unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 

5791 

5792 

5793alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

5794punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

5795 

5796# build list of built-in expressions, for future reference if a global default value 

5797# gets updated 

5798_builtin_exprs: List[ParserElement] = [ 

5799 v for v in vars().values() if isinstance(v, ParserElement) 

5800] 

5801 

5802# backward compatibility names 

5803tokenMap = token_map 

5804conditionAsParseAction = condition_as_parse_action 

5805nullDebugAction = null_debug_action 

5806sglQuotedString = sgl_quoted_string 

5807dblQuotedString = dbl_quoted_string 

5808quotedString = quoted_string 

5809unicodeString = unicode_string 

5810lineStart = line_start 

5811lineEnd = line_end 

5812stringStart = string_start 

5813stringEnd = string_end 

5814traceParseAction = trace_parse_action