Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 44%

2# core.py

5from collections import deque

6import os

7import typing

8from typing import (

9 Any,

10 Callable,

11 Generator,

12 List,

13 NamedTuple,

14 Sequence,

15 Set,

16 TextIO,

17 Tuple,

18 Union,

19 cast,

20)

21from abc import ABC, abstractmethod

22from enum import Enum

23import string

24import copy

25import warnings

26import re

27import sys

28from collections.abc import Iterable

29import traceback

30import types

31from operator import itemgetter

32from functools import wraps

33from threading import RLock

34from pathlib import Path

36from .util import (

37 _FifoCache,

38 _UnboundedCache,

39 __config_flags,

40 _collapse_string_to_ranges,

41 _escape_regex_range_chars,

42 _bslash,

43 _flatten,

44 LRUMemo as _LRUMemo,

45 UnboundedMemo as _UnboundedMemo,

46 replaced_by_pep8,

47)

48from .exceptions import *

49from .actions import *

50from .results import ParseResults, _ParseResultsWithOffset

51from .unicode import pyparsing_unicode

53_MAX_INT = sys.maxsize

54str_type: Tuple[type, ...] = (str, bytes)

56#

58#

59# Permission is hereby granted, free of charge, to any person obtaining

60# a copy of this software and associated documentation files (the

61# "Software"), to deal in the Software without restriction, including

62# without limitation the rights to use, copy, modify, merge, publish,

63# distribute, sublicense, and/or sell copies of the Software, and to

64# permit persons to whom the Software is furnished to do so, subject to

65# the following conditions:

66#

67# The above copyright notice and this permission notice shall be

68# included in all copies or substantial portions of the Software.

69#

70# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

71# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

72# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

73# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

74# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

75# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

76# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

77#

80if sys.version_info >= (3, 8):

81 from functools import cached_property

82else:

84 class cached_property:

85 def __init__(self, func):

86 self._func = func

88 def __get__(self, instance, owner=None):

89 ret = instance.__dict__[self._func.__name__] = self._func(instance)

90 return ret

93class __compat__(__config_flags):

94 """

95 A cross-version compatibility configuration for pyparsing features that will be

96 released in a future version. By setting values in this configuration to True,

97 those features can be enabled in prior versions for compatibility development

98 and testing.

100 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping

101 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;

102 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1

103 behavior

104 """

105

106 _type_desc = "compatibility"

107

108 collect_all_And_tokens = True

109

110 _all_names = [__ for __ in locals() if not __.startswith("_")]

111 _fixed_names = """

112 collect_all_And_tokens

113 """.split()

114

115

116class __diag__(__config_flags):

117 _type_desc = "diagnostic"

118

119 warn_multiple_tokens_in_named_alternation = False

120 warn_ungrouped_named_tokens_in_collection = False

121 warn_name_set_on_empty_Forward = False

122 warn_on_parse_using_empty_Forward = False

123 warn_on_assignment_to_Forward = False

124 warn_on_multiple_string_args_to_oneof = False

125 warn_on_match_first_with_lshift_operator = False

126 enable_debug_on_named_expressions = False

127

128 _all_names = [__ for __ in locals() if not __.startswith("_")]

129 _warning_names = [name for name in _all_names if name.startswith("warn")]

130 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]

131

132 @classmethod

133 def enable_all_warnings(cls) -> None:

134 for name in cls._warning_names:

135 cls.enable(name)

136

137

138class Diagnostics(Enum):

139 """

140 Diagnostic configuration (all default to disabled)

141

142 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results

143 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions

144 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results

145 name is defined on a containing expression with ungrouped subexpressions that also

146 have results names

147 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined

148 with a results name, but has no contents defined

149 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is

150 defined in a grammar but has never had an expression attached to it

151 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined

152 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``

153 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is

154 incorrectly called with multiple str arguments

155 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent

156 calls to :class:`ParserElement.set_name`

157

158 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.

159 All warnings can be enabled by calling :class:`enable_all_warnings`.

160 """

161

162 warn_multiple_tokens_in_named_alternation = 0

163 warn_ungrouped_named_tokens_in_collection = 1

164 warn_name_set_on_empty_Forward = 2

165 warn_on_parse_using_empty_Forward = 3

166 warn_on_assignment_to_Forward = 4

167 warn_on_multiple_string_args_to_oneof = 5

168 warn_on_match_first_with_lshift_operator = 6

169 enable_debug_on_named_expressions = 7

170

171

172def enable_diag(diag_enum: Diagnostics) -> None:

173 """

174 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

175 """

176 __diag__.enable(diag_enum.name)

177

178

179def disable_diag(diag_enum: Diagnostics) -> None:

180 """

181 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

182 """

183 __diag__.disable(diag_enum.name)

184

185

186def enable_all_warnings() -> None:

187 """

188 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).

189 """

190 __diag__.enable_all_warnings()

191

192

193# hide abstract class

194del __config_flags

195

196

197def _should_enable_warnings(

198 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]

199) -> bool:

200 enable = bool(warn_env_var)

201 for warn_opt in cmd_line_warn_options:

202 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(

203 ":"

204 )[:5]

205 if not w_action.lower().startswith("i") and (

206 not (w_message or w_category or w_module) or w_module == "pyparsing"

207 ):

208 enable = True

209 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):

210 enable = False

211 return enable

212

213

214if _should_enable_warnings(

215 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")

216):

217 enable_all_warnings()

218

219

220# build list of single arg builtins, that can be used as parse actions

221_single_arg_builtins = {

222 sum,

223 len,

224 sorted,

225 reversed,

226 list,

227 tuple,

228 set,

229 any,

230 all,

231 min,

232 max,

233}

234

235_generatorType = types.GeneratorType

236ParseImplReturnType = Tuple[int, Any]

237PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]

238ParseAction = Union[

239 Callable[[], Any],

240 Callable[[ParseResults], Any],

241 Callable[[int, ParseResults], Any],

242 Callable[[str, int, ParseResults], Any],

243]

244ParseCondition = Union[

245 Callable[[], bool],

246 Callable[[ParseResults], bool],

247 Callable[[int, ParseResults], bool],

248 Callable[[str, int, ParseResults], bool],

249]

250ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]

251DebugStartAction = Callable[[str, int, "ParserElement", bool], None]

252DebugSuccessAction = Callable[

253 [str, int, int, "ParserElement", ParseResults, bool], None

254]

255DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]

256

257

258alphas = string.ascii_uppercase + string.ascii_lowercase

259identchars = pyparsing_unicode.Latin1.identchars

260identbodychars = pyparsing_unicode.Latin1.identbodychars

261nums = "0123456789"

262hexnums = nums + "ABCDEFabcdef"

263alphanums = alphas + nums

264printables = "".join([c for c in string.printable if c not in string.whitespace])

265

266_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]

267

268

269def _trim_arity(func, max_limit=3):

270 """decorator to trim function calls to match the arity of the target"""

271 global _trim_arity_call_line

272

273 if func in _single_arg_builtins:

274 return lambda s, l, t: func(t)

275

276 limit = 0

277 found_arity = False

278

279 # synthesize what would be returned by traceback.extract_stack at the call to

280 # user's parse action 'func', so that we don't incur call penalty at parse time

281

282 # fmt: off

283 LINE_DIFF = 7

284 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND

285 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!

286 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1])

287 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)

288

289 def wrapper(*args):

290 nonlocal found_arity, limit

291 while 1:

292 try:

293 ret = func(*args[limit:])

294 found_arity = True

295 return ret

296 except TypeError as te:

297 # re-raise TypeErrors if they did not come from our arity testing

298 if found_arity:

299 raise

300 else:

301 tb = te.__traceback__

302 frames = traceback.extract_tb(tb, limit=2)

303 frame_summary = frames[-1]

304 trim_arity_type_error = (

305 [frame_summary[:2]][-1][:2] == pa_call_line_synth

306 )

307 del tb

308

309 if trim_arity_type_error:

310 if limit < max_limit:

311 limit += 1

312 continue

313

314 raise

315 # fmt: on

316

317 # copy func name to wrapper for sensible debug output

318 # (can't use functools.wraps, since that messes with function signature)

319 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

320 wrapper.__name__ = func_name

321 wrapper.__doc__ = func.__doc__

322

323 return wrapper

324

325

326def condition_as_parse_action(

327 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False

328) -> ParseAction:

329 """

330 Function to convert a simple predicate function that returns ``True`` or ``False``

331 into a parse action. Can be used in places when a parse action is required

332 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition

333 to an operator level in :class:`infix_notation`).

334

335 Optional keyword arguments:

336

337 - ``message`` - define a custom message to be used in the raised exception

338 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;

339 otherwise will raise :class:`ParseException`

340

341 """

342 msg = message if message is not None else "failed user-defined condition"

343 exc_type = ParseFatalException if fatal else ParseException

344 fn = _trim_arity(fn)

345

346 @wraps(fn)

347 def pa(s, l, t):

348 if not bool(fn(s, l, t)):

349 raise exc_type(s, l, msg)

350

351 return pa

352

353

354def _default_start_debug_action(

355 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False

356):

357 cache_hit_str = "*" if cache_hit else ""

358 print(

359 (

360 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"

361 f" {line(loc, instring)}\n"

362 f" {' ' * (col(loc, instring) - 1)}^"

363 )

364 )

365

366

367def _default_success_debug_action(

368 instring: str,

369 startloc: int,

370 endloc: int,

371 expr: "ParserElement",

372 toks: ParseResults,

373 cache_hit: bool = False,

374):

375 cache_hit_str = "*" if cache_hit else ""

376 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")

377

378

379def _default_exception_debug_action(

380 instring: str,

381 loc: int,

382 expr: "ParserElement",

383 exc: Exception,

384 cache_hit: bool = False,

385):

386 cache_hit_str = "*" if cache_hit else ""

387 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")

388

389

390def null_debug_action(*args):

391 """'Do-nothing' debug action, to suppress debugging output during parsing."""

392

393

394class ParserElement(ABC):

395 """Abstract base level parser element class."""

396

397 DEFAULT_WHITE_CHARS: str = " \n\t\r"

398 verbose_stacktrace: bool = False

399 _literalStringClass: type = None # type: ignore[assignment]

400

401 @staticmethod

402 def set_default_whitespace_chars(chars: str) -> None:

403 r"""

404 Overrides the default whitespace chars

405

406 Example::

407

408 # default whitespace chars are space, <TAB> and newline

409 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']

410

411 # change to just treat newline as significant

412 ParserElement.set_default_whitespace_chars(" \t")

413 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def']

414 """

415 ParserElement.DEFAULT_WHITE_CHARS = chars

416

417 # update whitespace all parse expressions defined in this module

418 for expr in _builtin_exprs:

419 if expr.copyDefaultWhiteChars:

420 expr.whiteChars = set(chars)

421

422 @staticmethod

423 def inline_literals_using(cls: type) -> None:

424 """

425 Set class to be used for inclusion of string literals into a parser.

426

427 Example::

428

429 # default literal class used is Literal

430 integer = Word(nums)

431 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

432

433 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31']

434

435

436 # change to Suppress

437 ParserElement.inline_literals_using(Suppress)

438 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

439

440 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31']

441 """

442 ParserElement._literalStringClass = cls

443

444 @classmethod

445 def using_each(cls, seq, **class_kwargs):

446 """

447 Yields a sequence of class(obj, **class_kwargs) for obj in seq.

448

449 Example::

450

451 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")

452

453 """

454 yield from (cls(obj, **class_kwargs) for obj in seq)

455

456 class DebugActions(NamedTuple):

457 debug_try: typing.Optional[DebugStartAction]

458 debug_match: typing.Optional[DebugSuccessAction]

459 debug_fail: typing.Optional[DebugExceptionAction]

460

461 def __init__(self, savelist: bool = False):

462 self.parseAction: List[ParseAction] = list()

463 self.failAction: typing.Optional[ParseFailAction] = None

464 self.customName: str = None # type: ignore[assignment]

465 self._defaultName: typing.Optional[str] = None

466 self.resultsName: str = None # type: ignore[assignment]

467 self.saveAsList = savelist

468 self.skipWhitespace = True

469 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)

470 self.copyDefaultWhiteChars = True

471 # used when checking for left-recursion

472 self.mayReturnEmpty = False

473 self.keepTabs = False

474 self.ignoreExprs: List["ParserElement"] = list()

475 self.debug = False

476 self.streamlined = False

477 # optimize exception handling for subclasses that don't advance parse index

478 self.mayIndexError = True

479 self.errmsg = ""

480 # mark results names as modal (report only last) or cumulative (list all)

481 self.modalResults = True

482 # custom debug actions

483 self.debugActions = self.DebugActions(None, None, None)

484 # avoid redundant calls to preParse

485 self.callPreparse = True

486 self.callDuringTry = False

487 self.suppress_warnings_: List[Diagnostics] = []

488

489 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement":

490 """

491 Suppress warnings emitted for a particular diagnostic on this expression.

492

493 Example::

494

495 base = pp.Forward()

496 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)

497

498 # statement would normally raise a warning, but is now suppressed

499 print(base.parse_string("x"))

500

501 """

502 self.suppress_warnings_.append(warning_type)

503 return self

504

505 def visit_all(self):

506 """General-purpose method to yield all expressions and sub-expressions

507 in a grammar. Typically just for internal use.

508 """

509 to_visit = deque([self])

510 seen = set()

511 while to_visit:

512 cur = to_visit.popleft()

513

514 # guard against looping forever through recursive grammars

515 if cur in seen:

516 continue

517 seen.add(cur)

518

519 to_visit.extend(cur.recurse())

520 yield cur

521

522 def copy(self) -> "ParserElement":

523 """

524 Make a copy of this :class:`ParserElement`. Useful for defining

525 different parse actions for the same parsing pattern, using copies of

526 the original parse element.

527

528 Example::

529

530 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))

531 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")

532 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

533

534 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M"))

535

536 prints::

537

538 [5120, 100, 655360, 268435456]

539

540 Equivalent form of ``expr.copy()`` is just ``expr()``::

541

542 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

543 """

544 cpy = copy.copy(self)

545 cpy.parseAction = self.parseAction[:]

546 cpy.ignoreExprs = self.ignoreExprs[:]

547 if self.copyDefaultWhiteChars:

548 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)

549 return cpy

550

551 def set_results_name(

552 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False

553 ) -> "ParserElement":

554 """

555 Define name for referencing matching tokens as a nested attribute

556 of the returned parse results.

557

558 Normally, results names are assigned as you would assign keys in a dict:

559 any existing value is overwritten by later values. If it is necessary to

560 keep all values captured for a particular results name, call ``set_results_name``

561 with ``list_all_matches`` = True.

562

563 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;

564 this is so that the client can define a basic element, such as an

565 integer, and reference it in multiple places with different names.

566

567 You can also set results names using the abbreviated syntax,

568 ``expr("name")`` in place of ``expr.set_results_name("name")``

569 - see :class:`__call__`. If ``list_all_matches`` is required, use

570 ``expr("name*")``.

571

572 Example::

573

574 date_str = (integer.set_results_name("year") + '/'

575 + integer.set_results_name("month") + '/'

576 + integer.set_results_name("day"))

577

578 # equivalent form:

579 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

580 """

581 listAllMatches = listAllMatches or list_all_matches

582 return self._setResultsName(name, listAllMatches)

583

584 def _setResultsName(self, name, listAllMatches=False):

585 if name is None:

586 return self

587 newself = self.copy()

588 if name.endswith("*"):

589 name = name[:-1]

590 listAllMatches = True

591 newself.resultsName = name

592 newself.modalResults = not listAllMatches

593 return newself

594

595 def set_break(self, break_flag: bool = True) -> "ParserElement":

596 """

597 Method to invoke the Python pdb debugger when this element is

598 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to

599 disable.

600 """

601 if break_flag:

602 _parseMethod = self._parse

603

604 def breaker(instring, loc, doActions=True, callPreParse=True):

605 import pdb

606

607 # this call to pdb.set_trace() is intentional, not a checkin error

608 pdb.set_trace()

609 return _parseMethod(instring, loc, doActions, callPreParse)

610

611 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]

612 self._parse = breaker # type: ignore [assignment]

613 else:

614 if hasattr(self._parse, "_originalParseMethod"):

615 self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment]

616 return self

617

618 def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":

619 """

620 Define one or more actions to perform when successfully matching parse element definition.

621

622 Parse actions can be called to perform data conversions, do extra validation,

623 update external data structures, or enhance or replace the parsed tokens.

624 Each parse action ``fn`` is a callable method with 0-3 arguments, called as

625 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:

626

627 - ``s`` = the original string being parsed (see note below)

628 - ``loc`` = the location of the matching substring

629 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object

630

631 The parsed tokens are passed to the parse action as ParseResults. They can be

632 modified in place using list-style append, extend, and pop operations to update

633 the parsed list elements; and with dictionary-style item set and del operations

634 to add, update, or remove any named results. If the tokens are modified in place,

635 it is not necessary to return them with a return statement.

636

637 Parse actions can also completely replace the given tokens, with another ``ParseResults``

638 object, or with some entirely different object (common for parse actions that perform data

639 conversions). A convenient way to build a new parse result is to define the values

640 using a dict, and then create the return value using :class:`ParseResults.from_dict`.

641

642 If None is passed as the ``fn`` parse action, all previously added parse actions for this

643 expression are cleared.

644

645 Optional keyword arguments:

646

647 - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during

648 lookaheads and alternate testing. For parse actions that have side effects, it is

649 important to only call the parse action once it is determined that it is being

650 called as part of a successful parse. For parse actions that perform additional

651 validation, then call_during_try should be passed as True, so that the validation

652 code is included in the preliminary "try" parses.

653

654 Note: the default parsing behavior is to expand tabs in the input string

655 before starting the parsing process. See :class:`parse_string` for more

656 information on parsing strings containing ``<TAB>`` s, and suggested

657 methods to maintain a consistent view of the parsed string, the parse

658 location, and line and column positions within the parsed string.

659

660 Example::

661

662 # parse dates in the form YYYY/MM/DD

663

664 # use parse action to convert toks from str to int at parse time

665 def convert_to_int(toks):

666 return int(toks[0])

667

668 # use a parse action to verify that the date is a valid date

669 def is_valid_date(instring, loc, toks):

670 from datetime import date

671 year, month, day = toks[::2]

672 try:

673 date(year, month, day)

674 except ValueError:

675 raise ParseException(instring, loc, "invalid date given")

676

677 integer = Word(nums)

678 date_str = integer + '/' + integer + '/' + integer

679

680 # add parse actions

681 integer.set_parse_action(convert_to_int)

682 date_str.set_parse_action(is_valid_date)

683

684 # note that integer fields are now ints, not strings

685 date_str.run_tests('''

686 # successful parse - note that integer fields were converted to ints

687 1999/12/31

688

689 # fail - invalid date

690 1999/13/31

691 ''')

692 """

693 if list(fns) == [None]:

694 self.parseAction = []

695 else:

696 if not all(callable(fn) for fn in fns):

697 raise TypeError("parse actions must be callable")

698 self.parseAction = [_trim_arity(fn) for fn in fns]

699 self.callDuringTry = kwargs.get(

700 "call_during_try", kwargs.get("callDuringTry", False)

701 )

702 return self

703

704 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":

705 """

706 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.

707

708 See examples in :class:`copy`.

709 """

710 self.parseAction += [_trim_arity(fn) for fn in fns]

711 self.callDuringTry = self.callDuringTry or kwargs.get(

712 "call_during_try", kwargs.get("callDuringTry", False)

713 )

714 return self

715

716 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement":

717 """Add a boolean predicate function to expression's list of parse actions. See

718 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,

719 functions passed to ``add_condition`` need to return boolean success/fail of the condition.

720

721 Optional keyword arguments:

722

723 - ``message`` = define a custom message to be used in the raised exception

724 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise

725 ParseException

726 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,

727 default=False

728

729 Example::

730

731 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))

732 year_int = integer.copy()

733 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")

734 date_str = year_int + '/' + integer + '/' + integer

735

736 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0),

737 (line:1, col:1)

738 """

739 for fn in fns:

740 self.parseAction.append(

741 condition_as_parse_action(

742 fn,

743 message=str(kwargs.get("message")),

744 fatal=bool(kwargs.get("fatal", False)),

745 )

746 )

747

748 self.callDuringTry = self.callDuringTry or kwargs.get(

749 "call_during_try", kwargs.get("callDuringTry", False)

750 )

751 return self

752

753 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement":

754 """

755 Define action to perform if parsing fails at this expression.

756 Fail acton fn is a callable function that takes the arguments

757 ``fn(s, loc, expr, err)`` where:

758

759 - ``s`` = string being parsed

760 - ``loc`` = location where expression match was attempted and failed

761 - ``expr`` = the parse expression that failed

762 - ``err`` = the exception thrown

763

764 The function returns no value. It may throw :class:`ParseFatalException`

765 if it is desired to stop parsing immediately."""

766 self.failAction = fn

767 return self

768

769 def _skipIgnorables(self, instring: str, loc: int) -> int:

770 if not self.ignoreExprs:

771 return loc

772 exprsFound = True

773 ignore_expr_fns = [e._parse for e in self.ignoreExprs]

774 last_loc = loc

775 while exprsFound:

776 exprsFound = False

777 for ignore_fn in ignore_expr_fns:

778 try:

779 while 1:

780 loc, dummy = ignore_fn(instring, loc)

781 exprsFound = True

782 except ParseException:

783 pass

784 # check if all ignore exprs matched but didn't actually advance the parse location

785 if loc == last_loc:

786 break

787 last_loc = loc

788 return loc

789

790 def preParse(self, instring: str, loc: int) -> int:

791 if self.ignoreExprs:

792 loc = self._skipIgnorables(instring, loc)

793

794 if self.skipWhitespace:

795 instrlen = len(instring)

796 white_chars = self.whiteChars

797 while loc < instrlen and instring[loc] in white_chars:

798 loc += 1

799

800 return loc

801

802 def parseImpl(self, instring, loc, doActions=True):

803 return loc, []

804

805 def postParse(self, instring, loc, tokenlist):

806 return tokenlist

807

808 # @profile

809 def _parseNoCache(

810 self, instring, loc, doActions=True, callPreParse=True

811 ) -> Tuple[int, ParseResults]:

812 TRY, MATCH, FAIL = 0, 1, 2

813 debugging = self.debug # and doActions)

814 len_instring = len(instring)

815

816 if debugging or self.failAction:

817 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))

818 try:

819 if callPreParse and self.callPreparse:

820 pre_loc = self.preParse(instring, loc)

821 else:

822 pre_loc = loc

823 tokens_start = pre_loc

824 if self.debugActions.debug_try:

825 self.debugActions.debug_try(instring, tokens_start, self, False)

826 if self.mayIndexError or pre_loc >= len_instring:

827 try:

828 loc, tokens = self.parseImpl(instring, pre_loc, doActions)

829 except IndexError:

830 raise ParseException(instring, len_instring, self.errmsg, self)

831 else:

832 loc, tokens = self.parseImpl(instring, pre_loc, doActions)

833 except Exception as err:

834 # print("Exception raised:", err)

835 if self.debugActions.debug_fail:

836 self.debugActions.debug_fail(

837 instring, tokens_start, self, err, False

838 )

839 if self.failAction:

840 self.failAction(instring, tokens_start, self, err)

841 raise

842 else:

843 if callPreParse and self.callPreparse:

844 pre_loc = self.preParse(instring, loc)

845 else:

846 pre_loc = loc

847 tokens_start = pre_loc

848 if self.mayIndexError or pre_loc >= len_instring:

849 try:

850 loc, tokens = self.parseImpl(instring, pre_loc, doActions)

851 except IndexError:

852 raise ParseException(instring, len_instring, self.errmsg, self)

853 else:

854 loc, tokens = self.parseImpl(instring, pre_loc, doActions)

855

856 tokens = self.postParse(instring, loc, tokens)

857

858 ret_tokens = ParseResults(

859 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults

860 )

861 if self.parseAction and (doActions or self.callDuringTry):

862 if debugging:

863 try:

864 for fn in self.parseAction:

865 try:

866 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]

867 except IndexError as parse_action_exc:

868 exc = ParseException("exception raised in parse action")

869 raise exc from parse_action_exc

870

871 if tokens is not None and tokens is not ret_tokens:

872 ret_tokens = ParseResults(

873 tokens,

874 self.resultsName,

875 asList=self.saveAsList

876 and isinstance(tokens, (ParseResults, list)),

877 modal=self.modalResults,

878 )

879 except Exception as err:

880 # print "Exception raised in user parse action:", err

881 if self.debugActions.debug_fail:

882 self.debugActions.debug_fail(

883 instring, tokens_start, self, err, False

884 )

885 raise

886 else:

887 for fn in self.parseAction:

888 try:

889 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]

890 except IndexError as parse_action_exc:

891 exc = ParseException("exception raised in parse action")

892 raise exc from parse_action_exc

893

894 if tokens is not None and tokens is not ret_tokens:

895 ret_tokens = ParseResults(

896 tokens,

897 self.resultsName,

898 asList=self.saveAsList

899 and isinstance(tokens, (ParseResults, list)),

900 modal=self.modalResults,

901 )

902 if debugging:

903 # print("Matched", self, "->", ret_tokens.as_list())

904 if self.debugActions.debug_match:

905 self.debugActions.debug_match(

906 instring, tokens_start, loc, self, ret_tokens, False

907 )

908

909 return loc, ret_tokens

910

911 def try_parse(

912 self,

913 instring: str,

914 loc: int,

915 *,

916 raise_fatal: bool = False,

917 do_actions: bool = False,

918 ) -> int:

919 try:

920 return self._parse(instring, loc, doActions=do_actions)[0]

921 except ParseFatalException:

922 if raise_fatal:

923 raise

924 raise ParseException(instring, loc, self.errmsg, self)

925

926 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:

927 try:

928 self.try_parse(instring, loc, do_actions=do_actions)

929 except (ParseException, IndexError):

930 return False

931 else:

932 return True

933

934 # cache for left-recursion in Forward references

935 recursion_lock = RLock()

936 recursion_memos: typing.Dict[

937 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]]

938 ] = {}

939

940 class _CacheType(dict):

941 """

942 class to help type checking

943 """

944

945 not_in_cache: bool

946

947 def get(self, *args):

948 ...

949

950 def set(self, *args):

951 ...

952

953 # argument cache for optimizing repeated calls when backtracking through recursive expressions

954 packrat_cache = (

955 _CacheType()

956 ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail

957 packrat_cache_lock = RLock()

958 packrat_cache_stats = [0, 0]

959

960 # this method gets repeatedly called during backtracking with the same arguments -

961 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

962 def _parseCache(

963 self, instring, loc, doActions=True, callPreParse=True

964 ) -> Tuple[int, ParseResults]:

965 HIT, MISS = 0, 1

966 TRY, MATCH, FAIL = 0, 1, 2

967 lookup = (self, instring, loc, callPreParse, doActions)

968 with ParserElement.packrat_cache_lock:

969 cache = ParserElement.packrat_cache

970 value = cache.get(lookup)

971 if value is cache.not_in_cache:

972 ParserElement.packrat_cache_stats[MISS] += 1

973 try:

974 value = self._parseNoCache(instring, loc, doActions, callPreParse)

975 except ParseBaseException as pe:

976 # cache a copy of the exception, without the traceback

977 cache.set(lookup, pe.__class__(*pe.args))

978 raise

979 else:

980 cache.set(lookup, (value[0], value[1].copy(), loc))

981 return value

982 else:

983 ParserElement.packrat_cache_stats[HIT] += 1

984 if self.debug and self.debugActions.debug_try:

985 try:

986 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]

987 except TypeError:

988 pass

989 if isinstance(value, Exception):

990 if self.debug and self.debugActions.debug_fail:

991 try:

992 self.debugActions.debug_fail(

993 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]

994 )

995 except TypeError:

996 pass

997 raise value

998

999 value = cast(Tuple[int, ParseResults, int], value)

1000 loc_, result, endloc = value[0], value[1].copy(), value[2]

1001 if self.debug and self.debugActions.debug_match:

1002 try:

1003 self.debugActions.debug_match(

1004 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]

1005 )

1006 except TypeError:

1007 pass

1008

1009 return loc_, result

1010

1011 _parse = _parseNoCache

1012

1013 @staticmethod

1014 def reset_cache() -> None:

1015 ParserElement.packrat_cache.clear()

1016 ParserElement.packrat_cache_stats[:] = [0] * len(

1017 ParserElement.packrat_cache_stats

1018 )

1019 ParserElement.recursion_memos.clear()

1020

1021 _packratEnabled = False

1022 _left_recursion_enabled = False

1023

1024 @staticmethod

1025 def disable_memoization() -> None:

1026 """

1027 Disables active Packrat or Left Recursion parsing and their memoization

1028

1029 This method also works if neither Packrat nor Left Recursion are enabled.

1030 This makes it safe to call before activating Packrat nor Left Recursion

1031 to clear any previous settings.

1032 """

1033 ParserElement.reset_cache()

1034 ParserElement._left_recursion_enabled = False

1035 ParserElement._packratEnabled = False

1036 ParserElement._parse = ParserElement._parseNoCache

1037

1038 @staticmethod

1039 def enable_left_recursion(

1040 cache_size_limit: typing.Optional[int] = None, *, force=False

1041 ) -> None:

1042 """

1043 Enables "bounded recursion" parsing, which allows for both direct and indirect

1044 left-recursion. During parsing, left-recursive :class:`Forward` elements are

1045 repeatedly matched with a fixed recursion depth that is gradually increased

1046 until finding the longest match.

1047

1048 Example::

1049

1050 import pyparsing as pp

1051 pp.ParserElement.enable_left_recursion()

1052

1053 E = pp.Forward("E")

1054 num = pp.Word(pp.nums)

1055 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...

1056 E <<= E + '+' - num | num

1057

1058 print(E.parse_string("1+2+3"))

1059

1060 Recursion search naturally memoizes matches of ``Forward`` elements and may

1061 thus skip reevaluation of parse actions during backtracking. This may break

1062 programs with parse actions which rely on strict ordering of side-effects.

1063

1064 Parameters:

1065

1066 - ``cache_size_limit`` - (default=``None``) - memoize at most this many

1067 ``Forward`` elements during matching; if ``None`` (the default),

1068 memoize all ``Forward`` elements.

1069

1070 Bounded Recursion parsing works similar but not identical to Packrat parsing,

1071 thus the two cannot be used together. Use ``force=True`` to disable any

1072 previous, conflicting settings.

1073 """

1074 if force:

1075 ParserElement.disable_memoization()

1076 elif ParserElement._packratEnabled:

1077 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1078 if cache_size_limit is None:

1079 ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment]

1080 elif cache_size_limit > 0:

1081 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]

1082 else:

1083 raise NotImplementedError("Memo size of %s" % cache_size_limit)

1084 ParserElement._left_recursion_enabled = True

1085

1086 @staticmethod

1087 def enable_packrat(

1088 cache_size_limit: Union[int, None] = 128, *, force: bool = False

1089 ) -> None:

1090 """

1091 Enables "packrat" parsing, which adds memoizing to the parsing logic.

1092 Repeated parse attempts at the same string location (which happens

1093 often in many complex grammars) can immediately return a cached value,

1094 instead of re-executing parsing/validating code. Memoizing is done of

1095 both valid results and parsing exceptions.

1096

1097 Parameters:

1098

1099 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided

1100 will limit the size of the packrat cache; if None is passed, then

1101 the cache size will be unbounded; if 0 is passed, the cache will

1102 be effectively disabled.

1103

1104 This speedup may break existing programs that use parse actions that

1105 have side-effects. For this reason, packrat parsing is disabled when

1106 you first import pyparsing. To activate the packrat feature, your

1107 program must call the class method :class:`ParserElement.enable_packrat`.

1108 For best results, call ``enable_packrat()`` immediately after

1109 importing pyparsing.

1110

1111 Example::

1112

1113 import pyparsing

1114 pyparsing.ParserElement.enable_packrat()

1115

1116 Packrat parsing works similar but not identical to Bounded Recursion parsing,

1117 thus the two cannot be used together. Use ``force=True`` to disable any

1118 previous, conflicting settings.

1119 """

1120 if force:

1121 ParserElement.disable_memoization()

1122 elif ParserElement._left_recursion_enabled:

1123 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1124 if not ParserElement._packratEnabled:

1125 ParserElement._packratEnabled = True

1126 if cache_size_limit is None:

1127 ParserElement.packrat_cache = _UnboundedCache()

1128 else:

1129 ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment]

1130 ParserElement._parse = ParserElement._parseCache

1131

1132 def parse_string(

1133 self, instring: str, parse_all: bool = False, *, parseAll: bool = False

1134 ) -> ParseResults:

1135 """

1136 Parse a string with respect to the parser definition. This function is intended as the primary interface to the

1137 client code.

1138

1139 :param instring: The input string to be parsed.

1140 :param parse_all: If set, the entire input string must match the grammar.

1141 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.

1142 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.

1143 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or

1144 an object with attributes if the given parser includes results names.

1145

1146 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This

1147 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().

1148

1149 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are

1150 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string

1151 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string

1152 being parsed, one can ensure a consistent view of the input string by doing one of the following:

1153

1154 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),

1155 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the

1156 parse action's ``s`` argument, or

1157 - explicitly expand the tabs in your input string before calling ``parse_string``.

1158

1159 Examples:

1160

1161 By default, partial matches are OK.

1162

1163 >>> res = Word('a').parse_string('aaaaabaaa')

1164 >>> print(res)

1165 ['aaaaa']

1166

1167 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children

1168 directly to see more examples.

1169

1170 It raises an exception if parse_all flag is set and instring does not match the whole grammar.

1171

1172 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)

1173 Traceback (most recent call last):

1174 ...

1175 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6)

1176 """

1177 parseAll = parse_all or parseAll

1178

1179 ParserElement.reset_cache()

1180 if not self.streamlined:

1181 self.streamline()

1182 for e in self.ignoreExprs:

1183 e.streamline()

1184 if not self.keepTabs:

1185 instring = instring.expandtabs()

1186 try:

1187 loc, tokens = self._parse(instring, 0)

1188 if parseAll:

1189 loc = self.preParse(instring, loc)

1190 se = Empty() + StringEnd()

1191 se._parse(instring, loc)

1192 except ParseBaseException as exc:

1193 if ParserElement.verbose_stacktrace:

1194 raise

1195 else:

1196 # catch and re-raise exception from here, clearing out pyparsing internal stack trace

1197 raise exc.with_traceback(None)

1198 else:

1199 return tokens

1200

1201 def scan_string(

1202 self,

1203 instring: str,

1204 max_matches: int = _MAX_INT,

1205 overlap: bool = False,

1206 *,

1207 debug: bool = False,

1208 maxMatches: int = _MAX_INT,

1209 ) -> Generator[Tuple[ParseResults, int, int], None, None]:

1210 """

1211 Scan the input string for expression matches. Each match will return the

1212 matching tokens, start location, and end location. May be called with optional

1213 ``max_matches`` argument, to clip scanning after 'n' matches are found. If

1214 ``overlap`` is specified, then overlapping matches will be reported.

1215

1216 Note that the start and end locations are reported relative to the string

1217 being parsed. See :class:`parse_string` for more information on parsing

1218 strings with embedded tabs.

1219

1220 Example::

1221

1222 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"

1223 print(source)

1224 for tokens, start, end in Word(alphas).scan_string(source):

1225 print(' '*start + '^'*(end-start))

1226 print(' '*start + tokens[0])

1227

1228 prints::

1229

1230 sldjf123lsdjjkf345sldkjf879lkjsfd987

1231 ^^^^^

1232 sldjf

1233 ^^^^^^^

1234 lsdjjkf

1235 ^^^^^^

1236 sldkjf

1237 ^^^^^^

1238 lkjsfd

1239 """

1240 maxMatches = min(maxMatches, max_matches)

1241 if not self.streamlined:

1242 self.streamline()

1243 for e in self.ignoreExprs:

1244 e.streamline()

1245

1246 if not self.keepTabs:

1247 instring = str(instring).expandtabs()

1248 instrlen = len(instring)

1249 loc = 0

1250 preparseFn = self.preParse

1251 parseFn = self._parse

1252 ParserElement.resetCache()

1253 matches = 0

1254 try:

1255 while loc <= instrlen and matches < maxMatches:

1256 try:

1257 preloc: int = preparseFn(instring, loc)

1258 nextLoc: int

1259 tokens: ParseResults

1260 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)

1261 except ParseException:

1262 loc = preloc + 1

1263 else:

1264 if nextLoc > loc:

1265 matches += 1

1266 if debug:

1267 print(

1268 {

1269 "tokens": tokens.asList(),

1270 "start": preloc,

1271 "end": nextLoc,

1272 }

1273 )

1274 yield tokens, preloc, nextLoc

1275 if overlap:

1276 nextloc = preparseFn(instring, loc)

1277 if nextloc > loc:

1278 loc = nextLoc

1279 else:

1280 loc += 1

1281 else:

1282 loc = nextLoc

1283 else:

1284 loc = preloc + 1

1285 except ParseBaseException as exc:

1286 if ParserElement.verbose_stacktrace:

1287 raise

1288 else:

1289 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1290 raise exc.with_traceback(None)

1291

1292 def transform_string(self, instring: str, *, debug: bool = False) -> str:

1293 """

1294 Extension to :class:`scan_string`, to modify matching text with modified tokens that may

1295 be returned from a parse action. To use ``transform_string``, define a grammar and

1296 attach a parse action to it that modifies the returned token list.

1297 Invoking ``transform_string()`` on a target string will then scan for matches,

1298 and replace the matched text patterns according to the logic in the parse

1299 action. ``transform_string()`` returns the resulting transformed string.

1300

1301 Example::

1302

1303 wd = Word(alphas)

1304 wd.set_parse_action(lambda toks: toks[0].title())

1305

1306 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))

1307

1308 prints::

1309

1310 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.

1311 """

1312 out: List[str] = []

1313 lastE = 0

1314 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to

1315 # keep string locs straight between transform_string and scan_string

1316 self.keepTabs = True

1317 try:

1318 for t, s, e in self.scan_string(instring, debug=debug):

1319 out.append(instring[lastE:s])

1320 if t:

1321 if isinstance(t, ParseResults):

1322 out += t.as_list()

1323 elif isinstance(t, Iterable) and not isinstance(t, str_type):

1324 out.extend(t)

1325 else:

1326 out.append(t)

1327 lastE = e

1328 out.append(instring[lastE:])

1329 out = [o for o in out if o]

1330 return "".join([str(s) for s in _flatten(out)])

1331 except ParseBaseException as exc:

1332 if ParserElement.verbose_stacktrace:

1333 raise

1334 else:

1335 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1336 raise exc.with_traceback(None)

1337

1338 def search_string(

1339 self,

1340 instring: str,

1341 max_matches: int = _MAX_INT,

1342 *,

1343 debug: bool = False,

1344 maxMatches: int = _MAX_INT,

1345 ) -> ParseResults:

1346 """

1347 Another extension to :class:`scan_string`, simplifying the access to the tokens found

1348 to match the given parse expression. May be called with optional

1349 ``max_matches`` argument, to clip searching after 'n' matches are found.

1350

1351 Example::

1352

1353 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters

1354 cap_word = Word(alphas.upper(), alphas.lower())

1355

1356 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))

1357

1358 # the sum() builtin can be used to merge results into a single ParseResults object

1359 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))

1360

1361 prints::

1362

1363 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]

1364 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']

1365 """

1366 maxMatches = min(maxMatches, max_matches)

1367 try:

1368 return ParseResults(

1369 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)]

1370 )

1371 except ParseBaseException as exc:

1372 if ParserElement.verbose_stacktrace:

1373 raise

1374 else:

1375 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1376 raise exc.with_traceback(None)

1377

1378 def split(

1379 self,

1380 instring: str,

1381 maxsplit: int = _MAX_INT,

1382 include_separators: bool = False,

1383 *,

1384 includeSeparators=False,

1385 ) -> Generator[str, None, None]:

1386 """

1387 Generator method to split a string using the given expression as a separator.

1388 May be called with optional ``maxsplit`` argument, to limit the number of splits;

1389 and the optional ``include_separators`` argument (default= ``False``), if the separating

1390 matching text should be included in the split results.

1391

1392 Example::

1393

1394 punc = one_of(list(".,;:/-!?"))

1395 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))

1396

1397 prints::

1398

1399 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']

1400 """

1401 includeSeparators = includeSeparators or include_separators

1402 last = 0

1403 for t, s, e in self.scan_string(instring, max_matches=maxsplit):

1404 yield instring[last:s]

1405 if includeSeparators:

1406 yield t[0]

1407 last = e

1408 yield instring[last:]

1409

1410 def __add__(self, other) -> "ParserElement":

1411 """

1412 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`

1413 converts them to :class:`Literal`\\ s by default.

1414

1415 Example::

1416

1417 greet = Word(alphas) + "," + Word(alphas) + "!"

1418 hello = "Hello, World!"

1419 print(hello, "->", greet.parse_string(hello))

1420

1421 prints::

1422

1423 Hello, World! -> ['Hello', ',', 'World', '!']

1424

1425 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`::

1426

1427 Literal('start') + ... + Literal('end')

1428

1429 is equivalent to::

1430

1431 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')

1432

1433 Note that the skipped text is returned with '_skipped' as a results name,

1434 and to support having multiple skips in the same parser, the value returned is

1435 a list of all skipped text.

1436 """

1437 if other is Ellipsis:

1438 return _PendingSkip(self)

1439

1440 if isinstance(other, str_type):

1441 other = self._literalStringClass(other)

1442 if not isinstance(other, ParserElement):

1443 return NotImplemented

1444 return And([self, other])

1445

1446 def __radd__(self, other) -> "ParserElement":

1447 """

1448 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`

1449 """

1450 if other is Ellipsis:

1451 return SkipTo(self)("_skipped*") + self

1452

1453 if isinstance(other, str_type):

1454 other = self._literalStringClass(other)

1455 if not isinstance(other, ParserElement):

1456 return NotImplemented

1457 return other + self

1458

1459 def __sub__(self, other) -> "ParserElement":

1460 """

1461 Implementation of ``-`` operator, returns :class:`And` with error stop

1462 """

1463 if isinstance(other, str_type):

1464 other = self._literalStringClass(other)

1465 if not isinstance(other, ParserElement):

1466 return NotImplemented

1467 return self + And._ErrorStop() + other

1468

1469 def __rsub__(self, other) -> "ParserElement":

1470 """

1471 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`

1472 """

1473 if isinstance(other, str_type):

1474 other = self._literalStringClass(other)

1475 if not isinstance(other, ParserElement):

1476 return NotImplemented

1477 return other - self

1478

1479 def __mul__(self, other) -> "ParserElement":

1480 """

1481 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of

1482 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer

1483 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples

1484 may also include ``None`` as in:

1485

1486 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent

1487 to ``expr*n + ZeroOrMore(expr)``

1488 (read as "at least n instances of ``expr``")

1489 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``

1490 (read as "0 to n instances of ``expr``")

1491 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``

1492 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``

1493

1494 Note that ``expr*(None, n)`` does not raise an exception if

1495 more than n exprs exist in the input stream; that is,

1496 ``expr*(None, n)`` does not enforce a maximum number of expr

1497 occurrences. If this behavior is desired, then write

1498 ``expr*(None, n) + ~expr``

1499 """

1500 if other is Ellipsis:

1501 other = (0, None)

1502 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):

1503 other = ((0,) + other[1:] + (None,))[:2]

1504

1505 if isinstance(other, int):

1506 minElements, optElements = other, 0

1507 elif isinstance(other, tuple):

1508 other = tuple(o if o is not Ellipsis else None for o in other)

1509 other = (other + (None, None))[:2]

1510 if other[0] is None:

1511 other = (0, other[1])

1512 if isinstance(other[0], int) and other[1] is None:

1513 if other[0] == 0:

1514 return ZeroOrMore(self)

1515 if other[0] == 1:

1516 return OneOrMore(self)

1517 else:

1518 return self * other[0] + ZeroOrMore(self)

1519 elif isinstance(other[0], int) and isinstance(other[1], int):

1520 minElements, optElements = other

1521 optElements -= minElements

1522 else:

1523 return NotImplemented

1524 else:

1525 return NotImplemented

1526

1527 if minElements < 0:

1528 raise ValueError("cannot multiply ParserElement by negative value")

1529 if optElements < 0:

1530 raise ValueError(

1531 "second tuple value must be greater or equal to first tuple value"

1532 )

1533 if minElements == optElements == 0:

1534 return And([])

1535

1536 if optElements:

1537

1538 def makeOptionalList(n):

1539 if n > 1:

1540 return Opt(self + makeOptionalList(n - 1))

1541 else:

1542 return Opt(self)

1543

1544 if minElements:

1545 if minElements == 1:

1546 ret = self + makeOptionalList(optElements)

1547 else:

1548 ret = And([self] * minElements) + makeOptionalList(optElements)

1549 else:

1550 ret = makeOptionalList(optElements)

1551 else:

1552 if minElements == 1:

1553 ret = self

1554 else:

1555 ret = And([self] * minElements)

1556 return ret

1557

1558 def __rmul__(self, other) -> "ParserElement":

1559 return self.__mul__(other)

1560

1561 def __or__(self, other) -> "ParserElement":

1562 """

1563 Implementation of ``|`` operator - returns :class:`MatchFirst`

1564 """

1565 if other is Ellipsis:

1566 return _PendingSkip(self, must_skip=True)

1567

1568 if isinstance(other, str_type):

1569 # `expr | ""` is equivalent to `Opt(expr)`

1570 if other == "":

1571 return Opt(self)

1572 other = self._literalStringClass(other)

1573 if not isinstance(other, ParserElement):

1574 return NotImplemented

1575 return MatchFirst([self, other])

1576

1577 def __ror__(self, other) -> "ParserElement":

1578 """

1579 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`

1580 """

1581 if isinstance(other, str_type):

1582 other = self._literalStringClass(other)

1583 if not isinstance(other, ParserElement):

1584 return NotImplemented

1585 return other | self

1586

1587 def __xor__(self, other) -> "ParserElement":

1588 """

1589 Implementation of ``^`` operator - returns :class:`Or`

1590 """

1591 if isinstance(other, str_type):

1592 other = self._literalStringClass(other)

1593 if not isinstance(other, ParserElement):

1594 return NotImplemented

1595 return Or([self, other])

1596

1597 def __rxor__(self, other) -> "ParserElement":

1598 """

1599 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`

1600 """

1601 if isinstance(other, str_type):

1602 other = self._literalStringClass(other)

1603 if not isinstance(other, ParserElement):

1604 return NotImplemented

1605 return other ^ self

1606

1607 def __and__(self, other) -> "ParserElement":

1608 """

1609 Implementation of ``&`` operator - returns :class:`Each`

1610 """

1611 if isinstance(other, str_type):

1612 other = self._literalStringClass(other)

1613 if not isinstance(other, ParserElement):

1614 return NotImplemented

1615 return Each([self, other])

1616

1617 def __rand__(self, other) -> "ParserElement":

1618 """

1619 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`

1620 """

1621 if isinstance(other, str_type):

1622 other = self._literalStringClass(other)

1623 if not isinstance(other, ParserElement):

1624 return NotImplemented

1625 return other & self

1626

1627 def __invert__(self) -> "ParserElement":

1628 """

1629 Implementation of ``~`` operator - returns :class:`NotAny`

1630 """

1631 return NotAny(self)

1632

1633 # disable __iter__ to override legacy use of sequential access to __getitem__ to

1634 # iterate over a sequence

1635 __iter__ = None

1636

1637 def __getitem__(self, key):

1638 """

1639 use ``[]`` indexing notation as a short form for expression repetition:

1640

1641 - ``expr[n]`` is equivalent to ``expr*n``

1642 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``

1643 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent

1644 to ``expr*n + ZeroOrMore(expr)``

1645 (read as "at least n instances of ``expr``")

1646 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``

1647 (read as "0 to n instances of ``expr``")

1648 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``

1649 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``

1650

1651 ``None`` may be used in place of ``...``.

1652

1653 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception

1654 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is

1655 desired, then write ``expr[..., n] + ~expr``.

1656

1657 For repetition with a stop_on expression, use slice notation:

1658

1659 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``

1660 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``

1661

1662 """

1663

1664 stop_on_defined = False

1665 stop_on = NoMatch()

1666 if isinstance(key, slice):

1667 key, stop_on = key.start, key.stop

1668 if key is None:

1669 key = ...

1670 stop_on_defined = True

1671 elif isinstance(key, tuple) and isinstance(key[-1], slice):

1672 key, stop_on = (key[0], key[1].start), key[1].stop

1673 stop_on_defined = True

1674

1675 # convert single arg keys to tuples

1676 if isinstance(key, str_type):

1677 key = (key,)

1678 try:

1679 iter(key)

1680 except TypeError:

1681 key = (key, key)

1682

1683 if len(key) > 2:

1684 raise TypeError(

1685 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"

1686 )

1687

1688 # clip to 2 elements

1689 ret = self * tuple(key[:2])

1690 ret = typing.cast(_MultipleMatch, ret)

1691

1692 if stop_on_defined:

1693 ret.stopOn(stop_on)

1694

1695 return ret

1696

1697 def __call__(self, name: typing.Optional[str] = None) -> "ParserElement":

1698 """

1699 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.

1700

1701 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be

1702 passed as ``True``.

1703

1704 If ``name`` is omitted, same as calling :class:`copy`.

1705

1706 Example::

1707

1708 # these are equivalent

1709 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")

1710 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")

1711 """

1712 if name is not None:

1713 return self._setResultsName(name)

1714 else:

1715 return self.copy()

1716

1717 def suppress(self) -> "ParserElement":

1718 """

1719 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from

1720 cluttering up returned output.

1721 """

1722 return Suppress(self)

1723

1724 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement":

1725 """

1726 Enables the skipping of whitespace before matching the characters in the

1727 :class:`ParserElement`'s defined pattern.

1728

1729 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)

1730 """

1731 self.skipWhitespace = True

1732 return self

1733

1734 def leave_whitespace(self, recursive: bool = True) -> "ParserElement":

1735 """

1736 Disables the skipping of whitespace before matching the characters in the

1737 :class:`ParserElement`'s defined pattern. This is normally only used internally by

1738 the pyparsing module, but may be needed in some whitespace-sensitive grammars.

1739

1740 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)

1741 """

1742 self.skipWhitespace = False

1743 return self

1744

1745 def set_whitespace_chars(

1746 self, chars: Union[Set[str], str], copy_defaults: bool = False

1747 ) -> "ParserElement":

1748 """

1749 Overrides the default whitespace chars

1750 """

1751 self.skipWhitespace = True

1752 self.whiteChars = set(chars)

1753 self.copyDefaultWhiteChars = copy_defaults

1754 return self

1755

1756 def parse_with_tabs(self) -> "ParserElement":

1757 """

1758 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.

1759 Must be called before ``parse_string`` when the input grammar contains elements that

1760 match ``<TAB>`` characters.

1761 """

1762 self.keepTabs = True

1763 return self

1764

1765 def ignore(self, other: "ParserElement") -> "ParserElement":

1766 """

1767 Define expression to be ignored (e.g., comments) while doing pattern

1768 matching; may be called repeatedly, to define multiple comment or other

1769 ignorable patterns.

1770

1771 Example::

1772

1773 patt = Word(alphas)[1, ...]

1774 patt.parse_string('ablaj /* comment */ lskjd')

1775 # -> ['ablaj']

1776

1777 patt.ignore(c_style_comment)

1778 patt.parse_string('ablaj /* comment */ lskjd')

1779 # -> ['ablaj', 'lskjd']

1780 """

1781 import typing

1782

1783 if isinstance(other, str_type):

1784 other = Suppress(other)

1785

1786 if isinstance(other, Suppress):

1787 if other not in self.ignoreExprs:

1788 self.ignoreExprs.append(other)

1789 else:

1790 self.ignoreExprs.append(Suppress(other.copy()))

1791 return self

1792

1793 def set_debug_actions(

1794 self,

1795 start_action: DebugStartAction,

1796 success_action: DebugSuccessAction,

1797 exception_action: DebugExceptionAction,

1798 ) -> "ParserElement":

1799 """

1800 Customize display of debugging messages while doing pattern matching:

1801

1802 - ``start_action`` - method to be called when an expression is about to be parsed;

1803 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``

1804

1805 - ``success_action`` - method to be called when an expression has successfully parsed;

1806 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``

1807

1808 - ``exception_action`` - method to be called when expression fails to parse;

1809 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``

1810 """

1811 self.debugActions = self.DebugActions(

1812 start_action or _default_start_debug_action, # type: ignore[truthy-function]

1813 success_action or _default_success_debug_action, # type: ignore[truthy-function]

1814 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

1815 )

1816 self.debug = True

1817 return self

1818

1819 def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement":

1820 """

1821 Enable display of debugging messages while doing pattern matching.

1822 Set ``flag`` to ``True`` to enable, ``False`` to disable.

1823 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

1824

1825 Example::

1826

1827 wd = Word(alphas).set_name("alphaword")

1828 integer = Word(nums).set_name("numword")

1829 term = wd | integer

1830

1831 # turn on debugging for wd

1832 wd.set_debug()

1833

1834 term[1, ...].parse_string("abc 123 xyz 890")

1835

1836 prints::

1837

1838 Match alphaword at loc 0(1,1)

1839 Matched alphaword -> ['abc']

1840 Match alphaword at loc 3(1,4)

1841 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

1842 Match alphaword at loc 7(1,8)

1843 Matched alphaword -> ['xyz']

1844 Match alphaword at loc 11(1,12)

1845 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

1846 Match alphaword at loc 15(1,16)

1847 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

1848

1849 The output shown is that produced by the default debug actions - custom debug actions can be

1850 specified using :class:`set_debug_actions`. Prior to attempting

1851 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

1852 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

1853 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,

1854 which makes debugging and exception messages easier to understand - for instance, the default

1855 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.

1856 """

1857 if recurse:

1858 for expr in self.visit_all():

1859 expr.set_debug(flag, recurse=False)

1860 return self

1861

1862 if flag:

1863 self.set_debug_actions(

1864 _default_start_debug_action,

1865 _default_success_debug_action,

1866 _default_exception_debug_action,

1867 )

1868 else:

1869 self.debug = False

1870 return self

1871

1872 @property

1873 def default_name(self) -> str:

1874 if self._defaultName is None:

1875 self._defaultName = self._generateDefaultName()

1876 return self._defaultName

1877

1878 @abstractmethod

1879 def _generateDefaultName(self) -> str:

1880 """

1881 Child classes must define this method, which defines how the ``default_name`` is set.

1882 """

1883

1884 def set_name(self, name: str) -> "ParserElement":

1885 """

1886 Define name for this expression, makes debugging and exception messages clearer.

1887

1888 Example::

1889

1890 Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)

1891 Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1892 """

1893 self.customName = name

1894 self.errmsg = "Expected " + self.name

1895 if __diag__.enable_debug_on_named_expressions:

1896 self.set_debug()

1897 return self

1898

1899 @property

1900 def name(self) -> str:

1901 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name

1902 return self.customName if self.customName is not None else self.default_name

1903

1904 def __str__(self) -> str:

1905 return self.name

1906

1907 def __repr__(self) -> str:

1908 return str(self)

1909

1910 def streamline(self) -> "ParserElement":

1911 self.streamlined = True

1912 self._defaultName = None

1913 return self

1914

1915 def recurse(self) -> List["ParserElement"]:

1916 return []

1917

1918 def _checkRecursion(self, parseElementList):

1919 subRecCheckList = parseElementList[:] + [self]

1920 for e in self.recurse():

1921 e._checkRecursion(subRecCheckList)

1922

1923 def validate(self, validateTrace=None) -> None:

1924 """

1925 Check defined expressions for valid structure, check for infinite recursive definitions.

1926 """

1927 warnings.warn(

1928 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

1929 DeprecationWarning,

1930 stacklevel=2,

1931 )

1932 self._checkRecursion([])

1933

1934 def parse_file(

1935 self,

1936 file_or_filename: Union[str, Path, TextIO],

1937 encoding: str = "utf-8",

1938 parse_all: bool = False,

1939 *,

1940 parseAll: bool = False,

1941 ) -> ParseResults:

1942 """

1943 Execute the parse expression on the given file or filename.

1944 If a filename is specified (instead of a file object),

1945 the entire file is opened, read, and closed before parsing.

1946 """

1947 parseAll = parseAll or parse_all

1948 try:

1949 file_or_filename = typing.cast(TextIO, file_or_filename)

1950 file_contents = file_or_filename.read()

1951 except AttributeError:

1952 file_or_filename = typing.cast(str, file_or_filename)

1953 with open(file_or_filename, "r", encoding=encoding) as f:

1954 file_contents = f.read()

1955 try:

1956 return self.parse_string(file_contents, parseAll)

1957 except ParseBaseException as exc:

1958 if ParserElement.verbose_stacktrace:

1959 raise

1960 else:

1961 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1962 raise exc.with_traceback(None)

1963

1964 def __eq__(self, other):

1965 if self is other:

1966 return True

1967 elif isinstance(other, str_type):

1968 return self.matches(other, parse_all=True)

1969 elif isinstance(other, ParserElement):

1970 return vars(self) == vars(other)

1971 return False

1972

1973 def __hash__(self):

1974 return id(self)

1975

1976 def matches(

1977 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True

1978 ) -> bool:

1979 """

1980 Method for quick testing of a parser against a test string. Good for simple

1981 inline microtests of sub expressions while building up larger parser.

1982

1983 Parameters:

1984

1985 - ``test_string`` - to test against this expression for a match

1986 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

1987

1988 Example::

1989

1990 expr = Word(nums)

1991 assert expr.matches("100")

1992 """

1993 parseAll = parseAll and parse_all

1994 try:

1995 self.parse_string(str(test_string), parse_all=parseAll)

1996 return True

1997 except ParseBaseException:

1998 return False

1999

2000 def run_tests(

2001 self,

2002 tests: Union[str, List[str]],

2003 parse_all: bool = True,

2004 comment: typing.Optional[Union["ParserElement", str]] = "#",

2005 full_dump: bool = True,

2006 print_results: bool = True,

2007 failure_tests: bool = False,

2008 post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None,

2009 file: typing.Optional[TextIO] = None,

2010 with_line_numbers: bool = False,

2011 *,

2012 parseAll: bool = True,

2013 fullDump: bool = True,

2014 printResults: bool = True,

2015 failureTests: bool = False,

2016 postParse: typing.Optional[Callable[[str, ParseResults], str]] = None,

2017 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]:

2018 """

2019 Execute the parse expression on a series of test strings, showing each

2020 test, the parsed results or where the parse failed. Quick and easy way to

2021 run a parse expression against a list of sample strings.

2022

2023 Parameters:

2024

2025 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2026 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2027 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2028 string; pass None to disable comment filtering

2029 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2030 if False, only dump nested list

2031 - ``print_results`` - (default= ``True``) prints test output to stdout

2032 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2033 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2034 `fn(test_string, parse_results)` and returns a string to be added to the test output

2035 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2036 if None, will default to ``sys.stdout``

2037 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2038

2039 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2040 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2041 test's output

2042

2043 Example::

2044

2045 number_expr = pyparsing_common.number.copy()

2046

2047 result = number_expr.run_tests('''

2048 # unsigned integer

2049 100

2050 # negative integer

2051 -100

2052 # float with scientific notation

2053 6.02e23

2054 # integer with scientific notation

2055 1e-12

2056 ''')

2057 print("Success" if result[0] else "Failed!")

2058

2059 result = number_expr.run_tests('''

2060 # stray character

2061 100Z

2062 # missing leading digit before '.'

2063 -.100

2064 # too many '.'

2065 3.14.159

2066 ''', failure_tests=True)

2067 print("Success" if result[0] else "Failed!")

2068

2069 prints::

2070

2071 # unsigned integer

2072 100

2073 [100]

2074

2075 # negative integer

2076 -100

2077 [-100]

2078

2079 # float with scientific notation

2080 6.02e23

2081 [6.02e+23]

2082

2083 # integer with scientific notation

2084 1e-12

2085 [1e-12]

2086

2087 Success

2088

2089 # stray character

2090 100Z

2091 ^

2092 FAIL: Expected end of text (at char 3), (line:1, col:4)

2093

2094 # missing leading digit before '.'

2095 -.100

2096 ^

2097 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2098

2099 # too many '.'

2100 3.14.159

2101 ^

2102 FAIL: Expected end of text (at char 4), (line:1, col:5)

2103

2104 Success

2105

2106 Each test string must be on a single line. If you want to test a string that spans multiple

2107 lines, create a test like this::

2108

2109 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2110

2111 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2112 """

2113 from .testing import pyparsing_test

2114

2115 parseAll = parseAll and parse_all

2116 fullDump = fullDump and full_dump

2117 printResults = printResults and print_results

2118 failureTests = failureTests or failure_tests

2119 postParse = postParse or post_parse

2120 if isinstance(tests, str_type):

2121 tests = typing.cast(str, tests)

2122 line_strip = type(tests).strip

2123 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2124 comment_specified = comment is not None

2125 if comment_specified:

2126 if isinstance(comment, str_type):

2127 comment = typing.cast(str, comment)

2128 comment = Literal(comment)

2129 comment = typing.cast(ParserElement, comment)

2130 if file is None:

2131 file = sys.stdout

2132 print_ = file.write

2133

2134 result: Union[ParseResults, Exception]

2135 allResults: List[Tuple[str, Union[ParseResults, Exception]]] = []

2136 comments: List[str] = []

2137 success = True

2138 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2139 BOM = "\ufeff"

2140 for t in tests:

2141 if comment_specified and comment.matches(t, False) or comments and not t:

2142 comments.append(

2143 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2144 )

2145 continue

2146 if not t:

2147 continue

2148 out = [

2149 "\n" + "\n".join(comments) if comments else "",

2150 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2151 ]

2152 comments = []

2153 try:

2154 # convert newline marks to actual newlines, and strip leading BOM if present

2155 t = NL.transform_string(t.lstrip(BOM))

2156 result = self.parse_string(t, parse_all=parseAll)

2157 except ParseBaseException as pe:

2158 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""

2159 out.append(pe.explain())

2160 out.append("FAIL: " + str(pe))

2161 if ParserElement.verbose_stacktrace:

2162 out.extend(traceback.format_tb(pe.__traceback__))

2163 success = success and failureTests

2164 result = pe

2165 except Exception as exc:

2166 out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}")

2167 if ParserElement.verbose_stacktrace:

2168 out.extend(traceback.format_tb(exc.__traceback__))

2169 success = success and failureTests

2170 result = exc

2171 else:

2172 success = success and not failureTests

2173 if postParse is not None:

2174 try:

2175 pp_value = postParse(t, result)

2176 if pp_value is not None:

2177 if isinstance(pp_value, ParseResults):

2178 out.append(pp_value.dump())

2179 else:

2180 out.append(str(pp_value))

2181 else:

2182 out.append(result.dump())

2183 except Exception as e:

2184 out.append(result.dump(full=fullDump))

2185 out.append(

2186 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2187 )

2188 else:

2189 out.append(result.dump(full=fullDump))

2190 out.append("")

2191

2192 if printResults:

2193 print_("\n".join(out))

2194

2195 allResults.append((t, result))

2196

2197 return success, allResults

2198

2199 def create_diagram(

2200 self,

2201 output_html: Union[TextIO, Path, str],

2202 vertical: int = 3,

2203 show_results_names: bool = False,

2204 show_groups: bool = False,

2205 embed: bool = False,

2206 **kwargs,

2207 ) -> None:

2208 """

2209 Create a railroad diagram for the parser.

2210

2211 Parameters:

2212

2213 - ``output_html`` (str or file-like object) - output target for generated

2214 diagram HTML

2215 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2216 instead of horizontally (default=3)

2217 - ``show_results_names`` - bool flag whether diagram should show annotations for

2218 defined results names

2219 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2220 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2221 the resulting HTML in an enclosing HTML source

2222 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2223 can be used to insert custom CSS styling

2224 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2225 generated code

2226

2227 Additional diagram-formatting keyword arguments can also be included;

2228 see railroad.Diagram class.

2229 """

2230

2231 try:

2232 from .diagram import to_railroad, railroad_to_html

2233 except ImportError as ie:

2234 raise Exception(

2235 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2236 ) from ie

2237

2238 self.streamline()

2239

2240 railroad = to_railroad(

2241 self,

2242 vertical=vertical,

2243 show_results_names=show_results_names,

2244 show_groups=show_groups,

2245 diagram_kwargs=kwargs,

2246 )

2247 if isinstance(output_html, (str, Path)):

2248 with open(output_html, "w", encoding="utf-8") as diag_file:

2249 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2250 else:

2251 # we were passed a file-like object, just write to it

2252 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2253

2254 # Compatibility synonyms

2255 # fmt: off

2256 @staticmethod

2257 @replaced_by_pep8(inline_literals_using)

2258 def inlineLiteralsUsing(): ...

2259

2260 @staticmethod

2261 @replaced_by_pep8(set_default_whitespace_chars)

2262 def setDefaultWhitespaceChars(): ...

2263

2264 @replaced_by_pep8(set_results_name)

2265 def setResultsName(self): ...

2266

2267 @replaced_by_pep8(set_break)

2268 def setBreak(self): ...

2269

2270 @replaced_by_pep8(set_parse_action)

2271 def setParseAction(self): ...

2272

2273 @replaced_by_pep8(add_parse_action)

2274 def addParseAction(self): ...

2275

2276 @replaced_by_pep8(add_condition)

2277 def addCondition(self): ...

2278

2279 @replaced_by_pep8(set_fail_action)

2280 def setFailAction(self): ...

2281

2282 @replaced_by_pep8(try_parse)

2283 def tryParse(self): ...

2284

2285 @staticmethod

2286 @replaced_by_pep8(enable_left_recursion)

2287 def enableLeftRecursion(): ...

2288

2289 @staticmethod

2290 @replaced_by_pep8(enable_packrat)

2291 def enablePackrat(): ...

2292

2293 @replaced_by_pep8(parse_string)

2294 def parseString(self): ...

2295

2296 @replaced_by_pep8(scan_string)

2297 def scanString(self): ...

2298

2299 @replaced_by_pep8(transform_string)

2300 def transformString(self): ...

2301

2302 @replaced_by_pep8(search_string)

2303 def searchString(self): ...

2304

2305 @replaced_by_pep8(ignore_whitespace)

2306 def ignoreWhitespace(self): ...

2307

2308 @replaced_by_pep8(leave_whitespace)

2309 def leaveWhitespace(self): ...

2310

2311 @replaced_by_pep8(set_whitespace_chars)

2312 def setWhitespaceChars(self): ...

2313

2314 @replaced_by_pep8(parse_with_tabs)

2315 def parseWithTabs(self): ...

2316

2317 @replaced_by_pep8(set_debug_actions)

2318 def setDebugActions(self): ...

2319

2320 @replaced_by_pep8(set_debug)

2321 def setDebug(self): ...

2322

2323 @replaced_by_pep8(set_name)

2324 def setName(self): ...

2325

2326 @replaced_by_pep8(parse_file)

2327 def parseFile(self): ...

2328

2329 @replaced_by_pep8(run_tests)

2330 def runTests(self): ...

2331

2332 canParseNext = can_parse_next

2333 resetCache = reset_cache

2334 defaultName = default_name

2335 # fmt: on

2336

2337

2338class _PendingSkip(ParserElement):

2339 # internal placeholder class to hold a place were '...' is added to a parser element,

2340 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2341 def __init__(self, expr: ParserElement, must_skip: bool = False):

2342 super().__init__()

2343 self.anchor = expr

2344 self.must_skip = must_skip

2345

2346 def _generateDefaultName(self) -> str:

2347 return str(self.anchor + Empty()).replace("Empty", "...")

2348

2349 def __add__(self, other) -> "ParserElement":

2350 skipper = SkipTo(other).set_name("...")("_skipped*")

2351 if self.must_skip:

2352

2353 def must_skip(t):

2354 if not t._skipped or t._skipped.as_list() == [""]:

2355 del t[0]

2356 t.pop("_skipped", None)

2357

2358 def show_skip(t):

2359 if t._skipped.as_list()[-1:] == [""]:

2360 t.pop("_skipped")

2361 t["_skipped"] = "missing <" + repr(self.anchor) + ">"

2362

2363 return (

2364 self.anchor + skipper().add_parse_action(must_skip)

2365 | skipper().add_parse_action(show_skip)

2366 ) + other

2367

2368 return self.anchor + skipper + other

2369

2370 def __repr__(self):

2371 return self.defaultName

2372

2373 def parseImpl(self, *args):

2374 raise Exception(

2375 "use of `...` expression without following SkipTo target expression"

2376 )

2377

2378

2379class Token(ParserElement):

2380 """Abstract :class:`ParserElement` subclass, for defining atomic

2381 matching patterns.

2382 """

2383

2384 def __init__(self):

2385 super().__init__(savelist=False)

2386

2387 def _generateDefaultName(self) -> str:

2388 return type(self).__name__

2389

2390

2391class NoMatch(Token):

2392 """

2393 A token that will never match.

2394 """

2395

2396 def __init__(self):

2397 super().__init__()

2398 self.mayReturnEmpty = True

2399 self.mayIndexError = False

2400 self.errmsg = "Unmatchable token"

2401

2402 def parseImpl(self, instring, loc, doActions=True):

2403 raise ParseException(instring, loc, self.errmsg, self)

2404

2405

2406class Literal(Token):

2407 """

2408 Token to exactly match a specified string.

2409

2410 Example::

2411

2412 Literal('blah').parse_string('blah') # -> ['blah']

2413 Literal('blah').parse_string('blahfooblah') # -> ['blah']

2414 Literal('blah').parse_string('bla') # -> Exception: Expected "blah"

2415

2416 For case-insensitive matching, use :class:`CaselessLiteral`.

2417

2418 For keyword matching (force word break before and after the matched string),

2419 use :class:`Keyword` or :class:`CaselessKeyword`.

2420 """

2421

2422 def __new__(cls, match_string: str = "", *, matchString: str = ""):

2423 # Performance tuning: select a subclass with optimized parseImpl

2424 if cls is Literal:

2425 match_string = matchString or match_string

2426 if not match_string:

2427 return super().__new__(Empty)

2428 if len(match_string) == 1:

2429 return super().__new__(_SingleCharLiteral)

2430

2431 # Default behavior

2432 return super().__new__(cls)

2433

2434 # Needed to make copy.copy() work correctly if we customize __new__

2435 def __getnewargs__(self):

2436 return (self.match,)

2437

2438 def __init__(self, match_string: str = "", *, matchString: str = ""):

2439 super().__init__()

2440 match_string = matchString or match_string

2441 self.match = match_string

2442 self.matchLen = len(match_string)

2443 self.firstMatchChar = match_string[:1]

2444 self.errmsg = "Expected " + self.name

2445 self.mayReturnEmpty = False

2446 self.mayIndexError = False

2447

2448 def _generateDefaultName(self) -> str:

2449 return repr(self.match)

2450

2451 def parseImpl(self, instring, loc, doActions=True):

2452 if instring[loc] == self.firstMatchChar and instring.startswith(

2453 self.match, loc

2454 ):

2455 return loc + self.matchLen, self.match

2456 raise ParseException(instring, loc, self.errmsg, self)

2457

2458

2459class Empty(Literal):

2460 """

2461 An empty token, will always match.

2462 """

2463

2464 def __init__(self, match_string="", *, matchString=""):

2465 super().__init__("")

2466 self.mayReturnEmpty = True

2467 self.mayIndexError = False

2468

2469 def _generateDefaultName(self) -> str:

2470 return "Empty"

2471

2472 def parseImpl(self, instring, loc, doActions=True):

2473 return loc, []

2474

2475

2476class _SingleCharLiteral(Literal):

2477 def parseImpl(self, instring, loc, doActions=True):

2478 if instring[loc] == self.firstMatchChar:

2479 return loc + 1, self.match

2480 raise ParseException(instring, loc, self.errmsg, self)

2481

2482

2483ParserElement._literalStringClass = Literal

2484

2485

2486class Keyword(Token):

2487 """

2488 Token to exactly match a specified string as a keyword, that is,

2489 it must be immediately preceded and followed by whitespace or

2490 non-keyword characters. Compare with :class:`Literal`:

2491

2492 - ``Literal("if")`` will match the leading ``'if'`` in

2493 ``'ifAndOnlyIf'``.

2494 - ``Keyword("if")`` will not; it will only match the leading

2495 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2496

2497 Accepts two optional constructor arguments in addition to the

2498 keyword string:

2499

2500 - ``ident_chars`` is a string of characters that would be valid

2501 identifier characters, defaulting to all alphanumerics + "_" and

2502 "$"

2503 - ``caseless`` allows case-insensitive matching, default is ``False``.

2504

2505 Example::

2506

2507 Keyword("start").parse_string("start") # -> ['start']

2508 Keyword("start").parse_string("starting") # -> Exception

2509

2510 For case-insensitive matching, use :class:`CaselessKeyword`.

2511 """

2512

2513 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2514

2515 def __init__(

2516 self,

2517 match_string: str = "",

2518 ident_chars: typing.Optional[str] = None,

2519 caseless: bool = False,

2520 *,

2521 matchString: str = "",

2522 identChars: typing.Optional[str] = None,

2523 ):

2524 super().__init__()

2525 identChars = identChars or ident_chars

2526 if identChars is None:

2527 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2528 match_string = matchString or match_string

2529 self.match = match_string

2530 self.matchLen = len(match_string)

2531 try:

2532 self.firstMatchChar = match_string[0]

2533 except IndexError:

2534 raise ValueError("null string passed to Keyword; use Empty() instead")

2535 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2536 self.mayReturnEmpty = False

2537 self.mayIndexError = False

2538 self.caseless = caseless

2539 if caseless:

2540 self.caselessmatch = match_string.upper()

2541 identChars = identChars.upper()

2542 self.identChars = set(identChars)

2543

2544 def _generateDefaultName(self) -> str:

2545 return repr(self.match)

2546

2547 def parseImpl(self, instring, loc, doActions=True):

2548 errmsg = self.errmsg

2549 errloc = loc

2550 if self.caseless:

2551 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2552 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2553 if (

2554 loc >= len(instring) - self.matchLen

2555 or instring[loc + self.matchLen].upper() not in self.identChars

2556 ):

2557 return loc + self.matchLen, self.match

2558 else:

2559 # followed by keyword char

2560 errmsg += ", was immediately followed by keyword character"

2561 errloc = loc + self.matchLen

2562 else:

2563 # preceded by keyword char

2564 errmsg += ", keyword was immediately preceded by keyword character"

2565 errloc = loc - 1

2566 # else no match just raise plain exception

2567

2568 else:

2569 if (

2570 instring[loc] == self.firstMatchChar

2571 and self.matchLen == 1

2572 or instring.startswith(self.match, loc)

2573 ):

2574 if loc == 0 or instring[loc - 1] not in self.identChars:

2575 if (

2576 loc >= len(instring) - self.matchLen

2577 or instring[loc + self.matchLen] not in self.identChars

2578 ):

2579 return loc + self.matchLen, self.match

2580 else:

2581 # followed by keyword char

2582 errmsg += (

2583 ", keyword was immediately followed by keyword character"

2584 )

2585 errloc = loc + self.matchLen

2586 else:

2587 # preceded by keyword char

2588 errmsg += ", keyword was immediately preceded by keyword character"

2589 errloc = loc - 1

2590 # else no match just raise plain exception

2591

2592 raise ParseException(instring, errloc, errmsg, self)

2593

2594 @staticmethod

2595 def set_default_keyword_chars(chars) -> None:

2596 """

2597 Overrides the default characters used by :class:`Keyword` expressions.

2598 """

2599 Keyword.DEFAULT_KEYWORD_CHARS = chars

2600

2601 setDefaultKeywordChars = set_default_keyword_chars

2602

2603

2604class CaselessLiteral(Literal):

2605 """

2606 Token to match a specified string, ignoring case of letters.

2607 Note: the matched results will always be in the case of the given

2608 match string, NOT the case of the input text.

2609

2610 Example::

2611

2612 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2613 # -> ['CMD', 'CMD', 'CMD']

2614

2615 (Contrast with example for :class:`CaselessKeyword`.)

2616 """

2617

2618 def __init__(self, match_string: str = "", *, matchString: str = ""):

2619 match_string = matchString or match_string

2620 super().__init__(match_string.upper())

2621 # Preserve the defining literal.

2622 self.returnString = match_string

2623 self.errmsg = "Expected " + self.name

2624

2625 def parseImpl(self, instring, loc, doActions=True):

2626 if instring[loc : loc + self.matchLen].upper() == self.match:

2627 return loc + self.matchLen, self.returnString

2628 raise ParseException(instring, loc, self.errmsg, self)

2629

2630

2631class CaselessKeyword(Keyword):

2632 """

2633 Caseless version of :class:`Keyword`.

2634

2635 Example::

2636

2637 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2638 # -> ['CMD', 'CMD']

2639

2640 (Contrast with example for :class:`CaselessLiteral`.)

2641 """

2642

2643 def __init__(

2644 self,

2645 match_string: str = "",

2646 ident_chars: typing.Optional[str] = None,

2647 *,

2648 matchString: str = "",

2649 identChars: typing.Optional[str] = None,

2650 ):

2651 identChars = identChars or ident_chars

2652 match_string = matchString or match_string

2653 super().__init__(match_string, identChars, caseless=True)

2654

2655

2656class CloseMatch(Token):

2657 """A variation on :class:`Literal` which matches "close" matches,

2658 that is, strings with at most 'n' mismatching characters.

2659 :class:`CloseMatch` takes parameters:

2660

2661 - ``match_string`` - string to be matched

2662 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2663 - ``max_mismatches`` - (``default=1``) maximum number of

2664 mismatches allowed to count as a match

2665

2666 The results from a successful parse will contain the matched text

2667 from the input string and the following named results:

2668

2669 - ``mismatches`` - a list of the positions within the

2670 match_string where mismatches were found

2671 - ``original`` - the original match_string used to compare

2672 against the input string

2673

2674 If ``mismatches`` is an empty list, then the match was an exact

2675 match.

2676

2677 Example::

2678

2679 patt = CloseMatch("ATCATCGAATGGA")

2680 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2681 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2682

2683 # exact match

2684 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2685

2686 # close match allowing up to 2 mismatches

2687 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

2688 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2689 """

2690

2691 def __init__(

2692 self,

2693 match_string: str,

2694 max_mismatches: typing.Optional[int] = None,

2695 *,

2696 maxMismatches: int = 1,

2697 caseless=False,

2698 ):

2699 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

2700 super().__init__()

2701 self.match_string = match_string

2702 self.maxMismatches = maxMismatches

2703 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

2704 self.caseless = caseless

2705 self.mayIndexError = False

2706 self.mayReturnEmpty = False

2707

2708 def _generateDefaultName(self) -> str:

2709 return f"{type(self).__name__}:{self.match_string!r}"

2710

2711 def parseImpl(self, instring, loc, doActions=True):

2712 start = loc

2713 instrlen = len(instring)

2714 maxloc = start + len(self.match_string)

2715

2716 if maxloc <= instrlen:

2717 match_string = self.match_string

2718 match_stringloc = 0

2719 mismatches = []

2720 maxMismatches = self.maxMismatches

2721

2722 for match_stringloc, s_m in enumerate(

2723 zip(instring[loc:maxloc], match_string)

2724 ):

2725 src, mat = s_m

2726 if self.caseless:

2727 src, mat = src.lower(), mat.lower()

2728

2729 if src != mat:

2730 mismatches.append(match_stringloc)

2731 if len(mismatches) > maxMismatches:

2732 break

2733 else:

2734 loc = start + match_stringloc + 1

2735 results = ParseResults([instring[start:loc]])

2736 results["original"] = match_string

2737 results["mismatches"] = mismatches

2738 return loc, results

2739

2740 raise ParseException(instring, loc, self.errmsg, self)

2741

2742

2743class Word(Token):

2744 """Token for matching words composed of allowed character sets.

2745

2746 Parameters:

2747

2748 - ``init_chars`` - string of all characters that should be used to

2749 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

2750 if ``body_chars`` is also specified, then this is the string of

2751 initial characters

2752 - ``body_chars`` - string of characters that

2753 can be used for matching after a matched initial character as

2754 given in ``init_chars``; if omitted, same as the initial characters

2755 (default=``None``)

2756 - ``min`` - minimum number of characters to match (default=1)

2757 - ``max`` - maximum number of characters to match (default=0)

2758 - ``exact`` - exact number of characters to match (default=0)

2759 - ``as_keyword`` - match as a keyword (default=``False``)

2760 - ``exclude_chars`` - characters that might be

2761 found in the input ``body_chars`` string but which should not be

2762 accepted for matching ;useful to define a word of all

2763 printables except for one or two characters, for instance

2764 (default=``None``)

2765

2766 :class:`srange` is useful for defining custom character set strings

2767 for defining :class:`Word` expressions, using range notation from

2768 regular expression character sets.

2769

2770 A common mistake is to use :class:`Word` to match a specific literal

2771 string, as in ``Word("Address")``. Remember that :class:`Word`

2772 uses the string argument to define *sets* of matchable characters.

2773 This expression would match "Add", "AAA", "dAred", or any other word

2774 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

2775 exact literal string, use :class:`Literal` or :class:`Keyword`.

2776

2777 pyparsing includes helper strings for building Words:

2778

2779 - :class:`alphas`

2780 - :class:`nums`

2781 - :class:`alphanums`

2782 - :class:`hexnums`

2783 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255

2784 - accented, tilded, umlauted, etc.)

2785 - :class:`punc8bit` (non-alphabetic characters in ASCII range

2786 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2787 - :class:`printables` (any non-whitespace character)

2788

2789 ``alphas``, ``nums``, and ``printables`` are also defined in several

2790 Unicode sets - see :class:`pyparsing_unicode``.

2791

2792 Example::

2793

2794 # a word composed of digits

2795 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2796

2797 # a word with a leading capital, and zero or more lowercase

2798 capital_word = Word(alphas.upper(), alphas.lower())

2799

2800 # hostnames are alphanumeric, with leading alpha, and '-'

2801 hostname = Word(alphas, alphanums + '-')

2802

2803 # roman numeral (not a strict parser, accepts invalid mix of characters)

2804 roman = Word("IVXLCDM")

2805

2806 # any string of non-whitespace characters, except for ','

2807 csv_value = Word(printables, exclude_chars=",")

2808 """

2809

2810 def __init__(

2811 self,

2812 init_chars: str = "",

2813 body_chars: typing.Optional[str] = None,

2814 min: int = 1,

2815 max: int = 0,

2816 exact: int = 0,

2817 as_keyword: bool = False,

2818 exclude_chars: typing.Optional[str] = None,

2819 *,

2820 initChars: typing.Optional[str] = None,

2821 bodyChars: typing.Optional[str] = None,

2822 asKeyword: bool = False,

2823 excludeChars: typing.Optional[str] = None,

2824 ):

2825 initChars = initChars or init_chars

2826 bodyChars = bodyChars or body_chars

2827 asKeyword = asKeyword or as_keyword

2828 excludeChars = excludeChars or exclude_chars

2829 super().__init__()

2830 if not initChars:

2831 raise ValueError(

2832 f"invalid {type(self).__name__}, initChars cannot be empty string"

2833 )

2834

2835 initChars_set = set(initChars)

2836 if excludeChars:

2837 excludeChars_set = set(excludeChars)

2838 initChars_set -= excludeChars_set

2839 if bodyChars:

2840 bodyChars = "".join(set(bodyChars) - excludeChars_set)

2841 self.initChars = initChars_set

2842 self.initCharsOrig = "".join(sorted(initChars_set))

2843

2844 if bodyChars:

2845 self.bodyChars = set(bodyChars)

2846 self.bodyCharsOrig = "".join(sorted(bodyChars))

2847 else:

2848 self.bodyChars = initChars_set

2849 self.bodyCharsOrig = self.initCharsOrig

2850

2851 self.maxSpecified = max > 0

2852

2853 if min < 1:

2854 raise ValueError(

2855 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

2856 )

2857

2858 if self.maxSpecified and min > max:

2859 raise ValueError(

2860 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

2861 )

2862

2863 self.minLen = min

2864

2865 if max > 0:

2866 self.maxLen = max

2867 else:

2868 self.maxLen = _MAX_INT

2869

2870 if exact > 0:

2871 min = max = exact

2872 self.maxLen = exact

2873 self.minLen = exact

2874

2875 self.errmsg = "Expected " + self.name

2876 self.mayIndexError = False

2877 self.asKeyword = asKeyword

2878 if self.asKeyword:

2879 self.errmsg += " as a keyword"

2880

2881 # see if we can make a regex for this Word

2882 if " " not in (self.initChars | self.bodyChars):

2883 if len(self.initChars) == 1:

2884 re_leading_fragment = re.escape(self.initCharsOrig)

2885 else:

2886 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

2887

2888 if self.bodyChars == self.initChars:

2889 if max == 0 and self.minLen == 1:

2890 repeat = "+"

2891 elif max == 1:

2892 repeat = ""

2893 else:

2894 if self.minLen != self.maxLen:

2895 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

2896 else:

2897 repeat = f"{{{self.minLen}}}"

2898 self.reString = f"{re_leading_fragment}{repeat}"

2899 else:

2900 if max == 1:

2901 re_body_fragment = ""

2902 repeat = ""

2903 else:

2904 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

2905 if max == 0 and self.minLen == 1:

2906 repeat = "*"

2907 elif max == 2:

2908 repeat = "?" if min <= 1 else ""

2909 else:

2910 if min != max:

2911 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

2912 else:

2913 repeat = f"{{{min - 1 if min > 0 else ''}}}"

2914

2915 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

2916

2917 if self.asKeyword:

2918 self.reString = rf"\b{self.reString}\b"

2919

2920 try:

2921 self.re = re.compile(self.reString)

2922 except re.error:

2923 self.re = None # type: ignore[assignment]

2924 else:

2925 self.re_match = self.re.match

2926 self.parseImpl = self.parseImpl_regex # type: ignore[assignment]

2927

2928 def _generateDefaultName(self) -> str:

2929 def charsAsStr(s):

2930 max_repr_len = 16

2931 s = _collapse_string_to_ranges(s, re_escape=False)

2932 if len(s) > max_repr_len:

2933 return s[: max_repr_len - 3] + "..."

2934 else:

2935 return s

2936

2937 if self.initChars != self.bodyChars:

2938 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

2939 else:

2940 base = f"W:({charsAsStr(self.initChars)})"

2941

2942 # add length specification

2943 if self.minLen > 1 or self.maxLen != _MAX_INT:

2944 if self.minLen == self.maxLen:

2945 if self.minLen == 1:

2946 return base[2:]

2947 else:

2948 return base + f"{{{self.minLen}}}"

2949 elif self.maxLen == _MAX_INT:

2950 return base + f"{{{self.minLen},...}}"

2951 else:

2952 return base + f"{{{self.minLen},{self.maxLen}}}"

2953 return base

2954

2955 def parseImpl(self, instring, loc, doActions=True):

2956 if instring[loc] not in self.initChars:

2957 raise ParseException(instring, loc, self.errmsg, self)

2958

2959 start = loc

2960 loc += 1

2961 instrlen = len(instring)

2962 bodychars = self.bodyChars

2963 maxloc = start + self.maxLen

2964 maxloc = min(maxloc, instrlen)

2965 while loc < maxloc and instring[loc] in bodychars:

2966 loc += 1

2967

2968 throwException = False

2969 if loc - start < self.minLen:

2970 throwException = True

2971 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:

2972 throwException = True

2973 elif self.asKeyword:

2974 if (

2975 start > 0

2976 and instring[start - 1] in bodychars

2977 or loc < instrlen

2978 and instring[loc] in bodychars

2979 ):

2980 throwException = True

2981

2982 if throwException:

2983 raise ParseException(instring, loc, self.errmsg, self)

2984

2985 return loc, instring[start:loc]

2986

2987 def parseImpl_regex(self, instring, loc, doActions=True):

2988 result = self.re_match(instring, loc)

2989 if not result:

2990 raise ParseException(instring, loc, self.errmsg, self)

2991

2992 loc = result.end()

2993 return loc, result.group()

2994

2995

2996class Char(Word):

2997 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

2998 when defining a match of any single character in a string of

2999 characters.

3000 """

3001

3002 def __init__(

3003 self,

3004 charset: str,

3005 as_keyword: bool = False,

3006 exclude_chars: typing.Optional[str] = None,

3007 *,

3008 asKeyword: bool = False,

3009 excludeChars: typing.Optional[str] = None,

3010 ):

3011 asKeyword = asKeyword or as_keyword

3012 excludeChars = excludeChars or exclude_chars

3013 super().__init__(

3014 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

3015 )

3016

3017

3018class Regex(Token):

3019 r"""Token for matching strings that match a given regular

3020 expression. Defined with string specifying the regular expression in

3021 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

3022 If the given regex contains named groups (defined using ``(?P<name>...)``),

3023 these will be preserved as named :class:`ParseResults`.

3024

3025 If instead of the Python stdlib ``re`` module you wish to use a different RE module

3026 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

3027 a compiled RE that was compiled using ``regex``.

3028

3029 Example::

3030

3031 realnum = Regex(r"[+-]?\d+\.\d*")

3032 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

3033 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

3034

3035 # named fields in a regex will be returned as named results

3036 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

3037

3038 # the Regex class will accept re's compiled using the regex module

3039 import regex

3040 parser = pp.Regex(regex.compile(r'[0-9]'))

3041 """

3042

3043 def __init__(

3044 self,

3045 pattern: Any,

3046 flags: Union[re.RegexFlag, int] = 0,

3047 as_group_list: bool = False,

3048 as_match: bool = False,

3049 *,

3050 asGroupList: bool = False,

3051 asMatch: bool = False,

3052 ):

3053 """The parameters ``pattern`` and ``flags`` are passed

3054 to the ``re.compile()`` function as-is. See the Python

3055 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3056 explanation of the acceptable patterns and flags.

3057 """

3058 super().__init__()

3059 asGroupList = asGroupList or as_group_list

3060 asMatch = asMatch or as_match

3061

3062 if isinstance(pattern, str_type):

3063 if not pattern:

3064 raise ValueError("null string passed to Regex; use Empty() instead")

3065

3066 self._re = None

3067 self.reString = self.pattern = pattern

3068 self.flags = flags

3069

3070 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3071 self._re = pattern

3072 self.pattern = self.reString = pattern.pattern

3073 self.flags = flags

3074

3075 else:

3076 raise TypeError(

3077 "Regex may only be constructed with a string or a compiled RE object"

3078 )

3079

3080 self.errmsg = "Expected " + self.name

3081 self.mayIndexError = False

3082 self.asGroupList = asGroupList

3083 self.asMatch = asMatch

3084 if self.asGroupList:

3085 self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment]

3086 if self.asMatch:

3087 self.parseImpl = self.parseImplAsMatch # type: ignore [assignment]

3088

3089 @cached_property

3090 def re(self):

3091 if self._re:

3092 return self._re

3093 else:

3094 try:

3095 return re.compile(self.pattern, self.flags)

3096 except re.error:

3097 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3098

3099 @cached_property

3100 def re_match(self):

3101 return self.re.match

3102

3103 @cached_property

3104 def mayReturnEmpty(self):

3105 return self.re_match("") is not None

3106

3107 def _generateDefaultName(self) -> str:

3108 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\"))

3109

3110 def parseImpl(self, instring, loc, doActions=True):

3111 result = self.re_match(instring, loc)

3112 if not result:

3113 raise ParseException(instring, loc, self.errmsg, self)

3114

3115 loc = result.end()

3116 ret = ParseResults(result.group())

3117 d = result.groupdict()

3118 if d:

3119 for k, v in d.items():

3120 ret[k] = v

3121 return loc, ret

3122

3123 def parseImplAsGroupList(self, instring, loc, doActions=True):

3124 result = self.re_match(instring, loc)

3125 if not result:

3126 raise ParseException(instring, loc, self.errmsg, self)

3127

3128 loc = result.end()

3129 ret = result.groups()

3130 return loc, ret

3131

3132 def parseImplAsMatch(self, instring, loc, doActions=True):

3133 result = self.re_match(instring, loc)

3134 if not result:

3135 raise ParseException(instring, loc, self.errmsg, self)

3136

3137 loc = result.end()

3138 ret = result

3139 return loc, ret

3140

3141 def sub(self, repl: str) -> ParserElement:

3142 r"""

3143 Return :class:`Regex` with an attached parse action to transform the parsed

3144 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3145

3146 Example::

3147

3148 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3149 print(make_html.transform_string("h1:main title:"))

3150 # prints "<h1>main title</h1>"

3151 """

3152 if self.asGroupList:

3153 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3154

3155 if self.asMatch and callable(repl):

3156 raise TypeError(

3157 "cannot use sub() with a callable with Regex(as_match=True)"

3158 )

3159

3160 if self.asMatch:

3161

3162 def pa(tokens):

3163 return tokens[0].expand(repl)

3164

3165 else:

3166

3167 def pa(tokens):

3168 return self.re.sub(repl, tokens[0])

3169

3170 return self.add_parse_action(pa)

3171

3172

3173class QuotedString(Token):

3174 r"""

3175 Token for matching strings that are delimited by quoting characters.

3176

3177 Defined with the following parameters:

3178

3179 - ``quote_char`` - string of one or more characters defining the

3180 quote delimiting string

3181 - ``esc_char`` - character to re_escape quotes, typically backslash

3182 (default= ``None``)

3183 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3184 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3185 (default= ``None``)

3186 - ``multiline`` - boolean indicating whether quotes can span

3187 multiple lines (default= ``False``)

3188 - ``unquote_results`` - boolean indicating whether the matched text

3189 should be unquoted (default= ``True``)

3190 - ``end_quote_char`` - string of one or more characters defining the

3191 end of the quote delimited string (default= ``None`` => same as

3192 quote_char)

3193 - ``convert_whitespace_escapes`` - convert escaped whitespace

3194 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3195 (default= ``True``)

3196

3197 Example::

3198

3199 qs = QuotedString('"')

3200 print(qs.search_string('lsjdf "This is the quote" sldjf'))

3201 complex_qs = QuotedString('{{', end_quote_char='}}')

3202 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))

3203 sql_qs = QuotedString('"', esc_quote='""')

3204 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3205

3206 prints::

3207

3208 [['This is the quote']]

3209 [['This is the "quote"']]

3210 [['This is the quote with "embedded" quotes']]

3211 """

3212 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3213

3214 def __init__(

3215 self,

3216 quote_char: str = "",

3217 esc_char: typing.Optional[str] = None,

3218 esc_quote: typing.Optional[str] = None,

3219 multiline: bool = False,

3220 unquote_results: bool = True,

3221 end_quote_char: typing.Optional[str] = None,

3222 convert_whitespace_escapes: bool = True,

3223 *,

3224 quoteChar: str = "",

3225 escChar: typing.Optional[str] = None,

3226 escQuote: typing.Optional[str] = None,

3227 unquoteResults: bool = True,

3228 endQuoteChar: typing.Optional[str] = None,

3229 convertWhitespaceEscapes: bool = True,

3230 ):

3231 super().__init__()

3232 esc_char = escChar or esc_char

3233 esc_quote = escQuote or esc_quote

3234 unquote_results = unquoteResults and unquote_results

3235 end_quote_char = endQuoteChar or end_quote_char

3236 convert_whitespace_escapes = (

3237 convertWhitespaceEscapes and convert_whitespace_escapes

3238 )

3239 quote_char = quoteChar or quote_char

3240

3241 # remove white space from quote chars

3242 quote_char = quote_char.strip()

3243 if not quote_char:

3244 raise ValueError("quote_char cannot be the empty string")

3245

3246 if end_quote_char is None:

3247 end_quote_char = quote_char

3248 else:

3249 end_quote_char = end_quote_char.strip()

3250 if not end_quote_char:

3251 raise ValueError("end_quote_char cannot be the empty string")

3252

3253 self.quote_char: str = quote_char

3254 self.quote_char_len: int = len(quote_char)

3255 self.first_quote_char: str = quote_char[0]

3256 self.end_quote_char: str = end_quote_char

3257 self.end_quote_char_len: int = len(end_quote_char)

3258 self.esc_char: str = esc_char or ""

3259 self.has_esc_char: bool = esc_char is not None

3260 self.esc_quote: str = esc_quote or ""

3261 self.unquote_results: bool = unquote_results

3262 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3263 self.multiline = multiline

3264 self.re_flags = re.RegexFlag(0)

3265

3266 # fmt: off

3267 # build up re pattern for the content between the quote delimiters

3268 inner_pattern = []

3269

3270 if esc_quote:

3271 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3272

3273 if esc_char:

3274 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3275

3276 if len(self.end_quote_char) > 1:

3277 inner_pattern.append(

3278 "(?:"

3279 + "|".join(

3280 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3281 for i in range(len(self.end_quote_char) - 1, 0, -1)

3282 )

3283 + ")"

3284 )

3285

3286 if self.multiline:

3287 self.re_flags |= re.MULTILINE | re.DOTALL

3288 inner_pattern.append(

3289 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3290 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"

3291 )

3292 else:

3293 inner_pattern.append(

3294 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3295 rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"

3296 )

3297

3298 self.pattern = "".join(

3299 [

3300 re.escape(self.quote_char),

3301 "(?:",

3302 '|'.join(inner_pattern),

3303 ")*",

3304 re.escape(self.end_quote_char),

3305 ]

3306 )

3307

3308 if self.unquote_results:

3309 if self.convert_whitespace_escapes:

3310 self.unquote_scan_re = re.compile(

3311 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3312 rf"|({re.escape(self.esc_char)}.)"

3313 rf"|(\n|.)",

3314 flags=self.re_flags,

3315 )

3316 else:

3317 self.unquote_scan_re = re.compile(

3318 rf"({re.escape(self.esc_char)}.)"

3319 rf"|(\n|.)",

3320 flags=self.re_flags

3321 )

3322 # fmt: on

3323

3324 try:

3325 self.re = re.compile(self.pattern, self.re_flags)

3326 self.reString = self.pattern

3327 self.re_match = self.re.match

3328 except re.error:

3329 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3330

3331 self.errmsg = "Expected " + self.name

3332 self.mayIndexError = False

3333 self.mayReturnEmpty = True

3334

3335 def _generateDefaultName(self) -> str:

3336 if self.quote_char == self.end_quote_char and isinstance(

3337 self.quote_char, str_type

3338 ):

3339 return f"string enclosed in {self.quote_char!r}"

3340

3341 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3342

3343 def parseImpl(self, instring, loc, doActions=True):

3344 # check first character of opening quote to see if that is a match

3345 # before doing the more complicated regex match

3346 result = (

3347 instring[loc] == self.first_quote_char

3348 and self.re_match(instring, loc)

3349 or None

3350 )

3351 if not result:

3352 raise ParseException(instring, loc, self.errmsg, self)

3353

3354 # get ending loc and matched string from regex matching result

3355 loc = result.end()

3356 ret = result.group()

3357

3358 if self.unquote_results:

3359 # strip off quotes

3360 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3361

3362 if isinstance(ret, str_type):

3363 # fmt: off

3364 if self.convert_whitespace_escapes:

3365 # as we iterate over matches in the input string,

3366 # collect from whichever match group of the unquote_scan_re

3367 # regex matches (only 1 group will match at any given time)

3368 ret = "".join(

3369 # match group 1 matches \t, \n, etc.

3370 self.ws_map[match.group(1)] if match.group(1)

3371 # match group 2 matches escaped characters

3372 else match.group(2)[-1] if match.group(2)

3373 # match group 3 matches any character

3374 else match.group(3)

3375 for match in self.unquote_scan_re.finditer(ret)

3376 )

3377 else:

3378 ret = "".join(

3379 # match group 1 matches escaped characters

3380 match.group(1)[-1] if match.group(1)

3381 # match group 2 matches any character

3382 else match.group(2)

3383 for match in self.unquote_scan_re.finditer(ret)

3384 )

3385 # fmt: on

3386

3387 # replace escaped quotes

3388 if self.esc_quote:

3389 ret = ret.replace(self.esc_quote, self.end_quote_char)

3390

3391 return loc, ret

3392

3393

3394class CharsNotIn(Token):

3395 """Token for matching words composed of characters *not* in a given

3396 set (will include whitespace in matched characters if not listed in

3397 the provided exclusion set - see example). Defined with string

3398 containing all disallowed characters, and an optional minimum,

3399 maximum, and/or exact length. The default value for ``min`` is

3400 1 (a minimum value < 1 is not valid); the default values for

3401 ``max`` and ``exact`` are 0, meaning no maximum or exact

3402 length restriction.

3403

3404 Example::

3405

3406 # define a comma-separated-value as anything that is not a ','

3407 csv_value = CharsNotIn(',')

3408 print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))

3409

3410 prints::

3411

3412 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3413 """

3414

3415 def __init__(

3416 self,

3417 not_chars: str = "",

3418 min: int = 1,

3419 max: int = 0,

3420 exact: int = 0,

3421 *,

3422 notChars: str = "",

3423 ):

3424 super().__init__()

3425 self.skipWhitespace = False

3426 self.notChars = not_chars or notChars

3427 self.notCharsSet = set(self.notChars)

3428

3429 if min < 1:

3430 raise ValueError(

3431 "cannot specify a minimum length < 1; use "

3432 "Opt(CharsNotIn()) if zero-length char group is permitted"

3433 )

3434

3435 self.minLen = min

3436

3437 if max > 0:

3438 self.maxLen = max

3439 else:

3440 self.maxLen = _MAX_INT

3441

3442 if exact > 0:

3443 self.maxLen = exact

3444 self.minLen = exact

3445

3446 self.errmsg = "Expected " + self.name

3447 self.mayReturnEmpty = self.minLen == 0

3448 self.mayIndexError = False

3449

3450 def _generateDefaultName(self) -> str:

3451 not_chars_str = _collapse_string_to_ranges(self.notChars)

3452 if len(not_chars_str) > 16:

3453 return f"!W:({self.notChars[: 16 - 3]}...)"

3454 else:

3455 return f"!W:({self.notChars})"

3456

3457 def parseImpl(self, instring, loc, doActions=True):

3458 notchars = self.notCharsSet

3459 if instring[loc] in notchars:

3460 raise ParseException(instring, loc, self.errmsg, self)

3461

3462 start = loc

3463 loc += 1

3464 maxlen = min(start + self.maxLen, len(instring))

3465 while loc < maxlen and instring[loc] not in notchars:

3466 loc += 1

3467

3468 if loc - start < self.minLen:

3469 raise ParseException(instring, loc, self.errmsg, self)

3470

3471 return loc, instring[start:loc]

3472

3473

3474class White(Token):

3475 """Special matching class for matching whitespace. Normally,

3476 whitespace is ignored by pyparsing grammars. This class is included

3477 when some whitespace structures are significant. Define with

3478 a string containing the whitespace characters to be matched; default

3479 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3480 ``max``, and ``exact`` arguments, as defined for the

3481 :class:`Word` class.

3482 """

3483

3484 whiteStrs = {

3485 " ": "<SP>",

3486 "\t": "<TAB>",

3487 "\n": "<LF>",

3488 "\r": "<CR>",

3489 "\f": "<FF>",

3490 "\u00A0": "<NBSP>",

3491 "\u1680": "<OGHAM_SPACE_MARK>",

3492 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",

3493 "\u2000": "<EN_QUAD>",

3494 "\u2001": "<EM_QUAD>",

3495 "\u2002": "<EN_SPACE>",

3496 "\u2003": "<EM_SPACE>",

3497 "\u2004": "<THREE-PER-EM_SPACE>",

3498 "\u2005": "<FOUR-PER-EM_SPACE>",

3499 "\u2006": "<SIX-PER-EM_SPACE>",

3500 "\u2007": "<FIGURE_SPACE>",

3501 "\u2008": "<PUNCTUATION_SPACE>",

3502 "\u2009": "<THIN_SPACE>",

3503 "\u200A": "<HAIR_SPACE>",

3504 "\u200B": "<ZERO_WIDTH_SPACE>",

3505 "\u202F": "<NNBSP>",

3506 "\u205F": "<MMSP>",

3507 "\u3000": "<IDEOGRAPHIC_SPACE>",

3508 }

3509

3510 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):

3511 super().__init__()

3512 self.matchWhite = ws

3513 self.set_whitespace_chars(

3514 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3515 copy_defaults=True,

3516 )

3517 # self.leave_whitespace()

3518 self.mayReturnEmpty = True

3519 self.errmsg = "Expected " + self.name

3520

3521 self.minLen = min

3522

3523 if max > 0:

3524 self.maxLen = max

3525 else:

3526 self.maxLen = _MAX_INT

3527

3528 if exact > 0:

3529 self.maxLen = exact

3530 self.minLen = exact

3531

3532 def _generateDefaultName(self) -> str:

3533 return "".join(White.whiteStrs[c] for c in self.matchWhite)

3534

3535 def parseImpl(self, instring, loc, doActions=True):

3536 if instring[loc] not in self.matchWhite:

3537 raise ParseException(instring, loc, self.errmsg, self)

3538 start = loc

3539 loc += 1

3540 maxloc = start + self.maxLen

3541 maxloc = min(maxloc, len(instring))

3542 while loc < maxloc and instring[loc] in self.matchWhite:

3543 loc += 1

3544

3545 if loc - start < self.minLen:

3546 raise ParseException(instring, loc, self.errmsg, self)

3547

3548 return loc, instring[start:loc]

3549

3550

3551class PositionToken(Token):

3552 def __init__(self):

3553 super().__init__()

3554 self.mayReturnEmpty = True

3555 self.mayIndexError = False

3556

3557

3558class GoToColumn(PositionToken):

3559 """Token to advance to a specific column of input text; useful for

3560 tabular report scraping.

3561 """

3562

3563 def __init__(self, colno: int):

3564 super().__init__()

3565 self.col = colno

3566

3567 def preParse(self, instring: str, loc: int) -> int:

3568 if col(loc, instring) != self.col:

3569 instrlen = len(instring)

3570 if self.ignoreExprs:

3571 loc = self._skipIgnorables(instring, loc)

3572 while (

3573 loc < instrlen

3574 and instring[loc].isspace()

3575 and col(loc, instring) != self.col

3576 ):

3577 loc += 1

3578 return loc

3579

3580 def parseImpl(self, instring, loc, doActions=True):

3581 thiscol = col(loc, instring)

3582 if thiscol > self.col:

3583 raise ParseException(instring, loc, "Text not in expected column", self)

3584 newloc = loc + self.col - thiscol

3585 ret = instring[loc:newloc]

3586 return newloc, ret

3587

3588

3589class LineStart(PositionToken):

3590 r"""Matches if current position is at the beginning of a line within

3591 the parse string

3592

3593 Example::

3594

3595 test = '''\

3596 AAA this line

3597 AAA and this line

3598 AAA but not this one

3599 B AAA and definitely not this one

3600 '''

3601

3602 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

3603 print(t)

3604

3605 prints::

3606

3607 ['AAA', ' this line']

3608 ['AAA', ' and this line']

3609

3610 """

3611

3612 def __init__(self):

3613 super().__init__()

3614 self.leave_whitespace()

3615 self.orig_whiteChars = set() | self.whiteChars

3616 self.whiteChars.discard("\n")

3617 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

3618 self.errmsg = "Expected start of line"

3619

3620 def preParse(self, instring: str, loc: int) -> int:

3621 if loc == 0:

3622 return loc

3623 else:

3624 ret = self.skipper.preParse(instring, loc)

3625 if "\n" in self.orig_whiteChars:

3626 while instring[ret : ret + 1] == "\n":

3627 ret = self.skipper.preParse(instring, ret + 1)

3628 return ret

3629

3630 def parseImpl(self, instring, loc, doActions=True):

3631 if col(loc, instring) == 1:

3632 return loc, []

3633 raise ParseException(instring, loc, self.errmsg, self)

3634

3635

3636class LineEnd(PositionToken):

3637 """Matches if current position is at the end of a line within the

3638 parse string

3639 """

3640

3641 def __init__(self):

3642 super().__init__()

3643 self.whiteChars.discard("\n")

3644 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

3645 self.errmsg = "Expected end of line"

3646

3647 def parseImpl(self, instring, loc, doActions=True):

3648 if loc < len(instring):

3649 if instring[loc] == "\n":

3650 return loc + 1, "\n"

3651 else:

3652 raise ParseException(instring, loc, self.errmsg, self)

3653 elif loc == len(instring):

3654 return loc + 1, []

3655 else:

3656 raise ParseException(instring, loc, self.errmsg, self)

3657

3658

3659class StringStart(PositionToken):

3660 """Matches if current position is at the beginning of the parse

3661 string

3662 """

3663

3664 def __init__(self):

3665 super().__init__()

3666 self.errmsg = "Expected start of text"

3667

3668 def parseImpl(self, instring, loc, doActions=True):

3669 if loc != 0:

3670 # see if entire string up to here is just whitespace and ignoreables

3671 if loc != self.preParse(instring, 0):

3672 raise ParseException(instring, loc, self.errmsg, self)

3673 return loc, []

3674

3675

3676class StringEnd(PositionToken):

3677 """

3678 Matches if current position is at the end of the parse string

3679 """

3680

3681 def __init__(self):

3682 super().__init__()

3683 self.errmsg = "Expected end of text"

3684

3685 def parseImpl(self, instring, loc, doActions=True):

3686 if loc < len(instring):

3687 raise ParseException(instring, loc, self.errmsg, self)

3688 elif loc == len(instring):

3689 return loc + 1, []

3690 elif loc > len(instring):

3691 return loc, []

3692 else:

3693 raise ParseException(instring, loc, self.errmsg, self)

3694

3695

3696class WordStart(PositionToken):

3697 """Matches if the current position is at the beginning of a

3698 :class:`Word`, and is not preceded by any character in a given

3699 set of ``word_chars`` (default= ``printables``). To emulate the

3700 ``\b`` behavior of regular expressions, use

3701 ``WordStart(alphanums)``. ``WordStart`` will also match at

3702 the beginning of the string being parsed, or at the beginning of

3703 a line.

3704 """

3705

3706 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3707 wordChars = word_chars if wordChars == printables else wordChars

3708 super().__init__()

3709 self.wordChars = set(wordChars)

3710 self.errmsg = "Not at the start of a word"

3711

3712 def parseImpl(self, instring, loc, doActions=True):

3713 if loc != 0:

3714 if (

3715 instring[loc - 1] in self.wordChars

3716 or instring[loc] not in self.wordChars

3717 ):

3718 raise ParseException(instring, loc, self.errmsg, self)

3719 return loc, []

3720

3721

3722class WordEnd(PositionToken):

3723 """Matches if the current position is at the end of a :class:`Word`,

3724 and is not followed by any character in a given set of ``word_chars``

3725 (default= ``printables``). To emulate the ``\b`` behavior of

3726 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

3727 will also match at the end of the string being parsed, or at the end

3728 of a line.

3729 """

3730

3731 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3732 wordChars = word_chars if wordChars == printables else wordChars

3733 super().__init__()

3734 self.wordChars = set(wordChars)

3735 self.skipWhitespace = False

3736 self.errmsg = "Not at the end of a word"

3737

3738 def parseImpl(self, instring, loc, doActions=True):

3739 instrlen = len(instring)

3740 if instrlen > 0 and loc < instrlen:

3741 if (

3742 instring[loc] in self.wordChars

3743 or instring[loc - 1] not in self.wordChars

3744 ):

3745 raise ParseException(instring, loc, self.errmsg, self)

3746 return loc, []

3747

3748

3749class ParseExpression(ParserElement):

3750 """Abstract subclass of ParserElement, for combining and

3751 post-processing parsed tokens.

3752 """

3753

3754 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

3755 super().__init__(savelist)

3756 self.exprs: List[ParserElement]

3757 if isinstance(exprs, _generatorType):

3758 exprs = list(exprs)

3759

3760 if isinstance(exprs, str_type):

3761 self.exprs = [self._literalStringClass(exprs)]

3762 elif isinstance(exprs, ParserElement):

3763 self.exprs = [exprs]

3764 elif isinstance(exprs, Iterable):

3765 exprs = list(exprs)

3766 # if sequence of strings provided, wrap with Literal

3767 if any(isinstance(expr, str_type) for expr in exprs):

3768 exprs = (

3769 self._literalStringClass(e) if isinstance(e, str_type) else e

3770 for e in exprs

3771 )

3772 self.exprs = list(exprs)

3773 else:

3774 try:

3775 self.exprs = list(exprs)

3776 except TypeError:

3777 self.exprs = [exprs]

3778 self.callPreparse = False

3779

3780 def recurse(self) -> List[ParserElement]:

3781 return self.exprs[:]

3782

3783 def append(self, other) -> ParserElement:

3784 self.exprs.append(other)

3785 self._defaultName = None

3786 return self

3787

3788 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

3789 """

3790 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3791 all contained expressions.

3792 """

3793 super().leave_whitespace(recursive)

3794

3795 if recursive:

3796 self.exprs = [e.copy() for e in self.exprs]

3797 for e in self.exprs:

3798 e.leave_whitespace(recursive)

3799 return self

3800

3801 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

3802 """

3803 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3804 all contained expressions.

3805 """

3806 super().ignore_whitespace(recursive)

3807 if recursive:

3808 self.exprs = [e.copy() for e in self.exprs]

3809 for e in self.exprs:

3810 e.ignore_whitespace(recursive)

3811 return self

3812

3813 def ignore(self, other) -> ParserElement:

3814 if isinstance(other, Suppress):

3815 if other not in self.ignoreExprs:

3816 super().ignore(other)

3817 for e in self.exprs:

3818 e.ignore(self.ignoreExprs[-1])

3819 else:

3820 super().ignore(other)

3821 for e in self.exprs:

3822 e.ignore(self.ignoreExprs[-1])

3823 return self

3824

3825 def _generateDefaultName(self) -> str:

3826 return f"{self.__class__.__name__}:({str(self.exprs)})"

3827

3828 def streamline(self) -> ParserElement:

3829 if self.streamlined:

3830 return self

3831

3832 super().streamline()

3833

3834 for e in self.exprs:

3835 e.streamline()

3836

3837 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

3838 # but only if there are no parse actions or resultsNames on the nested And's

3839 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

3840 if len(self.exprs) == 2:

3841 other = self.exprs[0]

3842 if (

3843 isinstance(other, self.__class__)

3844 and not other.parseAction

3845 and other.resultsName is None

3846 and not other.debug

3847 ):

3848 self.exprs = other.exprs[:] + [self.exprs[1]]

3849 self._defaultName = None

3850 self.mayReturnEmpty |= other.mayReturnEmpty

3851 self.mayIndexError |= other.mayIndexError

3852

3853 other = self.exprs[-1]

3854 if (

3855 isinstance(other, self.__class__)

3856 and not other.parseAction

3857 and other.resultsName is None

3858 and not other.debug

3859 ):

3860 self.exprs = self.exprs[:-1] + other.exprs[:]

3861 self._defaultName = None

3862 self.mayReturnEmpty |= other.mayReturnEmpty

3863 self.mayIndexError |= other.mayIndexError

3864

3865 self.errmsg = "Expected " + str(self)

3866

3867 return self

3868

3869 def validate(self, validateTrace=None) -> None:

3870 warnings.warn(

3871 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

3872 DeprecationWarning,

3873 stacklevel=2,

3874 )

3875 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

3876 for e in self.exprs:

3877 e.validate(tmp)

3878 self._checkRecursion([])

3879

3880 def copy(self) -> ParserElement:

3881 ret = super().copy()

3882 ret = typing.cast(ParseExpression, ret)

3883 ret.exprs = [e.copy() for e in self.exprs]

3884 return ret

3885

3886 def _setResultsName(self, name, listAllMatches=False):

3887 if (

3888 __diag__.warn_ungrouped_named_tokens_in_collection

3889 and Diagnostics.warn_ungrouped_named_tokens_in_collection

3890 not in self.suppress_warnings_

3891 ):

3892 for e in self.exprs:

3893 if (

3894 isinstance(e, ParserElement)

3895 and e.resultsName

3896 and Diagnostics.warn_ungrouped_named_tokens_in_collection

3897 not in e.suppress_warnings_

3898 ):

3899 warnings.warn(

3900 "{}: setting results name {!r} on {} expression "

3901 "collides with {!r} on contained expression".format(

3902 "warn_ungrouped_named_tokens_in_collection",

3903 name,

3904 type(self).__name__,

3905 e.resultsName,

3906 ),

3907 stacklevel=3,

3908 )

3909

3910 return super()._setResultsName(name, listAllMatches)

3911

3912 # Compatibility synonyms

3913 # fmt: off

3914 @replaced_by_pep8(leave_whitespace)

3915 def leaveWhitespace(self): ...

3916

3917 @replaced_by_pep8(ignore_whitespace)

3918 def ignoreWhitespace(self): ...

3919 # fmt: on

3920

3921

3922class And(ParseExpression):

3923 """

3924 Requires all given :class:`ParseExpression` s to be found in the given order.

3925 Expressions may be separated by whitespace.

3926 May be constructed using the ``'+'`` operator.

3927 May also be constructed using the ``'-'`` operator, which will

3928 suppress backtracking.

3929

3930 Example::

3931

3932 integer = Word(nums)

3933 name_expr = Word(alphas)[1, ...]

3934

3935 expr = And([integer("id"), name_expr("name"), integer("age")])

3936 # more easily written as:

3937 expr = integer("id") + name_expr("name") + integer("age")

3938 """

3939

3940 class _ErrorStop(Empty):

3941 def __init__(self, *args, **kwargs):

3942 super().__init__(*args, **kwargs)

3943 self.leave_whitespace()

3944

3945 def _generateDefaultName(self) -> str:

3946 return "-"

3947

3948 def __init__(

3949 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True

3950 ):

3951 exprs: List[ParserElement] = list(exprs_arg)

3952 if exprs and Ellipsis in exprs:

3953 tmp = []

3954 for i, expr in enumerate(exprs):

3955 if expr is Ellipsis:

3956 if i < len(exprs) - 1:

3957 skipto_arg: ParserElement = typing.cast(

3958 ParseExpression, (Empty() + exprs[i + 1])

3959 ).exprs[-1]

3960 tmp.append(SkipTo(skipto_arg)("_skipped*"))

3961 else:

3962 raise Exception(

3963 "cannot construct And with sequence ending in ..."

3964 )

3965 else:

3966 tmp.append(expr)

3967 exprs[:] = tmp

3968 super().__init__(exprs, savelist)

3969 if self.exprs:

3970 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3971 if not isinstance(self.exprs[0], White):

3972 self.set_whitespace_chars(

3973 self.exprs[0].whiteChars,

3974 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

3975 )

3976 self.skipWhitespace = self.exprs[0].skipWhitespace

3977 else:

3978 self.skipWhitespace = False

3979 else:

3980 self.mayReturnEmpty = True

3981 self.callPreparse = True

3982

3983 def streamline(self) -> ParserElement:

3984 # collapse any _PendingSkip's

3985 if self.exprs:

3986 if any(

3987 isinstance(e, ParseExpression)

3988 and e.exprs

3989 and isinstance(e.exprs[-1], _PendingSkip)

3990 for e in self.exprs[:-1]

3991 ):

3992 deleted_expr_marker = NoMatch()

3993 for i, e in enumerate(self.exprs[:-1]):

3994 if e is deleted_expr_marker:

3995 continue

3996 if (

3997 isinstance(e, ParseExpression)

3998 and e.exprs

3999 and isinstance(e.exprs[-1], _PendingSkip)

4000 ):

4001 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

4002 self.exprs[i + 1] = deleted_expr_marker

4003 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

4004

4005 super().streamline()

4006

4007 # link any IndentedBlocks to the prior expression

4008 prev: ParserElement

4009 cur: ParserElement

4010 for prev, cur in zip(self.exprs, self.exprs[1:]):

4011 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

4012 # (but watch out for recursive grammar)

4013 seen = set()

4014 while True:

4015 if id(cur) in seen:

4016 break

4017 seen.add(id(cur))

4018 if isinstance(cur, IndentedBlock):

4019 prev.add_parse_action(

4020 lambda s, l, t, cur_=cur: setattr(

4021 cur_, "parent_anchor", col(l, s)

4022 )

4023 )

4024 break

4025 subs = cur.recurse()

4026 next_first = next(iter(subs), None)

4027 if next_first is None:

4028 break

4029 cur = typing.cast(ParserElement, next_first)

4030

4031 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4032 return self

4033

4034 def parseImpl(self, instring, loc, doActions=True):

4035 # pass False as callPreParse arg to _parse for first element, since we already

4036 # pre-parsed the string as part of our And pre-parsing

4037 loc, resultlist = self.exprs[0]._parse(

4038 instring, loc, doActions, callPreParse=False

4039 )

4040 errorStop = False

4041 for e in self.exprs[1:]:

4042 # if isinstance(e, And._ErrorStop):

4043 if type(e) is And._ErrorStop:

4044 errorStop = True

4045 continue

4046 if errorStop:

4047 try:

4048 loc, exprtokens = e._parse(instring, loc, doActions)

4049 except ParseSyntaxException:

4050 raise

4051 except ParseBaseException as pe:

4052 pe.__traceback__ = None

4053 raise ParseSyntaxException._from_exception(pe)

4054 except IndexError:

4055 raise ParseSyntaxException(

4056 instring, len(instring), self.errmsg, self

4057 )

4058 else:

4059 loc, exprtokens = e._parse(instring, loc, doActions)

4060 resultlist += exprtokens

4061 return loc, resultlist

4062

4063 def __iadd__(self, other):

4064 if isinstance(other, str_type):

4065 other = self._literalStringClass(other)

4066 if not isinstance(other, ParserElement):

4067 return NotImplemented

4068 return self.append(other) # And([self, other])

4069

4070 def _checkRecursion(self, parseElementList):

4071 subRecCheckList = parseElementList[:] + [self]

4072 for e in self.exprs:

4073 e._checkRecursion(subRecCheckList)

4074 if not e.mayReturnEmpty:

4075 break

4076

4077 def _generateDefaultName(self) -> str:

4078 inner = " ".join(str(e) for e in self.exprs)

4079 # strip off redundant inner {}'s

4080 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4081 inner = inner[1:-1]

4082 return "{" + inner + "}"

4083

4084

4085class Or(ParseExpression):

4086 """Requires that at least one :class:`ParseExpression` is found. If

4087 two expressions match, the expression that matches the longest

4088 string will be used. May be constructed using the ``'^'``

4089 operator.

4090

4091 Example::

4092

4093 # construct Or using '^' operator

4094

4095 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4096 print(number.search_string("123 3.1416 789"))

4097

4098 prints::

4099

4100 [['123'], ['3.1416'], ['789']]

4101 """

4102

4103 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

4104 super().__init__(exprs, savelist)

4105 if self.exprs:

4106 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4107 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4108 else:

4109 self.mayReturnEmpty = True

4110

4111 def streamline(self) -> ParserElement:

4112 super().streamline()

4113 if self.exprs:

4114 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4115 self.saveAsList = any(e.saveAsList for e in self.exprs)

4116 self.skipWhitespace = all(

4117 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4118 )

4119 else:

4120 self.saveAsList = False

4121 return self

4122

4123 def parseImpl(self, instring, loc, doActions=True):

4124 maxExcLoc = -1

4125 maxException = None

4126 matches = []

4127 fatals = []

4128 if all(e.callPreparse for e in self.exprs):

4129 loc = self.preParse(instring, loc)

4130 for e in self.exprs:

4131 try:

4132 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4133 except ParseFatalException as pfe:

4134 pfe.__traceback__ = None

4135 pfe.parser_element = e

4136 fatals.append(pfe)

4137 maxException = None

4138 maxExcLoc = -1

4139 except ParseException as err:

4140 if not fatals:

4141 err.__traceback__ = None

4142 if err.loc > maxExcLoc:

4143 maxException = err

4144 maxExcLoc = err.loc

4145 except IndexError:

4146 if len(instring) > maxExcLoc:

4147 maxException = ParseException(

4148 instring, len(instring), e.errmsg, self

4149 )

4150 maxExcLoc = len(instring)

4151 else:

4152 # save match among all matches, to retry longest to shortest

4153 matches.append((loc2, e))

4154

4155 if matches:

4156 # re-evaluate all matches in descending order of length of match, in case attached actions

4157 # might change whether or how much they match of the input.

4158 matches.sort(key=itemgetter(0), reverse=True)

4159

4160 if not doActions:

4161 # no further conditions or parse actions to change the selection of

4162 # alternative, so the first match will be the best match

4163 best_expr = matches[0][1]

4164 return best_expr._parse(instring, loc, doActions)

4165

4166 longest = -1, None

4167 for loc1, expr1 in matches:

4168 if loc1 <= longest[0]:

4169 # already have a longer match than this one will deliver, we are done

4170 return longest

4171

4172 try:

4173 loc2, toks = expr1._parse(instring, loc, doActions)

4174 except ParseException as err:

4175 err.__traceback__ = None

4176 if err.loc > maxExcLoc:

4177 maxException = err

4178 maxExcLoc = err.loc

4179 else:

4180 if loc2 >= loc1:

4181 return loc2, toks

4182 # didn't match as much as before

4183 elif loc2 > longest[0]:

4184 longest = loc2, toks

4185

4186 if longest != (-1, None):

4187 return longest

4188

4189 if fatals:

4190 if len(fatals) > 1:

4191 fatals.sort(key=lambda e: -e.loc)

4192 if fatals[0].loc == fatals[1].loc:

4193 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4194 max_fatal = fatals[0]

4195 raise max_fatal

4196

4197 if maxException is not None:

4198 # infer from this check that all alternatives failed at the current position

4199 # so emit this collective error message instead of any single error message

4200 if maxExcLoc == loc:

4201 maxException.msg = self.errmsg

4202 raise maxException

4203 else:

4204 raise ParseException(

4205 instring, loc, "no defined alternatives to match", self

4206 )

4207

4208 def __ixor__(self, other):

4209 if isinstance(other, str_type):

4210 other = self._literalStringClass(other)

4211 if not isinstance(other, ParserElement):

4212 return NotImplemented

4213 return self.append(other) # Or([self, other])

4214

4215 def _generateDefaultName(self) -> str:

4216 return "{" + " ^ ".join(str(e) for e in self.exprs) + "}"

4217

4218 def _setResultsName(self, name, listAllMatches=False):

4219 if (

4220 __diag__.warn_multiple_tokens_in_named_alternation

4221 and Diagnostics.warn_multiple_tokens_in_named_alternation

4222 not in self.suppress_warnings_

4223 ):

4224 if any(

4225 isinstance(e, And)

4226 and Diagnostics.warn_multiple_tokens_in_named_alternation

4227 not in e.suppress_warnings_

4228 for e in self.exprs

4229 ):

4230 warnings.warn(

4231 "{}: setting results name {!r} on {} expression "

4232 "will return a list of all parsed tokens in an And alternative, "

4233 "in prior versions only the first token was returned; enclose "

4234 "contained argument in Group".format(

4235 "warn_multiple_tokens_in_named_alternation",

4236 name,

4237 type(self).__name__,

4238 ),

4239 stacklevel=3,

4240 )

4241

4242 return super()._setResultsName(name, listAllMatches)

4243

4244

4245class MatchFirst(ParseExpression):

4246 """Requires that at least one :class:`ParseExpression` is found. If

4247 more than one expression matches, the first one listed is the one that will

4248 match. May be constructed using the ``'|'`` operator.

4249

4250 Example::

4251

4252 # construct MatchFirst using '|' operator

4253

4254 # watch the order of expressions to match

4255 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4256 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

4257

4258 # put more selective expression first

4259 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4260 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

4261 """

4262

4263 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

4264 super().__init__(exprs, savelist)

4265 if self.exprs:

4266 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4267 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4268 else:

4269 self.mayReturnEmpty = True

4270

4271 def streamline(self) -> ParserElement:

4272 if self.streamlined:

4273 return self

4274

4275 super().streamline()

4276 if self.exprs:

4277 self.saveAsList = any(e.saveAsList for e in self.exprs)

4278 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4279 self.skipWhitespace = all(

4280 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4281 )

4282 else:

4283 self.saveAsList = False

4284 self.mayReturnEmpty = True

4285 return self

4286

4287 def parseImpl(self, instring, loc, doActions=True):

4288 maxExcLoc = -1

4289 maxException = None

4290

4291 for e in self.exprs:

4292 try:

4293 return e._parse(

4294 instring,

4295 loc,

4296 doActions,

4297 )

4298 except ParseFatalException as pfe:

4299 pfe.__traceback__ = None

4300 pfe.parser_element = e

4301 raise

4302 except ParseException as err:

4303 if err.loc > maxExcLoc:

4304 maxException = err

4305 maxExcLoc = err.loc

4306 except IndexError:

4307 if len(instring) > maxExcLoc:

4308 maxException = ParseException(

4309 instring, len(instring), e.errmsg, self

4310 )

4311 maxExcLoc = len(instring)

4312

4313 if maxException is not None:

4314 # infer from this check that all alternatives failed at the current position

4315 # so emit this collective error message instead of any individual error message

4316 if maxExcLoc == loc:

4317 maxException.msg = self.errmsg

4318 raise maxException

4319 else:

4320 raise ParseException(

4321 instring, loc, "no defined alternatives to match", self

4322 )

4323

4324 def __ior__(self, other):

4325 if isinstance(other, str_type):

4326 other = self._literalStringClass(other)

4327 if not isinstance(other, ParserElement):

4328 return NotImplemented

4329 return self.append(other) # MatchFirst([self, other])

4330

4331 def _generateDefaultName(self) -> str:

4332 return "{" + " | ".join(str(e) for e in self.exprs) + "}"

4333

4334 def _setResultsName(self, name, listAllMatches=False):

4335 if (

4336 __diag__.warn_multiple_tokens_in_named_alternation

4337 and Diagnostics.warn_multiple_tokens_in_named_alternation

4338 not in self.suppress_warnings_

4339 ):

4340 if any(

4341 isinstance(e, And)

4342 and Diagnostics.warn_multiple_tokens_in_named_alternation

4343 not in e.suppress_warnings_

4344 for e in self.exprs

4345 ):

4346 warnings.warn(

4347 "{}: setting results name {!r} on {} expression "

4348 "will return a list of all parsed tokens in an And alternative, "

4349 "in prior versions only the first token was returned; enclose "

4350 "contained argument in Group".format(

4351 "warn_multiple_tokens_in_named_alternation",

4352 name,

4353 type(self).__name__,

4354 ),

4355 stacklevel=3,

4356 )

4357

4358 return super()._setResultsName(name, listAllMatches)

4359

4360

4361class Each(ParseExpression):

4362 """Requires all given :class:`ParseExpression` s to be found, but in

4363 any order. Expressions may be separated by whitespace.

4364

4365 May be constructed using the ``'&'`` operator.

4366

4367 Example::

4368

4369 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4370 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4371 integer = Word(nums)

4372 shape_attr = "shape:" + shape_type("shape")

4373 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4374 color_attr = "color:" + color("color")

4375 size_attr = "size:" + integer("size")

4376

4377 # use Each (using operator '&') to accept attributes in any order

4378 # (shape and posn are required, color and size are optional)

4379 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4380

4381 shape_spec.run_tests('''

4382 shape: SQUARE color: BLACK posn: 100, 120

4383 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4384 color:GREEN size:20 shape:TRIANGLE posn:20,40

4385 '''

4386 )

4387

4388 prints::

4389

4390 shape: SQUARE color: BLACK posn: 100, 120

4391 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4392 - color: BLACK

4393 - posn: ['100', ',', '120']

4394 - x: 100

4395 - y: 120

4396 - shape: SQUARE

4397

4398

4399 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4400 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

4401 - color: BLUE

4402 - posn: ['50', ',', '80']

4403 - x: 50

4404 - y: 80

4405 - shape: CIRCLE

4406 - size: 50

4407

4408

4409 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

4410 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

4411 - color: GREEN

4412 - posn: ['20', ',', '40']

4413 - x: 20

4414 - y: 40

4415 - shape: TRIANGLE

4416 - size: 20

4417 """

4418

4419 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True):

4420 super().__init__(exprs, savelist)

4421 if self.exprs:

4422 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4423 else:

4424 self.mayReturnEmpty = True

4425 self.skipWhitespace = True

4426 self.initExprGroups = True

4427 self.saveAsList = True

4428

4429 def __iand__(self, other):

4430 if isinstance(other, str_type):

4431 other = self._literalStringClass(other)

4432 if not isinstance(other, ParserElement):

4433 return NotImplemented

4434 return self.append(other) # Each([self, other])

4435

4436 def streamline(self) -> ParserElement:

4437 super().streamline()

4438 if self.exprs:

4439 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4440 else:

4441 self.mayReturnEmpty = True

4442 return self

4443

4444 def parseImpl(self, instring, loc, doActions=True):

4445 if self.initExprGroups:

4446 self.opt1map = dict(

4447 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

4448 )

4449 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

4450 opt2 = [

4451 e

4452 for e in self.exprs

4453 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

4454 ]

4455 self.optionals = opt1 + opt2

4456 self.multioptionals = [

4457 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4458 for e in self.exprs

4459 if isinstance(e, _MultipleMatch)

4460 ]

4461 self.multirequired = [

4462 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4463 for e in self.exprs

4464 if isinstance(e, OneOrMore)

4465 ]

4466 self.required = [

4467 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

4468 ]

4469 self.required += self.multirequired

4470 self.initExprGroups = False

4471

4472 tmpLoc = loc

4473 tmpReqd = self.required[:]

4474 tmpOpt = self.optionals[:]

4475 multis = self.multioptionals[:]

4476 matchOrder = []

4477

4478 keepMatching = True

4479 failed = []

4480 fatals = []

4481 while keepMatching:

4482 tmpExprs = tmpReqd + tmpOpt + multis

4483 failed.clear()

4484 fatals.clear()

4485 for e in tmpExprs:

4486 try:

4487 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

4488 except ParseFatalException as pfe:

4489 pfe.__traceback__ = None

4490 pfe.parser_element = e

4491 fatals.append(pfe)

4492 failed.append(e)

4493 except ParseException:

4494 failed.append(e)

4495 else:

4496 matchOrder.append(self.opt1map.get(id(e), e))

4497 if e in tmpReqd:

4498 tmpReqd.remove(e)

4499 elif e in tmpOpt:

4500 tmpOpt.remove(e)

4501 if len(failed) == len(tmpExprs):

4502 keepMatching = False

4503

4504 # look for any ParseFatalExceptions

4505 if fatals:

4506 if len(fatals) > 1:

4507 fatals.sort(key=lambda e: -e.loc)

4508 if fatals[0].loc == fatals[1].loc:

4509 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4510 max_fatal = fatals[0]

4511 raise max_fatal

4512

4513 if tmpReqd:

4514 missing = ", ".join([str(e) for e in tmpReqd])

4515 raise ParseException(

4516 instring,

4517 loc,

4518 f"Missing one or more required elements ({missing})",

4519 )

4520

4521 # add any unmatched Opts, in case they have default values defined

4522 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

4523

4524 total_results = ParseResults([])

4525 for e in matchOrder:

4526 loc, results = e._parse(instring, loc, doActions)

4527 total_results += results

4528

4529 return loc, total_results

4530

4531 def _generateDefaultName(self) -> str:

4532 return "{" + " & ".join(str(e) for e in self.exprs) + "}"

4533

4534

4535class ParseElementEnhance(ParserElement):

4536 """Abstract subclass of :class:`ParserElement`, for combining and

4537 post-processing parsed tokens.

4538 """

4539

4540 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

4541 super().__init__(savelist)

4542 if isinstance(expr, str_type):

4543 expr_str = typing.cast(str, expr)

4544 if issubclass(self._literalStringClass, Token):

4545 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

4546 elif issubclass(type(self), self._literalStringClass):

4547 expr = Literal(expr_str)

4548 else:

4549 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

4550 expr = typing.cast(ParserElement, expr)

4551 self.expr = expr

4552 if expr is not None:

4553 self.mayIndexError = expr.mayIndexError

4554 self.mayReturnEmpty = expr.mayReturnEmpty

4555 self.set_whitespace_chars(

4556 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

4557 )

4558 self.skipWhitespace = expr.skipWhitespace

4559 self.saveAsList = expr.saveAsList

4560 self.callPreparse = expr.callPreparse

4561 self.ignoreExprs.extend(expr.ignoreExprs)

4562

4563 def recurse(self) -> List[ParserElement]:

4564 return [self.expr] if self.expr is not None else []

4565

4566 def parseImpl(self, instring, loc, doActions=True):

4567 if self.expr is not None:

4568 try:

4569 return self.expr._parse(instring, loc, doActions, callPreParse=False)

4570 except ParseBaseException as pbe:

4571 if not isinstance(self, Forward) or self.customName is not None:

4572 pbe.msg = self.errmsg

4573 raise

4574 else:

4575 raise ParseException(instring, loc, "No expression defined", self)

4576

4577 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4578 super().leave_whitespace(recursive)

4579

4580 if recursive:

4581 if self.expr is not None:

4582 self.expr = self.expr.copy()

4583 self.expr.leave_whitespace(recursive)

4584 return self

4585

4586 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4587 super().ignore_whitespace(recursive)

4588

4589 if recursive:

4590 if self.expr is not None:

4591 self.expr = self.expr.copy()

4592 self.expr.ignore_whitespace(recursive)

4593 return self

4594

4595 def ignore(self, other) -> ParserElement:

4596 if isinstance(other, Suppress):

4597 if other not in self.ignoreExprs:

4598 super().ignore(other)

4599 if self.expr is not None:

4600 self.expr.ignore(self.ignoreExprs[-1])

4601 else:

4602 super().ignore(other)

4603 if self.expr is not None:

4604 self.expr.ignore(self.ignoreExprs[-1])

4605 return self

4606

4607 def streamline(self) -> ParserElement:

4608 super().streamline()

4609 if self.expr is not None:

4610 self.expr.streamline()

4611 return self

4612

4613 def _checkRecursion(self, parseElementList):

4614 if self in parseElementList:

4615 raise RecursiveGrammarException(parseElementList + [self])

4616 subRecCheckList = parseElementList[:] + [self]

4617 if self.expr is not None:

4618 self.expr._checkRecursion(subRecCheckList)

4619

4620 def validate(self, validateTrace=None) -> None:

4621 warnings.warn(

4622 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4623 DeprecationWarning,

4624 stacklevel=2,

4625 )

4626 if validateTrace is None:

4627 validateTrace = []

4628 tmp = validateTrace[:] + [self]

4629 if self.expr is not None:

4630 self.expr.validate(tmp)

4631 self._checkRecursion([])

4632

4633 def _generateDefaultName(self) -> str:

4634 return f"{self.__class__.__name__}:({str(self.expr)})"

4635

4636 # Compatibility synonyms

4637 # fmt: off

4638 @replaced_by_pep8(leave_whitespace)

4639 def leaveWhitespace(self): ...

4640

4641 @replaced_by_pep8(ignore_whitespace)

4642 def ignoreWhitespace(self): ...

4643 # fmt: on

4644

4645

4646class IndentedBlock(ParseElementEnhance):

4647 """

4648 Expression to match one or more expressions at a given indentation level.

4649 Useful for parsing text where structure is implied by indentation (like Python source code).

4650 """

4651

4652 class _Indent(Empty):

4653 def __init__(self, ref_col: int):

4654 super().__init__()

4655 self.errmsg = f"expected indent at column {ref_col}"

4656 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

4657

4658 class _IndentGreater(Empty):

4659 def __init__(self, ref_col: int):

4660 super().__init__()

4661 self.errmsg = f"expected indent at column greater than {ref_col}"

4662 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

4663

4664 def __init__(

4665 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

4666 ):

4667 super().__init__(expr, savelist=True)

4668 # if recursive:

4669 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

4670 self._recursive = recursive

4671 self._grouped = grouped

4672 self.parent_anchor = 1

4673

4674 def parseImpl(self, instring, loc, doActions=True):

4675 # advance parse position to non-whitespace by using an Empty()

4676 # this should be the column to be used for all subsequent indented lines

4677 anchor_loc = Empty().preParse(instring, loc)

4678

4679 # see if self.expr matches at the current location - if not it will raise an exception

4680 # and no further work is necessary

4681 self.expr.try_parse(instring, anchor_loc, do_actions=doActions)

4682

4683 indent_col = col(anchor_loc, instring)

4684 peer_detect_expr = self._Indent(indent_col)

4685

4686 inner_expr = Empty() + peer_detect_expr + self.expr

4687 if self._recursive:

4688 sub_indent = self._IndentGreater(indent_col)

4689 nested_block = IndentedBlock(

4690 self.expr, recursive=self._recursive, grouped=self._grouped

4691 )

4692 nested_block.set_debug(self.debug)

4693 nested_block.parent_anchor = indent_col

4694 inner_expr += Opt(sub_indent + nested_block)

4695

4696 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

4697 block = OneOrMore(inner_expr)

4698

4699 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

4700

4701 if self._grouped:

4702 wrapper = Group

4703 else:

4704 wrapper = lambda expr: expr

4705 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

4706 instring, anchor_loc, doActions

4707 )

4708

4709

4710class AtStringStart(ParseElementEnhance):

4711 """Matches if expression matches at the beginning of the parse

4712 string::

4713

4714 AtStringStart(Word(nums)).parse_string("123")

4715 # prints ["123"]

4716

4717 AtStringStart(Word(nums)).parse_string(" 123")

4718 # raises ParseException

4719 """

4720

4721 def __init__(self, expr: Union[ParserElement, str]):

4722 super().__init__(expr)

4723 self.callPreparse = False

4724

4725 def parseImpl(self, instring, loc, doActions=True):

4726 if loc != 0:

4727 raise ParseException(instring, loc, "not found at string start")

4728 return super().parseImpl(instring, loc, doActions)

4729

4730

4731class AtLineStart(ParseElementEnhance):

4732 r"""Matches if an expression matches at the beginning of a line within

4733 the parse string

4734

4735 Example::

4736

4737 test = '''\

4738 AAA this line

4739 AAA and this line

4740 AAA but not this one

4741 B AAA and definitely not this one

4742 '''

4743

4744 for t in (AtLineStart('AAA') + rest_of_line).search_string(test):

4745 print(t)

4746

4747 prints::

4748

4749 ['AAA', ' this line']

4750 ['AAA', ' and this line']

4751

4752 """

4753

4754 def __init__(self, expr: Union[ParserElement, str]):

4755 super().__init__(expr)

4756 self.callPreparse = False

4757

4758 def parseImpl(self, instring, loc, doActions=True):

4759 if col(loc, instring) != 1:

4760 raise ParseException(instring, loc, "not found at line start")

4761 return super().parseImpl(instring, loc, doActions)

4762

4763

4764class FollowedBy(ParseElementEnhance):

4765 """Lookahead matching of the given parse expression.

4766 ``FollowedBy`` does *not* advance the parsing position within

4767 the input string, it only verifies that the specified parse

4768 expression matches at the current position. ``FollowedBy``

4769 always returns a null token list. If any results names are defined

4770 in the lookahead expression, those *will* be returned for access by

4771 name.

4772

4773 Example::

4774

4775 # use FollowedBy to match a label only if it is followed by a ':'

4776 data_word = Word(alphas)

4777 label = data_word + FollowedBy(':')

4778 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

4779

4780 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()

4781

4782 prints::

4783

4784 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

4785 """

4786

4787 def __init__(self, expr: Union[ParserElement, str]):

4788 super().__init__(expr)

4789 self.mayReturnEmpty = True

4790

4791 def parseImpl(self, instring, loc, doActions=True):

4792 # by using self._expr.parse and deleting the contents of the returned ParseResults list

4793 # we keep any named results that were defined in the FollowedBy expression

4794 _, ret = self.expr._parse(instring, loc, doActions=doActions)

4795 del ret[:]

4796

4797 return loc, ret

4798

4799

4800class PrecededBy(ParseElementEnhance):

4801 """Lookbehind matching of the given parse expression.

4802 ``PrecededBy`` does not advance the parsing position within the

4803 input string, it only verifies that the specified parse expression

4804 matches prior to the current position. ``PrecededBy`` always

4805 returns a null token list, but if a results name is defined on the

4806 given expression, it is returned.

4807

4808 Parameters:

4809

4810 - ``expr`` - expression that must match prior to the current parse

4811 location

4812 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

4813 to lookbehind prior to the current parse location

4814

4815 If the lookbehind expression is a string, :class:`Literal`,

4816 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

4817 with a specified exact or maximum length, then the retreat

4818 parameter is not required. Otherwise, retreat must be specified to

4819 give a maximum number of characters to look back from

4820 the current parse position for a lookbehind match.

4821

4822 Example::

4823

4824 # VB-style variable names with type prefixes

4825 int_var = PrecededBy("#") + pyparsing_common.identifier

4826 str_var = PrecededBy("$") + pyparsing_common.identifier

4827

4828 """

4829

4830 def __init__(

4831 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None

4832 ):

4833 super().__init__(expr)

4834 self.expr = self.expr().leave_whitespace()

4835 self.mayReturnEmpty = True

4836 self.mayIndexError = False

4837 self.exact = False

4838 if isinstance(expr, str_type):

4839 expr = typing.cast(str, expr)

4840 retreat = len(expr)

4841 self.exact = True

4842 elif isinstance(expr, (Literal, Keyword)):

4843 retreat = expr.matchLen

4844 self.exact = True

4845 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

4846 retreat = expr.maxLen

4847 self.exact = True

4848 elif isinstance(expr, PositionToken):

4849 retreat = 0

4850 self.exact = True

4851 self.retreat = retreat

4852 self.errmsg = "not preceded by " + str(expr)

4853 self.skipWhitespace = False

4854 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

4855

4856 def parseImpl(self, instring, loc=0, doActions=True):

4857 if self.exact:

4858 if loc < self.retreat:

4859 raise ParseException(instring, loc, self.errmsg)

4860 start = loc - self.retreat

4861 _, ret = self.expr._parse(instring, start)

4862 else:

4863 # retreat specified a maximum lookbehind window, iterate

4864 test_expr = self.expr + StringEnd()

4865 instring_slice = instring[max(0, loc - self.retreat) : loc]

4866 last_expr = ParseException(instring, loc, self.errmsg)

4867 for offset in range(1, min(loc, self.retreat + 1) + 1):

4868 try:

4869 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

4870 _, ret = test_expr._parse(

4871 instring_slice, len(instring_slice) - offset

4872 )

4873 except ParseBaseException as pbe:

4874 last_expr = pbe

4875 else:

4876 break

4877 else:

4878 raise last_expr

4879 return loc, ret

4880

4881

4882class Located(ParseElementEnhance):

4883 """

4884 Decorates a returned token with its starting and ending

4885 locations in the input string.

4886

4887 This helper adds the following results names:

4888

4889 - ``locn_start`` - location where matched expression begins

4890 - ``locn_end`` - location where matched expression ends

4891 - ``value`` - the actual parsed results

4892

4893 Be careful if the input text contains ``<TAB>`` characters, you

4894 may want to call :class:`ParserElement.parse_with_tabs`

4895

4896 Example::

4897

4898 wd = Word(alphas)

4899 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

4900 print(match)

4901

4902 prints::

4903

4904 [0, ['ljsdf'], 5]

4905 [8, ['lksdjjf'], 15]

4906 [18, ['lkkjj'], 23]

4907

4908 """

4909

4910 def parseImpl(self, instring, loc, doActions=True):

4911 start = loc

4912 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False)

4913 ret_tokens = ParseResults([start, tokens, loc])

4914 ret_tokens["locn_start"] = start

4915 ret_tokens["value"] = tokens

4916 ret_tokens["locn_end"] = loc

4917 if self.resultsName:

4918 # must return as a list, so that the name will be attached to the complete group

4919 return loc, [ret_tokens]

4920 else:

4921 return loc, ret_tokens

4922

4923

4924class NotAny(ParseElementEnhance):

4925 """

4926 Lookahead to disallow matching with the given parse expression.

4927 ``NotAny`` does *not* advance the parsing position within the

4928 input string, it only verifies that the specified parse expression

4929 does *not* match at the current position. Also, ``NotAny`` does

4930 *not* skip over leading whitespace. ``NotAny`` always returns

4931 a null token list. May be constructed using the ``'~'`` operator.

4932

4933 Example::

4934

4935 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

4936

4937 # take care not to mistake keywords for identifiers

4938 ident = ~(AND | OR | NOT) + Word(alphas)

4939 boolean_term = Opt(NOT) + ident

4940

4941 # very crude boolean expression - to support parenthesis groups and

4942 # operation hierarchy, use infix_notation

4943 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

4944

4945 # integers that are followed by "." are actually floats

4946 integer = Word(nums) + ~Char(".")

4947 """

4948

4949 def __init__(self, expr: Union[ParserElement, str]):

4950 super().__init__(expr)

4951 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

4952 # self.leave_whitespace()

4953 self.skipWhitespace = False

4954

4955 self.mayReturnEmpty = True

4956 self.errmsg = "Found unwanted token, " + str(self.expr)

4957

4958 def parseImpl(self, instring, loc, doActions=True):

4959 if self.expr.can_parse_next(instring, loc, do_actions=doActions):

4960 raise ParseException(instring, loc, self.errmsg, self)

4961 return loc, []

4962

4963 def _generateDefaultName(self) -> str:

4964 return "~{" + str(self.expr) + "}"

4965

4966

4967class _MultipleMatch(ParseElementEnhance):

4968 def __init__(

4969 self,

4970 expr: Union[str, ParserElement],

4971 stop_on: typing.Optional[Union[ParserElement, str]] = None,

4972 *,

4973 stopOn: typing.Optional[Union[ParserElement, str]] = None,

4974 ):

4975 super().__init__(expr)

4976 stopOn = stopOn or stop_on

4977 self.saveAsList = True

4978 ender = stopOn

4979 if isinstance(ender, str_type):

4980 ender = self._literalStringClass(ender)

4981 self.stopOn(ender)

4982

4983 def stopOn(self, ender) -> ParserElement:

4984 if isinstance(ender, str_type):

4985 ender = self._literalStringClass(ender)

4986 self.not_ender = ~ender if ender is not None else None

4987 return self

4988

4989 def parseImpl(self, instring, loc, doActions=True):

4990 self_expr_parse = self.expr._parse

4991 self_skip_ignorables = self._skipIgnorables

4992 check_ender = self.not_ender is not None

4993 if check_ender:

4994 try_not_ender = self.not_ender.try_parse

4995

4996 # must be at least one (but first see if we are the stopOn sentinel;

4997 # if so, fail)

4998 if check_ender:

4999 try_not_ender(instring, loc)

5000 loc, tokens = self_expr_parse(instring, loc, doActions)

5001 try:

5002 hasIgnoreExprs = not not self.ignoreExprs

5003 while 1:

5004 if check_ender:

5005 try_not_ender(instring, loc)

5006 if hasIgnoreExprs:

5007 preloc = self_skip_ignorables(instring, loc)

5008 else:

5009 preloc = loc

5010 loc, tmptokens = self_expr_parse(instring, preloc, doActions)

5011 tokens += tmptokens

5012 except (ParseException, IndexError):

5013 pass

5014

5015 return loc, tokens

5016

5017 def _setResultsName(self, name, listAllMatches=False):

5018 if (

5019 __diag__.warn_ungrouped_named_tokens_in_collection

5020 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5021 not in self.suppress_warnings_

5022 ):

5023 for e in [self.expr] + self.expr.recurse():

5024 if (

5025 isinstance(e, ParserElement)

5026 and e.resultsName

5027 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5028 not in e.suppress_warnings_

5029 ):

5030 warnings.warn(

5031 "{}: setting results name {!r} on {} expression "

5032 "collides with {!r} on contained expression".format(

5033 "warn_ungrouped_named_tokens_in_collection",

5034 name,

5035 type(self).__name__,

5036 e.resultsName,

5037 ),

5038 stacklevel=3,

5039 )

5040

5041 return super()._setResultsName(name, listAllMatches)

5042

5043

5044class OneOrMore(_MultipleMatch):

5045 """

5046 Repetition of one or more of the given expression.

5047

5048 Parameters:

5049

5050 - ``expr`` - expression that must match one or more times

5051 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

5052 (only required if the sentinel would ordinarily match the repetition

5053 expression)

5054

5055 Example::

5056

5057 data_word = Word(alphas)

5058 label = data_word + FollowedBy(':')

5059 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))

5060

5061 text = "shape: SQUARE posn: upper left color: BLACK"

5062 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

5063

5064 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data

5065 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5066 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5067

5068 # could also be written as

5069 (attr_expr * (1,)).parse_string(text).pprint()

5070 """

5071

5072 def _generateDefaultName(self) -> str:

5073 return "{" + str(self.expr) + "}..."

5074

5075

5076class ZeroOrMore(_MultipleMatch):

5077 """

5078 Optional repetition of zero or more of the given expression.

5079

5080 Parameters:

5081

5082 - ``expr`` - expression that must match zero or more times

5083 - ``stop_on`` - expression for a terminating sentinel

5084 (only required if the sentinel would ordinarily match the repetition

5085 expression) - (default= ``None``)

5086

5087 Example: similar to :class:`OneOrMore`

5088 """

5089

5090 def __init__(

5091 self,

5092 expr: Union[str, ParserElement],

5093 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5094 *,

5095 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5096 ):

5097 super().__init__(expr, stopOn=stopOn or stop_on)

5098 self.mayReturnEmpty = True

5099

5100 def parseImpl(self, instring, loc, doActions=True):

5101 try:

5102 return super().parseImpl(instring, loc, doActions)

5103 except (ParseException, IndexError):

5104 return loc, ParseResults([], name=self.resultsName)

5105

5106 def _generateDefaultName(self) -> str:

5107 return "[" + str(self.expr) + "]..."

5108

5109

5110class DelimitedList(ParseElementEnhance):

5111 def __init__(

5112 self,

5113 expr: Union[str, ParserElement],

5114 delim: Union[str, ParserElement] = ",",

5115 combine: bool = False,

5116 min: typing.Optional[int] = None,

5117 max: typing.Optional[int] = None,

5118 *,

5119 allow_trailing_delim: bool = False,

5120 ):

5121 """Helper to define a delimited list of expressions - the delimiter

5122 defaults to ','. By default, the list elements and delimiters can

5123 have intervening whitespace, and comments, but this can be

5124 overridden by passing ``combine=True`` in the constructor. If

5125 ``combine`` is set to ``True``, the matching tokens are

5126 returned as a single token string, with the delimiters included;

5127 otherwise, the matching tokens are returned as a list of tokens,

5128 with the delimiters suppressed.

5129

5130 If ``allow_trailing_delim`` is set to True, then the list may end with

5131 a delimiter.

5132

5133 Example::

5134

5135 DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']

5136 DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

5137 """

5138 if isinstance(expr, str_type):

5139 expr = ParserElement._literalStringClass(expr)

5140 expr = typing.cast(ParserElement, expr)

5141

5142 if min is not None:

5143 if min < 1:

5144 raise ValueError("min must be greater than 0")

5145 if max is not None:

5146 if min is not None and max < min:

5147 raise ValueError("max must be greater than, or equal to min")

5148

5149 self.content = expr

5150 self.raw_delim = str(delim)

5151 self.delim = delim

5152 self.combine = combine

5153 if not combine:

5154 self.delim = Suppress(delim)

5155 self.min = min or 1

5156 self.max = max

5157 self.allow_trailing_delim = allow_trailing_delim

5158

5159 delim_list_expr = self.content + (self.delim + self.content) * (

5160 self.min - 1,

5161 None if self.max is None else self.max - 1,

5162 )

5163 if self.allow_trailing_delim:

5164 delim_list_expr += Opt(self.delim)

5165

5166 if self.combine:

5167 delim_list_expr = Combine(delim_list_expr)

5168

5169 super().__init__(delim_list_expr, savelist=True)

5170

5171 def _generateDefaultName(self) -> str:

5172 return "{0} [{1} {0}]...".format(self.content.streamline(), self.raw_delim)

5173

5174

5175class _NullToken:

5176 def __bool__(self):

5177 return False

5178

5179 def __str__(self):

5180 return ""

5181

5182

5183class Opt(ParseElementEnhance):

5184 """

5185 Optional matching of the given expression.

5186

5187 Parameters:

5188

5189 - ``expr`` - expression that must match zero or more times

5190 - ``default`` (optional) - value to be returned if the optional expression is not found.

5191

5192 Example::

5193

5194 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5195 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5196 zip.run_tests('''

5197 # traditional ZIP code

5198 12345

5199

5200 # ZIP+4 form

5201 12101-0001

5202

5203 # invalid ZIP

5204 98765-

5205 ''')

5206

5207 prints::

5208

5209 # traditional ZIP code

5210 12345

5211 ['12345']

5212

5213 # ZIP+4 form

5214 12101-0001

5215 ['12101-0001']

5216

5217 # invalid ZIP

5218 98765-

5219 ^

5220 FAIL: Expected end of text (at char 5), (line:1, col:6)

5221 """

5222

5223 __optionalNotMatched = _NullToken()

5224

5225 def __init__(

5226 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5227 ):

5228 super().__init__(expr, savelist=False)

5229 self.saveAsList = self.expr.saveAsList

5230 self.defaultValue = default

5231 self.mayReturnEmpty = True

5232

5233 def parseImpl(self, instring, loc, doActions=True):

5234 self_expr = self.expr

5235 try:

5236 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False)

5237 except (ParseException, IndexError):

5238 default_value = self.defaultValue

5239 if default_value is not self.__optionalNotMatched:

5240 if self_expr.resultsName:

5241 tokens = ParseResults([default_value])

5242 tokens[self_expr.resultsName] = default_value

5243 else:

5244 tokens = [default_value]

5245 else:

5246 tokens = []

5247 return loc, tokens

5248

5249 def _generateDefaultName(self) -> str:

5250 inner = str(self.expr)

5251 # strip off redundant inner {}'s

5252 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5253 inner = inner[1:-1]

5254 return "[" + inner + "]"

5257Optional = Opt

5260class SkipTo(ParseElementEnhance):

5261 """

5262 Token for skipping over all undefined text until the matched

5263 expression is found.

5264

5265 Parameters:

5266

5267 - ``expr`` - target expression marking the end of the data to be skipped

5268 - ``include`` - if ``True``, the target expression is also parsed

5269 (the skipped text and target expression are returned as a 2-element

5270 list) (default= ``False``).

5271 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and

5272 comments) that might contain false matches to the target expression

5273 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be

5274 included in the skipped test; if found before the target expression is found,

5275 the :class:`SkipTo` is not a match

5276

5277 Example::

5278

5279 report = '''

5280 Outstanding Issues Report - 1 Jan 2000

5281

5282 # | Severity | Description | Days Open

5283 -----+----------+-------------------------------------------+-----------

5284 101 | Critical | Intermittent system crash | 6

5285 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5286 79 | Minor | System slow when running too many reports | 47

5287 '''

5288 integer = Word(nums)

5289 SEP = Suppress('|')

5290 # use SkipTo to simply match everything up until the next SEP

5291 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5292 # - parse action will call token.strip() for each matched token, i.e., the description body

5293 string_data = SkipTo(SEP, ignore=quoted_string)

5294 string_data.set_parse_action(token_map(str.strip))

5295 ticket_expr = (integer("issue_num") + SEP

5296 + string_data("sev") + SEP

5297 + string_data("desc") + SEP

5298 + integer("days_open"))

5299

5300 for tkt in ticket_expr.search_string(report):

5301 print tkt.dump()

5302

5303 prints::

5304

5305 ['101', 'Critical', 'Intermittent system crash', '6']

5306 - days_open: '6'

5307 - desc: 'Intermittent system crash'

5308 - issue_num: '101'

5309 - sev: 'Critical'

5310 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

5311 - days_open: '14'

5312 - desc: "Spelling error on Login ('log|n')"

5313 - issue_num: '94'

5314 - sev: 'Cosmetic'

5315 ['79', 'Minor', 'System slow when running too many reports', '47']

5316 - days_open: '47'

5317 - desc: 'System slow when running too many reports'

5318 - issue_num: '79'

5319 - sev: 'Minor'

5320 """

5321

5322 def __init__(

5323 self,

5324 other: Union[ParserElement, str],

5325 include: bool = False,

5326 ignore: typing.Optional[Union[ParserElement, str]] = None,

5327 fail_on: typing.Optional[Union[ParserElement, str]] = None,

5328 *,

5329 failOn: typing.Optional[Union[ParserElement, str]] = None,

5330 ):

5331 super().__init__(other)

5332 failOn = failOn or fail_on

5333 self.ignoreExpr = ignore

5334 self.mayReturnEmpty = True

5335 self.mayIndexError = False

5336 self.includeMatch = include

5337 self.saveAsList = False

5338 if isinstance(failOn, str_type):

5339 self.failOn = self._literalStringClass(failOn)

5340 else:

5341 self.failOn = failOn

5342 self.errmsg = "No match found for " + str(self.expr)

5343 self.ignorer = Empty().leave_whitespace()

5344 self._update_ignorer()

5345

5346 def _update_ignorer(self):

5347 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

5348 self.ignorer.ignoreExprs.clear()

5349 for e in self.expr.ignoreExprs:

5350 self.ignorer.ignore(e)

5351 if self.ignoreExpr:

5352 self.ignorer.ignore(self.ignoreExpr)

5353

5354 def ignore(self, expr):

5355 super().ignore(expr)

5356 self._update_ignorer()

5357

5358 def parseImpl(self, instring, loc, doActions=True):

5359 startloc = loc

5360 instrlen = len(instring)

5361 self_expr_parse = self.expr._parse

5362 self_failOn_canParseNext = (

5363 self.failOn.canParseNext if self.failOn is not None else None

5364 )

5365 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

5366

5367 tmploc = loc

5368 while tmploc <= instrlen:

5369 if self_failOn_canParseNext is not None:

5370 # break if failOn expression matches

5371 if self_failOn_canParseNext(instring, tmploc):

5372 break

5373

5374 if ignorer_try_parse is not None:

5375 # advance past ignore expressions

5376 prev_tmploc = tmploc

5377 while 1:

5378 try:

5379 tmploc = ignorer_try_parse(instring, tmploc)

5380 except ParseBaseException:

5381 break

5382 # see if all ignorers matched, but didn't actually ignore anything

5383 if tmploc == prev_tmploc:

5384 break

5385 prev_tmploc = tmploc

5386

5387 try:

5388 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False)

5389 except (ParseException, IndexError):

5390 # no match, advance loc in string

5391 tmploc += 1

5392 else:

5393 # matched skipto expr, done

5394 break

5395

5396 else:

5397 # ran off the end of the input string without matching skipto expr, fail

5398 raise ParseException(instring, loc, self.errmsg, self)

5399

5400 # build up return values

5401 loc = tmploc

5402 skiptext = instring[startloc:loc]

5403 skipresult = ParseResults(skiptext)

5404

5405 if self.includeMatch:

5406 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False)

5407 skipresult += mat

5408

5409 return loc, skipresult

5410

5411

5412class Forward(ParseElementEnhance):

5413 """

5414 Forward declaration of an expression to be defined later -

5415 used for recursive grammars, such as algebraic infix notation.

5416 When the expression is known, it is assigned to the ``Forward``

5417 variable using the ``'<<'`` operator.

5418

5419 Note: take care when assigning to ``Forward`` not to overlook

5420 precedence of operators.

5421

5422 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

5423

5424 fwd_expr << a | b | c

5425

5426 will actually be evaluated as::

5427

5428 (fwd_expr << a) | b | c

5429

5430 thereby leaving b and c out as parseable alternatives. It is recommended that you

5431 explicitly group the values inserted into the ``Forward``::

5432

5433 fwd_expr << (a | b | c)

5434

5435 Converting to use the ``'<<='`` operator instead will avoid this problem.

5436

5437 See :class:`ParseResults.pprint` for an example of a recursive

5438 parser created using ``Forward``.

5439 """

5440

5441 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None):

5442 self.caller_frame = traceback.extract_stack(limit=2)[0]

5443 super().__init__(other, savelist=False) # type: ignore[arg-type]

5444 self.lshift_line = None

5445

5446 def __lshift__(self, other) -> "Forward":

5447 if hasattr(self, "caller_frame"):

5448 del self.caller_frame

5449 if isinstance(other, str_type):

5450 other = self._literalStringClass(other)

5451

5452 if not isinstance(other, ParserElement):

5453 return NotImplemented

5454

5455 self.expr = other

5456 self.streamlined = other.streamlined

5457 self.mayIndexError = self.expr.mayIndexError

5458 self.mayReturnEmpty = self.expr.mayReturnEmpty

5459 self.set_whitespace_chars(

5460 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

5461 )

5462 self.skipWhitespace = self.expr.skipWhitespace

5463 self.saveAsList = self.expr.saveAsList

5464 self.ignoreExprs.extend(self.expr.ignoreExprs)

5465 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

5466 return self

5467

5468 def __ilshift__(self, other) -> "Forward":

5469 if not isinstance(other, ParserElement):

5470 return NotImplemented

5471

5472 return self << other

5473

5474 def __or__(self, other) -> "ParserElement":

5475 caller_line = traceback.extract_stack(limit=2)[-2]

5476 if (

5477 __diag__.warn_on_match_first_with_lshift_operator

5478 and caller_line == self.lshift_line

5479 and Diagnostics.warn_on_match_first_with_lshift_operator

5480 not in self.suppress_warnings_

5481 ):

5482 warnings.warn(

5483 "using '<<' operator with '|' is probably an error, use '<<='",

5484 stacklevel=2,

5485 )

5486 ret = super().__or__(other)

5487 return ret

5488

5489 def __del__(self):

5490 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

5491 if (

5492 self.expr is None

5493 and __diag__.warn_on_assignment_to_Forward

5494 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

5495 ):

5496 warnings.warn_explicit(

5497 "Forward defined here but no expression attached later using '<<=' or '<<'",

5498 UserWarning,

5499 filename=self.caller_frame.filename,

5500 lineno=self.caller_frame.lineno,

5501 )

5502

5503 def parseImpl(self, instring, loc, doActions=True):

5504 if (

5505 self.expr is None

5506 and __diag__.warn_on_parse_using_empty_Forward

5507 and Diagnostics.warn_on_parse_using_empty_Forward

5508 not in self.suppress_warnings_

5509 ):

5510 # walk stack until parse_string, scan_string, search_string, or transform_string is found

5511 parse_fns = (

5512 "parse_string",

5513 "scan_string",

5514 "search_string",

5515 "transform_string",

5516 )

5517 tb = traceback.extract_stack(limit=200)

5518 for i, frm in enumerate(reversed(tb), start=1):

5519 if frm.name in parse_fns:

5520 stacklevel = i + 1

5521 break

5522 else:

5523 stacklevel = 2

5524 warnings.warn(

5525 "Forward expression was never assigned a value, will not parse any input",

5526 stacklevel=stacklevel,

5527 )

5528 if not ParserElement._left_recursion_enabled:

5529 return super().parseImpl(instring, loc, doActions)

5530 # ## Bounded Recursion algorithm ##

5531 # Recursion only needs to be processed at ``Forward`` elements, since they are

5532 # the only ones that can actually refer to themselves. The general idea is

5533 # to handle recursion stepwise: We start at no recursion, then recurse once,

5534 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

5535 #

5536 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

5537 # - to *match* a specific recursion level, and

5538 # - to *search* the bounded recursion level

5539 # and the two run concurrently. The *search* must *match* each recursion level

5540 # to find the best possible match. This is handled by a memo table, which

5541 # provides the previous match to the next level match attempt.

5542 #

5543 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

5544 #

5545 # There is a complication since we not only *parse* but also *transform* via

5546 # actions: We do not want to run the actions too often while expanding. Thus,

5547 # we expand using `doActions=False` and only run `doActions=True` if the next

5548 # recursion level is acceptable.

5549 with ParserElement.recursion_lock:

5550 memo = ParserElement.recursion_memos

5551 try:

5552 # we are parsing at a specific recursion expansion - use it as-is

5553 prev_loc, prev_result = memo[loc, self, doActions]

5554 if isinstance(prev_result, Exception):

5555 raise prev_result

5556 return prev_loc, prev_result.copy()

5557 except KeyError:

5558 act_key = (loc, self, True)

5559 peek_key = (loc, self, False)

5560 # we are searching for the best recursion expansion - keep on improving

5561 # both `doActions` cases must be tracked separately here!

5562 prev_loc, prev_peek = memo[peek_key] = (

5563 loc - 1,

5564 ParseException(

5565 instring, loc, "Forward recursion without base case", self

5566 ),

5567 )

5568 if doActions:

5569 memo[act_key] = memo[peek_key]

5570 while True:

5571 try:

5572 new_loc, new_peek = super().parseImpl(instring, loc, False)

5573 except ParseException:

5574 # we failed before getting any match – do not hide the error

5575 if isinstance(prev_peek, Exception):

5576 raise

5577 new_loc, new_peek = prev_loc, prev_peek

5578 # the match did not get better: we are done

5579 if new_loc <= prev_loc:

5580 if doActions:

5581 # replace the match for doActions=False as well,

5582 # in case the action did backtrack

5583 prev_loc, prev_result = memo[peek_key] = memo[act_key]

5584 del memo[peek_key], memo[act_key]

5585 return prev_loc, prev_result.copy()

5586 del memo[peek_key]

5587 return prev_loc, prev_peek.copy()

5588 # the match did get better: see if we can improve further

5589 else:

5590 if doActions:

5591 try:

5592 memo[act_key] = super().parseImpl(instring, loc, True)

5593 except ParseException as e:

5594 memo[peek_key] = memo[act_key] = (new_loc, e)

5595 raise

5596 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

5597

5598 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5599 self.skipWhitespace = False

5600 return self

5601

5602 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5603 self.skipWhitespace = True

5604 return self

5605

5606 def streamline(self) -> ParserElement:

5607 if not self.streamlined:

5608 self.streamlined = True

5609 if self.expr is not None:

5610 self.expr.streamline()

5611 return self

5612

5613 def validate(self, validateTrace=None) -> None:

5614 warnings.warn(

5615 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5616 DeprecationWarning,

5617 stacklevel=2,

5618 )

5619 if validateTrace is None:

5620 validateTrace = []

5621

5622 if self not in validateTrace:

5623 tmp = validateTrace[:] + [self]

5624 if self.expr is not None:

5625 self.expr.validate(tmp)

5626 self._checkRecursion([])

5627

5628 def _generateDefaultName(self) -> str:

5629 # Avoid infinite recursion by setting a temporary _defaultName

5630 self._defaultName = ": ..."

5631

5632 # Use the string representation of main expression.

5633 retString = "..."

5634 try:

5635 if self.expr is not None:

5636 retString = str(self.expr)[:1000]

5637 else:

5638 retString = "None"

5639 finally:

5640 return self.__class__.__name__ + ": " + retString

5641

5642 def copy(self) -> ParserElement:

5643 if self.expr is not None:

5644 return super().copy()

5645 else:

5646 ret = Forward()

5647 ret <<= self

5648 return ret

5649

5650 def _setResultsName(self, name, list_all_matches=False):

5651 if (

5652 __diag__.warn_name_set_on_empty_Forward

5653 and Diagnostics.warn_name_set_on_empty_Forward

5654 not in self.suppress_warnings_

5655 ):

5656 if self.expr is None:

5657 warnings.warn(

5658 "{}: setting results name {!r} on {} expression "

5659 "that has no contained expression".format(

5660 "warn_name_set_on_empty_Forward", name, type(self).__name__

5661 ),

5662 stacklevel=3,

5663 )

5664

5665 return super()._setResultsName(name, list_all_matches)

5666

5667 # Compatibility synonyms

5668 # fmt: off

5669 @replaced_by_pep8(leave_whitespace)

5670 def leaveWhitespace(self): ...

5671

5672 @replaced_by_pep8(ignore_whitespace)

5673 def ignoreWhitespace(self): ...

5674 # fmt: on

5675

5676

5677class TokenConverter(ParseElementEnhance):

5678 """

5679 Abstract subclass of :class:`ParseExpression`, for converting parsed results.

5680 """

5681

5682 def __init__(self, expr: Union[ParserElement, str], savelist=False):

5683 super().__init__(expr) # , savelist)

5684 self.saveAsList = False

5685

5686

5687class Combine(TokenConverter):

5688 """Converter to concatenate all matching tokens to a single string.

5689 By default, the matching patterns must also be contiguous in the

5690 input string; this can be disabled by specifying

5691 ``'adjacent=False'`` in the constructor.

5692

5693 Example::

5694

5695 real = Word(nums) + '.' + Word(nums)

5696 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']

5697 # will also erroneously match the following

5698 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']

5699

5700 real = Combine(Word(nums) + '.' + Word(nums))

5701 print(real.parse_string('3.1416')) # -> ['3.1416']

5702 # no match when there are internal spaces

5703 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)

5704 """

5705

5706 def __init__(

5707 self,

5708 expr: ParserElement,

5709 join_string: str = "",

5710 adjacent: bool = True,

5711 *,

5712 joinString: typing.Optional[str] = None,

5713 ):

5714 super().__init__(expr)

5715 joinString = joinString if joinString is not None else join_string

5716 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

5717 if adjacent:

5718 self.leave_whitespace()

5719 self.adjacent = adjacent

5720 self.skipWhitespace = True

5721 self.joinString = joinString

5722 self.callPreparse = True

5723

5724 def ignore(self, other) -> ParserElement:

5725 if self.adjacent:

5726 ParserElement.ignore(self, other)

5727 else:

5728 super().ignore(other)

5729 return self

5730

5731 def postParse(self, instring, loc, tokenlist):

5732 retToks = tokenlist.copy()

5733 del retToks[:]

5734 retToks += ParseResults(

5735 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

5736 )

5737

5738 if self.resultsName and retToks.haskeys():

5739 return [retToks]

5740 else:

5741 return retToks

5742

5743

5744class Group(TokenConverter):

5745 """Converter to return the matched tokens as a list - useful for

5746 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

5747

5748 The optional ``aslist`` argument when set to True will return the

5749 parsed tokens as a Python list instead of a pyparsing ParseResults.

5750

5751 Example::

5752

5753 ident = Word(alphas)

5754 num = Word(nums)

5755 term = ident | num

5756 func = ident + Opt(DelimitedList(term))

5757 print(func.parse_string("fn a, b, 100"))

5758 # -> ['fn', 'a', 'b', '100']

5759

5760 func = ident + Group(Opt(DelimitedList(term)))

5761 print(func.parse_string("fn a, b, 100"))

5762 # -> ['fn', ['a', 'b', '100']]

5763 """

5764

5765 def __init__(self, expr: ParserElement, aslist: bool = False):

5766 super().__init__(expr)

5767 self.saveAsList = True

5768 self._asPythonList = aslist

5769

5770 def postParse(self, instring, loc, tokenlist):

5771 if self._asPythonList:

5772 return ParseResults.List(

5773 tokenlist.asList()

5774 if isinstance(tokenlist, ParseResults)

5775 else list(tokenlist)

5776 )

5777 else:

5778 return [tokenlist]

5779

5780

5781class Dict(TokenConverter):

5782 """Converter to return a repetitive expression as a list, but also

5783 as a dictionary. Each element can also be referenced using the first

5784 token in the expression as its key. Useful for tabular report

5785 scraping when the first column can be used as a item key.

5786

5787 The optional ``asdict`` argument when set to True will return the

5788 parsed tokens as a Python dict instead of a pyparsing ParseResults.

5789

5790 Example::

5791

5792 data_word = Word(alphas)

5793 label = data_word + FollowedBy(':')

5794

5795 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

5796 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5797

5798 # print attributes as plain groups

5799 print(attr_expr[1, ...].parse_string(text).dump())

5800

5801 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names

5802 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

5803 print(result.dump())

5804

5805 # access named fields as dict entries, or output as dict

5806 print(result['shape'])

5807 print(result.as_dict())

5808

5809 prints::

5810

5811 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

5812 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

5813 - color: 'light blue'

5814 - posn: 'upper left'

5815 - shape: 'SQUARE'

5816 - texture: 'burlap'

5817 SQUARE

5818 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

5819

5820 See more examples at :class:`ParseResults` of accessing fields by results name.

5821 """

5822

5823 def __init__(self, expr: ParserElement, asdict: bool = False):

5824 super().__init__(expr)

5825 self.saveAsList = True

5826 self._asPythonDict = asdict

5827

5828 def postParse(self, instring, loc, tokenlist):

5829 for i, tok in enumerate(tokenlist):

5830 if len(tok) == 0:

5831 continue

5832

5833 ikey = tok[0]

5834 if isinstance(ikey, int):

5835 ikey = str(ikey).strip()

5836

5837 if len(tok) == 1:

5838 tokenlist[ikey] = _ParseResultsWithOffset("", i)

5839

5840 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

5841 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

5842

5843 else:

5844 try:

5845 dictvalue = tok.copy() # ParseResults(i)

5846 except Exception:

5847 exc = TypeError(

5848 "could not extract dict values from parsed results"

5849 " - Dict expression must contain Grouped expressions"

5850 )

5851 raise exc from None

5852

5853 del dictvalue[0]

5854

5855 if len(dictvalue) != 1 or (

5856 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

5857 ):

5858 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

5859 else:

5860 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

5861

5862 if self._asPythonDict:

5863 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

5864 else:

5865 return [tokenlist] if self.resultsName else tokenlist

5866

5867

5868class Suppress(TokenConverter):

5869 """Converter for ignoring the results of a parsed expression.

5870

5871 Example::

5872

5873 source = "a, b, c,d"

5874 wd = Word(alphas)

5875 wd_list1 = wd + (',' + wd)[...]

5876 print(wd_list1.parse_string(source))

5877

5878 # often, delimiters that are useful during parsing are just in the

5879 # way afterward - use Suppress to keep them out of the parsed output

5880 wd_list2 = wd + (Suppress(',') + wd)[...]

5881 print(wd_list2.parse_string(source))

5882

5883 # Skipped text (using '...') can be suppressed as well

5884 source = "lead in START relevant text END trailing text"

5885 start_marker = Keyword("START")

5886 end_marker = Keyword("END")

5887 find_body = Suppress(...) + start_marker + ... + end_marker

5888 print(find_body.parse_string(source)

5889

5890 prints::

5891

5892 ['a', ',', 'b', ',', 'c', ',', 'd']

5893 ['a', 'b', 'c', 'd']

5894 ['START', 'relevant text ', 'END']

5895

5896 (See also :class:`DelimitedList`.)

5897 """

5898

5899 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

5900 if expr is ...:

5901 expr = _PendingSkip(NoMatch())

5902 super().__init__(expr)

5903

5904 def __add__(self, other) -> "ParserElement":

5905 if isinstance(self.expr, _PendingSkip):

5906 return Suppress(SkipTo(other)) + other

5907 else:

5908 return super().__add__(other)

5909

5910 def __sub__(self, other) -> "ParserElement":

5911 if isinstance(self.expr, _PendingSkip):

5912 return Suppress(SkipTo(other)) - other

5913 else:

5914 return super().__sub__(other)

5915

5916 def postParse(self, instring, loc, tokenlist):

5917 return []

5918

5919 def suppress(self) -> ParserElement:

5920 return self

5921

5922

5923def trace_parse_action(f: ParseAction) -> ParseAction:

5924 """Decorator for debugging parse actions.

5925

5926 When the parse action is called, this decorator will print

5927 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

5928 When the parse action completes, the decorator will print

5929 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

5930

5931 Example::

5932

5933 wd = Word(alphas)

5934

5935 @trace_parse_action

5936 def remove_duplicate_chars(tokens):

5937 return ''.join(sorted(set(''.join(tokens))))

5938

5939 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

5940 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

5941

5942 prints::

5943

5944 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

5945 <<leaving remove_duplicate_chars (ret: 'dfjkls')

5946 ['dfjkls']

5947 """

5948 f = _trim_arity(f)

5949

5950 def z(*paArgs):

5951 thisFunc = f.__name__

5952 s, l, t = paArgs[-3:]

5953 if len(paArgs) > 3:

5954 thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc

5955 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

5956 try:

5957 ret = f(*paArgs)

5958 except Exception as exc:

5959 sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n")

5960 raise

5961 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

5962 return ret

5963

5964 z.__name__ = f.__name__

5965 return z

5966

5967

5968# convenience constants for positional expressions

5969empty = Empty().set_name("empty")

5970line_start = LineStart().set_name("line_start")

5971line_end = LineEnd().set_name("line_end")

5972string_start = StringStart().set_name("string_start")

5973string_end = StringEnd().set_name("string_end")

5974

5975_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

5976 lambda s, l, t: t[0][1]

5977)

5978_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

5979 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

5980)

5981_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

5982 lambda s, l, t: chr(int(t[0][1:], 8))

5983)

5984_singleChar = (

5985 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

5986)

5987_charRange = Group(_singleChar + Suppress("-") + _singleChar)

5988_reBracketExpr = (

5989 Literal("[")

5990 + Opt("^").set_results_name("negate")

5991 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

5992 + Literal("]")

5993)

5994

5995

5996def srange(s: str) -> str:

5997 r"""Helper to easily define string ranges for use in :class:`Word`

5998 construction. Borrows syntax from regexp ``'[]'`` string range

5999 definitions::

6000

6001 srange("[0-9]") -> "0123456789"

6002 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

6003 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

6004

6005 The input string must be enclosed in []'s, and the returned string

6006 is the expanded character set joined into a single string. The

6007 values enclosed in the []'s may be:

6008

6009 - a single character

6010 - an escaped character with a leading backslash (such as ``\-``

6011 or ``\]``)

6012 - an escaped hex character with a leading ``'\x'``

6013 (``\x21``, which is a ``'!'`` character) (``\0x##``

6014 is also supported for backwards compatibility)

6015 - an escaped octal character with a leading ``'\0'``

6016 (``\041``, which is a ``'!'`` character)

6017 - a range of any of the above, separated by a dash (``'a-z'``,

6018 etc.)

6019 - any combination of the above (``'aeiouy'``,

6020 ``'a-zA-Z0-9_$'``, etc.)

6021 """

6022 _expanded = (

6023 lambda p: p

6024 if not isinstance(p, ParseResults)

6025 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

6026 )

6027 try:

6028 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)

6029 except Exception as e:

6030 return ""

6031

6032

6033def token_map(func, *args) -> ParseAction:

6034 """Helper to define a parse action by mapping a function to all

6035 elements of a :class:`ParseResults` list. If any additional args are passed,

6036 they are forwarded to the given function as additional arguments

6037 after the token, as in

6038 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

6039 which will convert the parsed data to an integer using base 16.

6040

6041 Example (compare the last to example in :class:`ParserElement.transform_string`::

6042

6043 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

6044 hex_ints.run_tests('''

6045 00 11 22 aa FF 0a 0d 1a

6046 ''')

6047

6048 upperword = Word(alphas).set_parse_action(token_map(str.upper))

6049 upperword[1, ...].run_tests('''

6050 my kingdom for a horse

6051 ''')

6052

6053 wd = Word(alphas).set_parse_action(token_map(str.title))

6054 wd[1, ...].set_parse_action(' '.join).run_tests('''

6055 now is the winter of our discontent made glorious summer by this sun of york

6056 ''')

6057

6058 prints::

6059

6060 00 11 22 aa FF 0a 0d 1a

6061 [0, 17, 34, 170, 255, 10, 13, 26]

6062

6063 my kingdom for a horse

6064 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6065

6066 now is the winter of our discontent made glorious summer by this sun of york

6067 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6068 """

6069

6070 def pa(s, l, t):

6071 return [func(tokn, *args) for tokn in t]

6072

6073 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6074 pa.__name__ = func_name

6075

6076 return pa

6077

6078

6079def autoname_elements() -> None:

6080 """

6081 Utility to simplify mass-naming of parser elements, for

6082 generating railroad diagram with named subdiagrams.

6083 """

6084 calling_frame = sys._getframe().f_back

6085 if calling_frame is None:

6086 return

6087 calling_frame = typing.cast(types.FrameType, calling_frame)

6088 for name, var in calling_frame.f_locals.items():

6089 if isinstance(var, ParserElement) and not var.customName:

6090 var.set_name(name)

6091

6092

6093dbl_quoted_string = Combine(

6094 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6095).set_name("string enclosed in double quotes")

6096

6097sgl_quoted_string = Combine(

6098 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6099).set_name("string enclosed in single quotes")

6100

6101quoted_string = Combine(

6102 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6103 "double quoted string"

6104 )

6105 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6106 "single quoted string"

6107 )

6108).set_name("quoted string using single or double quotes")

6109

6110python_quoted_string = Combine(

6111 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6112 "multiline double quoted string"

6113 )

6114 ^ (

6115 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6116 ).set_name("multiline single quoted string")

6117 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6118 "double quoted string"

6119 )

6120 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6121 "single quoted string"

6122 )

6123).set_name("Python quoted string")

6124

6125unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6126

6127

6128alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6129punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6130

6131# build list of built-in expressions, for future reference if a global default value

6132# gets updated

6133_builtin_exprs: List[ParserElement] = [

6134 v for v in vars().values() if isinstance(v, ParserElement)

6135]

6136

6137# backward compatibility names

6138# fmt: off

6139sglQuotedString = sgl_quoted_string

6140dblQuotedString = dbl_quoted_string

6141quotedString = quoted_string

6142unicodeString = unicode_string

6143lineStart = line_start

6144lineEnd = line_end

6145stringStart = string_start

6146stringEnd = string_end

6147

6148@replaced_by_pep8(null_debug_action)

6149def nullDebugAction(): ...

6150

6151@replaced_by_pep8(trace_parse_action)

6152def traceParseAction(): ...

6153

6154@replaced_by_pep8(condition_as_parse_action)

6155def conditionAsParseAction(): ...

6156

6157@replaced_by_pep8(token_map)

6158def tokenMap(): ...

6159# fmt: on