Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 45%

2# core.py

4from __future__ import annotations

6import collections.abc

7from collections import deque

8import os

9import typing

10from typing import (

11 Any,

12 Callable,

13 Generator,

14 NamedTuple,

15 Sequence,

16 TextIO,

17 Union,

18 cast,

19)

20from abc import ABC, abstractmethod

21from enum import Enum

22import string

23import copy

24import warnings

25import re

26import sys

27from collections.abc import Iterable

28import traceback

29import types

30from operator import itemgetter

31from functools import wraps

32from threading import RLock

33from pathlib import Path

35from .util import (

36 _FifoCache,

37 _UnboundedCache,

38 __config_flags,

39 _collapse_string_to_ranges,

40 _escape_regex_range_chars,

41 _flatten,

42 LRUMemo as _LRUMemo,

43 UnboundedMemo as _UnboundedMemo,

44 deprecate_argument,

45 replaced_by_pep8,

46)

47from .exceptions import *

48from .actions import *

49from .results import ParseResults, _ParseResultsWithOffset

50from .unicode import pyparsing_unicode

52_MAX_INT = sys.maxsize

53str_type: tuple[type, ...] = (str, bytes)

55#

57#

58# Permission is hereby granted, free of charge, to any person obtaining

59# a copy of this software and associated documentation files (the

60# "Software"), to deal in the Software without restriction, including

61# without limitation the rights to use, copy, modify, merge, publish,

62# distribute, sublicense, and/or sell copies of the Software, and to

63# permit persons to whom the Software is furnished to do so, subject to

64# the following conditions:

65#

66# The above copyright notice and this permission notice shall be

67# included in all copies or substantial portions of the Software.

68#

69# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

70# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

71# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

72# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

73# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

74# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

75# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

76#

78from functools import cached_property

81class __compat__(__config_flags):

82 """

83 A cross-version compatibility configuration for pyparsing features that will be

84 released in a future version. By setting values in this configuration to True,

85 those features can be enabled in prior versions for compatibility development

86 and testing.

88 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping

89 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;

90 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1

91 behavior

92 """

94 _type_desc = "compatibility"

96 collect_all_And_tokens = True

98 _all_names = [__ for __ in locals() if not __.startswith("_")]

99 _fixed_names = """

100 collect_all_And_tokens

101 """.split()

102

103

104class __diag__(__config_flags):

105 _type_desc = "diagnostic"

106

107 warn_multiple_tokens_in_named_alternation = False

108 warn_ungrouped_named_tokens_in_collection = False

109 warn_name_set_on_empty_Forward = False

110 warn_on_parse_using_empty_Forward = False

111 warn_on_assignment_to_Forward = False

112 warn_on_multiple_string_args_to_oneof = False

113 warn_on_match_first_with_lshift_operator = False

114 enable_debug_on_named_expressions = False

115

116 _all_names = [__ for __ in locals() if not __.startswith("_")]

117 _warning_names = [name for name in _all_names if name.startswith("warn")]

118 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]

119

120 @classmethod

121 def enable_all_warnings(cls) -> None:

122 for name in cls._warning_names:

123 cls.enable(name)

124

125

126class Diagnostics(Enum):

127 """

128 Diagnostic configuration (all default to disabled)

129

130 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results

131 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions

132 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results

133 name is defined on a containing expression with ungrouped subexpressions that also

134 have results names

135 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined

136 with a results name, but has no contents defined

137 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is

138 defined in a grammar but has never had an expression attached to it

139 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined

140 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``

141 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is

142 incorrectly called with multiple str arguments

143 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent

144 calls to :class:`ParserElement.set_name`

145

146 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.

147 All warnings can be enabled by calling :class:`enable_all_warnings`.

148 """

149

150 warn_multiple_tokens_in_named_alternation = 0

151 warn_ungrouped_named_tokens_in_collection = 1

152 warn_name_set_on_empty_Forward = 2

153 warn_on_parse_using_empty_Forward = 3

154 warn_on_assignment_to_Forward = 4

155 warn_on_multiple_string_args_to_oneof = 5

156 warn_on_match_first_with_lshift_operator = 6

157 enable_debug_on_named_expressions = 7

158

159

160def enable_diag(diag_enum: Diagnostics) -> None:

161 """

162 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

163 """

164 __diag__.enable(diag_enum.name)

165

166

167def disable_diag(diag_enum: Diagnostics) -> None:

168 """

169 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

170 """

171 __diag__.disable(diag_enum.name)

172

173

174def enable_all_warnings() -> None:

175 """

176 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).

177 """

178 __diag__.enable_all_warnings()

179

180

181# hide abstract class

182del __config_flags

183

184

185def _should_enable_warnings(

186 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]

187) -> bool:

188 enable = bool(warn_env_var)

189 for warn_opt in cmd_line_warn_options:

190 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(

191 ":"

192 )[:5]

193 if not w_action.lower().startswith("i") and (

194 not (w_message or w_category or w_module) or w_module == "pyparsing"

195 ):

196 enable = True

197 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):

198 enable = False

199 return enable

200

201

202if _should_enable_warnings(

203 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")

204):

205 enable_all_warnings()

206

207

208# build list of single arg builtins, that can be used as parse actions

209# fmt: off

210_single_arg_builtins = {

211 sum, len, sorted, reversed, list, tuple, set, any, all, min, max

212}

213# fmt: on

214

215_generatorType = types.GeneratorType

216ParseImplReturnType = tuple[int, Any]

217PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]

218

219ParseCondition = Union[

220 Callable[[], bool],

221 Callable[[ParseResults], bool],

222 Callable[[int, ParseResults], bool],

223 Callable[[str, int, ParseResults], bool],

224]

225ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]

226DebugStartAction = Callable[[str, int, "ParserElement", bool], None]

227DebugSuccessAction = Callable[

228 [str, int, int, "ParserElement", ParseResults, bool], None

229]

230DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]

231

232

233alphas: str = string.ascii_uppercase + string.ascii_lowercase

234identchars: str = pyparsing_unicode.Latin1.identchars

235identbodychars: str = pyparsing_unicode.Latin1.identbodychars

236nums: str = "0123456789"

237hexnums: str = nums + "ABCDEFabcdef"

238alphanums: str = alphas + nums

239printables: str = "".join([c for c in string.printable if c not in string.whitespace])

240

241

242class _ParseActionIndexError(Exception):

243 """

244 Internal wrapper around IndexError so that IndexErrors raised inside

245 parse actions aren't misinterpreted as IndexErrors raised inside

246 ParserElement parseImpl methods.

247 """

248

249 def __init__(self, msg: str, exc: BaseException) -> None:

250 self.msg: str = msg

251 self.exc: BaseException = exc

252

253

254_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]

255pa_call_line_synth = ()

256

257

258def _trim_arity(func, max_limit=3):

259 """decorator to trim function calls to match the arity of the target"""

260 global _trim_arity_call_line, pa_call_line_synth

261

262 if func in _single_arg_builtins:

263 return lambda s, l, t: func(t)

264

265 limit = 0

266 found_arity = False

267

268 # synthesize what would be returned by traceback.extract_stack at the call to

269 # user's parse action 'func', so that we don't incur call penalty at parse time

270

271 # fmt: off

272 LINE_DIFF = 9

273 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND

274 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!

275 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]

276 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)

277

278 def wrapper(*args):

279 nonlocal found_arity, limit

280 if found_arity:

281 return func(*args[limit:])

282 while 1:

283 try:

284 ret = func(*args[limit:])

285 found_arity = True

286 return ret

287 except TypeError as te:

288 # re-raise TypeErrors if they did not come from our arity testing

289 if found_arity:

290 raise

291 else:

292 tb = te.__traceback__

293 frames = traceback.extract_tb(tb, limit=2)

294 frame_summary = frames[-1]

295 trim_arity_type_error = (

296 [frame_summary[:2]][-1][:2] == pa_call_line_synth

297 )

298 del tb

299

300 if trim_arity_type_error:

301 if limit < max_limit:

302 limit += 1

303 continue

304

305 raise

306 except IndexError as ie:

307 # wrap IndexErrors inside a _ParseActionIndexError

308 raise _ParseActionIndexError(

309 "IndexError raised in parse action", ie

310 ).with_traceback(None)

311 # fmt: on

312

313 # copy func name to wrapper for sensible debug output

314 # (can't use functools.wraps, since that messes with function signature)

315 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

316 wrapper.__name__ = func_name

317 wrapper.__doc__ = func.__doc__

318

319 return wrapper

320

321

322def condition_as_parse_action(

323 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False

324) -> ParseAction:

325 """

326 Function to convert a simple predicate function that returns ``True`` or ``False``

327 into a parse action. Can be used in places when a parse action is required

328 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition

329 to an operator level in :class:`infix_notation`).

330

331 Optional keyword arguments:

332

333 :param message: define a custom message to be used in the raised exception

334 :param fatal: if ``True``, will raise :class:`ParseFatalException`

335 to stop parsing immediately;

336 otherwise will raise :class:`ParseException`

337

338 """

339 msg = message if message is not None else "failed user-defined condition"

340 exc_type = ParseFatalException if fatal else ParseException

341 fn = _trim_arity(fn)

342

343 @wraps(fn)

344 def pa(s, l, t):

345 if not bool(fn(s, l, t)):

346 raise exc_type(s, l, msg)

347

348 return pa

349

350

351def _default_start_debug_action(

352 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False

353):

354 cache_hit_str = "*" if cache_hit else ""

355 print(

356 (

357 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"

358 f" {line(loc, instring)}\n"

359 f" {'^':>{col(loc, instring)}}"

360 )

361 )

362

363

364def _default_success_debug_action(

365 instring: str,

366 startloc: int,

367 endloc: int,

368 expr: ParserElement,

369 toks: ParseResults,

370 cache_hit: bool = False,

371):

372 cache_hit_str = "*" if cache_hit else ""

373 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")

374

375

376def _default_exception_debug_action(

377 instring: str,

378 loc: int,

379 expr: ParserElement,

380 exc: Exception,

381 cache_hit: bool = False,

382):

383 cache_hit_str = "*" if cache_hit else ""

384 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")

385

386

387def null_debug_action(*args):

388 """'Do-nothing' debug action, to suppress debugging output during parsing."""

389

390

391class ParserElement(ABC):

392 """Abstract base level parser element class."""

393

394 DEFAULT_WHITE_CHARS: str = " \n\t\r"

395 verbose_stacktrace: bool = False

396 _literalStringClass: type = None # type: ignore[assignment]

397

398 @staticmethod

399 def set_default_whitespace_chars(chars: str) -> None:

400 r"""

401 Overrides the default whitespace chars

402

403 Example:

404

405 .. doctest::

406

407 # default whitespace chars are space, <TAB> and newline

408 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")

409 ParseResults(['abc', 'def', 'ghi', 'jkl'], {})

410

411 # change to just treat newline as significant

412 >>> ParserElement.set_default_whitespace_chars(" \t")

413 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")

414 ParseResults(['abc', 'def'], {})

415

416 # Reset to default

417 >>> ParserElement.set_default_whitespace_chars(" \n\t\r")

418 """

419 ParserElement.DEFAULT_WHITE_CHARS = chars

420

421 # update whitespace all parse expressions defined in this module

422 for expr in _builtin_exprs:

423 if expr.copyDefaultWhiteChars:

424 expr.whiteChars = set(chars)

425

426 @staticmethod

427 def inline_literals_using(cls: type) -> None:

428 """

429 Set class to be used for inclusion of string literals into a parser.

430

431 Example:

432

433 .. doctest::

434 :options: +NORMALIZE_WHITESPACE

435

436 # default literal class used is Literal

437 >>> integer = Word(nums)

438 >>> date_str = (

439 ... integer("year") + '/'

440 ... + integer("month") + '/'

441 ... + integer("day")

442 ... )

443

444 >>> date_str.parse_string("1999/12/31")

445 ParseResults(['1999', '/', '12', '/', '31'],

446 {'year': '1999', 'month': '12', 'day': '31'})

447

448 # change to Suppress

449 >>> ParserElement.inline_literals_using(Suppress)

450 >>> date_str = (

451 ... integer("year") + '/'

452 ... + integer("month") + '/'

453 ... + integer("day")

454 ... )

455

456 >>> date_str.parse_string("1999/12/31")

457 ParseResults(['1999', '12', '31'],

458 {'year': '1999', 'month': '12', 'day': '31'})

459

460 # Reset

461 >>> ParserElement.inline_literals_using(Literal)

462 """

463 ParserElement._literalStringClass = cls

464

465 @classmethod

466 def using_each(cls, seq, **class_kwargs):

467 """

468 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.

469

470 Example:

471

472 .. testcode::

473

474 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")

475

476 .. versionadded:: 3.1.0

477 """

478 yield from (cls(obj, **class_kwargs) for obj in seq)

479

480 class DebugActions(NamedTuple):

481 debug_try: typing.Optional[DebugStartAction]

482 debug_match: typing.Optional[DebugSuccessAction]

483 debug_fail: typing.Optional[DebugExceptionAction]

484

485 def __init__(self, savelist: bool = False) -> None:

486 self.parseAction: list[ParseAction] = list()

487 self.failAction: typing.Optional[ParseFailAction] = None

488 self.customName: str = None # type: ignore[assignment]

489 self._defaultName: typing.Optional[str] = None

490 self.resultsName: str = None # type: ignore[assignment]

491 self.saveAsList: bool = savelist

492 self.skipWhitespace: bool = True

493 self.whiteChars: set[str] = set(ParserElement.DEFAULT_WHITE_CHARS)

494 self.copyDefaultWhiteChars: bool = True

495 # used when checking for left-recursion

496 self._may_return_empty: bool = False

497 self.keepTabs: bool = False

498 self.ignoreExprs: list[ParserElement] = list()

499 self.debug: bool = False

500 self.streamlined: bool = False

501 # optimize exception handling for subclasses that don't advance parse index

502 self.mayIndexError: bool = True

503 self.errmsg: Union[str, None] = ""

504 # mark results names as modal (report only last) or cumulative (list all)

505 self.modalResults: bool = True

506 # custom debug actions

507 self.debugActions = self.DebugActions(None, None, None)

508 # avoid redundant calls to preParse

509 self.callPreparse: bool = True

510 self.callDuringTry: bool = False

511 self.suppress_warnings_: list[Diagnostics] = []

512 self.show_in_diagram: bool = True

513

514 @property

515 def mayReturnEmpty(self) -> bool:

516 """

517 .. deprecated:: 3.3.0

518 use _may_return_empty instead.

519 """

520 return self._may_return_empty

521

522 @mayReturnEmpty.setter

523 def mayReturnEmpty(self, value) -> None:

524 """

525 .. deprecated:: 3.3.0

526 use _may_return_empty instead.

527 """

528 self._may_return_empty = value

529

530 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:

531 """

532 Suppress warnings emitted for a particular diagnostic on this expression.

533

534 Example:

535

536 .. doctest::

537

538 >>> label = pp.Word(pp.alphas)

539

540 # Normally using an empty Forward in a grammar

541 # would print a warning, but we can suppress that

542 >>> base = pp.Forward().suppress_warning(

543 ... pp.Diagnostics.warn_on_parse_using_empty_Forward)

544

545 >>> grammar = base | label

546 >>> print(grammar.parse_string("x"))

547 ['x']

548 """

549 self.suppress_warnings_.append(warning_type)

550 return self

551

552 def visit_all(self):

553 """General-purpose method to yield all expressions and sub-expressions

554 in a grammar. Typically just for internal use.

555 """

556 to_visit = deque([self])

557 seen = set()

558 while to_visit:

559 cur = to_visit.popleft()

560

561 # guard against looping forever through recursive grammars

562 if cur in seen:

563 continue

564 seen.add(cur)

565

566 to_visit.extend(cur.recurse())

567 yield cur

568

569 def copy(self) -> ParserElement:

570 """

571 Make a copy of this :class:`ParserElement`. Useful for defining

572 different parse actions for the same parsing pattern, using copies of

573 the original parse element.

574

575 Example:

576

577 .. testcode::

578

579 integer = Word(nums).set_parse_action(

580 lambda toks: int(toks[0]))

581 integerK = integer.copy().add_parse_action(

582 lambda toks: toks[0] * 1024) + Suppress("K")

583 integerM = integer.copy().add_parse_action(

584 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

585

586 print(

587 (integerK | integerM | integer)[1, ...].parse_string(

588 "5K 100 640K 256M")

589 )

590

591 prints:

592

593 .. testoutput::

594

595 [5120, 100, 655360, 268435456]

596

597 Equivalent form of ``expr.copy()`` is just ``expr()``:

598

599 .. testcode::

600

601 integerM = integer().add_parse_action(

602 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

603 """

604 cpy = copy.copy(self)

605 cpy.parseAction = self.parseAction[:]

606 cpy.ignoreExprs = self.ignoreExprs[:]

607 if self.copyDefaultWhiteChars:

608 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)

609 return cpy

610

611 def set_results_name(

612 self, name: str, list_all_matches: bool = False, **kwargs

613 ) -> ParserElement:

614 """

615 Define name for referencing matching tokens as a nested attribute

616 of the returned parse results.

617

618 Normally, results names are assigned as you would assign keys in a dict:

619 any existing value is overwritten by later values. If it is necessary to

620 keep all values captured for a particular results name, call ``set_results_name``

621 with ``list_all_matches`` = True.

622

623 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;

624 this is so that the client can define a basic element, such as an

625 integer, and reference it in multiple places with different names.

626

627 You can also set results names using the abbreviated syntax,

628 ``expr("name")`` in place of ``expr.set_results_name("name")``

629 - see :meth:`__call__`. If ``list_all_matches`` is required, use

630 ``expr("name*")``.

631

632 Example:

633

634 .. testcode::

635

636 integer = Word(nums)

637 date_str = (integer.set_results_name("year") + '/'

638 + integer.set_results_name("month") + '/'

639 + integer.set_results_name("day"))

640

641 # equivalent form:

642 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

643 """

644 listAllMatches: bool = deprecate_argument(kwargs, "listAllMatches", False)

645

646 list_all_matches = listAllMatches or list_all_matches

647 return self._setResultsName(name, list_all_matches)

648

649 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

650 if name is None:

651 return self

652 newself = self.copy()

653 if name.endswith("*"):

654 name = name[:-1]

655 list_all_matches = True

656 newself.resultsName = name

657 newself.modalResults = not list_all_matches

658 return newself

659

660 def set_break(self, break_flag: bool = True) -> ParserElement:

661 """

662 Method to invoke the Python pdb debugger when this element is

663 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to

664 disable.

665 """

666 if break_flag:

667 _parseMethod = self._parse

668

669 def breaker(instring, loc, do_actions=True, callPreParse=True):

670 # this call to breakpoint() is intentional, not a checkin error

671 breakpoint()

672 return _parseMethod(instring, loc, do_actions, callPreParse)

673

674 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]

675 self._parse = breaker # type: ignore [method-assign]

676 elif hasattr(self._parse, "_originalParseMethod"):

677 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]

678 return self

679

680 def set_parse_action(

681 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any

682 ) -> ParserElement:

683 """

684 Define one or more actions to perform when successfully matching parse element definition.

685

686 Parse actions can be called to perform data conversions, do extra validation,

687 update external data structures, or enhance or replace the parsed tokens.

688 Each parse action ``fn`` is a callable method with 0-3 arguments, called as

689 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:

690

691 - ``s`` = the original string being parsed (see note below)

692 - ``loc`` = the location of the matching substring

693 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object

694

695 The parsed tokens are passed to the parse action as ParseResults. They can be

696 modified in place using list-style append, extend, and pop operations to update

697 the parsed list elements; and with dictionary-style item set and del operations

698 to add, update, or remove any named results. If the tokens are modified in place,

699 it is not necessary to return them with a return statement.

700

701 Parse actions can also completely replace the given tokens, with another ``ParseResults``

702 object, or with some entirely different object (common for parse actions that perform data

703 conversions). A convenient way to build a new parse result is to define the values

704 using a dict, and then create the return value using :class:`ParseResults.from_dict`.

705

706 If None is passed as the ``fn`` parse action, all previously added parse actions for this

707 expression are cleared.

708

709 Optional keyword arguments:

710

711 :param call_during_try: (default= ``False``) indicate if parse action

712 should be run during lookaheads and alternate

713 testing. For parse actions that have side

714 effects, it is important to only call the parse

715 action once it is determined that it is being

716 called as part of a successful parse.

717 For parse actions that perform additional

718 validation, then ``call_during_try`` should

719 be passed as True, so that the validation code

720 is included in the preliminary "try" parses.

721

722 .. Note::

723 The default parsing behavior is to expand tabs in the input string

724 before starting the parsing process.

725 See :meth:`parse_string` for more information on parsing strings

726 containing ``<TAB>`` s, and suggested methods to maintain a

727 consistent view of the parsed string, the parse location, and

728 line and column positions within the parsed string.

729

730 Example: Parse dates in the form ``YYYY/MM/DD``

731 -----------------------------------------------

732

733 Setup code:

734

735 .. testcode::

736

737 def convert_to_int(toks):

738 '''a parse action to convert toks from str to int

739 at parse time'''

740 return int(toks[0])

741

742 def is_valid_date(instring, loc, toks):

743 '''a parse action to verify that the date is a valid date'''

744 from datetime import date

745 year, month, day = toks[::2]

746 try:

747 date(year, month, day)

748 except ValueError:

749 raise ParseException(instring, loc, "invalid date given")

750

751 integer = Word(nums)

752 date_str = integer + '/' + integer + '/' + integer

753

754 # add parse actions

755 integer.set_parse_action(convert_to_int)

756 date_str.set_parse_action(is_valid_date)

757

758 Successful parse - note that integer fields are converted to ints:

759

760 .. testcode::

761

762 print(date_str.parse_string("1999/12/31"))

763

764 prints:

765

766 .. testoutput::

767

768 [1999, '/', 12, '/', 31]

769

770 Failure - invalid date:

771

772 .. testcode::

773

774 date_str.parse_string("1999/13/31")

775

776 prints:

777

778 .. testoutput::

779

780 Traceback (most recent call last):

781 ParseException: invalid date given, found '1999' ...

782 """

783 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)

784

785 if list(fns) == [None]:

786 self.parseAction.clear()

787 return self

788

789 if not all(callable(fn) for fn in fns):

790 raise TypeError("parse actions must be callable")

791 self.parseAction[:] = [_trim_arity(fn) for fn in fns]

792 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry

793

794 return self

795

796 def add_parse_action(

797 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any

798 ) -> ParserElement:

799 """

800 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.

801

802 See examples in :class:`copy`.

803 """

804 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)

805

806 self.parseAction += [_trim_arity(fn) for fn in fns]

807 self.callDuringTry = self.callDuringTry or callDuringTry or call_during_try

808 return self

809

810 def add_condition(

811 self, *fns: ParseCondition, call_during_try: bool = False, **kwargs: Any

812 ) -> ParserElement:

813 """Add a boolean predicate function to expression's list of parse actions. See

814 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,

815 functions passed to ``add_condition`` need to return boolean success/fail of the condition.

816

817 Optional keyword arguments:

818

819 - ``message`` = define a custom message to be used in the raised exception

820 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise

821 ParseException

822 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,

823 default=False

824

825 Example:

826

827 .. doctest::

828 :options: +NORMALIZE_WHITESPACE

829

830 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))

831 >>> year_int = integer.copy().add_condition(

832 ... lambda toks: toks[0] >= 2000,

833 ... message="Only support years 2000 and later")

834 >>> date_str = year_int + '/' + integer + '/' + integer

835

836 >>> result = date_str.parse_string("1999/12/31")

837 Traceback (most recent call last):

838 ParseException: Only support years 2000 and later...

839 """

840 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)

841

842 for fn in fns:

843 self.parseAction.append(

844 condition_as_parse_action(

845 fn,

846 message=str(kwargs.get("message")),

847 fatal=bool(kwargs.get("fatal", False)),

848 )

849 )

850

851 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry

852 return self

853

854 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:

855 """

856 Define action to perform if parsing fails at this expression.

857 Fail acton fn is a callable function that takes the arguments

858 ``fn(s, loc, expr, err)`` where:

859

860 - ``s`` = string being parsed

861 - ``loc`` = location where expression match was attempted and failed

862 - ``expr`` = the parse expression that failed

863 - ``err`` = the exception thrown

864

865 The function returns no value. It may throw :class:`ParseFatalException`

866 if it is desired to stop parsing immediately."""

867 self.failAction = fn

868 return self

869

870 def _skipIgnorables(self, instring: str, loc: int) -> int:

871 if not self.ignoreExprs:

872 return loc

873 exprsFound = True

874 ignore_expr_fns = [e._parse for e in self.ignoreExprs]

875 last_loc = loc

876 while exprsFound:

877 exprsFound = False

878 for ignore_fn in ignore_expr_fns:

879 try:

880 while 1:

881 loc, dummy = ignore_fn(instring, loc)

882 exprsFound = True

883 except ParseException:

884 pass

885 # check if all ignore exprs matched but didn't actually advance the parse location

886 if loc == last_loc:

887 break

888 last_loc = loc

889 return loc

890

891 def preParse(self, instring: str, loc: int) -> int:

892 if self.ignoreExprs:

893 loc = self._skipIgnorables(instring, loc)

894

895 if self.skipWhitespace:

896 instrlen = len(instring)

897 white_chars = self.whiteChars

898 while loc < instrlen and instring[loc] in white_chars:

899 loc += 1

900

901 return loc

902

903 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

904 return loc, []

905

906 def postParse(self, instring, loc, tokenlist):

907 return tokenlist

908

909 # @profile

910 def _parseNoCache(

911 self, instring, loc, do_actions=True, callPreParse=True

912 ) -> tuple[int, ParseResults]:

913 debugging = self.debug # and do_actions)

914 len_instring = len(instring)

915

916 if debugging or self.failAction:

917 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))

918 try:

919 if callPreParse and self.callPreparse:

920 pre_loc = self.preParse(instring, loc)

921 else:

922 pre_loc = loc

923 tokens_start = pre_loc

924 if self.debugActions.debug_try:

925 self.debugActions.debug_try(instring, tokens_start, self, False)

926 if self.mayIndexError or pre_loc >= len_instring:

927 try:

928 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

929 except IndexError:

930 raise ParseException(instring, len_instring, self.errmsg, self)

931 else:

932 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

933 except Exception as err:

934 # print("Exception raised:", err)

935 if self.debugActions.debug_fail:

936 self.debugActions.debug_fail(

937 instring, tokens_start, self, err, False

938 )

939 if self.failAction:

940 self.failAction(instring, tokens_start, self, err)

941 raise

942 else:

943 if callPreParse and self.callPreparse:

944 pre_loc = self.preParse(instring, loc)

945 else:

946 pre_loc = loc

947 tokens_start = pre_loc

948 if self.mayIndexError or pre_loc >= len_instring:

949 try:

950 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

951 except IndexError:

952 raise ParseException(instring, len_instring, self.errmsg, self)

953 else:

954 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

955

956 tokens = self.postParse(instring, loc, tokens)

957

958 ret_tokens = ParseResults(

959 tokens, self.resultsName, aslist=self.saveAsList, modal=self.modalResults

960 )

961 if self.parseAction and (do_actions or self.callDuringTry):

962 if debugging:

963 try:

964 for fn in self.parseAction:

965 try:

966 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]

967 except IndexError as parse_action_exc:

968 exc = ParseException("exception raised in parse action")

969 raise exc from parse_action_exc

970

971 if tokens is not None and tokens is not ret_tokens:

972 ret_tokens = ParseResults(

973 tokens,

974 self.resultsName,

975 aslist=self.saveAsList

976 and isinstance(tokens, (ParseResults, list)),

977 modal=self.modalResults,

978 )

979 except Exception as err:

980 # print "Exception raised in user parse action:", err

981 if self.debugActions.debug_fail:

982 self.debugActions.debug_fail(

983 instring, tokens_start, self, err, False

984 )

985 raise

986 else:

987 for fn in self.parseAction:

988 try:

989 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]

990 except IndexError as parse_action_exc:

991 exc = ParseException("exception raised in parse action")

992 raise exc from parse_action_exc

993

994 if tokens is not None and tokens is not ret_tokens:

995 ret_tokens = ParseResults(

996 tokens,

997 self.resultsName,

998 aslist=self.saveAsList

999 and isinstance(tokens, (ParseResults, list)),

1000 modal=self.modalResults,

1001 )

1002 if debugging:

1003 # print("Matched", self, "->", ret_tokens.as_list())

1004 if self.debugActions.debug_match:

1005 self.debugActions.debug_match(

1006 instring, tokens_start, loc, self, ret_tokens, False

1007 )

1008

1009 return loc, ret_tokens

1010

1011 def try_parse(

1012 self,

1013 instring: str,

1014 loc: int,

1015 *,

1016 raise_fatal: bool = False,

1017 do_actions: bool = False,

1018 ) -> int:

1019 try:

1020 return self._parse(instring, loc, do_actions=do_actions)[0]

1021 except ParseFatalException:

1022 if raise_fatal:

1023 raise

1024 raise ParseException(instring, loc, self.errmsg, self)

1025

1026 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:

1027 try:

1028 self.try_parse(instring, loc, do_actions=do_actions)

1029 except (ParseException, IndexError):

1030 return False

1031 else:

1032 return True

1033

1034 # cache for left-recursion in Forward references

1035 recursion_lock = RLock()

1036 recursion_memos: collections.abc.MutableMapping[

1037 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]

1038 ] = {}

1039

1040 class _CacheType(typing.Protocol):

1041 """

1042 Class to be used for packrat and left-recursion cacheing of results

1043 and exceptions.

1044 """

1045

1046 not_in_cache: bool

1047

1048 def get(self, *args) -> typing.Any: ...

1049

1050 def set(self, *args) -> None: ...

1051

1052 def clear(self) -> None: ...

1053

1054 class NullCache(dict):

1055 """

1056 A null cache type for initialization of the packrat_cache class variable.

1057 If/when enable_packrat() is called, this null cache will be replaced by a

1058 proper _CacheType class instance.

1059 """

1060

1061 not_in_cache: bool = True

1062

1063 def get(self, *args) -> typing.Any: ...

1064

1065 def set(self, *args) -> None: ...

1066

1067 def clear(self) -> None: ...

1068

1069 # class-level argument cache for optimizing repeated calls when backtracking

1070 # through recursive expressions

1071 packrat_cache: _CacheType = NullCache()

1072 packrat_cache_lock = RLock()

1073 packrat_cache_stats = [0, 0]

1074

1075 # this method gets repeatedly called during backtracking with the same arguments -

1076 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

1077 def _parseCache(

1078 self, instring, loc, do_actions=True, callPreParse=True

1079 ) -> tuple[int, ParseResults]:

1080 HIT, MISS = 0, 1

1081 lookup = (self, instring, loc, callPreParse, do_actions)

1082 with ParserElement.packrat_cache_lock:

1083 cache = ParserElement.packrat_cache

1084 value = cache.get(lookup)

1085 if value is cache.not_in_cache:

1086 ParserElement.packrat_cache_stats[MISS] += 1

1087 try:

1088 value = self._parseNoCache(instring, loc, do_actions, callPreParse)

1089 except ParseBaseException as pe:

1090 # cache a copy of the exception, without the traceback

1091 cache.set(lookup, pe.__class__(*pe.args))

1092 raise

1093 else:

1094 cache.set(lookup, (value[0], value[1].copy(), loc))

1095 return value

1096 else:

1097 ParserElement.packrat_cache_stats[HIT] += 1

1098 if self.debug and self.debugActions.debug_try:

1099 try:

1100 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]

1101 except TypeError:

1102 pass

1103 if isinstance(value, Exception):

1104 if self.debug and self.debugActions.debug_fail:

1105 try:

1106 self.debugActions.debug_fail(

1107 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]

1108 )

1109 except TypeError:

1110 pass

1111 raise value

1112

1113 value = cast(tuple[int, ParseResults, int], value)

1114 loc_, result, endloc = value[0], value[1].copy(), value[2]

1115 if self.debug and self.debugActions.debug_match:

1116 try:

1117 self.debugActions.debug_match(

1118 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]

1119 )

1120 except TypeError:

1121 pass

1122

1123 return loc_, result

1124

1125 _parse = _parseNoCache

1126

1127 @staticmethod

1128 def reset_cache() -> None:

1129 """

1130 Clears caches used by packrat and left-recursion.

1131 """

1132 with ParserElement.packrat_cache_lock:

1133 ParserElement.packrat_cache.clear()

1134 ParserElement.packrat_cache_stats[:] = [0] * len(

1135 ParserElement.packrat_cache_stats

1136 )

1137 ParserElement.recursion_memos.clear()

1138

1139 # class attributes to keep caching status

1140 _packratEnabled = False

1141 _left_recursion_enabled = False

1142

1143 @staticmethod

1144 def disable_memoization() -> None:

1145 """

1146 Disables active Packrat or Left Recursion parsing and their memoization

1147

1148 This method also works if neither Packrat nor Left Recursion are enabled.

1149 This makes it safe to call before activating Packrat nor Left Recursion

1150 to clear any previous settings.

1151 """

1152 with ParserElement.packrat_cache_lock:

1153 ParserElement.reset_cache()

1154 ParserElement._left_recursion_enabled = False

1155 ParserElement._packratEnabled = False

1156 ParserElement._parse = ParserElement._parseNoCache

1157

1158 @staticmethod

1159 def enable_left_recursion(

1160 cache_size_limit: typing.Optional[int] = None, *, force=False

1161 ) -> None:

1162 """

1163 Enables "bounded recursion" parsing, which allows for both direct and indirect

1164 left-recursion. During parsing, left-recursive :class:`Forward` elements are

1165 repeatedly matched with a fixed recursion depth that is gradually increased

1166 until finding the longest match.

1167

1168 Example:

1169

1170 .. testcode::

1171

1172 import pyparsing as pp

1173 pp.ParserElement.enable_left_recursion()

1174

1175 E = pp.Forward("E")

1176 num = pp.Word(pp.nums)

1177

1178 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...

1179 E <<= E + '+' - num | num

1180

1181 print(E.parse_string("1+2+3+4"))

1182

1183 prints:

1184

1185 .. testoutput::

1186

1187 ['1', '+', '2', '+', '3', '+', '4']

1188

1189 Recursion search naturally memoizes matches of ``Forward`` elements and may

1190 thus skip reevaluation of parse actions during backtracking. This may break

1191 programs with parse actions which rely on strict ordering of side-effects.

1192

1193 Parameters:

1194

1195 - ``cache_size_limit`` - (default=``None``) - memoize at most this many

1196 ``Forward`` elements during matching; if ``None`` (the default),

1197 memoize all ``Forward`` elements.

1198

1199 Bounded Recursion parsing works similar but not identical to Packrat parsing,

1200 thus the two cannot be used together. Use ``force=True`` to disable any

1201 previous, conflicting settings.

1202 """

1203 with ParserElement.packrat_cache_lock:

1204 if force:

1205 ParserElement.disable_memoization()

1206 elif ParserElement._packratEnabled:

1207 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1208 if cache_size_limit is None:

1209 ParserElement.recursion_memos = _UnboundedMemo()

1210 elif cache_size_limit > 0:

1211 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]

1212 else:

1213 raise NotImplementedError(f"Memo size of {cache_size_limit}")

1214 ParserElement._left_recursion_enabled = True

1215

1216 @staticmethod

1217 def enable_packrat(

1218 cache_size_limit: Union[int, None] = 128, *, force: bool = False

1219 ) -> None:

1220 """

1221 Enables "packrat" parsing, which adds memoizing to the parsing logic.

1222 Repeated parse attempts at the same string location (which happens

1223 often in many complex grammars) can immediately return a cached value,

1224 instead of re-executing parsing/validating code. Memoizing is done of

1225 both valid results and parsing exceptions.

1226

1227 Parameters:

1228

1229 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided

1230 will limit the size of the packrat cache; if None is passed, then

1231 the cache size will be unbounded; if 0 is passed, the cache will

1232 be effectively disabled.

1233

1234 This speedup may break existing programs that use parse actions that

1235 have side-effects. For this reason, packrat parsing is disabled when

1236 you first import pyparsing. To activate the packrat feature, your

1237 program must call the class method :class:`ParserElement.enable_packrat`.

1238 For best results, call ``enable_packrat()`` immediately after

1239 importing pyparsing.

1240

1241 .. Can't really be doctested, alas

1242

1243 Example::

1244

1245 import pyparsing

1246 pyparsing.ParserElement.enable_packrat()

1247

1248 Packrat parsing works similar but not identical to Bounded Recursion parsing,

1249 thus the two cannot be used together. Use ``force=True`` to disable any

1250 previous, conflicting settings.

1251 """

1252 with ParserElement.packrat_cache_lock:

1253 if force:

1254 ParserElement.disable_memoization()

1255 elif ParserElement._left_recursion_enabled:

1256 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1257

1258 if ParserElement._packratEnabled:

1259 return

1260

1261 ParserElement._packratEnabled = True

1262 if cache_size_limit is None:

1263 ParserElement.packrat_cache = _UnboundedCache()

1264 else:

1265 ParserElement.packrat_cache = _FifoCache(cache_size_limit)

1266 ParserElement._parse = ParserElement._parseCache

1267

1268 def parse_string(

1269 self, instring: str, parse_all: bool = False, **kwargs

1270 ) -> ParseResults:

1271 """

1272 Parse a string with respect to the parser definition. This function is intended as the primary interface to the

1273 client code.

1274

1275 :param instring: The input string to be parsed.

1276 :param parse_all: If set, the entire input string must match the grammar.

1277 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.

1278 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.

1279 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or

1280 an object with attributes if the given parser includes results names.

1281

1282 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This

1283 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().

1284

1285 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are

1286 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string

1287 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string

1288 being parsed, one can ensure a consistent view of the input string by doing one of the following:

1289

1290 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),

1291 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the

1292 parse action's ``s`` argument, or

1293 - explicitly expand the tabs in your input string before calling ``parse_string``.

1294

1295 Examples:

1296

1297 By default, partial matches are OK.

1298

1299 .. doctest::

1300

1301 >>> res = Word('a').parse_string('aaaaabaaa')

1302 >>> print(res)

1303 ['aaaaa']

1304

1305 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children

1306 directly to see more examples.

1307

1308 It raises an exception if parse_all flag is set and instring does not match the whole grammar.

1309

1310 .. doctest::

1311

1312 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)

1313 Traceback (most recent call last):

1314 ParseException: Expected end of text, found 'b' ...

1315 """

1316 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)

1317

1318 parse_all = parse_all or parseAll

1319

1320 ParserElement.reset_cache()

1321 if not self.streamlined:

1322 self.streamline()

1323 for e in self.ignoreExprs:

1324 e.streamline()

1325 if not self.keepTabs:

1326 instring = instring.expandtabs()

1327 try:

1328 loc, tokens = self._parse(instring, 0)

1329 if parse_all:

1330 loc = self.preParse(instring, loc)

1331 se = Empty() + StringEnd().set_debug(False)

1332 se._parse(instring, loc)

1333 except _ParseActionIndexError as pa_exc:

1334 raise pa_exc.exc

1335 except ParseBaseException as exc:

1336 if ParserElement.verbose_stacktrace:

1337 raise

1338

1339 # catch and re-raise exception from here, clearing out pyparsing internal stack trace

1340 raise exc.with_traceback(None)

1341 else:

1342 return tokens

1343

1344 def scan_string(

1345 self,

1346 instring: str,

1347 max_matches: int = _MAX_INT,

1348 overlap: bool = False,

1349 always_skip_whitespace=True,

1350 *,

1351 debug: bool = False,

1352 **kwargs,

1353 ) -> Generator[tuple[ParseResults, int, int], None, None]:

1354 """

1355 Scan the input string for expression matches. Each match will return the

1356 matching tokens, start location, and end location. May be called with optional

1357 ``max_matches`` argument, to clip scanning after 'n' matches are found. If

1358 ``overlap`` is specified, then overlapping matches will be reported.

1359

1360 Note that the start and end locations are reported relative to the string

1361 being parsed. See :class:`parse_string` for more information on parsing

1362 strings with embedded tabs.

1363

1364 Example:

1365

1366 .. testcode::

1367

1368 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"

1369 print(source)

1370 for tokens, start, end in Word(alphas).scan_string(source):

1371 print(' '*start + '^'*(end-start))

1372 print(' '*start + tokens[0])

1373

1374 prints:

1375

1376 .. testoutput::

1377

1378 sldjf123lsdjjkf345sldkjf879lkjsfd987

1379 ^^^^^

1380 sldjf

1381 ^^^^^^^

1382 lsdjjkf

1383 ^^^^^^

1384 sldkjf

1385 ^^^^^^

1386 lkjsfd

1387 """

1388 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)

1389

1390 max_matches = min(maxMatches, max_matches)

1391 if not self.streamlined:

1392 self.streamline()

1393 for e in self.ignoreExprs:

1394 e.streamline()

1395

1396 if not self.keepTabs:

1397 instring = str(instring).expandtabs()

1398 instrlen = len(instring)

1399 loc = 0

1400 if always_skip_whitespace:

1401 preparser = Empty()

1402 preparser.ignoreExprs = self.ignoreExprs

1403 preparser.whiteChars = self.whiteChars

1404 preparseFn = preparser.preParse

1405 else:

1406 preparseFn = self.preParse

1407 parseFn = self._parse

1408 ParserElement.reset_cache()

1409 matches = 0

1410 try:

1411 while loc <= instrlen and matches < max_matches:

1412 try:

1413 preloc: int = preparseFn(instring, loc)

1414 nextLoc: int

1415 tokens: ParseResults

1416 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)

1417 except ParseException:

1418 loc = preloc + 1

1419 else:

1420 if nextLoc > loc:

1421 matches += 1

1422 if debug:

1423 print(

1424 {

1425 "tokens": tokens.as_list(),

1426 "start": preloc,

1427 "end": nextLoc,

1428 }

1429 )

1430 yield tokens, preloc, nextLoc

1431 if overlap:

1432 nextloc = preparseFn(instring, loc)

1433 if nextloc > loc:

1434 loc = nextLoc

1435 else:

1436 loc += 1

1437 else:

1438 loc = nextLoc

1439 else:

1440 loc = preloc + 1

1441 except ParseBaseException as exc:

1442 if ParserElement.verbose_stacktrace:

1443 raise

1444

1445 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1446 raise exc.with_traceback(None)

1447

1448 def transform_string(self, instring: str, *, debug: bool = False) -> str:

1449 """

1450 Extension to :class:`scan_string`, to modify matching text with modified tokens that may

1451 be returned from a parse action. To use ``transform_string``, define a grammar and

1452 attach a parse action to it that modifies the returned token list.

1453 Invoking ``transform_string()`` on a target string will then scan for matches,

1454 and replace the matched text patterns according to the logic in the parse

1455 action. ``transform_string()`` returns the resulting transformed string.

1456

1457 Example:

1458

1459 .. testcode::

1460

1461 quote = '''now is the winter of our discontent,

1462 made glorious summer by this sun of york.'''

1463

1464 wd = Word(alphas)

1465 wd.set_parse_action(lambda toks: toks[0].title())

1466

1467 print(wd.transform_string(quote))

1468

1469 prints:

1470

1471 .. testoutput::

1472

1473 Now Is The Winter Of Our Discontent,

1474 Made Glorious Summer By This Sun Of York.

1475 """

1476 out: list[str] = []

1477 lastE = 0

1478 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to

1479 # keep string locs straight between transform_string and scan_string

1480 self.keepTabs = True

1481 try:

1482 for t, s, e in self.scan_string(instring, debug=debug):

1483 if s > lastE:

1484 out.append(instring[lastE:s])

1485 lastE = e

1486

1487 if not t:

1488 continue

1489

1490 if isinstance(t, ParseResults):

1491 out += t.as_list()

1492 elif isinstance(t, Iterable) and not isinstance(t, str_type):

1493 out.extend(t)

1494 else:

1495 out.append(t)

1496

1497 out.append(instring[lastE:])

1498 out = [o for o in out if o]

1499 return "".join([str(s) for s in _flatten(out)])

1500 except ParseBaseException as exc:

1501 if ParserElement.verbose_stacktrace:

1502 raise

1503

1504 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1505 raise exc.with_traceback(None)

1506

1507 def search_string(

1508 self,

1509 instring: str,

1510 max_matches: int = _MAX_INT,

1511 *,

1512 debug: bool = False,

1513 **kwargs,

1514 ) -> ParseResults:

1515 """

1516 Another extension to :class:`scan_string`, simplifying the access to the tokens found

1517 to match the given parse expression. May be called with optional

1518 ``max_matches`` argument, to clip searching after 'n' matches are found.

1519

1520 Example:

1521

1522 .. testcode::

1523

1524 quote = '''More than Iron, more than Lead,

1525 more than Gold I need Electricity'''

1526

1527 # a capitalized word starts with an uppercase letter,

1528 # followed by zero or more lowercase letters

1529 cap_word = Word(alphas.upper(), alphas.lower())

1530

1531 print(cap_word.search_string(quote))

1532

1533 # the sum() builtin can be used to merge results

1534 # into a single ParseResults object

1535 print(sum(cap_word.search_string(quote)))

1536

1537 prints:

1538

1539 .. testoutput::

1540

1541 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]

1542 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']

1543 """

1544 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)

1545

1546 max_matches = min(maxMatches, max_matches)

1547 try:

1548 return ParseResults(

1549 [

1550 t

1551 for t, s, e in self.scan_string(

1552 instring,

1553 max_matches=max_matches,

1554 always_skip_whitespace=False,

1555 debug=debug,

1556 )

1557 ]

1558 )

1559 except ParseBaseException as exc:

1560 if ParserElement.verbose_stacktrace:

1561 raise

1562

1563 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1564 raise exc.with_traceback(None)

1565

1566 def split(

1567 self,

1568 instring: str,

1569 maxsplit: int = _MAX_INT,

1570 include_separators: bool = False,

1571 **kwargs,

1572 ) -> Generator[str, None, None]:

1573 """

1574 Generator method to split a string using the given expression as a separator.

1575 May be called with optional ``maxsplit`` argument, to limit the number of splits;

1576 and the optional ``include_separators`` argument (default= ``False``), if the separating

1577 matching text should be included in the split results.

1578

1579 Example:

1580

1581 .. testcode::

1582

1583 punc = one_of(list(".,;:/-!?"))

1584 print(list(punc.split(

1585 "This, this?, this sentence, is badly punctuated!")))

1586

1587 prints:

1588

1589 .. testoutput::

1590

1591 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']

1592 """

1593 includeSeparators: bool = deprecate_argument(kwargs, "includeSeparators", False)

1594

1595 include_separators = includeSeparators or include_separators

1596 last = 0

1597 for t, s, e in self.scan_string(instring, max_matches=maxsplit):

1598 yield instring[last:s]

1599 if include_separators:

1600 yield t[0]

1601 last = e

1602 yield instring[last:]

1603

1604 def __add__(self, other) -> ParserElement:

1605 """

1606 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`

1607 converts them to :class:`Literal`\\ s by default.

1608

1609 Example:

1610

1611 .. testcode::

1612

1613 greet = Word(alphas) + "," + Word(alphas) + "!"

1614 hello = "Hello, World!"

1615 print(hello, "->", greet.parse_string(hello))

1616

1617 prints:

1618

1619 .. testoutput::

1620

1621 Hello, World! -> ['Hello', ',', 'World', '!']

1622

1623 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:

1624

1625 .. testcode::

1626

1627 Literal('start') + ... + Literal('end')

1628

1629 is equivalent to:

1630

1631 .. testcode::

1632

1633 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')

1634

1635 Note that the skipped text is returned with '_skipped' as a results name,

1636 and to support having multiple skips in the same parser, the value returned is

1637 a list of all skipped text.

1638 """

1639 if other is Ellipsis:

1640 return _PendingSkip(self)

1641

1642 if isinstance(other, str_type):

1643 other = self._literalStringClass(other)

1644 if not isinstance(other, ParserElement):

1645 return NotImplemented

1646 return And([self, other])

1647

1648 def __radd__(self, other) -> ParserElement:

1649 """

1650 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`

1651 """

1652 if other is Ellipsis:

1653 return SkipTo(self)("_skipped*") + self

1654

1655 if isinstance(other, str_type):

1656 other = self._literalStringClass(other)

1657 if not isinstance(other, ParserElement):

1658 return NotImplemented

1659 return other + self

1660

1661 def __sub__(self, other) -> ParserElement:

1662 """

1663 Implementation of ``-`` operator, returns :class:`And` with error stop

1664 """

1665 if isinstance(other, str_type):

1666 other = self._literalStringClass(other)

1667 if not isinstance(other, ParserElement):

1668 return NotImplemented

1669 return self + And._ErrorStop() + other

1670

1671 def __rsub__(self, other) -> ParserElement:

1672 """

1673 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`

1674 """

1675 if isinstance(other, str_type):

1676 other = self._literalStringClass(other)

1677 if not isinstance(other, ParserElement):

1678 return NotImplemented

1679 return other - self

1680

1681 def __mul__(self, other) -> ParserElement:

1682 """

1683 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of

1684 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer

1685 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples

1686 may also include ``None`` as in:

1687

1688 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent

1689 to ``expr*n + ZeroOrMore(expr)``

1690 (read as "at least n instances of ``expr``")

1691 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``

1692 (read as "0 to n instances of ``expr``")

1693 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``

1694 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``

1695

1696 Note that ``expr*(None, n)`` does not raise an exception if

1697 more than n exprs exist in the input stream; that is,

1698 ``expr*(None, n)`` does not enforce a maximum number of expr

1699 occurrences. If this behavior is desired, then write

1700 ``expr*(None, n) + ~expr``

1701 """

1702 if other is Ellipsis:

1703 other = (0, None)

1704 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):

1705 other = ((0,) + other[1:] + (None,))[:2]

1706

1707 if not isinstance(other, (int, tuple)):

1708 return NotImplemented

1709

1710 if isinstance(other, int):

1711 minElements, optElements = other, 0

1712 else:

1713 other = tuple(o if o is not Ellipsis else None for o in other)

1714 other = (other + (None, None))[:2]

1715 if other[0] is None:

1716 other = (0, other[1])

1717 if isinstance(other[0], int) and other[1] is None:

1718 if other[0] == 0:

1719 return ZeroOrMore(self)

1720 if other[0] == 1:

1721 return OneOrMore(self)

1722 else:

1723 return self * other[0] + ZeroOrMore(self)

1724 elif isinstance(other[0], int) and isinstance(other[1], int):

1725 minElements, optElements = other

1726 optElements -= minElements

1727 else:

1728 return NotImplemented

1729

1730 if minElements < 0:

1731 raise ValueError("cannot multiply ParserElement by negative value")

1732 if optElements < 0:

1733 raise ValueError(

1734 "second tuple value must be greater or equal to first tuple value"

1735 )

1736 if minElements == optElements == 0:

1737 return And([])

1738

1739 if optElements:

1740

1741 def makeOptionalList(n):

1742 if n > 1:

1743 return Opt(self + makeOptionalList(n - 1))

1744 else:

1745 return Opt(self)

1746

1747 if minElements:

1748 if minElements == 1:

1749 ret = self + makeOptionalList(optElements)

1750 else:

1751 ret = And([self] * minElements) + makeOptionalList(optElements)

1752 else:

1753 ret = makeOptionalList(optElements)

1754 else:

1755 if minElements == 1:

1756 ret = self

1757 else:

1758 ret = And([self] * minElements)

1759 return ret

1760

1761 def __rmul__(self, other) -> ParserElement:

1762 return self.__mul__(other)

1763

1764 def __or__(self, other) -> ParserElement:

1765 """

1766 Implementation of ``|`` operator - returns :class:`MatchFirst`

1767

1768 .. versionchanged:: 3.1.0

1769 Support ``expr | ""`` as a synonym for ``Optional(expr)``.

1770 """

1771 if other is Ellipsis:

1772 return _PendingSkip(self, must_skip=True)

1773

1774 if isinstance(other, str_type):

1775 # `expr | ""` is equivalent to `Opt(expr)`

1776 if other == "":

1777 return Opt(self)

1778 other = self._literalStringClass(other)

1779 if not isinstance(other, ParserElement):

1780 return NotImplemented

1781 return MatchFirst([self, other])

1782

1783 def __ror__(self, other) -> ParserElement:

1784 """

1785 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`

1786 """

1787 if isinstance(other, str_type):

1788 other = self._literalStringClass(other)

1789 if not isinstance(other, ParserElement):

1790 return NotImplemented

1791 return other | self

1792

1793 def __xor__(self, other) -> ParserElement:

1794 """

1795 Implementation of ``^`` operator - returns :class:`Or`

1796 """

1797 if isinstance(other, str_type):

1798 other = self._literalStringClass(other)

1799 if not isinstance(other, ParserElement):

1800 return NotImplemented

1801 return Or([self, other])

1802

1803 def __rxor__(self, other) -> ParserElement:

1804 """

1805 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`

1806 """

1807 if isinstance(other, str_type):

1808 other = self._literalStringClass(other)

1809 if not isinstance(other, ParserElement):

1810 return NotImplemented

1811 return other ^ self

1812

1813 def __and__(self, other) -> ParserElement:

1814 """

1815 Implementation of ``&`` operator - returns :class:`Each`

1816 """

1817 if isinstance(other, str_type):

1818 other = self._literalStringClass(other)

1819 if not isinstance(other, ParserElement):

1820 return NotImplemented

1821 return Each([self, other])

1822

1823 def __rand__(self, other) -> ParserElement:

1824 """

1825 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`

1826 """

1827 if isinstance(other, str_type):

1828 other = self._literalStringClass(other)

1829 if not isinstance(other, ParserElement):

1830 return NotImplemented

1831 return other & self

1832

1833 def __invert__(self) -> ParserElement:

1834 """

1835 Implementation of ``~`` operator - returns :class:`NotAny`

1836 """

1837 return NotAny(self)

1838

1839 # disable __iter__ to override legacy use of sequential access to __getitem__ to

1840 # iterate over a sequence

1841 __iter__ = None

1842

1843 def __getitem__(self, key):

1844 """

1845 use ``[]`` indexing notation as a short form for expression repetition:

1846

1847 - ``expr[n]`` is equivalent to ``expr*n``

1848 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``

1849 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent

1850 to ``expr*n + ZeroOrMore(expr)``

1851 (read as "at least n instances of ``expr``")

1852 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``

1853 (read as "0 to n instances of ``expr``")

1854 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``

1855 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``

1856

1857 ``None`` may be used in place of ``...``.

1858

1859 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception

1860 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is

1861 desired, then write ``expr[..., n] + ~expr``.

1862

1863 For repetition with a stop_on expression, use slice notation:

1864

1865 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``

1866 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``

1867

1868 .. versionchanged:: 3.1.0

1869 Support for slice notation.

1870 """

1871

1872 stop_on_defined = False

1873 stop_on = NoMatch()

1874 if isinstance(key, slice):

1875 key, stop_on = key.start, key.stop

1876 if key is None:

1877 key = ...

1878 stop_on_defined = True

1879 elif isinstance(key, tuple) and isinstance(key[-1], slice):

1880 key, stop_on = (key[0], key[1].start), key[1].stop

1881 stop_on_defined = True

1882

1883 # convert single arg keys to tuples

1884 if isinstance(key, str_type):

1885 key = (key,)

1886 try:

1887 iter(key)

1888 except TypeError:

1889 key = (key, key)

1890

1891 if len(key) > 2:

1892 raise TypeError(

1893 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"

1894 )

1895

1896 # clip to 2 elements

1897 ret = self * tuple(key[:2])

1898 ret = typing.cast(_MultipleMatch, ret)

1899

1900 if stop_on_defined:

1901 ret.stopOn(stop_on)

1902

1903 return ret

1904

1905 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:

1906 """

1907 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.

1908

1909 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be

1910 passed as ``True``.

1911

1912 If ``name`` is omitted, same as calling :class:`copy`.

1913

1914 Example:

1915

1916 .. testcode::

1917

1918 # these are equivalent

1919 userdata = (

1920 Word(alphas).set_results_name("name")

1921 + Word(nums + "-").set_results_name("socsecno")

1922 )

1923

1924 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")

1925 """

1926 if name is not None:

1927 return self._setResultsName(name)

1928

1929 return self.copy()

1930

1931 def suppress(self) -> ParserElement:

1932 """

1933 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from

1934 cluttering up returned output.

1935 """

1936 return Suppress(self)

1937

1938 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

1939 """

1940 Enables the skipping of whitespace before matching the characters in the

1941 :class:`ParserElement`'s defined pattern.

1942

1943 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)

1944 """

1945 self.skipWhitespace = True

1946 return self

1947

1948 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

1949 """

1950 Disables the skipping of whitespace before matching the characters in the

1951 :class:`ParserElement`'s defined pattern. This is normally only used internally by

1952 the pyparsing module, but may be needed in some whitespace-sensitive grammars.

1953

1954 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)

1955 """

1956 self.skipWhitespace = False

1957 return self

1958

1959 def set_whitespace_chars(

1960 self, chars: Union[set[str], str], copy_defaults: bool = False

1961 ) -> ParserElement:

1962 """

1963 Overrides the default whitespace chars

1964 """

1965 self.skipWhitespace = True

1966 self.whiteChars = set(chars)

1967 self.copyDefaultWhiteChars = copy_defaults

1968 return self

1969

1970 def parse_with_tabs(self) -> ParserElement:

1971 """

1972 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.

1973 Must be called before ``parse_string`` when the input grammar contains elements that

1974 match ``<TAB>`` characters.

1975 """

1976 self.keepTabs = True

1977 return self

1978

1979 def ignore(self, other: ParserElement) -> ParserElement:

1980 """

1981 Define expression to be ignored (e.g., comments) while doing pattern

1982 matching; may be called repeatedly, to define multiple comment or other

1983 ignorable patterns.

1984

1985 Example:

1986

1987 .. doctest::

1988

1989 >>> patt = Word(alphas)[...]

1990 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))

1991 ['ablaj']

1992

1993 >>> patt = Word(alphas)[...].ignore(c_style_comment)

1994 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))

1995 ['ablaj', 'lskjd']

1996 """

1997 if isinstance(other, str_type):

1998 other = Suppress(other)

1999

2000 if isinstance(other, Suppress):

2001 if other not in self.ignoreExprs:

2002 self.ignoreExprs.append(other)

2003 else:

2004 self.ignoreExprs.append(Suppress(other.copy()))

2005 return self

2006

2007 def set_debug_actions(

2008 self,

2009 start_action: DebugStartAction,

2010 success_action: DebugSuccessAction,

2011 exception_action: DebugExceptionAction,

2012 ) -> ParserElement:

2013 """

2014 Customize display of debugging messages while doing pattern matching:

2015

2016 :param start_action: method to be called when an expression is about to be parsed;

2017 should have the signature::

2018

2019 fn(input_string: str,

2020 location: int,

2021 expression: ParserElement,

2022 cache_hit: bool)

2023

2024 :param success_action: method to be called when an expression has successfully parsed;

2025 should have the signature::

2026

2027 fn(input_string: str,

2028 start_location: int,

2029 end_location: int,

2030 expression: ParserELement,

2031 parsed_tokens: ParseResults,

2032 cache_hit: bool)

2033

2034 :param exception_action: method to be called when expression fails to parse;

2035 should have the signature::

2036

2037 fn(input_string: str,

2038 location: int,

2039 expression: ParserElement,

2040 exception: Exception,

2041 cache_hit: bool)

2042 """

2043 self.debugActions = self.DebugActions(

2044 start_action or _default_start_debug_action, # type: ignore[truthy-function]

2045 success_action or _default_success_debug_action, # type: ignore[truthy-function]

2046 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

2047 )

2048 self.debug = True

2049 return self

2050

2051 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:

2052 """

2053 Enable display of debugging messages while doing pattern matching.

2054 Set ``flag`` to ``True`` to enable, ``False`` to disable.

2055 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

2056

2057 Example:

2058

2059 .. testcode::

2060

2061 wd = Word(alphas).set_name("alphaword")

2062 integer = Word(nums).set_name("numword")

2063 term = wd | integer

2064

2065 # turn on debugging for wd

2066 wd.set_debug()

2067

2068 term[1, ...].parse_string("abc 123 xyz 890")

2069

2070 prints:

2071

2072 .. testoutput::

2073 :options: +NORMALIZE_WHITESPACE

2074

2075 Match alphaword at loc 0(1,1)

2076 abc 123 xyz 890

2077 ^

2078 Matched alphaword -> ['abc']

2079 Match alphaword at loc 4(1,5)

2080 abc 123 xyz 890

2081 ^

2082 Match alphaword failed, ParseException raised: Expected alphaword, ...

2083 Match alphaword at loc 8(1,9)

2084 abc 123 xyz 890

2085 ^

2086 Matched alphaword -> ['xyz']

2087 Match alphaword at loc 12(1,13)

2088 abc 123 xyz 890

2089 ^

2090 Match alphaword failed, ParseException raised: Expected alphaword, ...

2091 abc 123 xyz 890

2092 ^

2093 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ...

2094

2095 The output shown is that produced by the default debug actions - custom debug actions can be

2096 specified using :meth:`set_debug_actions`. Prior to attempting

2097 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

2098 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

2099 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression,

2100 which makes debugging and exception messages easier to understand - for instance, the default

2101 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``.

2102

2103 .. versionchanged:: 3.1.0

2104 ``recurse`` argument added.

2105 """

2106 if recurse:

2107 for expr in self.visit_all():

2108 expr.set_debug(flag, recurse=False)

2109 return self

2110

2111 if flag:

2112 self.set_debug_actions(

2113 _default_start_debug_action,

2114 _default_success_debug_action,

2115 _default_exception_debug_action,

2116 )

2117 else:

2118 self.debug = False

2119 return self

2120

2121 @property

2122 def default_name(self) -> str:

2123 if self._defaultName is None:

2124 self._defaultName = self._generateDefaultName()

2125 return self._defaultName

2126

2127 @abstractmethod

2128 def _generateDefaultName(self) -> str:

2129 """

2130 Child classes must define this method, which defines how the ``default_name`` is set.

2131 """

2132

2133 def set_name(self, name: typing.Optional[str]) -> ParserElement:

2134 """

2135 Define name for this expression, makes debugging and exception messages clearer. If

2136 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also

2137 enable debug for this expression.

2138

2139 If `name` is None, clears any custom name for this expression, and clears the

2140 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.

2141

2142 Example:

2143

2144 .. doctest::

2145

2146 >>> integer = Word(nums)

2147 >>> integer.parse_string("ABC")

2148 Traceback (most recent call last):

2149 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1)

2150

2151 >>> integer.set_name("integer")

2152 integer

2153 >>> integer.parse_string("ABC")

2154 Traceback (most recent call last):

2155 ParseException: Expected integer (at char 0), (line:1, col:1)

2156

2157 .. versionchanged:: 3.1.0

2158 Accept ``None`` as the ``name`` argument.

2159 """

2160 self.customName = name # type: ignore[assignment]

2161 self.errmsg = f"Expected {str(self)}"

2162

2163 if __diag__.enable_debug_on_named_expressions:

2164 self.set_debug(name is not None)

2165

2166 return self

2167

2168 @property

2169 def name(self) -> str:

2170 """

2171 Returns a user-defined name if available, but otherwise defaults back to the auto-generated name

2172 """

2173 return self.customName if self.customName is not None else self.default_name

2174

2175 @name.setter

2176 def name(self, new_name) -> None:

2177 self.set_name(new_name)

2178

2179 def __str__(self) -> str:

2180 return self.name

2181

2182 def __repr__(self) -> str:

2183 return str(self)

2184

2185 def streamline(self) -> ParserElement:

2186 self.streamlined = True

2187 self._defaultName = None

2188 return self

2189

2190 def recurse(self) -> list[ParserElement]:

2191 return []

2192

2193 def _checkRecursion(self, parseElementList):

2194 subRecCheckList = parseElementList[:] + [self]

2195 for e in self.recurse():

2196 e._checkRecursion(subRecCheckList)

2197

2198 def validate(self, validateTrace=None) -> None:

2199 """

2200 .. deprecated:: 3.0.0

2201 Do not use to check for left recursion.

2202

2203 Check defined expressions for valid structure, check for infinite recursive definitions.

2204

2205 """

2206 warnings.warn(

2207 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

2208 DeprecationWarning,

2209 stacklevel=2,

2210 )

2211 self._checkRecursion([])

2212

2213 def parse_file(

2214 self,

2215 file_or_filename: Union[str, Path, TextIO],

2216 encoding: str = "utf-8",

2217 parse_all: bool = False,

2218 **kwargs,

2219 ) -> ParseResults:

2220 """

2221 Execute the parse expression on the given file or filename.

2222 If a filename is specified (instead of a file object),

2223 the entire file is opened, read, and closed before parsing.

2224 """

2225 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)

2226

2227 parse_all = parse_all or parseAll

2228 try:

2229 file_or_filename = typing.cast(TextIO, file_or_filename)

2230 file_contents = file_or_filename.read()

2231 except AttributeError:

2232 file_or_filename = typing.cast(str, file_or_filename)

2233 with open(file_or_filename, "r", encoding=encoding) as f:

2234 file_contents = f.read()

2235 try:

2236 return self.parse_string(file_contents, parse_all)

2237 except ParseBaseException as exc:

2238 if ParserElement.verbose_stacktrace:

2239 raise

2240

2241 # catch and re-raise exception from here, clears out pyparsing internal stack trace

2242 raise exc.with_traceback(None)

2243

2244 def __eq__(self, other):

2245 if self is other:

2246 return True

2247 elif isinstance(other, str_type):

2248 return self.matches(other, parse_all=True)

2249 elif isinstance(other, ParserElement):

2250 return vars(self) == vars(other)

2251 return False

2252

2253 def __hash__(self):

2254 return id(self)

2255

2256 def matches(self, test_string: str, parse_all: bool = True, **kwargs) -> bool:

2257 """

2258 Method for quick testing of a parser against a test string. Good for simple

2259 inline microtests of sub expressions while building up larger parser.

2260

2261 :param test_string: to test against this expression for a match

2262 :param parse_all: flag to pass to :meth:`parse_string` when running tests

2263

2264 Example:

2265

2266 .. doctest::

2267

2268 >>> expr = Word(nums)

2269 >>> expr.matches("100")

2270 True

2271 """

2272 parseAll: bool = deprecate_argument(kwargs, "parseAll", True)

2273

2274 parse_all = parse_all and parseAll

2275 try:

2276 self.parse_string(str(test_string), parse_all=parse_all)

2277 return True

2278 except ParseBaseException:

2279 return False

2280

2281 def run_tests(

2282 self,

2283 tests: Union[str, list[str]],

2284 parse_all: bool = True,

2285 comment: typing.Optional[Union[ParserElement, str]] = "#",

2286 full_dump: bool = True,

2287 print_results: bool = True,

2288 failure_tests: bool = False,

2289 post_parse: typing.Optional[

2290 Callable[[str, ParseResults], typing.Optional[str]]

2291 ] = None,

2292 file: typing.Optional[TextIO] = None,

2293 with_line_numbers: bool = False,

2294 *,

2295 parseAll: bool = True,

2296 fullDump: bool = True,

2297 printResults: bool = True,

2298 failureTests: bool = False,

2299 postParse: typing.Optional[

2300 Callable[[str, ParseResults], typing.Optional[str]]

2301 ] = None,

2302 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:

2303 """

2304 Execute the parse expression on a series of test strings, showing each

2305 test, the parsed results or where the parse failed. Quick and easy way to

2306 run a parse expression against a list of sample strings.

2307

2308 Parameters:

2309

2310 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2311 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2312 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2313 string; pass None to disable comment filtering

2314 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2315 if False, only dump nested list

2316 - ``print_results`` - (default= ``True``) prints test output to stdout

2317 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2318 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2319 `fn(test_string, parse_results)` and returns a string to be added to the test output

2320 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2321 if None, will default to ``sys.stdout``

2322 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2323

2324 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2325 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2326 test's output

2327

2328 Passing example:

2329

2330 .. testcode::

2331

2332 number_expr = pyparsing_common.number.copy()

2333

2334 result = number_expr.run_tests('''

2335 # unsigned integer

2336 100

2337 # negative integer

2338 -100

2339 # float with scientific notation

2340 6.02e23

2341 # integer with scientific notation

2342 1e-12

2343 # negative decimal number without leading digit

2344 -.100

2345 ''')

2346 print("Success" if result[0] else "Failed!")

2347

2348 prints:

2349

2350 .. testoutput::

2351 :options: +NORMALIZE_WHITESPACE

2352

2353

2354 # unsigned integer

2355 100

2356 [100]

2357

2358 # negative integer

2359 -100

2360 [-100]

2361

2362 # float with scientific notation

2363 6.02e23

2364 [6.02e+23]

2365

2366 # integer with scientific notation

2367 1e-12

2368 [1e-12]

2369

2370 # negative decimal number without leading digit

2371 -.100

2372 [-0.1]

2373 Success

2374

2375 Failure-test example:

2376

2377 .. testcode::

2378

2379 result = number_expr.run_tests('''

2380 # stray character

2381 100Z

2382 # too many '.'

2383 3.14.159

2384 ''', failure_tests=True)

2385 print("Success" if result[0] else "Failed!")

2386

2387 prints:

2388

2389 .. testoutput::

2390 :options: +NORMALIZE_WHITESPACE

2391

2392

2393 # stray character

2394 100Z

2395 100Z

2396 ^

2397 ParseException: Expected end of text, found 'Z' ...

2398

2399 # too many '.'

2400 3.14.159

2401 3.14.159

2402 ^

2403 ParseException: Expected end of text, found '.' ...

2404 FAIL: Expected end of text, found '.' ...

2405 Success

2406

2407 Each test string must be on a single line. If you want to test a string that spans multiple

2408 lines, create a test like this:

2409

2410 .. testcode::

2411

2412 expr = Word(alphanums)[1,...]

2413 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2414

2415 .. testoutput::

2416 :options: +NORMALIZE_WHITESPACE

2417 :hide:

2418

2419

2420 this is a test\\n of strings that spans \\n 3 lines

2421 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines']

2422

2423 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2424 """

2425 from .testing import pyparsing_test

2426

2427 parseAll = parseAll and parse_all

2428 fullDump = fullDump and full_dump

2429 printResults = printResults and print_results

2430 failureTests = failureTests or failure_tests

2431 postParse = postParse or post_parse

2432 if isinstance(tests, str_type):

2433 tests = typing.cast(str, tests)

2434 line_strip = type(tests).strip

2435 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2436 comment_specified = comment is not None

2437 if comment_specified:

2438 if isinstance(comment, str_type):

2439 comment = typing.cast(str, comment)

2440 comment = Literal(comment)

2441 comment = typing.cast(ParserElement, comment)

2442 if file is None:

2443 file = sys.stdout

2444 print_ = file.write

2445

2446 result: Union[ParseResults, Exception]

2447 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []

2448 comments: list[str] = []

2449 success = True

2450 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2451 BOM = "\ufeff"

2452 nlstr = "\n"

2453 for t in tests:

2454 if comment_specified and comment.matches(t, False) or comments and not t:

2455 comments.append(

2456 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2457 )

2458 continue

2459 if not t:

2460 continue

2461 out = [

2462 f"{nlstr}{nlstr.join(comments) if comments else ''}",

2463 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2464 ]

2465 comments.clear()

2466 try:

2467 # convert newline marks to actual newlines, and strip leading BOM if present

2468 t = NL.transform_string(t.lstrip(BOM))

2469 result = self.parse_string(t, parse_all=parse_all)

2470 except ParseBaseException as pe:

2471 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""

2472 out.append(pe.explain())

2473 out.append(f"FAIL: {fatal}{pe}")

2474 if ParserElement.verbose_stacktrace:

2475 out.extend(traceback.format_tb(pe.__traceback__))

2476 success = success and failureTests

2477 result = pe

2478 except Exception as exc:

2479 tag = "FAIL-EXCEPTION"

2480

2481 # see if this exception was raised in a parse action

2482 tb = exc.__traceback__

2483 it = iter(traceback.walk_tb(tb))

2484 for f, line in it:

2485 if (f.f_code.co_filename, line) == pa_call_line_synth:

2486 next_f = next(it)[0]

2487 tag += f" (raised in parse action {next_f.f_code.co_name!r})"

2488 break

2489

2490 out.append(f"{tag}: {type(exc).__name__}: {exc}")

2491 if ParserElement.verbose_stacktrace:

2492 out.extend(traceback.format_tb(exc.__traceback__))

2493 success = success and failureTests

2494 result = exc

2495 else:

2496 success = success and not failureTests

2497 if postParse is not None:

2498 try:

2499 pp_value = postParse(t, result)

2500 if pp_value is not None:

2501 if isinstance(pp_value, ParseResults):

2502 out.append(pp_value.dump())

2503 else:

2504 out.append(str(pp_value))

2505 else:

2506 out.append(result.dump())

2507 except Exception as e:

2508 out.append(result.dump(full=fullDump))

2509 out.append(

2510 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2511 )

2512 else:

2513 out.append(result.dump(full=fullDump))

2514 out.append("")

2515

2516 if printResults:

2517 print_("\n".join(out))

2518

2519 allResults.append((t, result))

2520

2521 return success, allResults

2522

2523 def create_diagram(

2524 self,

2525 output_html: Union[TextIO, Path, str],

2526 vertical: int = 3,

2527 show_results_names: bool = False,

2528 show_groups: bool = False,

2529 embed: bool = False,

2530 show_hidden: bool = False,

2531 **kwargs,

2532 ) -> None:

2533 """

2534 Create a railroad diagram for the parser.

2535

2536 Parameters:

2537

2538 - ``output_html`` (str or file-like object) - output target for generated

2539 diagram HTML

2540 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2541 instead of horizontally (default=3)

2542 - ``show_results_names`` - bool flag whether diagram should show annotations for

2543 defined results names

2544 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2545 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden

2546 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2547 the resulting HTML in an enclosing HTML source

2548 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2549 can be used to insert custom CSS styling

2550 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2551 generated code

2552

2553 Additional diagram-formatting keyword arguments can also be included;

2554 see railroad.Diagram class.

2555

2556 .. versionchanged:: 3.1.0

2557 ``embed`` argument added.

2558 """

2559

2560 try:

2561 from .diagram import to_railroad, railroad_to_html

2562 except ImportError as ie:

2563 raise Exception(

2564 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2565 ) from ie

2566

2567 self.streamline()

2568

2569 railroad = to_railroad(

2570 self,

2571 vertical=vertical,

2572 show_results_names=show_results_names,

2573 show_groups=show_groups,

2574 show_hidden=show_hidden,

2575 diagram_kwargs=kwargs,

2576 )

2577 if not isinstance(output_html, (str, Path)):

2578 # we were passed a file-like object, just write to it

2579 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2580 return

2581

2582 with open(output_html, "w", encoding="utf-8") as diag_file:

2583 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2584

2585 # Compatibility synonyms

2586 # fmt: off

2587 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))

2588 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(

2589 "setDefaultWhitespaceChars", set_default_whitespace_chars

2590 ))

2591 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))

2592 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))

2593 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))

2594 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))

2595

2596 setResultsName = replaced_by_pep8("setResultsName", set_results_name)

2597 setBreak = replaced_by_pep8("setBreak", set_break)

2598 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)

2599 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)

2600 addCondition = replaced_by_pep8("addCondition", add_condition)

2601 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)

2602 tryParse = replaced_by_pep8("tryParse", try_parse)

2603 parseString = replaced_by_pep8("parseString", parse_string)

2604 scanString = replaced_by_pep8("scanString", scan_string)

2605 transformString = replaced_by_pep8("transformString", transform_string)

2606 searchString = replaced_by_pep8("searchString", search_string)

2607 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

2608 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

2609 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)

2610 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)

2611 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)

2612 setDebug = replaced_by_pep8("setDebug", set_debug)

2613 setName = replaced_by_pep8("setName", set_name)

2614 parseFile = replaced_by_pep8("parseFile", parse_file)

2615 runTests = replaced_by_pep8("runTests", run_tests)

2616 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)

2617 defaultName = default_name

2618 # fmt: on

2619

2620

2621class _PendingSkip(ParserElement):

2622 # internal placeholder class to hold a place were '...' is added to a parser element,

2623 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2624 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:

2625 super().__init__()

2626 self.anchor = expr

2627 self.must_skip = must_skip

2628

2629 def _generateDefaultName(self) -> str:

2630 return str(self.anchor + Empty()).replace("Empty", "...")

2631

2632 def __add__(self, other) -> ParserElement:

2633 skipper = SkipTo(other).set_name("...")("_skipped*")

2634 if self.must_skip:

2635

2636 def must_skip(t):

2637 if not t._skipped or t._skipped.as_list() == [""]:

2638 del t[0]

2639 t.pop("_skipped", None)

2640

2641 def show_skip(t):

2642 if t._skipped.as_list()[-1:] == [""]:

2643 t.pop("_skipped")

2644 t["_skipped"] = f"missing <{self.anchor!r}>"

2645

2646 return (

2647 self.anchor + skipper().add_parse_action(must_skip)

2648 | skipper().add_parse_action(show_skip)

2649 ) + other

2650

2651 return self.anchor + skipper + other

2652

2653 def __repr__(self):

2654 return self.defaultName

2655

2656 def parseImpl(self, *args) -> ParseImplReturnType:

2657 raise Exception(

2658 "use of `...` expression without following SkipTo target expression"

2659 )

2660

2661

2662class Token(ParserElement):

2663 """Abstract :class:`ParserElement` subclass, for defining atomic

2664 matching patterns.

2665 """

2666

2667 def __init__(self) -> None:

2668 super().__init__(savelist=False)

2669

2670 def _generateDefaultName(self) -> str:

2671 return type(self).__name__

2672

2673

2674class NoMatch(Token):

2675 """

2676 A token that will never match.

2677 """

2678

2679 def __init__(self) -> None:

2680 super().__init__()

2681 self._may_return_empty = True

2682 self.mayIndexError = False

2683 self.errmsg = "Unmatchable token"

2684

2685 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2686 raise ParseException(instring, loc, self.errmsg, self)

2687

2688

2689class Literal(Token):

2690 """

2691 Token to exactly match a specified string.

2692

2693 Example:

2694

2695 .. doctest::

2696

2697 >>> Literal('abc').parse_string('abc')

2698 ParseResults(['abc'], {})

2699 >>> Literal('abc').parse_string('abcdef')

2700 ParseResults(['abc'], {})

2701 >>> Literal('abc').parse_string('ab')

2702 Traceback (most recent call last):

2703 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1)

2704

2705 For case-insensitive matching, use :class:`CaselessLiteral`.

2706

2707 For keyword matching (force word break before and after the matched string),

2708 use :class:`Keyword` or :class:`CaselessKeyword`.

2709 """

2710

2711 def __new__(cls, match_string: str = "", **kwargs):

2712 # Performance tuning: select a subclass with optimized parseImpl

2713 if cls is Literal:

2714 matchString: str = deprecate_argument(kwargs, "matchString", "")

2715

2716 match_string = matchString or match_string

2717 if not match_string:

2718 return super().__new__(Empty)

2719 if len(match_string) == 1:

2720 return super().__new__(_SingleCharLiteral)

2721

2722 # Default behavior

2723 return super().__new__(cls)

2724

2725 # Needed to make copy.copy() work correctly if we customize __new__

2726 def __getnewargs__(self):

2727 return (self.match,)

2728

2729 def __init__(self, match_string: str = "", **kwargs) -> None:

2730 matchString: str = deprecate_argument(kwargs, "matchString", "")

2731

2732 super().__init__()

2733 match_string = matchString or match_string

2734 self.match = match_string

2735 self.matchLen = len(match_string)

2736 self.firstMatchChar = match_string[:1]

2737 self.errmsg = f"Expected {self.name}"

2738 self._may_return_empty = False

2739 self.mayIndexError = False

2740

2741 def _generateDefaultName(self) -> str:

2742 return repr(self.match)

2743

2744 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2745 if instring[loc] == self.firstMatchChar and instring.startswith(

2746 self.match, loc

2747 ):

2748 return loc + self.matchLen, self.match

2749 raise ParseException(instring, loc, self.errmsg, self)

2750

2751

2752class Empty(Literal):

2753 """

2754 An empty token, will always match.

2755 """

2756

2757 def __init__(self, match_string="", *, matchString="") -> None:

2758 super().__init__("")

2759 self._may_return_empty = True

2760 self.mayIndexError = False

2761

2762 def _generateDefaultName(self) -> str:

2763 return "Empty"

2764

2765 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2766 return loc, []

2767

2768

2769class _SingleCharLiteral(Literal):

2770 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2771 if instring[loc] == self.firstMatchChar:

2772 return loc + 1, self.match

2773 raise ParseException(instring, loc, self.errmsg, self)

2774

2775

2776ParserElement._literalStringClass = Literal

2777

2778

2779class Keyword(Token):

2780 """

2781 Token to exactly match a specified string as a keyword, that is,

2782 it must be immediately preceded and followed by whitespace or

2783 non-keyword characters. Compare with :class:`Literal`:

2784

2785 - ``Literal("if")`` will match the leading ``'if'`` in

2786 ``'ifAndOnlyIf'``.

2787 - ``Keyword("if")`` will not; it will only match the leading

2788 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2789

2790 Accepts two optional constructor arguments in addition to the

2791 keyword string:

2792

2793 - ``ident_chars`` is a string of characters that would be valid

2794 identifier characters, defaulting to all alphanumerics + "_" and

2795 "$"

2796 - ``caseless`` allows case-insensitive matching, default is ``False``.

2797

2798 Example:

2799

2800 .. doctest::

2801 :options: +NORMALIZE_WHITESPACE

2802

2803 >>> Keyword("start").parse_string("start")

2804 ParseResults(['start'], {})

2805 >>> Keyword("start").parse_string("starting")

2806 Traceback (most recent call last):

2807 ParseException: Expected Keyword 'start', keyword was immediately

2808 followed by keyword character, found 'ing' (at char 5), (line:1, col:6)

2809

2810 .. doctest::

2811 :options: +NORMALIZE_WHITESPACE

2812

2813 >>> Keyword("start").parse_string("starting").debug()

2814 Traceback (most recent call last):

2815 ParseException: Expected Keyword "start", keyword was immediately

2816 followed by keyword character, found 'ing' ...

2817

2818 For case-insensitive matching, use :class:`CaselessKeyword`.

2819 """

2820

2821 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2822

2823 def __init__(

2824 self,

2825 match_string: str = "",

2826 ident_chars: typing.Optional[str] = None,

2827 caseless: bool = False,

2828 **kwargs,

2829 ) -> None:

2830 matchString = deprecate_argument(kwargs, "matchString", "")

2831 identChars = deprecate_argument(kwargs, "identChars", None)

2832

2833 super().__init__()

2834 identChars = identChars or ident_chars

2835 if identChars is None:

2836 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2837 match_string = matchString or match_string

2838 self.match = match_string

2839 self.matchLen = len(match_string)

2840 self.firstMatchChar = match_string[:1]

2841 if not self.firstMatchChar:

2842 raise ValueError("null string passed to Keyword; use Empty() instead")

2843 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2844 self._may_return_empty = False

2845 self.mayIndexError = False

2846 self.caseless = caseless

2847 if caseless:

2848 self.caselessmatch = match_string.upper()

2849 identChars = identChars.upper()

2850 self.ident_chars = set(identChars)

2851

2852 @property

2853 def identChars(self) -> set[str]:

2854 """

2855 .. deprecated:: 3.3.0

2856 use ident_chars instead.

2857

2858 Property returning the characters being used as keyword characters for this expression.

2859 """

2860 return self.ident_chars

2861

2862 def _generateDefaultName(self) -> str:

2863 return repr(self.match)

2864

2865 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2866 errmsg = self.errmsg or ""

2867 errloc = loc

2868 if self.caseless:

2869 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2870 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2871 if (

2872 loc >= len(instring) - self.matchLen

2873 or instring[loc + self.matchLen].upper() not in self.identChars

2874 ):

2875 return loc + self.matchLen, self.match

2876

2877 # followed by keyword char

2878 errmsg += ", was immediately followed by keyword character"

2879 errloc = loc + self.matchLen

2880 else:

2881 # preceded by keyword char

2882 errmsg += ", keyword was immediately preceded by keyword character"

2883 errloc = loc - 1

2884 # else no match just raise plain exception

2885

2886 elif (

2887 instring[loc] == self.firstMatchChar

2888 and self.matchLen == 1

2889 or instring.startswith(self.match, loc)

2890 ):

2891 if loc == 0 or instring[loc - 1] not in self.identChars:

2892 if (

2893 loc >= len(instring) - self.matchLen

2894 or instring[loc + self.matchLen] not in self.identChars

2895 ):

2896 return loc + self.matchLen, self.match

2897

2898 # followed by keyword char

2899 errmsg += ", keyword was immediately followed by keyword character"

2900 errloc = loc + self.matchLen

2901 else:

2902 # preceded by keyword char

2903 errmsg += ", keyword was immediately preceded by keyword character"

2904 errloc = loc - 1

2905 # else no match just raise plain exception

2906

2907 raise ParseException(instring, errloc, errmsg, self)

2908

2909 @staticmethod

2910 def set_default_keyword_chars(chars) -> None:

2911 """

2912 Overrides the default characters used by :class:`Keyword` expressions.

2913 """

2914 Keyword.DEFAULT_KEYWORD_CHARS = chars

2915

2916 # Compatibility synonyms

2917 setDefaultKeywordChars = staticmethod(

2918 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)

2919 )

2920

2921

2922class CaselessLiteral(Literal):

2923 """

2924 Token to match a specified string, ignoring case of letters.

2925 Note: the matched results will always be in the case of the given

2926 match string, NOT the case of the input text.

2927

2928 Example:

2929

2930 .. doctest::

2931

2932 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2933 ParseResults(['CMD', 'CMD', 'CMD'], {})

2934

2935 (Contrast with example for :class:`CaselessKeyword`.)

2936 """

2937

2938 def __init__(self, match_string: str = "", **kwargs) -> None:

2939 matchString: str = deprecate_argument(kwargs, "matchString", "")

2940

2941 match_string = matchString or match_string

2942 super().__init__(match_string.upper())

2943 # Preserve the defining literal.

2944 self.returnString = match_string

2945 self.errmsg = f"Expected {self.name}"

2946

2947 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2948 if instring[loc : loc + self.matchLen].upper() == self.match:

2949 return loc + self.matchLen, self.returnString

2950 raise ParseException(instring, loc, self.errmsg, self)

2951

2952

2953class CaselessKeyword(Keyword):

2954 """

2955 Caseless version of :class:`Keyword`.

2956

2957 Example:

2958

2959 .. doctest::

2960

2961 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2962 ParseResults(['CMD', 'CMD'], {})

2963

2964 (Contrast with example for :class:`CaselessLiteral`.)

2965 """

2966

2967 def __init__(

2968 self, match_string: str = "", ident_chars: typing.Optional[str] = None, **kwargs

2969 ) -> None:

2970 matchString: str = deprecate_argument(kwargs, "matchString", "")

2971 identChars: typing.Optional[str] = deprecate_argument(

2972 kwargs, "identChars", None

2973 )

2974

2975 identChars = identChars or ident_chars

2976 match_string = matchString or match_string

2977 super().__init__(match_string, identChars, caseless=True)

2978

2979

2980class CloseMatch(Token):

2981 """A variation on :class:`Literal` which matches "close" matches,

2982 that is, strings with at most 'n' mismatching characters.

2983 :class:`CloseMatch` takes parameters:

2984

2985 - ``match_string`` - string to be matched

2986 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2987 - ``max_mismatches`` - (``default=1``) maximum number of

2988 mismatches allowed to count as a match

2989

2990 The results from a successful parse will contain the matched text

2991 from the input string and the following named results:

2992

2993 - ``mismatches`` - a list of the positions within the

2994 match_string where mismatches were found

2995 - ``original`` - the original match_string used to compare

2996 against the input string

2997

2998 If ``mismatches`` is an empty list, then the match was an exact

2999 match.

3000

3001 Example:

3002

3003 .. doctest::

3004 :options: +NORMALIZE_WHITESPACE

3005

3006 >>> patt = CloseMatch("ATCATCGAATGGA")

3007 >>> patt.parse_string("ATCATCGAAXGGA")

3008 ParseResults(['ATCATCGAAXGGA'],

3009 {'original': 'ATCATCGAATGGA', 'mismatches': [9]})

3010

3011 >>> patt.parse_string("ATCAXCGAAXGGA")

3012 Traceback (most recent call last):

3013 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches),

3014 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1)

3015

3016 # exact match

3017 >>> patt.parse_string("ATCATCGAATGGA")

3018 ParseResults(['ATCATCGAATGGA'],

3019 {'original': 'ATCATCGAATGGA', 'mismatches': []})

3020

3021 # close match allowing up to 2 mismatches

3022 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

3023 >>> patt.parse_string("ATCAXCGAAXGGA")

3024 ParseResults(['ATCAXCGAAXGGA'],

3025 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]})

3026 """

3027

3028 def __init__(

3029 self,

3030 match_string: str,

3031 max_mismatches: typing.Optional[int] = None,

3032 *,

3033 caseless=False,

3034 **kwargs,

3035 ) -> None:

3036 maxMismatches: int = deprecate_argument(kwargs, "maxMismatches", 1)

3037

3038 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

3039 super().__init__()

3040 self.match_string = match_string

3041 self.maxMismatches = maxMismatches

3042 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

3043 self.caseless = caseless

3044 self.mayIndexError = False

3045 self._may_return_empty = False

3046

3047 def _generateDefaultName(self) -> str:

3048 return f"{type(self).__name__}:{self.match_string!r}"

3049

3050 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3051 start = loc

3052 instrlen = len(instring)

3053 maxloc = start + len(self.match_string)

3054

3055 if maxloc <= instrlen:

3056 match_string = self.match_string

3057 match_stringloc = 0

3058 mismatches = []

3059 maxMismatches = self.maxMismatches

3060

3061 for match_stringloc, s_m in enumerate(

3062 zip(instring[loc:maxloc], match_string)

3063 ):

3064 src, mat = s_m

3065 if self.caseless:

3066 src, mat = src.lower(), mat.lower()

3067

3068 if src != mat:

3069 mismatches.append(match_stringloc)

3070 if len(mismatches) > maxMismatches:

3071 break

3072 else:

3073 loc = start + match_stringloc + 1

3074 results = ParseResults([instring[start:loc]])

3075 results["original"] = match_string

3076 results["mismatches"] = mismatches

3077 return loc, results

3078

3079 raise ParseException(instring, loc, self.errmsg, self)

3080

3081

3082class Word(Token):

3083 """Token for matching words composed of allowed character sets.

3084

3085 Parameters:

3086

3087 - ``init_chars`` - string of all characters that should be used to

3088 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

3089 if ``body_chars`` is also specified, then this is the string of

3090 initial characters

3091 - ``body_chars`` - string of characters that

3092 can be used for matching after a matched initial character as

3093 given in ``init_chars``; if omitted, same as the initial characters

3094 (default=``None``)

3095 - ``min`` - minimum number of characters to match (default=1)

3096 - ``max`` - maximum number of characters to match (default=0)

3097 - ``exact`` - exact number of characters to match (default=0)

3098 - ``as_keyword`` - match as a keyword (default=``False``)

3099 - ``exclude_chars`` - characters that might be

3100 found in the input ``body_chars`` string but which should not be

3101 accepted for matching ;useful to define a word of all

3102 printables except for one or two characters, for instance

3103 (default=``None``)

3104

3105 :class:`srange` is useful for defining custom character set strings

3106 for defining :class:`Word` expressions, using range notation from

3107 regular expression character sets.

3108

3109 A common mistake is to use :class:`Word` to match a specific literal

3110 string, as in ``Word("Address")``. Remember that :class:`Word`

3111 uses the string argument to define *sets* of matchable characters.

3112 This expression would match "Add", "AAA", "dAred", or any other word

3113 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

3114 exact literal string, use :class:`Literal` or :class:`Keyword`.

3115

3116 pyparsing includes helper strings for building Words:

3117

3118 - :attr:`alphas`

3119 - :attr:`nums`

3120 - :attr:`alphanums`

3121 - :attr:`hexnums`

3122 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255

3123 - accented, tilded, umlauted, etc.)

3124 - :attr:`punc8bit` (non-alphabetic characters in ASCII range

3125 128-255 - currency, symbols, superscripts, diacriticals, etc.)

3126 - :attr:`printables` (any non-whitespace character)

3127

3128 ``alphas``, ``nums``, and ``printables`` are also defined in several

3129 Unicode sets - see :class:`pyparsing_unicode`.

3130

3131 Example:

3132

3133 .. testcode::

3134

3135 # a word composed of digits

3136 integer = Word(nums)

3137 # Two equivalent alternate forms:

3138 Word("0123456789")

3139 Word(srange("[0-9]"))

3140

3141 # a word with a leading capital, and zero or more lowercase

3142 capitalized_word = Word(alphas.upper(), alphas.lower())

3143

3144 # hostnames are alphanumeric, with leading alpha, and '-'

3145 hostname = Word(alphas, alphanums + '-')

3146

3147 # roman numeral

3148 # (not a strict parser, accepts invalid mix of characters)

3149 roman = Word("IVXLCDM")

3150

3151 # any string of non-whitespace characters, except for ','

3152 csv_value = Word(printables, exclude_chars=",")

3153

3154 :raises ValueError: If ``min`` and ``max`` are both specified

3155 and the test ``min <= max`` fails.

3156

3157 .. versionchanged:: 3.1.0

3158 Raises :exc:`ValueError` if ``min`` > ``max``.

3159 """

3160

3161 def __init__(

3162 self,

3163 init_chars: str = "",

3164 body_chars: typing.Optional[str] = None,

3165 min: int = 1,

3166 max: int = 0,

3167 exact: int = 0,

3168 as_keyword: bool = False,

3169 exclude_chars: typing.Optional[str] = None,

3170 **kwargs,

3171 ) -> None:

3172 initChars: typing.Optional[str] = deprecate_argument(kwargs, "initChars", None)

3173 bodyChars: typing.Optional[str] = deprecate_argument(kwargs, "bodyChars", None)

3174 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)

3175 excludeChars: typing.Optional[str] = deprecate_argument(

3176 kwargs, "excludeChars", None

3177 )

3178

3179 initChars = initChars or init_chars

3180 bodyChars = bodyChars or body_chars

3181 asKeyword = asKeyword or as_keyword

3182 excludeChars = excludeChars or exclude_chars

3183 super().__init__()

3184 if not initChars:

3185 raise ValueError(

3186 f"invalid {type(self).__name__}, initChars cannot be empty string"

3187 )

3188

3189 initChars_set = set(initChars)

3190 if excludeChars:

3191 excludeChars_set = set(excludeChars)

3192 initChars_set -= excludeChars_set

3193 if bodyChars:

3194 bodyChars = "".join(set(bodyChars) - excludeChars_set)

3195 self.init_chars = initChars_set

3196 self.initCharsOrig = "".join(sorted(initChars_set))

3197

3198 if bodyChars:

3199 self.bodyChars = set(bodyChars)

3200 self.bodyCharsOrig = "".join(sorted(bodyChars))

3201 else:

3202 self.bodyChars = initChars_set

3203 self.bodyCharsOrig = self.initCharsOrig

3204

3205 self.maxSpecified = max > 0

3206

3207 if min < 1:

3208 raise ValueError(

3209 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

3210 )

3211

3212 if self.maxSpecified and min > max:

3213 raise ValueError(

3214 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

3215 )

3216

3217 self.minLen = min

3218

3219 if max > 0:

3220 self.maxLen = max

3221 else:

3222 self.maxLen = _MAX_INT

3223

3224 if exact > 0:

3225 min = max = exact

3226 self.maxLen = exact

3227 self.minLen = exact

3228

3229 self.errmsg = f"Expected {self.name}"

3230 self.mayIndexError = False

3231 self.asKeyword = asKeyword

3232 if self.asKeyword:

3233 self.errmsg += " as a keyword"

3234

3235 # see if we can make a regex for this Word

3236 if " " not in (self.initChars | self.bodyChars):

3237 if len(self.initChars) == 1:

3238 re_leading_fragment = re.escape(self.initCharsOrig)

3239 else:

3240 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

3241

3242 if self.bodyChars == self.initChars:

3243 if max == 0 and self.minLen == 1:

3244 repeat = "+"

3245 elif max == 1:

3246 repeat = ""

3247 else:

3248 if self.minLen != self.maxLen:

3249 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

3250 else:

3251 repeat = f"{{{self.minLen}}}"

3252 self.reString = f"{re_leading_fragment}{repeat}"

3253 else:

3254 if max == 1:

3255 re_body_fragment = ""

3256 repeat = ""

3257 else:

3258 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

3259 if max == 0 and self.minLen == 1:

3260 repeat = "*"

3261 elif max == 2:

3262 repeat = "?" if min <= 1 else ""

3263 else:

3264 if min != max:

3265 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

3266 else:

3267 repeat = f"{{{min - 1 if min > 0 else ''}}}"

3268

3269 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

3270

3271 if self.asKeyword:

3272 self.reString = rf"\b{self.reString}\b"

3273

3274 try:

3275 self.re = re.compile(self.reString)

3276 except re.error:

3277 self.re = None # type: ignore[assignment]

3278 else:

3279 self.re_match = self.re.match

3280 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]

3281

3282 @property

3283 def initChars(self) -> set[str]:

3284 """

3285 .. deprecated:: 3.3.0

3286 use `init_chars` instead.

3287

3288 Property returning the initial chars to be used when matching this

3289 Word expression. If no body chars were specified, the initial characters

3290 will also be the body characters.

3291 """

3292 return set(self.init_chars)

3293

3294 def copy(self) -> Word:

3295 """

3296 Returns a copy of this expression.

3297

3298 Generally only used internally by pyparsing.

3299 """

3300 ret: Word = cast(Word, super().copy())

3301 if hasattr(self, "re_match"):

3302 ret.re_match = self.re_match

3303 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign]

3304 return ret

3305

3306 def _generateDefaultName(self) -> str:

3307 def charsAsStr(s):

3308 max_repr_len = 16

3309 s = _collapse_string_to_ranges(s, re_escape=False)

3310

3311 if len(s) > max_repr_len:

3312 return s[: max_repr_len - 3] + "..."

3313

3314 return s

3315

3316 if self.initChars != self.bodyChars:

3317 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

3318 else:

3319 base = f"W:({charsAsStr(self.initChars)})"

3320

3321 # add length specification

3322 if self.minLen > 1 or self.maxLen != _MAX_INT:

3323 if self.minLen == self.maxLen:

3324 if self.minLen == 1:

3325 return base[2:]

3326 else:

3327 return base + f"{{{self.minLen}}}"

3328 elif self.maxLen == _MAX_INT:

3329 return base + f"{{{self.minLen},...}}"

3330 else:

3331 return base + f"{{{self.minLen},{self.maxLen}}}"

3332 return base

3333

3334 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3335 if instring[loc] not in self.initChars:

3336 raise ParseException(instring, loc, self.errmsg, self)

3337

3338 start = loc

3339 loc += 1

3340 instrlen = len(instring)

3341 body_chars: set[str] = self.bodyChars

3342 maxloc = start + self.maxLen

3343 maxloc = min(maxloc, instrlen)

3344 while loc < maxloc and instring[loc] in body_chars:

3345 loc += 1

3346

3347 throw_exception = False

3348 if loc - start < self.minLen:

3349 throw_exception = True

3350 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:

3351 throw_exception = True

3352 elif self.asKeyword and (

3353 (start > 0 and instring[start - 1] in body_chars)

3354 or (loc < instrlen and instring[loc] in body_chars)

3355 ):

3356 throw_exception = True

3357

3358 if throw_exception:

3359 raise ParseException(instring, loc, self.errmsg, self)

3360

3361 return loc, instring[start:loc]

3362

3363 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3364 result = self.re_match(instring, loc)

3365 if not result:

3366 raise ParseException(instring, loc, self.errmsg, self)

3367

3368 loc = result.end()

3369 return loc, result.group()

3370

3371

3372class Char(Word):

3373 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

3374 when defining a match of any single character in a string of

3375 characters.

3376 """

3377

3378 def __init__(

3379 self,

3380 charset: str,

3381 as_keyword: bool = False,

3382 exclude_chars: typing.Optional[str] = None,

3383 **kwargs,

3384 ) -> None:

3385 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)

3386 excludeChars: typing.Optional[str] = deprecate_argument(

3387 kwargs, "excludeChars", None

3388 )

3389

3390 asKeyword = asKeyword or as_keyword

3391 excludeChars = excludeChars or exclude_chars

3392 super().__init__(

3393 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

3394 )

3395

3396

3397class Regex(Token):

3398 r"""Token for matching strings that match a given regular

3399 expression. Defined with string specifying the regular expression in

3400 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

3401 If the given regex contains named groups (defined using ``(?P<name>...)``),

3402 these will be preserved as named :class:`ParseResults`.

3403

3404 If instead of the Python stdlib ``re`` module you wish to use a different RE module

3405 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

3406 a compiled RE that was compiled using ``regex``.

3407

3408 The parameters ``pattern`` and ``flags`` are passed

3409 to the ``re.compile()`` function as-is. See the Python

3410 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3411 explanation of the acceptable patterns and flags.

3412

3413 Example:

3414

3415 .. testcode::

3416

3417 realnum = Regex(r"[+-]?\d+\.\d*")

3418 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

3419 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

3420

3421 # named fields in a regex will be returned as named results

3422 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

3423

3424 # the Regex class will accept regular expressions compiled using the

3425 # re module

3426 import re

3427 parser = pp.Regex(re.compile(r'[0-9]'))

3428 """

3429

3430 def __init__(

3431 self,

3432 pattern: Any,

3433 flags: Union[re.RegexFlag, int] = 0,

3434 as_group_list: bool = False,

3435 as_match: bool = False,

3436 **kwargs,

3437 ) -> None:

3438 super().__init__()

3439 asGroupList: bool = deprecate_argument(kwargs, "asGroupList", False)

3440 asMatch: bool = deprecate_argument(kwargs, "asMatch", False)

3441

3442 asGroupList = asGroupList or as_group_list

3443 asMatch = asMatch or as_match

3444

3445 if isinstance(pattern, str_type):

3446 if not pattern:

3447 raise ValueError("null string passed to Regex; use Empty() instead")

3448

3449 self._re = None

3450 self._may_return_empty = None # type: ignore [assignment]

3451 self.reString = self.pattern = pattern

3452

3453 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3454 self._re = pattern

3455 self._may_return_empty = None # type: ignore [assignment]

3456 self.pattern = self.reString = pattern.pattern

3457

3458 elif callable(pattern):

3459 # defer creating this pattern until we really need it

3460 self.pattern = pattern

3461 self._may_return_empty = None # type: ignore [assignment]

3462 self._re = None

3463

3464 else:

3465 raise TypeError(

3466 "Regex may only be constructed with a string or a compiled RE object,"

3467 " or a callable that takes no arguments and returns a string or a"

3468 " compiled RE object"

3469 )

3470

3471 self.flags = flags

3472 self.errmsg = f"Expected {self.name}"

3473 self.mayIndexError = False

3474 self.asGroupList = asGroupList

3475 self.asMatch = asMatch

3476 if self.asGroupList:

3477 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]

3478 if self.asMatch:

3479 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]

3480

3481 def copy(self) -> Regex:

3482 """

3483 Returns a copy of this expression.

3484

3485 Generally only used internally by pyparsing.

3486 """

3487 ret: Regex = cast(Regex, super().copy())

3488 if self.asGroupList:

3489 ret.parseImpl = ret.parseImplAsGroupList # type: ignore [method-assign]

3490 if self.asMatch:

3491 ret.parseImpl = ret.parseImplAsMatch # type: ignore [method-assign]

3492 return ret

3493

3494 @cached_property

3495 def re(self) -> re.Pattern:

3496 """

3497 Property returning the compiled regular expression for this Regex.

3498

3499 Generally only used internally by pyparsing.

3500 """

3501 if self._re:

3502 return self._re

3503

3504 if callable(self.pattern):

3505 # replace self.pattern with the string returned by calling self.pattern()

3506 self.pattern = cast(Callable[[], str], self.pattern)()

3507

3508 # see if we got a compiled RE back instead of a str - if so, we're done

3509 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):

3510 self._re = cast(re.Pattern[str], self.pattern)

3511 self.pattern = self.reString = self._re.pattern

3512 return self._re

3513

3514 try:

3515 self._re = re.compile(self.pattern, self.flags)

3516 except re.error:

3517 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3518 else:

3519 self._may_return_empty = self.re.match("", pos=0) is not None

3520 return self._re

3521

3522 @cached_property

3523 def re_match(self) -> Callable[[str, int], Any]:

3524 return self.re.match

3525

3526 @property

3527 def mayReturnEmpty(self):

3528 if self._may_return_empty is None:

3529 # force compile of regex pattern, to set may_return_empty flag

3530 self.re # noqa

3531 return self._may_return_empty

3532

3533 @mayReturnEmpty.setter

3534 def mayReturnEmpty(self, value):

3535 self._may_return_empty = value

3536

3537 def _generateDefaultName(self) -> str:

3538 unescaped = repr(self.pattern).replace("\\\\", "\\")

3539 return f"Re:({unescaped})"

3540

3541 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3542 # explicit check for matching past the length of the string;

3543 # this is done because the re module will not complain about

3544 # a match with `pos > len(instring)`, it will just return ""

3545 if loc > len(instring) and self.mayReturnEmpty:

3546 raise ParseException(instring, loc, self.errmsg, self)

3547

3548 result = self.re_match(instring, loc)

3549 if not result:

3550 raise ParseException(instring, loc, self.errmsg, self)

3551

3552 loc = result.end()

3553 ret = ParseResults(result.group())

3554 d = result.groupdict()

3555

3556 for k, v in d.items():

3557 ret[k] = v

3558

3559 return loc, ret

3560

3561 def parseImplAsGroupList(self, instring, loc, do_actions=True):

3562 if loc > len(instring) and self.mayReturnEmpty:

3563 raise ParseException(instring, loc, self.errmsg, self)

3564

3565 result = self.re_match(instring, loc)

3566 if not result:

3567 raise ParseException(instring, loc, self.errmsg, self)

3568

3569 loc = result.end()

3570 ret = result.groups()

3571 return loc, ret

3572

3573 def parseImplAsMatch(self, instring, loc, do_actions=True):

3574 if loc > len(instring) and self.mayReturnEmpty:

3575 raise ParseException(instring, loc, self.errmsg, self)

3576

3577 result = self.re_match(instring, loc)

3578 if not result:

3579 raise ParseException(instring, loc, self.errmsg, self)

3580

3581 loc = result.end()

3582 ret = result

3583 return loc, ret

3584

3585 def sub(self, repl: str) -> ParserElement:

3586 r"""

3587 Return :class:`Regex` with an attached parse action to transform the parsed

3588 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3589

3590 Example:

3591

3592 .. testcode::

3593

3594 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3595 print(make_html.transform_string("h1:main title:"))

3596

3597 .. testoutput::

3598

3599 <h1>main title</h1>

3600 """

3601 if self.asGroupList:

3602 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3603

3604 if self.asMatch and callable(repl):

3605 raise TypeError(

3606 "cannot use sub() with a callable with Regex(as_match=True)"

3607 )

3608

3609 if self.asMatch:

3610

3611 def pa(tokens):

3612 return tokens[0].expand(repl)

3613

3614 else:

3615

3616 def pa(tokens):

3617 return self.re.sub(repl, tokens[0])

3618

3619 return self.add_parse_action(pa)

3620

3621

3622class QuotedString(Token):

3623 r"""

3624 Token for matching strings that are delimited by quoting characters.

3625

3626 Defined with the following parameters:

3627

3628 - ``quote_char`` - string of one or more characters defining the

3629 quote delimiting string

3630 - ``esc_char`` - character to re_escape quotes, typically backslash

3631 (default= ``None``)

3632 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3633 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3634 (default= ``None``)

3635 - ``multiline`` - boolean indicating whether quotes can span

3636 multiple lines (default= ``False``)

3637 - ``unquote_results`` - boolean indicating whether the matched text

3638 should be unquoted (default= ``True``)

3639 - ``end_quote_char`` - string of one or more characters defining the

3640 end of the quote delimited string (default= ``None`` => same as

3641 quote_char)

3642 - ``convert_whitespace_escapes`` - convert escaped whitespace

3643 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3644 (default= ``True``)

3645

3646 .. caution:: ``convert_whitespace_escapes`` has no effect if

3647 ``unquote_results`` is ``False``.

3648

3649 Example:

3650

3651 .. doctest::

3652

3653 >>> qs = QuotedString('"')

3654 >>> print(qs.search_string('lsjdf "This is the quote" sldjf'))

3655 [['This is the quote']]

3656 >>> complex_qs = QuotedString('{{', end_quote_char='}}')

3657 >>> print(complex_qs.search_string(

3658 ... 'lsjdf {{This is the "quote"}} sldjf'))

3659 [['This is the "quote"']]

3660 >>> sql_qs = QuotedString('"', esc_quote='""')

3661 >>> print(sql_qs.search_string(

3662 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3663 [['This is the quote with "embedded" quotes']]

3664 """

3665

3666 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3667

3668 def __init__(

3669 self,

3670 quote_char: str = "",

3671 esc_char: typing.Optional[str] = None,

3672 esc_quote: typing.Optional[str] = None,

3673 multiline: bool = False,

3674 unquote_results: bool = True,

3675 end_quote_char: typing.Optional[str] = None,

3676 convert_whitespace_escapes: bool = True,

3677 **kwargs,

3678 ) -> None:

3679 super().__init__()

3680 quoteChar: str = deprecate_argument(kwargs, "quoteChar", "")

3681 escChar: str = deprecate_argument(kwargs, "escChar", None)

3682 escQuote: str = deprecate_argument(kwargs, "escQuote", None)

3683 unquoteResults: bool = deprecate_argument(kwargs, "unquoteResults", True)

3684 endQuoteChar: typing.Optional[str] = deprecate_argument(

3685 kwargs, "endQuoteChar", None

3686 )

3687 convertWhitespaceEscapes: bool = deprecate_argument(

3688 kwargs, "convertWhitespaceEscapes", True

3689 )

3690

3691 esc_char = escChar or esc_char

3692 esc_quote = escQuote or esc_quote

3693 unquote_results = unquoteResults and unquote_results

3694 end_quote_char = endQuoteChar or end_quote_char

3695 convert_whitespace_escapes = (

3696 convertWhitespaceEscapes and convert_whitespace_escapes

3697 )

3698 quote_char = quoteChar or quote_char

3699

3700 # remove white space from quote chars

3701 quote_char = quote_char.strip()

3702 if not quote_char:

3703 raise ValueError("quote_char cannot be the empty string")

3704

3705 if end_quote_char is None:

3706 end_quote_char = quote_char

3707 else:

3708 end_quote_char = end_quote_char.strip()

3709 if not end_quote_char:

3710 raise ValueError("end_quote_char cannot be the empty string")

3711

3712 self.quote_char: str = quote_char

3713 self.quote_char_len: int = len(quote_char)

3714 self.first_quote_char: str = quote_char[0]

3715 self.end_quote_char: str = end_quote_char

3716 self.end_quote_char_len: int = len(end_quote_char)

3717 self.esc_char: str = esc_char or ""

3718 self.has_esc_char: bool = esc_char is not None

3719 self.esc_quote: str = esc_quote or ""

3720 self.unquote_results: bool = unquote_results

3721 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3722 self.multiline = multiline

3723 self.re_flags = re.RegexFlag(0)

3724

3725 # fmt: off

3726 # build up re pattern for the content between the quote delimiters

3727 inner_pattern: list[str] = []

3728

3729 if esc_quote:

3730 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3731

3732 if esc_char:

3733 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3734

3735 if len(self.end_quote_char) > 1:

3736 inner_pattern.append(

3737 "(?:"

3738 + "|".join(

3739 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3740 for i in range(len(self.end_quote_char) - 1, 0, -1)

3741 )

3742 + ")"

3743 )

3744

3745 if self.multiline:

3746 self.re_flags |= re.MULTILINE | re.DOTALL

3747 inner_pattern.append(

3748 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3749 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3750 )

3751 else:

3752 inner_pattern.append(

3753 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3754 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3755 )

3756

3757 self.pattern = "".join(

3758 [

3759 re.escape(self.quote_char),

3760 "(?:",

3761 '|'.join(inner_pattern),

3762 ")*",

3763 re.escape(self.end_quote_char),

3764 ]

3765 )

3766

3767 if self.unquote_results:

3768 if self.convert_whitespace_escapes:

3769 self.unquote_scan_re = re.compile(

3770 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3771 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"

3772 rf"|({re.escape(self.esc_char)}.)"

3773 rf"|(\n|.)",

3774 flags=self.re_flags,

3775 )

3776 else:

3777 self.unquote_scan_re = re.compile(

3778 rf"({re.escape(self.esc_char)}.)"

3779 rf"|(\n|.)",

3780 flags=self.re_flags

3781 )

3782 # fmt: on

3783

3784 try:

3785 self.re = re.compile(self.pattern, self.re_flags)

3786 self.reString = self.pattern

3787 self.re_match = self.re.match

3788 except re.error:

3789 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3790

3791 self.errmsg = f"Expected {self.name}"

3792 self.mayIndexError = False

3793 self._may_return_empty = True

3794

3795 def _generateDefaultName(self) -> str:

3796 if self.quote_char == self.end_quote_char and isinstance(

3797 self.quote_char, str_type

3798 ):

3799 return f"string enclosed in {self.quote_char!r}"

3800

3801 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3802

3803 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3804 # check first character of opening quote to see if that is a match

3805 # before doing the more complicated regex match

3806 result = (

3807 instring[loc] == self.first_quote_char

3808 and self.re_match(instring, loc)

3809 or None

3810 )

3811 if not result:

3812 raise ParseException(instring, loc, self.errmsg, self)

3813

3814 # get ending loc and matched string from regex matching result

3815 loc = result.end()

3816 ret = result.group()

3817

3818 def convert_escaped_numerics(s: str) -> str:

3819 if s == "0":

3820 return "\0"

3821 if s.isdigit() and len(s) == 3:

3822 return chr(int(s, base=8))

3823 elif s.startswith(("u", "x")):

3824 return chr(int(s[1:], base=16))

3825 else:

3826 return s

3827

3828 if self.unquote_results:

3829 # strip off quotes

3830 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3831

3832 if isinstance(ret, str_type):

3833 # fmt: off

3834 if self.convert_whitespace_escapes:

3835 # as we iterate over matches in the input string,

3836 # collect from whichever match group of the unquote_scan_re

3837 # regex matches (only 1 group will match at any given time)

3838 ret = "".join(

3839 # match group 1 matches \t, \n, etc.

3840 self.ws_map[match.group(1)] if match.group(1)

3841 # match group 2 matches escaped octal, null, hex, and Unicode

3842 # sequences

3843 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)

3844 # match group 3 matches escaped characters

3845 else match.group(3)[-1] if match.group(3)

3846 # match group 4 matches any character

3847 else match.group(4)

3848 for match in self.unquote_scan_re.finditer(ret)

3849 )

3850 else:

3851 ret = "".join(

3852 # match group 1 matches escaped characters

3853 match.group(1)[-1] if match.group(1)

3854 # match group 2 matches any character

3855 else match.group(2)

3856 for match in self.unquote_scan_re.finditer(ret)

3857 )

3858 # fmt: on

3859

3860 # replace escaped quotes

3861 if self.esc_quote:

3862 ret = ret.replace(self.esc_quote, self.end_quote_char)

3863

3864 return loc, ret

3865

3866

3867class CharsNotIn(Token):

3868 """Token for matching words composed of characters *not* in a given

3869 set (will include whitespace in matched characters if not listed in

3870 the provided exclusion set - see example). Defined with string

3871 containing all disallowed characters, and an optional minimum,

3872 maximum, and/or exact length. The default value for ``min`` is

3873 1 (a minimum value < 1 is not valid); the default values for

3874 ``max`` and ``exact`` are 0, meaning no maximum or exact

3875 length restriction.

3876

3877 Example:

3878

3879 .. testcode::

3880

3881 # define a comma-separated-value as anything that is not a ','

3882 csv_value = CharsNotIn(',')

3883 print(

3884 DelimitedList(csv_value).parse_string(

3885 "dkls,lsdkjf,s12 34,@!#,213"

3886 )

3887 )

3888

3889 prints:

3890

3891 .. testoutput::

3892

3893 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3894 """

3895

3896 def __init__(

3897 self, not_chars: str = "", min: int = 1, max: int = 0, exact: int = 0, **kwargs

3898 ) -> None:

3899 super().__init__()

3900 notChars: str = deprecate_argument(kwargs, "notChars", "")

3901

3902 self.skipWhitespace = False

3903 self.notChars = not_chars or notChars

3904 self.notCharsSet = set(self.notChars)

3905

3906 if min < 1:

3907 raise ValueError(

3908 "cannot specify a minimum length < 1; use"

3909 " Opt(CharsNotIn()) if zero-length char group is permitted"

3910 )

3911

3912 self.minLen = min

3913

3914 if max > 0:

3915 self.maxLen = max

3916 else:

3917 self.maxLen = _MAX_INT

3918

3919 if exact > 0:

3920 self.maxLen = exact

3921 self.minLen = exact

3922

3923 self.errmsg = f"Expected {self.name}"

3924 self._may_return_empty = self.minLen == 0

3925 self.mayIndexError = False

3926

3927 def _generateDefaultName(self) -> str:

3928 not_chars_str = _collapse_string_to_ranges(self.notChars)

3929 if len(not_chars_str) > 16:

3930 return f"!W:({self.notChars[: 16 - 3]}...)"

3931 else:

3932 return f"!W:({self.notChars})"

3933

3934 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3935 notchars = self.notCharsSet

3936 if instring[loc] in notchars:

3937 raise ParseException(instring, loc, self.errmsg, self)

3938

3939 start = loc

3940 loc += 1

3941 maxlen = min(start + self.maxLen, len(instring))

3942 while loc < maxlen and instring[loc] not in notchars:

3943 loc += 1

3944

3945 if loc - start < self.minLen:

3946 raise ParseException(instring, loc, self.errmsg, self)

3947

3948 return loc, instring[start:loc]

3949

3950

3951class White(Token):

3952 """Special matching class for matching whitespace. Normally,

3953 whitespace is ignored by pyparsing grammars. This class is included

3954 when some whitespace structures are significant. Define with

3955 a string containing the whitespace characters to be matched; default

3956 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3957 ``max``, and ``exact`` arguments, as defined for the

3958 :class:`Word` class.

3959 """

3960

3961 whiteStrs = {

3962 " ": "<SP>",

3963 "\t": "<TAB>",

3964 "\n": "<LF>",

3965 "\r": "<CR>",

3966 "\f": "<FF>",

3967 "\u00a0": "<NBSP>",

3968 "\u1680": "<OGHAM_SPACE_MARK>",

3969 "\u180e": "<MONGOLIAN_VOWEL_SEPARATOR>",

3970 "\u2000": "<EN_QUAD>",

3971 "\u2001": "<EM_QUAD>",

3972 "\u2002": "<EN_SPACE>",

3973 "\u2003": "<EM_SPACE>",

3974 "\u2004": "<THREE-PER-EM_SPACE>",

3975 "\u2005": "<FOUR-PER-EM_SPACE>",

3976 "\u2006": "<SIX-PER-EM_SPACE>",

3977 "\u2007": "<FIGURE_SPACE>",

3978 "\u2008": "<PUNCTUATION_SPACE>",

3979 "\u2009": "<THIN_SPACE>",

3980 "\u200a": "<HAIR_SPACE>",

3981 "\u200b": "<ZERO_WIDTH_SPACE>",

3982 "\u202f": "<NNBSP>",

3983 "\u205f": "<MMSP>",

3984 "\u3000": "<IDEOGRAPHIC_SPACE>",

3985 }

3986

3987 def __init__(

3988 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0

3989 ) -> None:

3990 super().__init__()

3991 self.matchWhite = ws

3992 self.set_whitespace_chars(

3993 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3994 copy_defaults=True,

3995 )

3996 # self.leave_whitespace()

3997 self._may_return_empty = True

3998 self.errmsg = f"Expected {self.name}"

3999

4000 self.minLen = min

4001

4002 if max > 0:

4003 self.maxLen = max

4004 else:

4005 self.maxLen = _MAX_INT

4006

4007 if exact > 0:

4008 self.maxLen = exact

4009 self.minLen = exact

4010

4011 def _generateDefaultName(self) -> str:

4012 return "".join(White.whiteStrs[c] for c in self.matchWhite)

4013

4014 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4015 if instring[loc] not in self.matchWhite:

4016 raise ParseException(instring, loc, self.errmsg, self)

4017 start = loc

4018 loc += 1

4019 maxloc = start + self.maxLen

4020 maxloc = min(maxloc, len(instring))

4021 while loc < maxloc and instring[loc] in self.matchWhite:

4022 loc += 1

4023

4024 if loc - start < self.minLen:

4025 raise ParseException(instring, loc, self.errmsg, self)

4026

4027 return loc, instring[start:loc]

4028

4029

4030class PositionToken(Token):

4031 def __init__(self) -> None:

4032 super().__init__()

4033 self._may_return_empty = True

4034 self.mayIndexError = False

4035

4036

4037class GoToColumn(PositionToken):

4038 """Token to advance to a specific column of input text; useful for

4039 tabular report scraping.

4040 """

4041

4042 def __init__(self, colno: int) -> None:

4043 super().__init__()

4044 self.col = colno

4045

4046 def preParse(self, instring: str, loc: int) -> int:

4047 if col(loc, instring) == self.col:

4048 return loc

4049

4050 instrlen = len(instring)

4051 if self.ignoreExprs:

4052 loc = self._skipIgnorables(instring, loc)

4053 while (

4054 loc < instrlen

4055 and instring[loc].isspace()

4056 and col(loc, instring) != self.col

4057 ):

4058 loc += 1

4059

4060 return loc

4061

4062 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4063 thiscol = col(loc, instring)

4064 if thiscol > self.col:

4065 raise ParseException(instring, loc, "Text not in expected column", self)

4066 newloc = loc + self.col - thiscol

4067 ret = instring[loc:newloc]

4068 return newloc, ret

4069

4070

4071class LineStart(PositionToken):

4072 r"""Matches if current position is at the logical beginning of a line (after skipping whitespace)

4073 within the parse string

4074

4075 Example:

4076

4077 .. testcode::

4078

4079 test = '''\

4080 AAA this line

4081 AAA and this line

4082 AAA and even this line

4083 B AAA but definitely not this line

4084 '''

4085

4086 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

4087 print(t)

4088

4089 prints:

4090

4091 .. testoutput::

4092

4093 ['AAA', ' this line']

4094 ['AAA', ' and this line']

4095 ['AAA', ' and even this line']

4096

4097 """

4098

4099 def __init__(self) -> None:

4100 super().__init__()

4101 self.leave_whitespace()

4102 self.orig_whiteChars = set() | self.whiteChars

4103 self.whiteChars.discard("\n")

4104 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

4105 self.set_name("start of line")

4106

4107 def preParse(self, instring: str, loc: int) -> int:

4108 if loc == 0:

4109 return loc

4110

4111 ret = self.skipper.preParse(instring, loc)

4112

4113 if "\n" in self.orig_whiteChars:

4114 while instring[ret : ret + 1] == "\n":

4115 ret = self.skipper.preParse(instring, ret + 1)

4116

4117 return ret

4118

4119 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4120 if col(loc, instring) == 1:

4121 return loc, []

4122 raise ParseException(instring, loc, self.errmsg, self)

4123

4124

4125class LineEnd(PositionToken):

4126 """Matches if current position is at the end of a line within the

4127 parse string

4128 """

4129

4130 def __init__(self) -> None:

4131 super().__init__()

4132 self.whiteChars.discard("\n")

4133 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

4134 self.set_name("end of line")

4135

4136 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4137 if loc < len(instring):

4138 if instring[loc] == "\n":

4139 return loc + 1, "\n"

4140 else:

4141 raise ParseException(instring, loc, self.errmsg, self)

4142 elif loc == len(instring):

4143 return loc + 1, []

4144 else:

4145 raise ParseException(instring, loc, self.errmsg, self)

4146

4147

4148class StringStart(PositionToken):

4149 """Matches if current position is at the beginning of the parse

4150 string

4151 """

4152

4153 def __init__(self) -> None:

4154 super().__init__()

4155 self.set_name("start of text")

4156

4157 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4158 # see if entire string up to here is just whitespace and ignoreables

4159 if loc != 0 and loc != self.preParse(instring, 0):

4160 raise ParseException(instring, loc, self.errmsg, self)

4161

4162 return loc, []

4163

4164

4165class StringEnd(PositionToken):

4166 """

4167 Matches if current position is at the end of the parse string

4168 """

4169

4170 def __init__(self) -> None:

4171 super().__init__()

4172 self.set_name("end of text")

4173

4174 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4175 if loc < len(instring):

4176 raise ParseException(instring, loc, self.errmsg, self)

4177 if loc == len(instring):

4178 return loc + 1, []

4179 if loc > len(instring):

4180 return loc, []

4181

4182 raise ParseException(instring, loc, self.errmsg, self)

4183

4184

4185class WordStart(PositionToken):

4186 """Matches if the current position is at the beginning of a

4187 :class:`Word`, and is not preceded by any character in a given

4188 set of ``word_chars`` (default= ``printables``). To emulate the

4189 ``\b`` behavior of regular expressions, use

4190 ``WordStart(alphanums)``. ``WordStart`` will also match at

4191 the beginning of the string being parsed, or at the beginning of

4192 a line.

4193 """

4194

4195 def __init__(self, word_chars: str = printables, **kwargs) -> None:

4196 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)

4197

4198 wordChars = word_chars if wordChars == printables else wordChars

4199 super().__init__()

4200 self.wordChars = set(wordChars)

4201 self.set_name("start of a word")

4202

4203 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4204 if loc != 0:

4205 if (

4206 instring[loc - 1] in self.wordChars

4207 or instring[loc] not in self.wordChars

4208 ):

4209 raise ParseException(instring, loc, self.errmsg, self)

4210 return loc, []

4211

4212

4213class WordEnd(PositionToken):

4214 """Matches if the current position is at the end of a :class:`Word`,

4215 and is not followed by any character in a given set of ``word_chars``

4216 (default= ``printables``). To emulate the ``\b`` behavior of

4217 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

4218 will also match at the end of the string being parsed, or at the end

4219 of a line.

4220 """

4221

4222 def __init__(self, word_chars: str = printables, **kwargs) -> None:

4223 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)

4224

4225 wordChars = word_chars if wordChars == printables else wordChars

4226 super().__init__()

4227 self.wordChars = set(wordChars)

4228 self.skipWhitespace = False

4229 self.set_name("end of a word")

4230

4231 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4232 instrlen = len(instring)

4233 if instrlen > 0 and loc < instrlen:

4234 if (

4235 instring[loc] in self.wordChars

4236 or instring[loc - 1] not in self.wordChars

4237 ):

4238 raise ParseException(instring, loc, self.errmsg, self)

4239 return loc, []

4240

4241

4242class Tag(Token):

4243 """

4244 A meta-element for inserting a named result into the parsed

4245 tokens that may be checked later in a parse action or while

4246 processing the parsed results. Accepts an optional tag value,

4247 defaulting to `True`.

4248

4249 Example:

4250

4251 .. doctest::

4252

4253 >>> end_punc = "." | ("!" + Tag("enthusiastic"))

4254 >>> greeting = "Hello," + Word(alphas) + end_punc

4255

4256 >>> result = greeting.parse_string("Hello, World.")

4257 >>> print(result.dump())

4258 ['Hello,', 'World', '.']

4259

4260 >>> result = greeting.parse_string("Hello, World!")

4261 >>> print(result.dump())

4262 ['Hello,', 'World', '!']

4263 - enthusiastic: True

4264

4265 .. versionadded:: 3.1.0

4266 """

4267

4268 def __init__(self, tag_name: str, value: Any = True) -> None:

4269 super().__init__()

4270 self._may_return_empty = True

4271 self.mayIndexError = False

4272 self.leave_whitespace()

4273 self.tag_name = tag_name

4274 self.tag_value = value

4275 self.add_parse_action(self._add_tag)

4276 self.show_in_diagram = False

4277

4278 def _add_tag(self, tokens: ParseResults):

4279 tokens[self.tag_name] = self.tag_value

4280

4281 def _generateDefaultName(self) -> str:

4282 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"

4283

4284

4285class ParseExpression(ParserElement):

4286 """Abstract subclass of ParserElement, for combining and

4287 post-processing parsed tokens.

4288 """

4289

4290 def __init__(

4291 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4292 ) -> None:

4293 super().__init__(savelist)

4294 self.exprs: list[ParserElement]

4295 if isinstance(exprs, _generatorType):

4296 exprs = list(exprs)

4297

4298 if isinstance(exprs, str_type):

4299 self.exprs = [self._literalStringClass(exprs)]

4300 elif isinstance(exprs, ParserElement):

4301 self.exprs = [exprs]

4302 elif isinstance(exprs, Iterable):

4303 exprs = list(exprs)

4304 # if sequence of strings provided, wrap with Literal

4305 if any(isinstance(expr, str_type) for expr in exprs):

4306 exprs = (

4307 self._literalStringClass(e) if isinstance(e, str_type) else e

4308 for e in exprs

4309 )

4310 self.exprs = list(exprs)

4311 else:

4312 try:

4313 self.exprs = list(exprs)

4314 except TypeError:

4315 self.exprs = [exprs]

4316 self.callPreparse = False

4317

4318 def recurse(self) -> list[ParserElement]:

4319 return self.exprs[:]

4320

4321 def append(self, other) -> ParserElement:

4322 """

4323 Add an expression to the list of expressions related to this ParseExpression instance.

4324 """

4325 self.exprs.append(other)

4326 self._defaultName = None

4327 return self

4328

4329 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4330 """

4331 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

4332 all contained expressions.

4333 """

4334 super().leave_whitespace(recursive)

4335

4336 if recursive:

4337 self.exprs = [e.copy() for e in self.exprs]

4338 for e in self.exprs:

4339 e.leave_whitespace(recursive)

4340 return self

4341

4342 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4343 """

4344 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on

4345 all contained expressions.

4346 """

4347 super().ignore_whitespace(recursive)

4348 if recursive:

4349 self.exprs = [e.copy() for e in self.exprs]

4350 for e in self.exprs:

4351 e.ignore_whitespace(recursive)

4352 return self

4353

4354 def ignore(self, other) -> ParserElement:

4355 """

4356 Define expression to be ignored (e.g., comments) while doing pattern

4357 matching; may be called repeatedly, to define multiple comment or other

4358 ignorable patterns.

4359 """

4360 if isinstance(other, Suppress):

4361 if other not in self.ignoreExprs:

4362 super().ignore(other)

4363 for e in self.exprs:

4364 e.ignore(self.ignoreExprs[-1])

4365 else:

4366 super().ignore(other)

4367 for e in self.exprs:

4368 e.ignore(self.ignoreExprs[-1])

4369 return self

4370

4371 def _generateDefaultName(self) -> str:

4372 return f"{type(self).__name__}:({self.exprs})"

4373

4374 def streamline(self) -> ParserElement:

4375 if self.streamlined:

4376 return self

4377

4378 super().streamline()

4379

4380 for e in self.exprs:

4381 e.streamline()

4382

4383 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

4384 # but only if there are no parse actions or resultsNames on the nested And's

4385 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

4386 if len(self.exprs) == 2:

4387 other = self.exprs[0]

4388 if (

4389 isinstance(other, self.__class__)

4390 and not other.parseAction

4391 and other.resultsName is None

4392 and not other.debug

4393 ):

4394 self.exprs = other.exprs[:] + [self.exprs[1]]

4395 self._defaultName = None

4396 self._may_return_empty |= other.mayReturnEmpty

4397 self.mayIndexError |= other.mayIndexError

4398

4399 other = self.exprs[-1]

4400 if (

4401 isinstance(other, self.__class__)

4402 and not other.parseAction

4403 and other.resultsName is None

4404 and not other.debug

4405 ):

4406 self.exprs = self.exprs[:-1] + other.exprs[:]

4407 self._defaultName = None

4408 self._may_return_empty |= other.mayReturnEmpty

4409 self.mayIndexError |= other.mayIndexError

4410

4411 self.errmsg = f"Expected {self}"

4412

4413 return self

4414

4415 def validate(self, validateTrace=None) -> None:

4416 warnings.warn(

4417 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4418 DeprecationWarning,

4419 stacklevel=2,

4420 )

4421 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

4422 for e in self.exprs:

4423 e.validate(tmp)

4424 self._checkRecursion([])

4425

4426 def copy(self) -> ParserElement:

4427 """

4428 Returns a copy of this expression.

4429

4430 Generally only used internally by pyparsing.

4431 """

4432 ret = super().copy()

4433 ret = typing.cast(ParseExpression, ret)

4434 ret.exprs = [e.copy() for e in self.exprs]

4435 return ret

4436

4437 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4438 if not (

4439 __diag__.warn_ungrouped_named_tokens_in_collection

4440 and Diagnostics.warn_ungrouped_named_tokens_in_collection

4441 not in self.suppress_warnings_

4442 ):

4443 return super()._setResultsName(name, list_all_matches)

4444

4445 for e in self.exprs:

4446 if (

4447 isinstance(e, ParserElement)

4448 and e.resultsName

4449 and (

4450 Diagnostics.warn_ungrouped_named_tokens_in_collection

4451 not in e.suppress_warnings_

4452 )

4453 ):

4454 warning = (

4455 "warn_ungrouped_named_tokens_in_collection:"

4456 f" setting results name {name!r} on {type(self).__name__} expression"

4457 f" collides with {e.resultsName!r} on contained expression"

4458 )

4459 warnings.warn(warning, stacklevel=3)

4460 break

4461

4462 return super()._setResultsName(name, list_all_matches)

4463

4464 # Compatibility synonyms

4465 # fmt: off

4466 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4467 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4468 # fmt: on

4469

4470

4471class And(ParseExpression):

4472 """

4473 Requires all given :class:`ParserElement` s to be found in the given order.

4474 Expressions may be separated by whitespace.

4475 May be constructed using the ``'+'`` operator.

4476 May also be constructed using the ``'-'`` operator, which will

4477 suppress backtracking.

4478

4479 Example:

4480

4481 .. testcode::

4482

4483 integer = Word(nums)

4484 name_expr = Word(alphas)[1, ...]

4485

4486 expr = And([integer("id"), name_expr("name"), integer("age")])

4487 # more easily written as:

4488 expr = integer("id") + name_expr("name") + integer("age")

4489 """

4490

4491 class _ErrorStop(Empty):

4492 def __init__(self, *args, **kwargs) -> None:

4493 super().__init__(*args, **kwargs)

4494 self.leave_whitespace()

4495

4496 def _generateDefaultName(self) -> str:

4497 return "-"

4498

4499 def __init__(

4500 self,

4501 exprs_arg: typing.Iterable[Union[ParserElement, str]],

4502 savelist: bool = True,

4503 ) -> None:

4504 # instantiate exprs as a list, converting strs to ParserElements

4505 exprs: list[ParserElement] = [

4506 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg

4507 ]

4508

4509 # convert any Ellipsis elements to SkipTo

4510 if Ellipsis in exprs:

4511

4512 # Ellipsis cannot be the last element

4513 if exprs[-1] is Ellipsis:

4514 raise Exception("cannot construct And with sequence ending in ...")

4515

4516 tmp: list[ParserElement] = []

4517 for cur_expr, next_expr in zip(exprs, exprs[1:]):

4518 if cur_expr is Ellipsis:

4519 tmp.append(SkipTo(next_expr)("_skipped*"))

4520 else:

4521 tmp.append(cur_expr)

4522

4523 exprs[:-1] = tmp

4524

4525 super().__init__(exprs, savelist)

4526 if self.exprs:

4527 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4528 if not isinstance(self.exprs[0], White):

4529 self.set_whitespace_chars(

4530 self.exprs[0].whiteChars,

4531 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

4532 )

4533 self.skipWhitespace = self.exprs[0].skipWhitespace

4534 else:

4535 self.skipWhitespace = False

4536 else:

4537 self._may_return_empty = True

4538 self.callPreparse = True

4539

4540 def streamline(self) -> ParserElement:

4541 """

4542 Collapse `And` expressions like `And(And(And(A, B), C), D)`

4543 to `And(A, B, C, D)`.

4544

4545 .. doctest::

4546

4547 >>> expr = Word("A") + Word("B") + Word("C") + Word("D")

4548 >>> # Using '+' operator creates nested And expression

4549 >>> expr

4550 {{{W:(A) W:(B)} W:(C)} W:(D)}

4551 >>> # streamline simplifies to a single And with multiple expressions

4552 >>> expr.streamline()

4553 {W:(A) W:(B) W:(C) W:(D)}

4554

4555 Guards against collapsing out expressions that have special features,

4556 such as results names or parse actions.

4557

4558 Resolves pending Skip commands defined using `...` terms.

4559 """

4560 # collapse any _PendingSkip's

4561 if self.exprs and any(

4562 isinstance(e, ParseExpression)

4563 and e.exprs

4564 and isinstance(e.exprs[-1], _PendingSkip)

4565 for e in self.exprs[:-1]

4566 ):

4567 deleted_expr_marker = NoMatch()

4568 for i, e in enumerate(self.exprs[:-1]):

4569 if e is deleted_expr_marker:

4570 continue

4571 if (

4572 isinstance(e, ParseExpression)

4573 and e.exprs

4574 and isinstance(e.exprs[-1], _PendingSkip)

4575 ):

4576 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

4577 self.exprs[i + 1] = deleted_expr_marker

4578 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

4579

4580 super().streamline()

4581

4582 # link any IndentedBlocks to the prior expression

4583 prev: ParserElement

4584 cur: ParserElement

4585 for prev, cur in zip(self.exprs, self.exprs[1:]):

4586 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

4587 # (but watch out for recursive grammar)

4588 seen = set()

4589 while True:

4590 if id(cur) in seen:

4591 break

4592 seen.add(id(cur))

4593 if isinstance(cur, IndentedBlock):

4594 prev.add_parse_action(

4595 lambda s, l, t, cur_=cur: setattr(

4596 cur_, "parent_anchor", col(l, s)

4597 )

4598 )

4599 break

4600 subs = cur.recurse()

4601 next_first = next(iter(subs), None)

4602 if next_first is None:

4603 break

4604 cur = typing.cast(ParserElement, next_first)

4605

4606 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4607 return self

4608

4609 def parseImpl(self, instring, loc, do_actions=True):

4610 # pass False as callPreParse arg to _parse for first element, since we already

4611 # pre-parsed the string as part of our And pre-parsing

4612 loc, resultlist = self.exprs[0]._parse(

4613 instring, loc, do_actions, callPreParse=False

4614 )

4615 errorStop = False

4616 for e in self.exprs[1:]:

4617 # if isinstance(e, And._ErrorStop):

4618 if type(e) is And._ErrorStop:

4619 errorStop = True

4620 continue

4621 if errorStop:

4622 try:

4623 loc, exprtokens = e._parse(instring, loc, do_actions)

4624 except ParseSyntaxException:

4625 raise

4626 except ParseBaseException as pe:

4627 pe.__traceback__ = None

4628 raise ParseSyntaxException._from_exception(pe)

4629 except IndexError:

4630 raise ParseSyntaxException(

4631 instring, len(instring), self.errmsg, self

4632 )

4633 else:

4634 loc, exprtokens = e._parse(instring, loc, do_actions)

4635 resultlist += exprtokens

4636 return loc, resultlist

4637

4638 def __iadd__(self, other):

4639 if isinstance(other, str_type):

4640 other = self._literalStringClass(other)

4641 if not isinstance(other, ParserElement):

4642 return NotImplemented

4643 return self.append(other) # And([self, other])

4644

4645 def _checkRecursion(self, parseElementList):

4646 subRecCheckList = parseElementList[:] + [self]

4647 for e in self.exprs:

4648 e._checkRecursion(subRecCheckList)

4649 if not e.mayReturnEmpty:

4650 break

4651

4652 def _generateDefaultName(self) -> str:

4653 inner = " ".join(str(e) for e in self.exprs)

4654 # strip off redundant inner {}'s

4655 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4656 inner = inner[1:-1]

4657 return f"{{{inner}}}"

4658

4659

4660class Or(ParseExpression):

4661 """Requires that at least one :class:`ParserElement` is found. If

4662 two expressions match, the expression that matches the longest

4663 string will be used. May be constructed using the ``'^'``

4664 operator.

4665

4666 Example:

4667

4668 .. testcode::

4669

4670 # construct Or using '^' operator

4671

4672 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4673 print(number.search_string("123 3.1416 789"))

4674

4675 prints:

4676

4677 .. testoutput::

4678

4679 [['123'], ['3.1416'], ['789']]

4680 """

4681

4682 def __init__(

4683 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4684 ) -> None:

4685 super().__init__(exprs, savelist)

4686 if self.exprs:

4687 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4688 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4689 else:

4690 self._may_return_empty = True

4691

4692 def streamline(self) -> ParserElement:

4693 super().streamline()

4694 if self.exprs:

4695 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4696 self.saveAsList = any(e.saveAsList for e in self.exprs)

4697 self.skipWhitespace = all(

4698 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4699 )

4700 else:

4701 self.saveAsList = False

4702 return self

4703

4704 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4705 maxExcLoc = -1

4706 maxException = None

4707 matches: list[tuple[int, ParserElement]] = []

4708 fatals: list[ParseFatalException] = []

4709 if all(e.callPreparse for e in self.exprs):

4710 loc = self.preParse(instring, loc)

4711 for e in self.exprs:

4712 try:

4713 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4714 except ParseFatalException as pfe:

4715 pfe.__traceback__ = None

4716 pfe.parser_element = e

4717 fatals.append(pfe)

4718 maxException = None

4719 maxExcLoc = -1

4720 except ParseException as err:

4721 if not fatals:

4722 err.__traceback__ = None

4723 if err.loc > maxExcLoc:

4724 maxException = err

4725 maxExcLoc = err.loc

4726 except IndexError:

4727 if len(instring) > maxExcLoc:

4728 maxException = ParseException(

4729 instring, len(instring), e.errmsg, self

4730 )

4731 maxExcLoc = len(instring)

4732 else:

4733 # save match among all matches, to retry longest to shortest

4734 matches.append((loc2, e))

4735

4736 if matches:

4737 # re-evaluate all matches in descending order of length of match, in case attached actions

4738 # might change whether or how much they match of the input.

4739 matches.sort(key=itemgetter(0), reverse=True)

4740

4741 if not do_actions:

4742 # no further conditions or parse actions to change the selection of

4743 # alternative, so the first match will be the best match

4744 best_expr = matches[0][1]

4745 return best_expr._parse(instring, loc, do_actions)

4746

4747 longest: tuple[int, typing.Optional[ParseResults]] = -1, None

4748 for loc1, expr1 in matches:

4749 if loc1 <= longest[0]:

4750 # already have a longer match than this one will deliver, we are done

4751 return longest

4752

4753 try:

4754 loc2, toks = expr1._parse(instring, loc, do_actions)

4755 except ParseException as err:

4756 err.__traceback__ = None

4757 if err.loc > maxExcLoc:

4758 maxException = err

4759 maxExcLoc = err.loc

4760 else:

4761 if loc2 >= loc1:

4762 return loc2, toks

4763 # didn't match as much as before

4764 elif loc2 > longest[0]:

4765 longest = loc2, toks

4766

4767 if longest != (-1, None):

4768 return longest

4769

4770 if fatals:

4771 if len(fatals) > 1:

4772 fatals.sort(key=lambda e: -e.loc)

4773 if fatals[0].loc == fatals[1].loc:

4774 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4775 max_fatal = fatals[0]

4776 raise max_fatal

4777

4778 if maxException is not None:

4779 # infer from this check that all alternatives failed at the current position

4780 # so emit this collective error message instead of any single error message

4781 parse_start_loc = self.preParse(instring, loc)

4782 if maxExcLoc == parse_start_loc:

4783 maxException.msg = self.errmsg or ""

4784 raise maxException

4785

4786 raise ParseException(instring, loc, "no defined alternatives to match", self)

4787

4788 def __ixor__(self, other):

4789 if isinstance(other, str_type):

4790 other = self._literalStringClass(other)

4791 if not isinstance(other, ParserElement):

4792 return NotImplemented

4793 return self.append(other) # Or([self, other])

4794

4795 def _generateDefaultName(self) -> str:

4796 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"

4797

4798 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4799 if (

4800 __diag__.warn_multiple_tokens_in_named_alternation

4801 and Diagnostics.warn_multiple_tokens_in_named_alternation

4802 not in self.suppress_warnings_

4803 ):

4804 if any(

4805 isinstance(e, And)

4806 and Diagnostics.warn_multiple_tokens_in_named_alternation

4807 not in e.suppress_warnings_

4808 for e in self.exprs

4809 ):

4810 warning = (

4811 "warn_multiple_tokens_in_named_alternation:"

4812 f" setting results name {name!r} on {type(self).__name__} expression"

4813 " will return a list of all parsed tokens in an And alternative,"

4814 " in prior versions only the first token was returned; enclose"

4815 " contained argument in Group"

4816 )

4817 warnings.warn(warning, stacklevel=3)

4818

4819 return super()._setResultsName(name, list_all_matches)

4820

4821

4822class MatchFirst(ParseExpression):

4823 """Requires that at least one :class:`ParserElement` is found. If

4824 more than one expression matches, the first one listed is the one that will

4825 match. May be constructed using the ``'|'`` operator.

4826

4827 Example: Construct MatchFirst using '|' operator

4828

4829 .. doctest::

4830

4831 # watch the order of expressions to match

4832 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4833 >>> print(number.search_string("123 3.1416 789")) # Fail!

4834 [['123'], ['3'], ['1416'], ['789']]

4835

4836 # put more selective expression first

4837 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4838 >>> print(number.search_string("123 3.1416 789")) # Better

4839 [['123'], ['3.1416'], ['789']]

4840 """

4841

4842 def __init__(

4843 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4844 ) -> None:

4845 super().__init__(exprs, savelist)

4846 if self.exprs:

4847 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4848 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4849 else:

4850 self._may_return_empty = True

4851

4852 def streamline(self) -> ParserElement:

4853 if self.streamlined:

4854 return self

4855

4856 super().streamline()

4857 if self.exprs:

4858 self.saveAsList = any(e.saveAsList for e in self.exprs)

4859 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4860 self.skipWhitespace = all(

4861 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4862 )

4863 else:

4864 self.saveAsList = False

4865 self._may_return_empty = True

4866 return self

4867

4868 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4869 maxExcLoc = -1

4870 maxException = None

4871

4872 for e in self.exprs:

4873 try:

4874 return e._parse(instring, loc, do_actions)

4875 except ParseFatalException as pfe:

4876 pfe.__traceback__ = None

4877 pfe.parser_element = e

4878 raise

4879 except ParseException as err:

4880 if err.loc > maxExcLoc:

4881 maxException = err

4882 maxExcLoc = err.loc

4883 except IndexError:

4884 if len(instring) > maxExcLoc:

4885 maxException = ParseException(

4886 instring, len(instring), e.errmsg, self

4887 )

4888 maxExcLoc = len(instring)

4889

4890 if maxException is not None:

4891 # infer from this check that all alternatives failed at the current position

4892 # so emit this collective error message instead of any individual error message

4893 parse_start_loc = self.preParse(instring, loc)

4894 if maxExcLoc == parse_start_loc:

4895 maxException.msg = self.errmsg or ""

4896 raise maxException

4897

4898 raise ParseException(instring, loc, "no defined alternatives to match", self)

4899

4900 def __ior__(self, other):

4901 if isinstance(other, str_type):

4902 other = self._literalStringClass(other)

4903 if not isinstance(other, ParserElement):

4904 return NotImplemented

4905 return self.append(other) # MatchFirst([self, other])

4906

4907 def _generateDefaultName(self) -> str:

4908 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"

4909

4910 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4911 if (

4912 __diag__.warn_multiple_tokens_in_named_alternation

4913 and Diagnostics.warn_multiple_tokens_in_named_alternation

4914 not in self.suppress_warnings_

4915 ):

4916 if any(

4917 isinstance(e, And)

4918 and Diagnostics.warn_multiple_tokens_in_named_alternation

4919 not in e.suppress_warnings_

4920 for e in self.exprs

4921 ):

4922 warning = (

4923 "warn_multiple_tokens_in_named_alternation:"

4924 f" setting results name {name!r} on {type(self).__name__} expression"

4925 " will return a list of all parsed tokens in an And alternative,"

4926 " in prior versions only the first token was returned; enclose"

4927 " contained argument in Group"

4928 )

4929 warnings.warn(warning, stacklevel=3)

4930

4931 return super()._setResultsName(name, list_all_matches)

4932

4933

4934class Each(ParseExpression):

4935 """Requires all given :class:`ParserElement` s to be found, but in

4936 any order. Expressions may be separated by whitespace.

4937

4938 May be constructed using the ``'&'`` operator.

4939

4940 Example:

4941

4942 .. testcode::

4943

4944 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4945 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4946 integer = Word(nums)

4947 shape_attr = "shape:" + shape_type("shape")

4948 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4949 color_attr = "color:" + color("color")

4950 size_attr = "size:" + integer("size")

4951

4952 # use Each (using operator '&') to accept attributes in any order

4953 # (shape and posn are required, color and size are optional)

4954 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4955

4956 shape_spec.run_tests('''

4957 shape: SQUARE color: BLACK posn: 100, 120

4958 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4959 color:GREEN size:20 shape:TRIANGLE posn:20,40

4960 '''

4961 )

4962

4963 prints:

4964

4965 .. testoutput::

4966 :options: +NORMALIZE_WHITESPACE

4967

4968

4969 shape: SQUARE color: BLACK posn: 100, 120

4970 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4971 - color: 'BLACK'

4972 - posn: ['100', ',', '120']

4973 - x: '100'

4974 - y: '120'

4975 - shape: 'SQUARE'

4976 ...

4977

4978 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4979 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE',

4980 'posn:', ['50', ',', '80']]

4981 - color: 'BLUE'

4982 - posn: ['50', ',', '80']

4983 - x: '50'

4984 - y: '80'

4985 - shape: 'CIRCLE'

4986 - size: '50'

4987 ...

4988

4989 color:GREEN size:20 shape:TRIANGLE posn:20,40

4990 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE',

4991 'posn:', ['20', ',', '40']]

4992 - color: 'GREEN'

4993 - posn: ['20', ',', '40']

4994 - x: '20'

4995 - y: '40'

4996 - shape: 'TRIANGLE'

4997 - size: '20'

4998 ...

4999 """

5000

5001 def __init__(

5002 self, exprs: typing.Iterable[ParserElement], savelist: bool = True

5003 ) -> None:

5004 super().__init__(exprs, savelist)

5005 if self.exprs:

5006 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

5007 else:

5008 self._may_return_empty = True

5009 self.skipWhitespace = True

5010 self.initExprGroups = True

5011 self.saveAsList = True

5012

5013 def __iand__(self, other):

5014 if isinstance(other, str_type):

5015 other = self._literalStringClass(other)

5016 if not isinstance(other, ParserElement):

5017 return NotImplemented

5018 return self.append(other) # Each([self, other])

5019

5020 def streamline(self) -> ParserElement:

5021 super().streamline()

5022 if self.exprs:

5023 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

5024 else:

5025 self._may_return_empty = True

5026 return self

5027

5028 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5029 if self.initExprGroups:

5030 self.opt1map = dict(

5031 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

5032 )

5033 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

5034 opt2 = [

5035 e

5036 for e in self.exprs

5037 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

5038 ]

5039 self.optionals = opt1 + opt2

5040 self.multioptionals = [

5041 e.expr.set_results_name(e.resultsName, list_all_matches=True)

5042 for e in self.exprs

5043 if isinstance(e, _MultipleMatch)

5044 ]

5045 self.multirequired = [

5046 e.expr.set_results_name(e.resultsName, list_all_matches=True)

5047 for e in self.exprs

5048 if isinstance(e, OneOrMore)

5049 ]

5050 self.required = [

5051 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

5052 ]

5053 self.required += self.multirequired

5054 self.initExprGroups = False

5055

5056 tmpLoc = loc

5057 tmpReqd = self.required[:]

5058 tmpOpt = self.optionals[:]

5059 multis = self.multioptionals[:]

5060 matchOrder: list[ParserElement] = []

5061

5062 keepMatching = True

5063 failed: list[ParserElement] = []

5064 fatals: list[ParseFatalException] = []

5065 while keepMatching:

5066 tmpExprs = tmpReqd + tmpOpt + multis

5067 failed.clear()

5068 fatals.clear()

5069 for e in tmpExprs:

5070 try:

5071 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

5072 except ParseFatalException as pfe:

5073 pfe.__traceback__ = None

5074 pfe.parser_element = e

5075 fatals.append(pfe)

5076 failed.append(e)

5077 except ParseException:

5078 failed.append(e)

5079 else:

5080 matchOrder.append(self.opt1map.get(id(e), e))

5081 if e in tmpReqd:

5082 tmpReqd.remove(e)

5083 elif e in tmpOpt:

5084 tmpOpt.remove(e)

5085 if len(failed) == len(tmpExprs):

5086 keepMatching = False

5087

5088 # look for any ParseFatalExceptions

5089 if fatals:

5090 if len(fatals) > 1:

5091 fatals.sort(key=lambda e: -e.loc)

5092 if fatals[0].loc == fatals[1].loc:

5093 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

5094 max_fatal = fatals[0]

5095 raise max_fatal

5096

5097 if tmpReqd:

5098 missing = ", ".join([str(e) for e in tmpReqd])

5099 raise ParseException(

5100 instring,

5101 loc,

5102 f"Missing one or more required elements ({missing})",

5103 )

5104

5105 # add any unmatched Opts, in case they have default values defined

5106 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

5107

5108 total_results = ParseResults([])

5109 for e in matchOrder:

5110 loc, results = e._parse(instring, loc, do_actions)

5111 total_results += results

5112

5113 return loc, total_results

5114

5115 def _generateDefaultName(self) -> str:

5116 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"

5117

5118

5119class ParseElementEnhance(ParserElement):

5120 """Abstract subclass of :class:`ParserElement`, for combining and

5121 post-processing parsed tokens.

5122 """

5123

5124 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

5125 super().__init__(savelist)

5126 if isinstance(expr, str_type):

5127 expr_str = typing.cast(str, expr)

5128 if issubclass(self._literalStringClass, Token):

5129 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

5130 elif issubclass(type(self), self._literalStringClass):

5131 expr = Literal(expr_str)

5132 else:

5133 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

5134 expr = typing.cast(ParserElement, expr)

5135 self.expr = expr

5136 if expr is not None:

5137 self.mayIndexError = expr.mayIndexError

5138 self._may_return_empty = expr.mayReturnEmpty

5139 self.set_whitespace_chars(

5140 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

5141 )

5142 self.skipWhitespace = expr.skipWhitespace

5143 self.saveAsList = expr.saveAsList

5144 self.callPreparse = expr.callPreparse

5145 self.ignoreExprs.extend(expr.ignoreExprs)

5146

5147 def recurse(self) -> list[ParserElement]:

5148 return [self.expr] if self.expr is not None else []

5149

5150 def parseImpl(self, instring, loc, do_actions=True):

5151 if self.expr is None:

5152 raise ParseException(instring, loc, "No expression defined", self)

5153

5154 try:

5155 return self.expr._parse(instring, loc, do_actions, callPreParse=False)

5156 except ParseSyntaxException:

5157 raise

5158 except ParseBaseException as pbe:

5159 pbe.pstr = pbe.pstr or instring

5160 pbe.loc = pbe.loc or loc

5161 pbe.parser_element = pbe.parser_element or self

5162 if not isinstance(self, Forward) and self.customName is not None:

5163 if self.errmsg:

5164 pbe.msg = self.errmsg

5165 raise

5166

5167 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5168 """

5169 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

5170 the contained expression.

5171 """

5172 super().leave_whitespace(recursive)

5173

5174 if recursive:

5175 if self.expr is not None:

5176 self.expr = self.expr.copy()

5177 self.expr.leave_whitespace(recursive)

5178 return self

5179

5180 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5181 """

5182 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on

5183 the contained expression.

5184 """

5185 super().ignore_whitespace(recursive)

5186

5187 if recursive:

5188 if self.expr is not None:

5189 self.expr = self.expr.copy()

5190 self.expr.ignore_whitespace(recursive)

5191 return self

5192

5193 def ignore(self, other) -> ParserElement:

5194 """

5195 Define expression to be ignored (e.g., comments) while doing pattern

5196 matching; may be called repeatedly, to define multiple comment or other

5197 ignorable patterns.

5198 """

5199 if not isinstance(other, Suppress) or other not in self.ignoreExprs:

5200 super().ignore(other)

5201 if self.expr is not None:

5202 self.expr.ignore(self.ignoreExprs[-1])

5203

5204 return self

5205

5206 def streamline(self) -> ParserElement:

5207 super().streamline()

5208 if self.expr is not None:

5209 self.expr.streamline()

5210 return self

5211

5212 def _checkRecursion(self, parseElementList):

5213 if self in parseElementList:

5214 raise RecursiveGrammarException(parseElementList + [self])

5215 subRecCheckList = parseElementList[:] + [self]

5216 if self.expr is not None:

5217 self.expr._checkRecursion(subRecCheckList)

5218

5219 def validate(self, validateTrace=None) -> None:

5220 warnings.warn(

5221 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5222 DeprecationWarning,

5223 stacklevel=2,

5224 )

5225 if validateTrace is None:

5226 validateTrace = []

5227 tmp = validateTrace[:] + [self]

5228 if self.expr is not None:

5229 self.expr.validate(tmp)

5230 self._checkRecursion([])

5231

5232 def _generateDefaultName(self) -> str:

5233 return f"{type(self).__name__}:({self.expr})"

5234

5235 # Compatibility synonyms

5236 # fmt: off

5237 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

5238 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

5239 # fmt: on

5240

5241

5242class IndentedBlock(ParseElementEnhance):

5243 """

5244 Expression to match one or more expressions at a given indentation level.

5245 Useful for parsing text where structure is implied by indentation (like Python source code).

5246

5247 Example:

5248

5249 .. testcode::

5250

5251 '''

5252 BNF:

5253 statement ::= assignment_stmt | if_stmt

5254 assignment_stmt ::= identifier '=' rvalue

5255 rvalue ::= identifier | integer

5256 if_stmt ::= 'if' bool_condition block

5257 block ::= ([indent] statement)...

5258 identifier ::= [A..Za..z]

5259 integer ::= [0..9]...

5260 bool_condition ::= 'TRUE' | 'FALSE'

5261 '''

5262

5263 IF, TRUE, FALSE = Keyword.using_each("IF TRUE FALSE".split())

5264

5265 statement = Forward()

5266 identifier = Char(alphas)

5267 integer = Word(nums).add_parse_action(lambda t: int(t[0]))

5268 rvalue = identifier | integer

5269 assignment_stmt = identifier + "=" + rvalue

5270

5271 if_stmt = IF + (TRUE | FALSE) + IndentedBlock(statement)

5272

5273 statement <<= Group(assignment_stmt | if_stmt)

5274

5275 result = if_stmt.parse_string('''

5276 IF TRUE

5277 a = 1000

5278 b = 2000

5279 IF FALSE

5280 z = 100

5281 ''')

5282 print(result.dump())

5283

5284 .. testoutput::

5285

5286 ['IF', 'TRUE', [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]]

5287 [0]:

5288 IF

5289 [1]:

5290 TRUE

5291 [2]:

5292 [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]

5293 [0]:

5294 ['a', '=', 1000]

5295 [1]:

5296 ['b', '=', 2000]

5297 [2]:

5298 ['IF', 'FALSE', [['z', '=', 100]]]

5299 [0]:

5300 IF

5301 [1]:

5302 FALSE

5303 [2]:

5304 [['z', '=', 100]]

5305 [0]:

5306 ['z', '=', 100]

5307 """

5308

5309 class _Indent(Empty):

5310 def __init__(self, ref_col: int) -> None:

5311 super().__init__()

5312 self.errmsg = f"expected indent at column {ref_col}"

5313 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

5314

5315 class _IndentGreater(Empty):

5316 def __init__(self, ref_col: int) -> None:

5317 super().__init__()

5318 self.errmsg = f"expected indent at column greater than {ref_col}"

5319 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

5320

5321 def __init__(

5322 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

5323 ) -> None:

5324 super().__init__(expr, savelist=True)

5325 # if recursive:

5326 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

5327 self._recursive = recursive

5328 self._grouped = grouped

5329 self.parent_anchor = 1

5330

5331 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5332 # advance parse position to non-whitespace by using an Empty()

5333 # this should be the column to be used for all subsequent indented lines

5334 anchor_loc = Empty().preParse(instring, loc)

5335

5336 # see if self.expr matches at the current location - if not it will raise an exception

5337 # and no further work is necessary

5338 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)

5339

5340 indent_col = col(anchor_loc, instring)

5341 peer_detect_expr = self._Indent(indent_col)

5342

5343 inner_expr = Empty() + peer_detect_expr + self.expr

5344 if self._recursive:

5345 sub_indent = self._IndentGreater(indent_col)

5346 nested_block = IndentedBlock(

5347 self.expr, recursive=self._recursive, grouped=self._grouped

5348 )

5349 nested_block.set_debug(self.debug)

5350 nested_block.parent_anchor = indent_col

5351 inner_expr += Opt(sub_indent + nested_block)

5352

5353 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

5354 block = OneOrMore(inner_expr)

5355

5356 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

5357

5358 if self._grouped:

5359 wrapper = Group

5360 else:

5361 wrapper = lambda expr: expr # type: ignore[misc, assignment]

5362 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

5363 instring, anchor_loc, do_actions

5364 )

5365

5366

5367class AtStringStart(ParseElementEnhance):

5368 """Matches if expression matches at the beginning of the parse

5369 string::

5370

5371 AtStringStart(Word(nums)).parse_string("123")

5372 # prints ["123"]

5373

5374 AtStringStart(Word(nums)).parse_string(" 123")

5375 # raises ParseException

5376 """

5377

5378 def __init__(self, expr: Union[ParserElement, str]) -> None:

5379 super().__init__(expr)

5380 self.callPreparse = False

5381

5382 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5383 if loc != 0:

5384 raise ParseException(instring, loc, "not found at string start")

5385 return super().parseImpl(instring, loc, do_actions)

5386

5387

5388class AtLineStart(ParseElementEnhance):

5389 r"""Matches if an expression matches at the beginning of a line within

5390 the parse string

5391

5392 Example:

5393

5394 .. testcode::

5395

5396 test = '''\

5397 BBB this line

5398 BBB and this line

5399 BBB but not this one

5400 A BBB and definitely not this one

5401 '''

5402

5403 for t in (AtLineStart('BBB') + rest_of_line).search_string(test):

5404 print(t)

5405

5406 prints:

5407

5408 .. testoutput::

5409

5410 ['BBB', ' this line']

5411 ['BBB', ' and this line']

5412 """

5413

5414 def __init__(self, expr: Union[ParserElement, str]) -> None:

5415 super().__init__(expr)

5416 self.callPreparse = False

5417

5418 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5419 if col(loc, instring) != 1:

5420 raise ParseException(instring, loc, "not found at line start")

5421 return super().parseImpl(instring, loc, do_actions)

5422

5423

5424class FollowedBy(ParseElementEnhance):

5425 """Lookahead matching of the given parse expression.

5426 ``FollowedBy`` does *not* advance the parsing position within

5427 the input string, it only verifies that the specified parse

5428 expression matches at the current position. ``FollowedBy``

5429 always returns a null token list. If any results names are defined

5430 in the lookahead expression, those *will* be returned for access by

5431 name.

5432

5433 Example:

5434

5435 .. testcode::

5436

5437 # use FollowedBy to match a label only if it is followed by a ':'

5438 data_word = Word(alphas)

5439 label = data_word + FollowedBy(':')

5440 attr_expr = Group(

5441 label + Suppress(':')

5442 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)

5443 )

5444

5445 attr_expr[1, ...].parse_string(

5446 "shape: SQUARE color: BLACK posn: upper left").pprint()

5447

5448 prints:

5449

5450 .. testoutput::

5451

5452 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

5453 """

5454

5455 def __init__(self, expr: Union[ParserElement, str]) -> None:

5456 super().__init__(expr)

5457 self._may_return_empty = True

5458

5459 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5460 # by using self._expr.parse and deleting the contents of the returned ParseResults list

5461 # we keep any named results that were defined in the FollowedBy expression

5462 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)

5463 del ret[:]

5464

5465 return loc, ret

5466

5467

5468class PrecededBy(ParseElementEnhance):

5469 """Lookbehind matching of the given parse expression.

5470 ``PrecededBy`` does not advance the parsing position within the

5471 input string, it only verifies that the specified parse expression

5472 matches prior to the current position. ``PrecededBy`` always

5473 returns a null token list, but if a results name is defined on the

5474 given expression, it is returned.

5475

5476 Parameters:

5477

5478 - ``expr`` - expression that must match prior to the current parse

5479 location

5480 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

5481 to lookbehind prior to the current parse location

5482

5483 If the lookbehind expression is a string, :class:`Literal`,

5484 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

5485 with a specified exact or maximum length, then the retreat

5486 parameter is not required. Otherwise, retreat must be specified to

5487 give a maximum number of characters to look back from

5488 the current parse position for a lookbehind match.

5489

5490 Example:

5491

5492 .. testcode::

5493

5494 # VB-style variable names with type prefixes

5495 int_var = PrecededBy("#") + pyparsing_common.identifier

5496 str_var = PrecededBy("$") + pyparsing_common.identifier

5497 """

5498

5499 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:

5500 super().__init__(expr)

5501 self.expr = self.expr().leave_whitespace()

5502 self._may_return_empty = True

5503 self.mayIndexError = False

5504 self.exact = False

5505 if isinstance(expr, str_type):

5506 expr = typing.cast(str, expr)

5507 retreat = len(expr)

5508 self.exact = True

5509 elif isinstance(expr, (Literal, Keyword)):

5510 retreat = expr.matchLen

5511 self.exact = True

5512 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

5513 retreat = expr.maxLen

5514 self.exact = True

5515 elif isinstance(expr, PositionToken):

5516 retreat = 0

5517 self.exact = True

5518 self.retreat = retreat

5519 self.errmsg = f"not preceded by {expr}"

5520 self.skipWhitespace = False

5521 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

5522

5523 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:

5524 if self.exact:

5525 if loc < self.retreat:

5526 raise ParseException(instring, loc, self.errmsg, self)

5527 start = loc - self.retreat

5528 _, ret = self.expr._parse(instring, start)

5529 return loc, ret

5530

5531 # retreat specified a maximum lookbehind window, iterate

5532 test_expr = self.expr + StringEnd()

5533 instring_slice = instring[max(0, loc - self.retreat) : loc]

5534 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)

5535

5536 for offset in range(1, min(loc, self.retreat + 1) + 1):

5537 try:

5538 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

5539 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)

5540 except ParseBaseException as pbe:

5541 last_expr = pbe

5542 else:

5543 break

5544 else:

5545 raise last_expr

5546

5547 return loc, ret

5548

5549

5550class Located(ParseElementEnhance):

5551 """

5552 Decorates a returned token with its starting and ending

5553 locations in the input string.

5554

5555 This helper adds the following results names:

5556

5557 - ``locn_start`` - location where matched expression begins

5558 - ``locn_end`` - location where matched expression ends

5559 - ``value`` - the actual parsed results

5560

5561 Be careful if the input text contains ``<TAB>`` characters, you

5562 may want to call :class:`ParserElement.parse_with_tabs`

5563

5564 Example:

5565

5566 .. testcode::

5567

5568 wd = Word(alphas)

5569 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

5570 print(match)

5571

5572 prints:

5573

5574 .. testoutput::

5575

5576 [0, ['ljsdf'], 5]

5577 [8, ['lksdjjf'], 15]

5578 [18, ['lkkjj'], 23]

5579 """

5580

5581 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5582 start = loc

5583 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)

5584 ret_tokens = ParseResults([start, tokens, loc])

5585 ret_tokens["locn_start"] = start

5586 ret_tokens["value"] = tokens

5587 ret_tokens["locn_end"] = loc

5588 if self.resultsName:

5589 # must return as a list, so that the name will be attached to the complete group

5590 return loc, [ret_tokens]

5591 else:

5592 return loc, ret_tokens

5593

5594

5595class NotAny(ParseElementEnhance):

5596 """

5597 Lookahead to disallow matching with the given parse expression.

5598 ``NotAny`` does *not* advance the parsing position within the

5599 input string, it only verifies that the specified parse expression

5600 does *not* match at the current position. Also, ``NotAny`` does

5601 *not* skip over leading whitespace. ``NotAny`` always returns

5602 a null token list. May be constructed using the ``'~'`` operator.

5603

5604 Example:

5605

5606 .. testcode::

5607

5608 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

5609

5610 # take care not to mistake keywords for identifiers

5611 ident = ~(AND | OR | NOT) + Word(alphas)

5612 boolean_term = Opt(NOT) + ident

5613

5614 # very crude boolean expression - to support parenthesis groups and

5615 # operation hierarchy, use infix_notation

5616 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

5617

5618 # integers that are followed by "." are actually floats

5619 integer = Word(nums) + ~Char(".")

5620 """

5621

5622 def __init__(self, expr: Union[ParserElement, str]) -> None:

5623 super().__init__(expr)

5624 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

5625 # self.leave_whitespace()

5626 self.skipWhitespace = False

5627

5628 self._may_return_empty = True

5629 self.errmsg = f"Found unwanted token, {self.expr}"

5630

5631 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5632 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):

5633 raise ParseException(instring, loc, self.errmsg, self)

5634 return loc, []

5635

5636 def _generateDefaultName(self) -> str:

5637 return f"~{{{self.expr}}}"

5638

5639

5640class _MultipleMatch(ParseElementEnhance):

5641 def __init__(

5642 self,

5643 expr: Union[str, ParserElement],

5644 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5645 **kwargs,

5646 ) -> None:

5647 stopOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(

5648 kwargs, "stopOn", None

5649 )

5650

5651 super().__init__(expr)

5652 stopOn = stopOn or stop_on

5653 self.saveAsList = True

5654 ender = stopOn

5655 if isinstance(ender, str_type):

5656 ender = self._literalStringClass(ender)

5657 self.stopOn(ender)

5658

5659 def stop_on(self, ender) -> ParserElement:

5660 if isinstance(ender, str_type):

5661 ender = self._literalStringClass(ender)

5662 self.not_ender = ~ender if ender is not None else None

5663 return self

5664

5665 stopOn = stop_on

5666

5667 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5668 self_expr_parse = self.expr._parse

5669 self_skip_ignorables = self._skipIgnorables

5670 check_ender = False

5671 if self.not_ender is not None:

5672 try_not_ender = self.not_ender.try_parse

5673 check_ender = True

5674

5675 # must be at least one (but first see if we are the stopOn sentinel;

5676 # if so, fail)

5677 if check_ender:

5678 try_not_ender(instring, loc)

5679 loc, tokens = self_expr_parse(instring, loc, do_actions)

5680 try:

5681 hasIgnoreExprs = not not self.ignoreExprs

5682 while 1:

5683 if check_ender:

5684 try_not_ender(instring, loc)

5685 if hasIgnoreExprs:

5686 preloc = self_skip_ignorables(instring, loc)

5687 else:

5688 preloc = loc

5689 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)

5690 tokens += tmptokens

5691 except (ParseException, IndexError):

5692 pass

5693

5694 return loc, tokens

5695

5696 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5697 if (

5698 __diag__.warn_ungrouped_named_tokens_in_collection

5699 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5700 not in self.suppress_warnings_

5701 ):

5702 for e in [self.expr] + self.expr.recurse():

5703 if (

5704 isinstance(e, ParserElement)

5705 and e.resultsName

5706 and (

5707 Diagnostics.warn_ungrouped_named_tokens_in_collection

5708 not in e.suppress_warnings_

5709 )

5710 ):

5711 warning = (

5712 "warn_ungrouped_named_tokens_in_collection:"

5713 f" setting results name {name!r} on {type(self).__name__} expression"

5714 f" collides with {e.resultsName!r} on contained expression"

5715 )

5716 warnings.warn(warning, stacklevel=3)

5717 break

5718

5719 return super()._setResultsName(name, list_all_matches)

5720

5721

5722class OneOrMore(_MultipleMatch):

5723 """

5724 Repetition of one or more of the given expression.

5725

5726 Parameters:

5727

5728 - ``expr`` - expression that must match one or more times

5729 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

5730 (only required if the sentinel would ordinarily match the repetition

5731 expression)

5732

5733 Example:

5734

5735 .. doctest::

5736

5737 >>> data_word = Word(alphas)

5738 >>> label = data_word + FollowedBy(':')

5739 >>> attr_expr = Group(

5740 ... label + Suppress(':')

5741 ... + OneOrMore(data_word).set_parse_action(' '.join))

5742

5743 >>> text = "shape: SQUARE posn: upper left color: BLACK"

5744

5745 # Fail! read 'posn' as data instead of next label

5746 >>> attr_expr[1, ...].parse_string(text).pprint()

5747 [['shape', 'SQUARE posn']]

5748

5749 # use stop_on attribute for OneOrMore

5750 # to avoid reading label string as part of the data

5751 >>> attr_expr = Group(

5752 ... label + Suppress(':')

5753 ... + OneOrMore(

5754 ... data_word, stop_on=label).set_parse_action(' '.join))

5755 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better

5756 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5757

5758 # could also be written as

5759 >>> (attr_expr * (1,)).parse_string(text).pprint()

5760 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5761 """

5762

5763 def _generateDefaultName(self) -> str:

5764 return f"{{{self.expr}}}..."

5765

5766

5767class ZeroOrMore(_MultipleMatch):

5768 """

5769 Optional repetition of zero or more of the given expression.

5770

5771 Parameters:

5772

5773 - ``expr`` - expression that must match zero or more times

5774 - ``stop_on`` - expression for a terminating sentinel

5775 (only required if the sentinel would ordinarily match the repetition

5776 expression) - (default= ``None``)

5777

5778 Example: similar to :class:`OneOrMore`

5779 """

5780

5781 def __init__(

5782 self,

5783 expr: Union[str, ParserElement],

5784 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5785 **kwargs,

5786 ) -> None:

5787 stopOn: Union[ParserElement, str] = deprecate_argument(kwargs, "stopOn", None)

5788

5789 super().__init__(expr, stop_on=stopOn or stop_on)

5790 self._may_return_empty = True

5791

5792 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5793 try:

5794 return super().parseImpl(instring, loc, do_actions)

5795 except (ParseException, IndexError):

5796 return loc, ParseResults([], name=self.resultsName)

5797

5798 def _generateDefaultName(self) -> str:

5799 return f"[{self.expr}]..."

5800

5801

5802class DelimitedList(ParseElementEnhance):

5803 """Helper to define a delimited list of expressions - the delimiter

5804 defaults to ','. By default, the list elements and delimiters can

5805 have intervening whitespace, and comments, but this can be

5806 overridden by passing ``combine=True`` in the constructor. If

5807 ``combine`` is set to ``True``, the matching tokens are

5808 returned as a single token string, with the delimiters included;

5809 otherwise, the matching tokens are returned as a list of tokens,

5810 with the delimiters suppressed.

5811

5812 If ``allow_trailing_delim`` is set to True, then the list may end with

5813 a delimiter.

5814

5815 Example:

5816

5817 .. doctest::

5818

5819 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc")

5820 ParseResults(['aa', 'bb', 'cc'], {})

5821 >>> DelimitedList(Word(hexnums), delim=':', combine=True

5822 ... ).parse_string("AA:BB:CC:DD:EE")

5823 ParseResults(['AA:BB:CC:DD:EE'], {})

5824

5825 .. versionadded:: 3.1.0

5826 """

5827

5828 def __init__(

5829 self,

5830 expr: Union[str, ParserElement],

5831 delim: Union[str, ParserElement] = ",",

5832 combine: bool = False,

5833 min: typing.Optional[int] = None,

5834 max: typing.Optional[int] = None,

5835 *,

5836 allow_trailing_delim: bool = False,

5837 ) -> None:

5838 if isinstance(expr, str_type):

5839 expr = ParserElement._literalStringClass(expr)

5840 expr = typing.cast(ParserElement, expr)

5841

5842 if min is not None and min < 1:

5843 raise ValueError("min must be greater than 0")

5844

5845 if max is not None and min is not None and max < min:

5846 raise ValueError("max must be greater than, or equal to min")

5847

5848 self.content = expr

5849 self.raw_delim = str(delim)

5850 self.delim = delim

5851 self.combine = combine

5852 if not combine:

5853 self.delim = Suppress(delim)

5854 self.min = min or 1

5855 self.max = max

5856 self.allow_trailing_delim = allow_trailing_delim

5857

5858 delim_list_expr = self.content + (self.delim + self.content) * (

5859 self.min - 1,

5860 None if self.max is None else self.max - 1,

5861 )

5862 if self.allow_trailing_delim:

5863 delim_list_expr += Opt(self.delim)

5864

5865 if self.combine:

5866 delim_list_expr = Combine(delim_list_expr)

5867

5868 super().__init__(delim_list_expr, savelist=True)

5869

5870 def _generateDefaultName(self) -> str:

5871 content_expr = self.content.streamline()

5872 return f"{content_expr} [{self.raw_delim} {content_expr}]..."

5873

5874

5875class _NullToken:

5876 def __bool__(self):

5877 return False

5878

5879 def __str__(self):

5880 return ""

5881

5882

5883class Opt(ParseElementEnhance):

5884 """

5885 Optional matching of the given expression.

5886

5887 :param expr: expression that must match zero or more times

5888 :param default: (optional) - value to be returned

5889 if the optional expression is not found.

5890

5891 Example:

5892

5893 .. testcode::

5894

5895 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5896 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5897 zip.run_tests('''

5898 # traditional ZIP code

5899 12345

5900

5901 # ZIP+4 form

5902 12101-0001

5903

5904 # invalid ZIP

5905 98765-

5906 ''')

5907

5908 prints:

5909

5910 .. testoutput::

5911 :options: +NORMALIZE_WHITESPACE

5912

5913

5914 # traditional ZIP code

5915 12345

5916 ['12345']

5917

5918 # ZIP+4 form

5919 12101-0001

5920 ['12101-0001']

5921

5922 # invalid ZIP

5923 98765-

5924 98765-

5925 ^

5926 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6)

5927 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6)

5928 """

5929

5930 __optionalNotMatched = _NullToken()

5931

5932 def __init__(

5933 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5934 ) -> None:

5935 super().__init__(expr, savelist=False)

5936 self.saveAsList = self.expr.saveAsList

5937 self.defaultValue = default

5938 self._may_return_empty = True

5939

5940 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5941 self_expr = self.expr

5942 try:

5943 loc, tokens = self_expr._parse(

5944 instring, loc, do_actions, callPreParse=False

5945 )

5946 except (ParseException, IndexError):

5947 default_value = self.defaultValue

5948 if default_value is not self.__optionalNotMatched:

5949 if self_expr.resultsName:

5950 tokens = ParseResults([default_value])

5951 tokens[self_expr.resultsName] = default_value

5952 else:

5953 tokens = [default_value] # type: ignore[assignment]

5954 else:

5955 tokens = [] # type: ignore[assignment]

5956 return loc, tokens

5957

5958 def _generateDefaultName(self) -> str:

5959 inner = str(self.expr)

5960 # strip off redundant inner {}'s

5961 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5962 inner = inner[1:-1]

5963 return f"[{inner}]"

5966Optional = Opt

5969class SkipTo(ParseElementEnhance):

5970 """

5971 Token for skipping over all undefined text until the matched

5972 expression is found.

5973

5974 :param expr: target expression marking the end of the data to be skipped

5975 :param include: if ``True``, the target expression is also parsed

5976 (the skipped text and target expression are returned

5977 as a 2-element list) (default= ``False``).

5978

5979 :param ignore: (default= ``None``) used to define grammars

5980 (typically quoted strings and comments)

5981 that might contain false matches to the target expression

5982

5983 :param fail_on: (default= ``None``) define expressions that

5984 are not allowed to be included in the skipped test;

5985 if found before the target expression is found,

5986 the :class:`SkipTo` is not a match

5987

5988 Example:

5989

5990 .. testcode::

5991

5992 report = '''

5993 Outstanding Issues Report - 1 Jan 2000

5994

5995 # | Severity | Description | Days Open

5996 -----+----------+-------------------------------------------+-----------

5997 101 | Critical | Intermittent system crash | 6

5998 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5999 79 | Minor | System slow when running too many reports | 47

6000 '''

6001 integer = Word(nums)

6002 SEP = Suppress('|')

6003 # use SkipTo to simply match everything up until the next SEP

6004 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

6005 # - parse action will call token.strip() for each matched token, i.e., the description body

6006 string_data = SkipTo(SEP, ignore=quoted_string)

6007 string_data.set_parse_action(token_map(str.strip))

6008 ticket_expr = (integer("issue_num") + SEP

6009 + string_data("sev") + SEP

6010 + string_data("desc") + SEP

6011 + integer("days_open"))

6012

6013 for tkt in ticket_expr.search_string(report):

6014 print(tkt.dump())

6015

6016 prints:

6017

6018 .. testoutput::

6019

6020 ['101', 'Critical', 'Intermittent system crash', '6']

6021 - days_open: '6'

6022 - desc: 'Intermittent system crash'

6023 - issue_num: '101'

6024 - sev: 'Critical'

6025 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

6026 - days_open: '14'

6027 - desc: "Spelling error on Login ('log|n')"

6028 - issue_num: '94'

6029 - sev: 'Cosmetic'

6030 ['79', 'Minor', 'System slow when running too many reports', '47']

6031 - days_open: '47'

6032 - desc: 'System slow when running too many reports'

6033 - issue_num: '79'

6034 - sev: 'Minor'

6035 """

6036

6037 def __init__(

6038 self,

6039 other: Union[ParserElement, str],

6040 include: bool = False,

6041 ignore: typing.Optional[Union[ParserElement, str]] = None,

6042 fail_on: typing.Optional[Union[ParserElement, str]] = None,

6043 **kwargs,

6044 ) -> None:

6045 failOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(

6046 kwargs, "failOn", None

6047 )

6048

6049 super().__init__(other)

6050 failOn = failOn or fail_on

6051 self.ignoreExpr = ignore

6052 self._may_return_empty = True

6053 self.mayIndexError = False

6054 self.includeMatch = include

6055 self.saveAsList = False

6056 if isinstance(failOn, str_type):

6057 self.failOn = self._literalStringClass(failOn)

6058 else:

6059 self.failOn = failOn

6060 self.errmsg = f"No match found for {self.expr}"

6061 self.ignorer = Empty().leave_whitespace()

6062 self._update_ignorer()

6063

6064 def _update_ignorer(self):

6065 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

6066 self.ignorer.ignoreExprs.clear()

6067 for e in self.expr.ignoreExprs:

6068 self.ignorer.ignore(e)

6069 if self.ignoreExpr:

6070 self.ignorer.ignore(self.ignoreExpr)

6071

6072 def ignore(self, expr):

6073 """

6074 Define expression to be ignored (e.g., comments) while doing pattern

6075 matching; may be called repeatedly, to define multiple comment or other

6076 ignorable patterns.

6077 """

6078 super().ignore(expr)

6079 self._update_ignorer()

6080

6081 def parseImpl(self, instring, loc, do_actions=True):

6082 startloc = loc

6083 instrlen = len(instring)

6084 self_expr_parse = self.expr._parse

6085 self_failOn_canParseNext = (

6086 self.failOn.can_parse_next if self.failOn is not None else None

6087 )

6088 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

6089

6090 tmploc = loc

6091 while tmploc <= instrlen:

6092 if self_failOn_canParseNext is not None:

6093 # break if failOn expression matches

6094 if self_failOn_canParseNext(instring, tmploc):

6095 break

6096

6097 if ignorer_try_parse is not None:

6098 # advance past ignore expressions

6099 prev_tmploc = tmploc

6100 while 1:

6101 try:

6102 tmploc = ignorer_try_parse(instring, tmploc)

6103 except ParseBaseException:

6104 break

6105 # see if all ignorers matched, but didn't actually ignore anything

6106 if tmploc == prev_tmploc:

6107 break

6108 prev_tmploc = tmploc

6109

6110 try:

6111 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)

6112 except (ParseException, IndexError):

6113 # no match, advance loc in string

6114 tmploc += 1

6115 else:

6116 # matched skipto expr, done

6117 break

6118

6119 else:

6120 # ran off the end of the input string without matching skipto expr, fail

6121 raise ParseException(instring, loc, self.errmsg, self)

6122

6123 # build up return values

6124 loc = tmploc

6125 skiptext = instring[startloc:loc]

6126 skipresult = ParseResults(skiptext)

6127

6128 if self.includeMatch:

6129 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)

6130 skipresult += mat

6131

6132 return loc, skipresult

6133

6134

6135class Forward(ParseElementEnhance):

6136 """

6137 Forward declaration of an expression to be defined later -

6138 used for recursive grammars, such as algebraic infix notation.

6139 When the expression is known, it is assigned to the ``Forward``

6140 instance using the ``'<<'`` operator.

6141

6142 .. Note::

6143

6144 Take care when assigning to ``Forward`` not to overlook

6145 precedence of operators.

6146

6147 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

6148

6149 fwd_expr << a | b | c

6150

6151 will actually be evaluated as::

6152

6153 (fwd_expr << a) | b | c

6154

6155 thereby leaving b and c out as parseable alternatives.

6156 It is recommended that you explicitly group the values

6157 inserted into the :class:`Forward`::

6158

6159 fwd_expr << (a | b | c)

6160

6161 Converting to use the ``'<<='`` operator instead will avoid this problem.

6162

6163 See :meth:`ParseResults.pprint` for an example of a recursive

6164 parser created using :class:`Forward`.

6165 """

6166

6167 def __init__(

6168 self, other: typing.Optional[Union[ParserElement, str]] = None

6169 ) -> None:

6170 self.caller_frame = traceback.extract_stack(limit=2)[0]

6171 super().__init__(other, savelist=False) # type: ignore[arg-type]

6172 self.lshift_line = None

6173

6174 def __lshift__(self, other) -> Forward:

6175 if hasattr(self, "caller_frame"):

6176 del self.caller_frame

6177 if isinstance(other, str_type):

6178 other = self._literalStringClass(other)

6179

6180 if not isinstance(other, ParserElement):

6181 return NotImplemented

6182

6183 self.expr = other

6184 self.streamlined = other.streamlined

6185 self.mayIndexError = self.expr.mayIndexError

6186 self._may_return_empty = self.expr.mayReturnEmpty

6187 self.set_whitespace_chars(

6188 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

6189 )

6190 self.skipWhitespace = self.expr.skipWhitespace

6191 self.saveAsList = self.expr.saveAsList

6192 self.ignoreExprs.extend(self.expr.ignoreExprs)

6193 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

6194 return self

6195

6196 def __ilshift__(self, other) -> Forward:

6197 if not isinstance(other, ParserElement):

6198 return NotImplemented

6199

6200 return self << other

6201

6202 def __or__(self, other) -> ParserElement:

6203 caller_line = traceback.extract_stack(limit=2)[-2]

6204 if (

6205 __diag__.warn_on_match_first_with_lshift_operator

6206 and caller_line == self.lshift_line

6207 and Diagnostics.warn_on_match_first_with_lshift_operator

6208 not in self.suppress_warnings_

6209 ):

6210 warnings.warn(

6211 "warn_on_match_first_with_lshift_operator:"

6212 " using '<<' operator with '|' is probably an error, use '<<='",

6213 stacklevel=2,

6214 )

6215 ret = super().__or__(other)

6216 return ret

6217

6218 def __del__(self):

6219 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

6220 if (

6221 self.expr is None

6222 and __diag__.warn_on_assignment_to_Forward

6223 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

6224 ):

6225 warnings.warn_explicit(

6226 "warn_on_assignment_to_Forward:"

6227 " Forward defined here but no expression attached later using '<<=' or '<<'",

6228 UserWarning,

6229 filename=self.caller_frame.filename,

6230 lineno=self.caller_frame.lineno,

6231 )

6232

6233 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

6234 if (

6235 self.expr is None

6236 and __diag__.warn_on_parse_using_empty_Forward

6237 and Diagnostics.warn_on_parse_using_empty_Forward

6238 not in self.suppress_warnings_

6239 ):

6240 # walk stack until parse_string, scan_string, search_string, or transform_string is found

6241 parse_fns = (

6242 "parse_string",

6243 "scan_string",

6244 "search_string",

6245 "transform_string",

6246 )

6247 tb = traceback.extract_stack(limit=200)

6248 for i, frm in enumerate(reversed(tb), start=1):

6249 if frm.name in parse_fns:

6250 stacklevel = i + 1

6251 break

6252 else:

6253 stacklevel = 2

6254 warnings.warn(

6255 "warn_on_parse_using_empty_Forward:"

6256 " Forward expression was never assigned a value, will not parse any input",

6257 stacklevel=stacklevel,

6258 )

6259 if not ParserElement._left_recursion_enabled:

6260 return super().parseImpl(instring, loc, do_actions)

6261 # ## Bounded Recursion algorithm ##

6262 # Recursion only needs to be processed at ``Forward`` elements, since they are

6263 # the only ones that can actually refer to themselves. The general idea is

6264 # to handle recursion stepwise: We start at no recursion, then recurse once,

6265 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

6266 #

6267 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

6268 # - to *match* a specific recursion level, and

6269 # - to *search* the bounded recursion level

6270 # and the two run concurrently. The *search* must *match* each recursion level

6271 # to find the best possible match. This is handled by a memo table, which

6272 # provides the previous match to the next level match attempt.

6273 #

6274 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

6275 #

6276 # There is a complication since we not only *parse* but also *transform* via

6277 # actions: We do not want to run the actions too often while expanding. Thus,

6278 # we expand using `do_actions=False` and only run `do_actions=True` if the next

6279 # recursion level is acceptable.

6280 with ParserElement.recursion_lock:

6281 memo = ParserElement.recursion_memos

6282 try:

6283 # we are parsing at a specific recursion expansion - use it as-is

6284 prev_loc, prev_result = memo[loc, self, do_actions]

6285 if isinstance(prev_result, Exception):

6286 raise prev_result

6287 return prev_loc, prev_result.copy()

6288 except KeyError:

6289 act_key = (loc, self, True)

6290 peek_key = (loc, self, False)

6291 # we are searching for the best recursion expansion - keep on improving

6292 # both `do_actions` cases must be tracked separately here!

6293 prev_loc, prev_peek = memo[peek_key] = (

6294 loc - 1,

6295 ParseException(

6296 instring, loc, "Forward recursion without base case", self

6297 ),

6298 )

6299 if do_actions:

6300 memo[act_key] = memo[peek_key]

6301 while True:

6302 try:

6303 new_loc, new_peek = super().parseImpl(instring, loc, False)

6304 except ParseException:

6305 # we failed before getting any match - do not hide the error

6306 if isinstance(prev_peek, Exception):

6307 raise

6308 new_loc, new_peek = prev_loc, prev_peek

6309 # the match did not get better: we are done

6310 if new_loc <= prev_loc:

6311 if do_actions:

6312 # replace the match for do_actions=False as well,

6313 # in case the action did backtrack

6314 prev_loc, prev_result = memo[peek_key] = memo[act_key]

6315 del memo[peek_key], memo[act_key]

6316 return prev_loc, copy.copy(prev_result)

6317 del memo[peek_key]

6318 return prev_loc, copy.copy(prev_peek)

6319 # the match did get better: see if we can improve further

6320 if do_actions:

6321 try:

6322 memo[act_key] = super().parseImpl(instring, loc, True)

6323 except ParseException as e:

6324 memo[peek_key] = memo[act_key] = (new_loc, e)

6325 raise

6326 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

6327

6328 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

6329 """

6330 Extends ``leave_whitespace`` defined in base class.

6331 """

6332 self.skipWhitespace = False

6333 return self

6334

6335 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

6336 """

6337 Extends ``ignore_whitespace`` defined in base class.

6338 """

6339 self.skipWhitespace = True

6340 return self

6341

6342 def streamline(self) -> ParserElement:

6343 if not self.streamlined:

6344 self.streamlined = True

6345 if self.expr is not None:

6346 self.expr.streamline()

6347 return self

6348

6349 def validate(self, validateTrace=None) -> None:

6350 warnings.warn(

6351 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

6352 DeprecationWarning,

6353 stacklevel=2,

6354 )

6355 if validateTrace is None:

6356 validateTrace = []

6357

6358 if self not in validateTrace:

6359 tmp = validateTrace[:] + [self]

6360 if self.expr is not None:

6361 self.expr.validate(tmp)

6362 self._checkRecursion([])

6363

6364 def _generateDefaultName(self) -> str:

6365 # Avoid infinite recursion by setting a temporary _defaultName

6366 save_default_name = self._defaultName

6367 self._defaultName = ": ..."

6368

6369 # Use the string representation of main expression.

6370 try:

6371 if self.expr is not None:

6372 ret_string = str(self.expr)[:1000]

6373 else:

6374 ret_string = "None"

6375 except Exception:

6376 ret_string = "..."

6377

6378 self._defaultName = save_default_name

6379 return f"{type(self).__name__}: {ret_string}"

6380

6381 def copy(self) -> ParserElement:

6382 """

6383 Returns a copy of this expression.

6384

6385 Generally only used internally by pyparsing.

6386 """

6387 if self.expr is not None:

6388 return super().copy()

6389 else:

6390 ret = Forward()

6391 ret <<= self

6392 return ret

6393

6394 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

6395 # fmt: off

6396 if (

6397 __diag__.warn_name_set_on_empty_Forward

6398 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_

6399 and self.expr is None

6400 ):

6401 warning = (

6402 "warn_name_set_on_empty_Forward:"

6403 f" setting results name {name!r} on {type(self).__name__} expression"

6404 " that has no contained expression"

6405 )

6406 warnings.warn(warning, stacklevel=3)

6407 # fmt: on

6408

6409 return super()._setResultsName(name, list_all_matches)

6410

6411 # Compatibility synonyms

6412 # fmt: off

6413 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

6414 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

6415 # fmt: on

6416

6417

6418class TokenConverter(ParseElementEnhance):

6419 """

6420 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.

6421 """

6422

6423 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:

6424 super().__init__(expr) # , savelist)

6425 self.saveAsList = False

6426

6427

6428class Combine(TokenConverter):

6429 """Converter to concatenate all matching tokens to a single string.

6430 By default, the matching patterns must also be contiguous in the

6431 input string; this can be disabled by specifying

6432 ``'adjacent=False'`` in the constructor.

6433

6434 Example:

6435

6436 .. doctest::

6437

6438 >>> real = Word(nums) + '.' + Word(nums)

6439 >>> print(real.parse_string('3.1416'))

6440 ['3', '.', '1416']

6441

6442 >>> # will also erroneously match the following

6443 >>> print(real.parse_string('3. 1416'))

6444 ['3', '.', '1416']

6445

6446 >>> real = Combine(Word(nums) + '.' + Word(nums))

6447 >>> print(real.parse_string('3.1416'))

6448 ['3.1416']

6449

6450 >>> # no match when there are internal spaces

6451 >>> print(real.parse_string('3. 1416'))

6452 Traceback (most recent call last):

6453 ParseException: Expected W:(0123...)

6454 """

6455

6456 def __init__(

6457 self,

6458 expr: ParserElement,

6459 join_string: str = "",

6460 adjacent: bool = True,

6461 *,

6462 joinString: typing.Optional[str] = None,

6463 ) -> None:

6464 super().__init__(expr)

6465 joinString = joinString if joinString is not None else join_string

6466 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

6467 if adjacent:

6468 self.leave_whitespace()

6469 self.adjacent = adjacent

6470 self.skipWhitespace = True

6471 self.joinString = joinString

6472 self.callPreparse = True

6473

6474 def ignore(self, other) -> ParserElement:

6475 """

6476 Define expression to be ignored (e.g., comments) while doing pattern

6477 matching; may be called repeatedly, to define multiple comment or other

6478 ignorable patterns.

6479 """

6480 if self.adjacent:

6481 ParserElement.ignore(self, other)

6482 else:

6483 super().ignore(other)

6484 return self

6485

6486 def postParse(self, instring, loc, tokenlist):

6487 retToks = tokenlist.copy()

6488 del retToks[:]

6489 retToks += ParseResults(

6490 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

6491 )

6492

6493 if self.resultsName and retToks.haskeys():

6494 return [retToks]

6495 else:

6496 return retToks

6497

6498

6499class Group(TokenConverter):

6500 """Converter to return the matched tokens as a list - useful for

6501 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

6502

6503 The optional ``aslist`` argument when set to True will return the

6504 parsed tokens as a Python list instead of a pyparsing ParseResults.

6505

6506 Example:

6507

6508 .. doctest::

6509

6510 >>> ident = Word(alphas)

6511 >>> num = Word(nums)

6512 >>> term = ident | num

6513 >>> func = ident + Opt(DelimitedList(term))

6514 >>> print(func.parse_string("fn a, b, 100"))

6515 ['fn', 'a', 'b', '100']

6516

6517 >>> func = ident + Group(Opt(DelimitedList(term)))

6518 >>> print(func.parse_string("fn a, b, 100"))

6519 ['fn', ['a', 'b', '100']]

6520 """

6521

6522 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:

6523 super().__init__(expr)

6524 self.saveAsList = True

6525 self._asPythonList = aslist

6526

6527 def postParse(self, instring, loc, tokenlist):

6528 if self._asPythonList:

6529 return ParseResults.List(

6530 tokenlist.as_list()

6531 if isinstance(tokenlist, ParseResults)

6532 else list(tokenlist)

6533 )

6534

6535 return [tokenlist]

6536

6537

6538class Dict(TokenConverter):

6539 """Converter to return a repetitive expression as a list, but also

6540 as a dictionary. Each element can also be referenced using the first

6541 token in the expression as its key. Useful for tabular report

6542 scraping when the first column can be used as a item key.

6543

6544 The optional ``asdict`` argument when set to True will return the

6545 parsed tokens as a Python dict instead of a pyparsing ParseResults.

6546

6547 Example:

6548

6549 .. doctest::

6550

6551 >>> data_word = Word(alphas)

6552 >>> label = data_word + FollowedBy(':')

6553

6554 >>> attr_expr = (

6555 ... label + Suppress(':')

6556 ... + OneOrMore(data_word, stop_on=label)

6557 ... .set_parse_action(' '.join)

6558 ... )

6559

6560 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

6561

6562 >>> # print attributes as plain groups

6563 >>> print(attr_expr[1, ...].parse_string(text).dump())

6564 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

6565

6566 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...])

6567 # Dict will auto-assign names.

6568 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

6569 >>> print(result.dump())

6570 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

6571 - color: 'light blue'

6572 - posn: 'upper left'

6573 - shape: 'SQUARE'

6574 - texture: 'burlap'

6575 [0]:

6576 ['shape', 'SQUARE']

6577 [1]:

6578 ['posn', 'upper left']

6579 [2]:

6580 ['color', 'light blue']

6581 [3]:

6582 ['texture', 'burlap']

6583

6584 # access named fields as dict entries, or output as dict

6585 >>> print(result['shape'])

6586 SQUARE

6587 >>> print(result.as_dict())

6588 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}

6589

6590 See more examples at :class:`ParseResults` of accessing fields by results name.

6591 """

6592

6593 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:

6594 super().__init__(expr)

6595 self.saveAsList = True

6596 self._asPythonDict = asdict

6597

6598 def postParse(self, instring, loc, tokenlist):

6599 for i, tok in enumerate(tokenlist):

6600 if len(tok) == 0:

6601 continue

6602

6603 ikey = tok[0]

6604 if isinstance(ikey, int):

6605 ikey = str(ikey).strip()

6606

6607 if len(tok) == 1:

6608 tokenlist[ikey] = _ParseResultsWithOffset("", i)

6609

6610 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

6611 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

6612

6613 else:

6614 try:

6615 dictvalue = tok.copy() # ParseResults(i)

6616 except Exception:

6617 exc = TypeError(

6618 "could not extract dict values from parsed results"

6619 " - Dict expression must contain Grouped expressions"

6620 )

6621 raise exc from None

6622

6623 del dictvalue[0]

6624

6625 if len(dictvalue) != 1 or (

6626 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

6627 ):

6628 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

6629 else:

6630 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

6631

6632 if self._asPythonDict:

6633 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

6634

6635 return [tokenlist] if self.resultsName else tokenlist

6636

6637

6638class Suppress(TokenConverter):

6639 """Converter for ignoring the results of a parsed expression.

6640

6641 Example:

6642

6643 .. doctest::

6644

6645 >>> source = "a, b, c,d"

6646 >>> wd = Word(alphas)

6647 >>> wd_list1 = wd + (',' + wd)[...]

6648 >>> print(wd_list1.parse_string(source))

6649 ['a', ',', 'b', ',', 'c', ',', 'd']

6650

6651 # often, delimiters that are useful during parsing are just in the

6652 # way afterward - use Suppress to keep them out of the parsed output

6653 >>> wd_list2 = wd + (Suppress(',') + wd)[...]

6654 >>> print(wd_list2.parse_string(source))

6655 ['a', 'b', 'c', 'd']

6656

6657 # Skipped text (using '...') can be suppressed as well

6658 >>> source = "lead in START relevant text END trailing text"

6659 >>> start_marker = Keyword("START")

6660 >>> end_marker = Keyword("END")

6661 >>> find_body = Suppress(...) + start_marker + ... + end_marker

6662 >>> print(find_body.parse_string(source))

6663 ['START', 'relevant text ', 'END']

6664

6665 (See also :class:`DelimitedList`.)

6666 """

6667

6668 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

6669 if expr is ...:

6670 expr = _PendingSkip(NoMatch())

6671 super().__init__(expr)

6672

6673 def __add__(self, other) -> ParserElement:

6674 if isinstance(self.expr, _PendingSkip):

6675 return Suppress(SkipTo(other)) + other

6676

6677 return super().__add__(other)

6678

6679 def __sub__(self, other) -> ParserElement:

6680 if isinstance(self.expr, _PendingSkip):

6681 return Suppress(SkipTo(other)) - other

6682

6683 return super().__sub__(other)

6684

6685 def postParse(self, instring, loc, tokenlist):

6686 return []

6687

6688 def suppress(self) -> ParserElement:

6689 return self

6690

6691

6692# XXX: Example needs to be re-done for updated output

6693def trace_parse_action(f: ParseAction) -> ParseAction:

6694 """Decorator for debugging parse actions.

6695

6696 When the parse action is called, this decorator will print

6697 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

6698 When the parse action completes, the decorator will print

6699 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

6700

6701 Example:

6702

6703 .. testsetup:: stderr

6704

6705 import sys

6706 sys.stderr = sys.stdout

6707

6708 .. testcleanup:: stderr

6709

6710 sys.stderr = sys.__stderr__

6711

6712 .. testcode:: stderr

6713

6714 wd = Word(alphas)

6715

6716 @trace_parse_action

6717 def remove_duplicate_chars(tokens):

6718 return ''.join(sorted(set(''.join(tokens))))

6719

6720 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

6721 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

6722

6723 prints:

6724

6725 .. testoutput:: stderr

6726 :options: +NORMALIZE_WHITESPACE

6727

6728 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf',

6729 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

6730 <<leaving remove_duplicate_chars (ret: 'dfjkls')

6731 ['dfjkls']

6732

6733 .. versionchanged:: 3.1.0

6734 Exception type added to output

6735 """

6736 f = _trim_arity(f)

6737

6738 def z(*paArgs):

6739 thisFunc = f.__name__

6740 s, l, t = paArgs[-3:]

6741 if len(paArgs) > 3:

6742 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"

6743 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

6744 try:

6745 ret = f(*paArgs)

6746 except Exception as exc:

6747 sys.stderr.write(

6748 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"

6749 )

6750 raise

6751 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

6752 return ret

6753

6754 z.__name__ = f.__name__

6755 return z

6756

6757

6758# convenience constants for positional expressions

6759empty = Empty().set_name("empty")

6760line_start = LineStart().set_name("line_start")

6761line_end = LineEnd().set_name("line_end")

6762string_start = StringStart().set_name("string_start")

6763string_end = StringEnd().set_name("string_end")

6764

6765_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

6766 lambda s, l, t: t[0][1]

6767)

6768_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

6769 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

6770)

6771_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

6772 lambda s, l, t: chr(int(t[0][1:], 8))

6773)

6774_singleChar = (

6775 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

6776)

6777_charRange = Group(_singleChar + Suppress("-") + _singleChar)

6778_reBracketExpr = (

6779 Literal("[")

6780 + Opt("^").set_results_name("negate")

6781 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

6782 + Literal("]")

6783)

6784

6785

6786def srange(s: str) -> str:

6787 r"""Helper to easily define string ranges for use in :class:`Word`

6788 construction. Borrows syntax from regexp ``'[]'`` string range

6789 definitions::

6790

6791 srange("[0-9]") -> "0123456789"

6792 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

6793 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

6794

6795 The input string must be enclosed in []'s, and the returned string

6796 is the expanded character set joined into a single string. The

6797 values enclosed in the []'s may be:

6798

6799 - a single character

6800 - an escaped character with a leading backslash (such as ``\-``

6801 or ``\]``)

6802 - an escaped hex character with a leading ``'\x'``

6803 (``\x21``, which is a ``'!'`` character) (``\0x##``

6804 is also supported for backwards compatibility)

6805 - an escaped octal character with a leading ``'\0'``

6806 (``\041``, which is a ``'!'`` character)

6807 - a range of any of the above, separated by a dash (``'a-z'``,

6808 etc.)

6809 - any combination of the above (``'aeiouy'``,

6810 ``'a-zA-Z0-9_$'``, etc.)

6811 """

6812

6813 def _expanded(p):

6814 if isinstance(p, ParseResults):

6815 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

6816 else:

6817 yield p

6818

6819 try:

6820 return "".join(

6821 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]

6822 )

6823 except Exception as e:

6824 return ""

6825

6826

6827def token_map(func, *args) -> ParseAction:

6828 """Helper to define a parse action by mapping a function to all

6829 elements of a :class:`ParseResults` list. If any additional args are passed,

6830 they are forwarded to the given function as additional arguments

6831 after the token, as in

6832 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

6833 which will convert the parsed data to an integer using base 16.

6834

6835 Example (compare the last to example in :class:`ParserElement.transform_string`::

6836

6837 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

6838 hex_ints.run_tests('''

6839 00 11 22 aa FF 0a 0d 1a

6840 ''')

6841

6842 upperword = Word(alphas).set_parse_action(token_map(str.upper))

6843 upperword[1, ...].run_tests('''

6844 my kingdom for a horse

6845 ''')

6846

6847 wd = Word(alphas).set_parse_action(token_map(str.title))

6848 wd[1, ...].set_parse_action(' '.join).run_tests('''

6849 now is the winter of our discontent made glorious summer by this sun of york

6850 ''')

6851

6852 prints::

6853

6854 00 11 22 aa FF 0a 0d 1a

6855 [0, 17, 34, 170, 255, 10, 13, 26]

6856

6857 my kingdom for a horse

6858 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6859

6860 now is the winter of our discontent made glorious summer by this sun of york

6861 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6862 """

6863

6864 def pa(s, l, t):

6865 return [func(tokn, *args) for tokn in t]

6866

6867 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6868 pa.__name__ = func_name

6869

6870 return pa

6871

6872

6873def autoname_elements() -> None:

6874 """

6875 Utility to simplify mass-naming of parser elements, for

6876 generating railroad diagram with named subdiagrams.

6877 """

6878

6879 # guard against _getframe not being implemented in the current Python

6880 getframe_fn = getattr(sys, "_getframe", lambda _: None)

6881 calling_frame = getframe_fn(1)

6882 if calling_frame is None:

6883 return

6884

6885 # find all locals in the calling frame that are ParserElements

6886 calling_frame = typing.cast(types.FrameType, calling_frame)

6887 for name, var in calling_frame.f_locals.items():

6888 # if no custom name defined, set the name to the var name

6889 if isinstance(var, ParserElement) and not var.customName:

6890 var.set_name(name)

6891

6892

6893dbl_quoted_string = Combine(

6894 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6895).set_name("string enclosed in double quotes")

6896

6897sgl_quoted_string = Combine(

6898 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6899).set_name("string enclosed in single quotes")

6900

6901quoted_string = Combine(

6902 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6903 "double quoted string"

6904 )

6905 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6906 "single quoted string"

6907 )

6908).set_name("quoted string using single or double quotes")

6909

6910# XXX: Is there some way to make this show up in API docs?

6911# .. versionadded:: 3.1.0

6912python_quoted_string = Combine(

6913 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6914 "multiline double quoted string"

6915 )

6916 ^ (

6917 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6918 ).set_name("multiline single quoted string")

6919 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6920 "double quoted string"

6921 )

6922 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6923 "single quoted string"

6924 )

6925).set_name("Python quoted string")

6926

6927unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6928

6929

6930alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6931punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6932

6933# build list of built-in expressions, for future reference if a global default value

6934# gets updated

6935_builtin_exprs: list[ParserElement] = [

6936 v for v in vars().values() if isinstance(v, ParserElement)

6937]

6938

6939# Compatibility synonyms

6940# fmt: off

6941sglQuotedString = sgl_quoted_string

6942dblQuotedString = dbl_quoted_string

6943quotedString = quoted_string

6944unicodeString = unicode_string

6945lineStart = line_start

6946lineEnd = line_end

6947stringStart = string_start

6948stringEnd = string_end

6949nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)

6950traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)

6951conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)

6952tokenMap = replaced_by_pep8("tokenMap", token_map)

6953# fmt: on