Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 46%

2# core.py

4from __future__ import annotations

6import collections.abc

7from collections import deque

8import os

9import typing

10from typing import (

11 Any,

12 Callable,

13 Generator,

14 NamedTuple,

15 Sequence,

16 TextIO,

17 Union,

18 cast,

19)

20from abc import ABC, abstractmethod

21from enum import Enum

22import string

23import copy

24import warnings

25import re

26import sys

27from collections.abc import Iterable

28import traceback

29import types

30from operator import itemgetter

31from functools import wraps

32from threading import RLock

33from pathlib import Path

35from .warnings import PyparsingDeprecationWarning, PyparsingDiagnosticWarning

36from .util import (

37 _FifoCache,

38 _UnboundedCache,

39 __config_flags,

40 _collapse_string_to_ranges,

41 _convert_escaped_numerics_to_char,

42 _escape_regex_range_chars,

43 _flatten,

44 LRUMemo as _LRUMemo,

45 UnboundedMemo as _UnboundedMemo,

46 deprecate_argument,

47 replaced_by_pep8,

48)

49from .exceptions import *

50from .actions import *

51from .results import ParseResults, _ParseResultsWithOffset

52from .unicode import pyparsing_unicode

54_MAX_INT = sys.maxsize

55str_type: tuple[type, ...] = (str, bytes)

57#

59#

60# Permission is hereby granted, free of charge, to any person obtaining

61# a copy of this software and associated documentation files (the

62# "Software"), to deal in the Software without restriction, including

63# without limitation the rights to use, copy, modify, merge, publish,

64# distribute, sublicense, and/or sell copies of the Software, and to

65# permit persons to whom the Software is furnished to do so, subject to

66# the following conditions:

67#

68# The above copyright notice and this permission notice shall be

69# included in all copies or substantial portions of the Software.

70#

71# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

72# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

73# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

74# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

75# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

76# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

77# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

78#

80from functools import cached_property

83class __compat__(__config_flags):

84 """

85 A cross-version compatibility configuration for pyparsing features that will be

86 released in a future version. By setting values in this configuration to True,

87 those features can be enabled in prior versions for compatibility development

88 and testing.

90 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping

91 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;

92 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1

93 behavior

94 """

96 _type_desc = "compatibility"

98 collect_all_And_tokens = True

100 _all_names = [__ for __ in locals() if not __.startswith("_")]

101 _fixed_names = """

102 collect_all_And_tokens

103 """.split()

104

105

106class __diag__(__config_flags):

107 _type_desc = "diagnostic"

108

109 warn_multiple_tokens_in_named_alternation = False

110 warn_ungrouped_named_tokens_in_collection = False

111 warn_name_set_on_empty_Forward = False

112 warn_on_parse_using_empty_Forward = False

113 warn_on_assignment_to_Forward = False

114 warn_on_multiple_string_args_to_oneof = False

115 warn_on_match_first_with_lshift_operator = False

116 enable_debug_on_named_expressions = False

117

118 _all_names = [__ for __ in locals() if not __.startswith("_")]

119 _warning_names = [name for name in _all_names if name.startswith("warn")]

120 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]

121

122 @classmethod

123 def enable_all_warnings(cls) -> None:

124 for name in cls._warning_names:

125 cls.enable(name)

126

127

128class Diagnostics(Enum):

129 """

130 Diagnostic configuration (all default to disabled)

131

132 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results

133 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions

134 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results

135 name is defined on a containing expression with ungrouped subexpressions that also

136 have results names

137 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined

138 with a results name, but has no contents defined

139 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is

140 defined in a grammar but has never had an expression attached to it

141 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined

142 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``

143 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is

144 incorrectly called with multiple str arguments

145 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent

146 calls to :class:`ParserElement.set_name`

147

148 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.

149 All warnings can be enabled by calling :class:`enable_all_warnings`.

150 """

151

152 warn_multiple_tokens_in_named_alternation = 0

153 warn_ungrouped_named_tokens_in_collection = 1

154 warn_name_set_on_empty_Forward = 2

155 warn_on_parse_using_empty_Forward = 3

156 warn_on_assignment_to_Forward = 4

157 warn_on_multiple_string_args_to_oneof = 5

158 warn_on_match_first_with_lshift_operator = 6

159 enable_debug_on_named_expressions = 7

160

161

162def enable_diag(diag_enum: Diagnostics) -> None:

163 """

164 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

165 """

166 __diag__.enable(diag_enum.name)

167

168

169def disable_diag(diag_enum: Diagnostics) -> None:

170 """

171 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

172 """

173 __diag__.disable(diag_enum.name)

174

175

176def enable_all_warnings() -> None:

177 """

178 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).

179 """

180 __diag__.enable_all_warnings()

181

182

183# hide abstract class

184del __config_flags

185

186

187def _should_enable_warnings(

188 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]

189) -> bool:

190 enable = bool(warn_env_var)

191 for warn_opt in cmd_line_warn_options:

192 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(

193 ":"

194 )[:5]

195 if not w_action.lower().startswith("i") and (

196 not (w_message or w_category or w_module) or w_module == "pyparsing"

197 ):

198 enable = True

199 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):

200 enable = False

201 return enable

202

203

204if _should_enable_warnings(

205 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")

206):

207 enable_all_warnings()

208

209

210# build list of single arg builtins, that can be used as parse actions

211# fmt: off

212_single_arg_builtins = {

213 sum, len, sorted, reversed, list, tuple, set, any, all, min, max

214}

215# fmt: on

216

217_generatorType = types.GeneratorType

218ParseImplReturnType = tuple[int, Any]

219PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]

220

221ParseCondition = Union[

222 Callable[[], bool],

223 Callable[[ParseResults], bool],

224 Callable[[int, ParseResults], bool],

225 Callable[[str, int, ParseResults], bool],

226]

227ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]

228DebugStartAction = Callable[[str, int, "ParserElement", bool], None]

229DebugSuccessAction = Callable[

230 [str, int, int, "ParserElement", ParseResults, bool], None

231]

232DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]

233

234

235alphas: str = string.ascii_uppercase + string.ascii_lowercase

236identchars: str = pyparsing_unicode.Latin1.identchars

237identbodychars: str = pyparsing_unicode.Latin1.identbodychars

238nums: str = "0123456789"

239hexnums: str = nums + "ABCDEFabcdef"

240alphanums: str = alphas + nums

241printables: str = "".join([c for c in string.printable if c not in string.whitespace])

242

243

244class _ParseActionIndexError(Exception):

245 """

246 Internal wrapper around IndexError so that IndexErrors raised inside

247 parse actions aren't misinterpreted as IndexErrors raised inside

248 ParserElement parseImpl methods.

249 """

250

251 def __init__(self, msg: str, exc: BaseException) -> None:

252 self.msg: str = msg

253 self.exc: BaseException = exc

254

255

256_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]

257pa_call_line_synth = ()

258

259

260def _trim_arity(func, max_limit=3):

261 """decorator to trim function calls to match the arity of the target"""

262 global _trim_arity_call_line, pa_call_line_synth

263

264 if func in _single_arg_builtins:

265 return lambda s, l, t: func(t)

266

267 limit = 0

268 found_arity = False

269

270 # synthesize what would be returned by traceback.extract_stack at the call to

271 # user's parse action 'func', so that we don't incur call penalty at parse time

272

273 # fmt: off

274 LINE_DIFF = 9

275 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND

276 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!

277 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]

278 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)

279

280 def wrapper(*args):

281 nonlocal found_arity, limit

282 if found_arity:

283 return func(*args[limit:])

284 while 1:

285 try:

286 ret = func(*args[limit:])

287 found_arity = True

288 return ret

289 except TypeError as te:

290 # re-raise TypeErrors if they did not come from our arity testing

291 if found_arity:

292 raise

293 else:

294 tb = te.__traceback__

295 frames = traceback.extract_tb(tb, limit=2)

296 frame_summary = frames[-1]

297 trim_arity_type_error = (

298 [frame_summary[:2]][-1][:2] == pa_call_line_synth

299 )

300 del tb

301

302 if trim_arity_type_error:

303 if limit < max_limit:

304 limit += 1

305 continue

306

307 raise

308 except IndexError as ie:

309 # wrap IndexErrors inside a _ParseActionIndexError

310 raise _ParseActionIndexError(

311 "IndexError raised in parse action", ie

312 ).with_traceback(None)

313 # fmt: on

314

315 # copy func name to wrapper for sensible debug output

316 # (can't use functools.wraps, since that messes with function signature)

317 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

318 wrapper.__name__ = func_name

319 wrapper.__doc__ = func.__doc__

320

321 return wrapper

322

323

324def condition_as_parse_action(

325 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False

326) -> ParseAction:

327 """

328 Function to convert a simple predicate function that returns ``True`` or ``False``

329 into a parse action. Can be used in places when a parse action is required

330 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition

331 to an operator level in :class:`infix_notation`).

332

333 Optional keyword arguments:

334

335 :param message: define a custom message to be used in the raised exception

336 :param fatal: if ``True``, will raise :class:`ParseFatalException`

337 to stop parsing immediately;

338 otherwise will raise :class:`ParseException`

339

340 """

341 msg = message if message is not None else "failed user-defined condition"

342 exc_type = ParseFatalException if fatal else ParseException

343 fn = _trim_arity(fn)

344

345 @wraps(fn)

346 def pa(s, l, t):

347 if not bool(fn(s, l, t)):

348 raise exc_type(s, l, msg)

349

350 return pa

351

352

353def _default_start_debug_action(

354 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False

355):

356 cache_hit_str = "*" if cache_hit else ""

357 print(

358 (

359 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"

360 f" {line(loc, instring)}\n"

361 f" {'^':>{col(loc, instring)}}"

362 )

363 )

364

365

366def _default_success_debug_action(

367 instring: str,

368 startloc: int,

369 endloc: int,

370 expr: ParserElement,

371 toks: ParseResults,

372 cache_hit: bool = False,

373):

374 cache_hit_str = "*" if cache_hit else ""

375 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")

376

377

378def _default_exception_debug_action(

379 instring: str,

380 loc: int,

381 expr: ParserElement,

382 exc: Exception,

383 cache_hit: bool = False,

384):

385 cache_hit_str = "*" if cache_hit else ""

386 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")

387

388

389def null_debug_action(*args):

390 """'Do-nothing' debug action, to suppress debugging output during parsing."""

391

392

393class ParserElement(ABC):

394 """Abstract base level parser element class."""

395

396 DEFAULT_WHITE_CHARS: str = " \n\t\r"

397 verbose_stacktrace: bool = False

398 _literalStringClass: type = None # type: ignore[assignment]

399

400 @staticmethod

401 def set_default_whitespace_chars(chars: str) -> None:

402 r"""

403 Overrides the default whitespace chars

404

405 Example:

406

407 .. doctest::

408

409 # default whitespace chars are space, <TAB> and newline

410 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")

411 ParseResults(['abc', 'def', 'ghi', 'jkl'], {})

412

413 # change to just treat newline as significant

414 >>> ParserElement.set_default_whitespace_chars(" \t")

415 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")

416 ParseResults(['abc', 'def'], {})

417

418 # Reset to default

419 >>> ParserElement.set_default_whitespace_chars(" \n\t\r")

420 """

421 ParserElement.DEFAULT_WHITE_CHARS = chars

422

423 # update whitespace all parse expressions defined in this module

424 for expr in _builtin_exprs:

425 if expr.copyDefaultWhiteChars:

426 expr.whiteChars = set(chars)

427

428 @staticmethod

429 def inline_literals_using(cls: type) -> None:

430 """

431 Set class to be used for inclusion of string literals into a parser.

432

433 Example:

434

435 .. doctest::

436 :options: +NORMALIZE_WHITESPACE

437

438 # default literal class used is Literal

439 >>> integer = Word(nums)

440 >>> date_str = (

441 ... integer("year") + '/'

442 ... + integer("month") + '/'

443 ... + integer("day")

444 ... )

445

446 >>> date_str.parse_string("1999/12/31")

447 ParseResults(['1999', '/', '12', '/', '31'],

448 {'year': '1999', 'month': '12', 'day': '31'})

449

450 # change to Suppress

451 >>> ParserElement.inline_literals_using(Suppress)

452 >>> date_str = (

453 ... integer("year") + '/'

454 ... + integer("month") + '/'

455 ... + integer("day")

456 ... )

457

458 >>> date_str.parse_string("1999/12/31")

459 ParseResults(['1999', '12', '31'],

460 {'year': '1999', 'month': '12', 'day': '31'})

461

462 # Reset

463 >>> ParserElement.inline_literals_using(Literal)

464 """

465 ParserElement._literalStringClass = cls

466

467 @classmethod

468 def using_each(cls, seq, **class_kwargs):

469 """

470 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.

471

472 Example:

473

474 .. testcode::

475

476 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")

477

478 .. versionadded:: 3.1.0

479 """

480 yield from (cls(obj, **class_kwargs) for obj in seq)

481

482 class DebugActions(NamedTuple):

483 debug_try: typing.Optional[DebugStartAction]

484 debug_match: typing.Optional[DebugSuccessAction]

485 debug_fail: typing.Optional[DebugExceptionAction]

486

487 def __init__(self, savelist: bool = False) -> None:

488 self.parseAction: list[ParseAction] = list()

489 self.failAction: typing.Optional[ParseFailAction] = None

490 self.customName: str = None # type: ignore[assignment]

491 self._defaultName: typing.Optional[str] = None

492 self.resultsName: str = None # type: ignore[assignment]

493 self.saveAsList: bool = savelist

494 self.skipWhitespace: bool = True

495 self.whiteChars: set[str] = set(ParserElement.DEFAULT_WHITE_CHARS)

496 self.copyDefaultWhiteChars: bool = True

497 # used when checking for left-recursion

498 self._may_return_empty: bool = False

499 self.keepTabs: bool = False

500 self.ignoreExprs: list[ParserElement] = list()

501 self.debug: bool = False

502 self.streamlined: bool = False

503 # optimize exception handling for subclasses that don't advance parse index

504 self.mayIndexError: bool = True

505 self.errmsg: Union[str, None] = ""

506 # mark results names as modal (report only last) or cumulative (list all)

507 self.modalResults: bool = True

508 # custom debug actions

509 self.debugActions = self.DebugActions(None, None, None)

510 # avoid redundant calls to preParse

511 self.callPreparse: bool = True

512 self.callDuringTry: bool = False

513 self.suppress_warnings_: list[Diagnostics] = []

514 self.show_in_diagram: bool = True

515

516 @property

517 def mayReturnEmpty(self) -> bool:

518 """

519 .. deprecated:: 3.3.0

520 use _may_return_empty instead.

521 """

522 return self._may_return_empty

523

524 @mayReturnEmpty.setter

525 def mayReturnEmpty(self, value) -> None:

526 """

527 .. deprecated:: 3.3.0

528 use _may_return_empty instead.

529 """

530 self._may_return_empty = value

531

532 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:

533 """

534 Suppress warnings emitted for a particular diagnostic on this expression.

535

536 Example:

537

538 .. doctest::

539

540 >>> label = pp.Word(pp.alphas)

541

542 # Normally using an empty Forward in a grammar

543 # would print a warning, but we can suppress that

544 >>> base = pp.Forward().suppress_warning(

545 ... pp.Diagnostics.warn_on_parse_using_empty_Forward)

546

547 >>> grammar = base | label

548 >>> print(grammar.parse_string("x"))

549 ['x']

550 """

551 self.suppress_warnings_.append(warning_type)

552 return self

553

554 def visit_all(self):

555 """General-purpose method to yield all expressions and sub-expressions

556 in a grammar. Typically just for internal use.

557 """

558 to_visit = deque([self])

559 seen = set()

560 while to_visit:

561 cur = to_visit.popleft()

562

563 # guard against looping forever through recursive grammars

564 if cur in seen:

565 continue

566 seen.add(cur)

567

568 to_visit.extend(cur.recurse())

569 yield cur

570

571 def copy(self) -> ParserElement:

572 """

573 Make a copy of this :class:`ParserElement`. Useful for defining

574 different parse actions for the same parsing pattern, using copies of

575 the original parse element.

576

577 Example:

578

579 .. testcode::

580

581 integer = Word(nums).set_parse_action(

582 lambda toks: int(toks[0]))

583 integerK = integer.copy().add_parse_action(

584 lambda toks: toks[0] * 1024) + Suppress("K")

585 integerM = integer.copy().add_parse_action(

586 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

587

588 print(

589 (integerK | integerM | integer)[1, ...].parse_string(

590 "5K 100 640K 256M")

591 )

592

593 prints:

594

595 .. testoutput::

596

597 [5120, 100, 655360, 268435456]

598

599 Equivalent form of ``expr.copy()`` is just ``expr()``:

600

601 .. testcode::

602

603 integerM = integer().add_parse_action(

604 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

605 """

606 cpy = copy.copy(self)

607 cpy.parseAction = self.parseAction[:]

608 cpy.ignoreExprs = self.ignoreExprs[:]

609 if self.copyDefaultWhiteChars:

610 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)

611 return cpy

612

613 def set_results_name(

614 self, name: str, list_all_matches: bool = False, **kwargs

615 ) -> ParserElement:

616 """

617 Define name for referencing matching tokens as a nested attribute

618 of the returned parse results.

619

620 Normally, results names are assigned as you would assign keys in a dict:

621 any existing value is overwritten by later values. If it is necessary to

622 keep all values captured for a particular results name, call ``set_results_name``

623 with ``list_all_matches`` = True.

624

625 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;

626 this is so that the client can define a basic element, such as an

627 integer, and reference it in multiple places with different names.

628

629 You can also set results names using the abbreviated syntax,

630 ``expr("name")`` in place of ``expr.set_results_name("name")``

631 - see :meth:`__call__`. If ``list_all_matches`` is required, use

632 ``expr("name*")``.

633

634 Example:

635

636 .. testcode::

637

638 integer = Word(nums)

639 date_str = (integer.set_results_name("year") + '/'

640 + integer.set_results_name("month") + '/'

641 + integer.set_results_name("day"))

642

643 # equivalent form:

644 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

645 """

646 listAllMatches: bool = deprecate_argument(kwargs, "listAllMatches", False)

647

648 list_all_matches = listAllMatches or list_all_matches

649 return self._setResultsName(name, list_all_matches)

650

651 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

652 if name is None:

653 return self

654 newself = self.copy()

655 if name.endswith("*"):

656 name = name[:-1]

657 list_all_matches = True

658 newself.resultsName = name

659 newself.modalResults = not list_all_matches

660 return newself

661

662 def set_break(self, break_flag: bool = True) -> ParserElement:

663 """

664 Method to invoke the Python pdb debugger when this element is

665 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to

666 disable.

667 """

668 if break_flag:

669 _parseMethod = self._parse

670

671 def breaker(instring, loc, do_actions=True, callPreParse=True):

672 # this call to breakpoint() is intentional, not a checkin error

673 breakpoint()

674 return _parseMethod(instring, loc, do_actions, callPreParse)

675

676 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]

677 self._parse = breaker # type: ignore [method-assign]

678 elif hasattr(self._parse, "_originalParseMethod"):

679 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]

680 return self

681

682 def set_parse_action(

683 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any

684 ) -> ParserElement:

685 """

686 Define one or more actions to perform when successfully matching parse element definition.

687

688 Parse actions can be called to perform data conversions, do extra validation,

689 update external data structures, or enhance or replace the parsed tokens.

690 Each parse action ``fn`` is a callable method with 0-3 arguments, called as

691 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:

692

693 - ``s`` = the original string being parsed (see note below)

694 - ``loc`` = the location of the matching substring

695 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object

696

697 The parsed tokens are passed to the parse action as ParseResults. They can be

698 modified in place using list-style append, extend, and pop operations to update

699 the parsed list elements; and with dictionary-style item set and del operations

700 to add, update, or remove any named results. If the tokens are modified in place,

701 it is not necessary to return them with a return statement.

702

703 Parse actions can also completely replace the given tokens, with another ``ParseResults``

704 object, or with some entirely different object (common for parse actions that perform data

705 conversions). A convenient way to build a new parse result is to define the values

706 using a dict, and then create the return value using :class:`ParseResults.from_dict`.

707

708 If None is passed as the ``fn`` parse action, all previously added parse actions for this

709 expression are cleared.

710

711 Optional keyword arguments:

712

713 :param call_during_try: (default= ``False``) indicate if parse action

714 should be run during lookaheads and alternate

715 testing. For parse actions that have side

716 effects, it is important to only call the parse

717 action once it is determined that it is being

718 called as part of a successful parse.

719 For parse actions that perform additional

720 validation, then ``call_during_try`` should

721 be passed as True, so that the validation code

722 is included in the preliminary "try" parses.

723

724 .. Note::

725 The default parsing behavior is to expand tabs in the input string

726 before starting the parsing process.

727 See :meth:`parse_string` for more information on parsing strings

728 containing ``<TAB>`` s, and suggested methods to maintain a

729 consistent view of the parsed string, the parse location, and

730 line and column positions within the parsed string.

731

732 Example: Parse dates in the form ``YYYY/MM/DD``

733 -----------------------------------------------

734

735 Setup code:

736

737 .. testcode::

738

739 def convert_to_int(toks):

740 '''a parse action to convert toks from str to int

741 at parse time'''

742 return int(toks[0])

743

744 def is_valid_date(instring, loc, toks):

745 '''a parse action to verify that the date is a valid date'''

746 from datetime import date

747 year, month, day = toks[::2]

748 try:

749 date(year, month, day)

750 except ValueError:

751 raise ParseException(instring, loc, "invalid date given")

752

753 integer = Word(nums)

754 date_str = integer + '/' + integer + '/' + integer

755

756 # add parse actions

757 integer.set_parse_action(convert_to_int)

758 date_str.set_parse_action(is_valid_date)

759

760 Successful parse - note that integer fields are converted to ints:

761

762 .. testcode::

763

764 print(date_str.parse_string("1999/12/31"))

765

766 prints:

767

768 .. testoutput::

769

770 [1999, '/', 12, '/', 31]

771

772 Failure - invalid date:

773

774 .. testcode::

775

776 date_str.parse_string("1999/13/31")

777

778 prints:

779

780 .. testoutput::

781

782 Traceback (most recent call last):

783 ParseException: invalid date given, found '1999' ...

784 """

785 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)

786

787 if list(fns) == [None]:

788 self.parseAction.clear()

789 return self

790

791 if not all(callable(fn) for fn in fns):

792 raise TypeError("parse actions must be callable")

793 self.parseAction[:] = [_trim_arity(fn) for fn in fns]

794 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry

795

796 return self

797

798 def add_parse_action(

799 self, *fns: ParseAction, call_during_try: bool = False, **kwargs: Any

800 ) -> ParserElement:

801 """

802 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.

803

804 See examples in :class:`copy`.

805 """

806 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)

807

808 self.parseAction += [_trim_arity(fn) for fn in fns]

809 self.callDuringTry = self.callDuringTry or callDuringTry or call_during_try

810 return self

811

812 def add_condition(

813 self, *fns: ParseCondition, call_during_try: bool = False, **kwargs: Any

814 ) -> ParserElement:

815 """Add a boolean predicate function to expression's list of parse actions. See

816 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,

817 functions passed to ``add_condition`` need to return boolean success/fail of the condition.

818

819 Optional keyword arguments:

820

821 - ``message`` = define a custom message to be used in the raised exception

822 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise

823 ParseException

824 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,

825 default=False

826

827 Example:

828

829 .. doctest::

830 :options: +NORMALIZE_WHITESPACE

831

832 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))

833 >>> year_int = integer.copy().add_condition(

834 ... lambda toks: toks[0] >= 2000,

835 ... message="Only support years 2000 and later")

836 >>> date_str = year_int + '/' + integer + '/' + integer

837

838 >>> result = date_str.parse_string("1999/12/31")

839 Traceback (most recent call last):

840 ParseException: Only support years 2000 and later...

841 """

842 callDuringTry: bool = deprecate_argument(kwargs, "callDuringTry", False)

843

844 for fn in fns:

845 self.parseAction.append(

846 condition_as_parse_action(

847 fn,

848 message=str(kwargs.get("message")),

849 fatal=bool(kwargs.get("fatal", False)),

850 )

851 )

852

853 self.callDuringTry = self.callDuringTry or call_during_try or callDuringTry

854 return self

855

856 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:

857 """

858 Define action to perform if parsing fails at this expression.

859 Fail acton fn is a callable function that takes the arguments

860 ``fn(s, loc, expr, err)`` where:

861

862 - ``s`` = string being parsed

863 - ``loc`` = location where expression match was attempted and failed

864 - ``expr`` = the parse expression that failed

865 - ``err`` = the exception thrown

866

867 The function returns no value. It may throw :class:`ParseFatalException`

868 if it is desired to stop parsing immediately."""

869 self.failAction = fn

870 return self

871

872 def _skipIgnorables(self, instring: str, loc: int) -> int:

873 if not self.ignoreExprs:

874 return loc

875 exprsFound = True

876 ignore_expr_fns = [e._parse for e in self.ignoreExprs]

877 last_loc = loc

878 while exprsFound:

879 exprsFound = False

880 for ignore_fn in ignore_expr_fns:

881 try:

882 while 1:

883 loc, dummy = ignore_fn(instring, loc)

884 exprsFound = True

885 except ParseException:

886 pass

887 # check if all ignore exprs matched but didn't actually advance the parse location

888 if loc == last_loc:

889 break

890 last_loc = loc

891 return loc

892

893 def preParse(self, instring: str, loc: int) -> int:

894 if self.ignoreExprs:

895 loc = self._skipIgnorables(instring, loc)

896

897 if self.skipWhitespace:

898 instrlen = len(instring)

899 white_chars = self.whiteChars

900 while loc < instrlen and instring[loc] in white_chars:

901 loc += 1

902

903 return loc

904

905 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

906 return loc, []

907

908 def postParse(self, instring, loc, tokenlist):

909 return tokenlist

910

911 # @profile

912 def _parseNoCache(

913 self, instring, loc, do_actions=True, callPreParse=True

914 ) -> tuple[int, ParseResults]:

915 debugging = self.debug # and do_actions)

916 len_instring = len(instring)

917

918 if debugging or self.failAction:

919 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))

920 try:

921 if callPreParse and self.callPreparse:

922 pre_loc = self.preParse(instring, loc)

923 else:

924 pre_loc = loc

925 tokens_start = pre_loc

926 if self.debugActions.debug_try:

927 self.debugActions.debug_try(instring, tokens_start, self, False)

928 if self.mayIndexError or pre_loc >= len_instring:

929 try:

930 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

931 except IndexError:

932 raise ParseException(instring, len_instring, self.errmsg, self)

933 else:

934 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

935 except Exception as err:

936 # print("Exception raised:", err)

937 if self.debugActions.debug_fail:

938 self.debugActions.debug_fail(

939 instring, tokens_start, self, err, False

940 )

941 if self.failAction:

942 self.failAction(instring, tokens_start, self, err)

943 raise

944 else:

945 if callPreParse and self.callPreparse:

946 pre_loc = self.preParse(instring, loc)

947 else:

948 pre_loc = loc

949 tokens_start = pre_loc

950 if self.mayIndexError or pre_loc >= len_instring:

951 try:

952 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

953 except IndexError:

954 raise ParseException(instring, len_instring, self.errmsg, self)

955 else:

956 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

957

958 tokens = self.postParse(instring, loc, tokens)

959

960 ret_tokens = ParseResults(

961 tokens, self.resultsName, aslist=self.saveAsList, modal=self.modalResults

962 )

963 if self.parseAction and (do_actions or self.callDuringTry):

964 if debugging:

965 try:

966 for fn in self.parseAction:

967 try:

968 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]

969 except IndexError as parse_action_exc:

970 exc = ParseException("exception raised in parse action")

971 raise exc from parse_action_exc

972

973 if tokens is not None and tokens is not ret_tokens:

974 ret_tokens = ParseResults(

975 tokens,

976 self.resultsName,

977 aslist=self.saveAsList

978 and isinstance(tokens, (ParseResults, list)),

979 modal=self.modalResults,

980 )

981 except Exception as err:

982 # print "Exception raised in user parse action:", err

983 if self.debugActions.debug_fail:

984 self.debugActions.debug_fail(

985 instring, tokens_start, self, err, False

986 )

987 raise

988 else:

989 for fn in self.parseAction:

990 try:

991 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]

992 except IndexError as parse_action_exc:

993 exc = ParseException("exception raised in parse action")

994 raise exc from parse_action_exc

995

996 if tokens is not None and tokens is not ret_tokens:

997 ret_tokens = ParseResults(

998 tokens,

999 self.resultsName,

1000 aslist=self.saveAsList

1001 and isinstance(tokens, (ParseResults, list)),

1002 modal=self.modalResults,

1003 )

1004 if debugging:

1005 # print("Matched", self, "->", ret_tokens.as_list())

1006 if self.debugActions.debug_match:

1007 self.debugActions.debug_match(

1008 instring, tokens_start, loc, self, ret_tokens, False

1009 )

1010

1011 return loc, ret_tokens

1012

1013 def try_parse(

1014 self,

1015 instring: str,

1016 loc: int,

1017 *,

1018 raise_fatal: bool = False,

1019 do_actions: bool = False,

1020 ) -> int:

1021 try:

1022 return self._parse(instring, loc, do_actions=do_actions)[0]

1023 except ParseFatalException:

1024 if raise_fatal:

1025 raise

1026 raise ParseException(instring, loc, self.errmsg, self)

1027

1028 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:

1029 try:

1030 self.try_parse(instring, loc, do_actions=do_actions)

1031 except (ParseException, IndexError):

1032 return False

1033 else:

1034 return True

1035

1036 # cache for left-recursion in Forward references

1037 recursion_lock = RLock()

1038 recursion_memos: collections.abc.MutableMapping[

1039 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]

1040 ] = {}

1041

1042 class _CacheType(typing.Protocol):

1043 """

1044 Class to be used for packrat and left-recursion cacheing of results

1045 and exceptions.

1046 """

1047

1048 not_in_cache: bool

1049

1050 def get(self, *args) -> typing.Any: ...

1051

1052 def set(self, *args) -> None: ...

1053

1054 def clear(self) -> None: ...

1055

1056 class NullCache(dict):

1057 """

1058 A null cache type for initialization of the packrat_cache class variable.

1059 If/when enable_packrat() is called, this null cache will be replaced by a

1060 proper _CacheType class instance.

1061 """

1062

1063 not_in_cache: bool = True

1064

1065 def get(self, *args) -> typing.Any: ...

1066

1067 def set(self, *args) -> None: ...

1068

1069 def clear(self) -> None: ...

1070

1071 # class-level argument cache for optimizing repeated calls when backtracking

1072 # through recursive expressions

1073 packrat_cache: _CacheType = NullCache()

1074 packrat_cache_lock = RLock()

1075 packrat_cache_stats = [0, 0]

1076

1077 # this method gets repeatedly called during backtracking with the same arguments -

1078 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

1079 def _parseCache(

1080 self, instring, loc, do_actions=True, callPreParse=True

1081 ) -> tuple[int, ParseResults]:

1082 HIT, MISS = 0, 1

1083 lookup = (self, instring, loc, callPreParse, do_actions)

1084 with ParserElement.packrat_cache_lock:

1085 cache = ParserElement.packrat_cache

1086 value = cache.get(lookup)

1087 if value is cache.not_in_cache:

1088 ParserElement.packrat_cache_stats[MISS] += 1

1089 try:

1090 value = self._parseNoCache(instring, loc, do_actions, callPreParse)

1091 except ParseBaseException as pe:

1092 # cache a copy of the exception, without the traceback

1093 cache.set(lookup, pe.__class__(*pe.args))

1094 raise

1095 else:

1096 cache.set(lookup, (value[0], value[1].copy(), loc))

1097 return value

1098 else:

1099 ParserElement.packrat_cache_stats[HIT] += 1

1100 if self.debug and self.debugActions.debug_try:

1101 try:

1102 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]

1103 except TypeError:

1104 pass

1105 if isinstance(value, Exception):

1106 if self.debug and self.debugActions.debug_fail:

1107 try:

1108 self.debugActions.debug_fail(

1109 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]

1110 )

1111 except TypeError:

1112 pass

1113 raise value

1114

1115 value = cast(tuple[int, ParseResults, int], value)

1116 loc_, result, endloc = value[0], value[1].copy(), value[2]

1117 if self.debug and self.debugActions.debug_match:

1118 try:

1119 self.debugActions.debug_match(

1120 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]

1121 )

1122 except TypeError:

1123 pass

1124

1125 return loc_, result

1126

1127 _parse = _parseNoCache

1128

1129 @staticmethod

1130 def reset_cache() -> None:

1131 """

1132 Clears caches used by packrat and left-recursion.

1133 """

1134 with ParserElement.packrat_cache_lock:

1135 ParserElement.packrat_cache.clear()

1136 ParserElement.packrat_cache_stats[:] = [0] * len(

1137 ParserElement.packrat_cache_stats

1138 )

1139 ParserElement.recursion_memos.clear()

1140

1141 # class attributes to keep caching status

1142 _packratEnabled = False

1143 _left_recursion_enabled = False

1144

1145 @staticmethod

1146 def disable_memoization() -> None:

1147 """

1148 Disables active Packrat or Left Recursion parsing and their memoization

1149

1150 This method also works if neither Packrat nor Left Recursion are enabled.

1151 This makes it safe to call before activating Packrat nor Left Recursion

1152 to clear any previous settings.

1153 """

1154 with ParserElement.packrat_cache_lock:

1155 ParserElement.reset_cache()

1156 ParserElement._left_recursion_enabled = False

1157 ParserElement._packratEnabled = False

1158 ParserElement._parse = ParserElement._parseNoCache

1159

1160 @staticmethod

1161 def enable_left_recursion(

1162 cache_size_limit: typing.Optional[int] = None, *, force=False

1163 ) -> None:

1164 """

1165 Enables "bounded recursion" parsing, which allows for both direct and indirect

1166 left-recursion. During parsing, left-recursive :class:`Forward` elements are

1167 repeatedly matched with a fixed recursion depth that is gradually increased

1168 until finding the longest match.

1169

1170 Example:

1171

1172 .. testcode::

1173

1174 import pyparsing as pp

1175 pp.ParserElement.enable_left_recursion()

1176

1177 E = pp.Forward("E")

1178 num = pp.Word(pp.nums)

1179

1180 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...

1181 E <<= E + '+' - num | num

1182

1183 print(E.parse_string("1+2+3+4"))

1184

1185 prints:

1186

1187 .. testoutput::

1188

1189 ['1', '+', '2', '+', '3', '+', '4']

1190

1191 Recursion search naturally memoizes matches of ``Forward`` elements and may

1192 thus skip reevaluation of parse actions during backtracking. This may break

1193 programs with parse actions which rely on strict ordering of side-effects.

1194

1195 Parameters:

1196

1197 - ``cache_size_limit`` - (default=``None``) - memoize at most this many

1198 ``Forward`` elements during matching; if ``None`` (the default),

1199 memoize all ``Forward`` elements.

1200

1201 Bounded Recursion parsing works similar but not identical to Packrat parsing,

1202 thus the two cannot be used together. Use ``force=True`` to disable any

1203 previous, conflicting settings.

1204 """

1205 with ParserElement.packrat_cache_lock:

1206 if force:

1207 ParserElement.disable_memoization()

1208 elif ParserElement._packratEnabled:

1209 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1210 if cache_size_limit is None:

1211 ParserElement.recursion_memos = _UnboundedMemo()

1212 elif cache_size_limit > 0:

1213 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]

1214 else:

1215 raise NotImplementedError(f"Memo size of {cache_size_limit}")

1216 ParserElement._left_recursion_enabled = True

1217

1218 @staticmethod

1219 def enable_packrat(

1220 cache_size_limit: Union[int, None] = 128, *, force: bool = False

1221 ) -> None:

1222 """

1223 Enables "packrat" parsing, which adds memoizing to the parsing logic.

1224 Repeated parse attempts at the same string location (which happens

1225 often in many complex grammars) can immediately return a cached value,

1226 instead of re-executing parsing/validating code. Memoizing is done of

1227 both valid results and parsing exceptions.

1228

1229 Parameters:

1230

1231 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided

1232 will limit the size of the packrat cache; if None is passed, then

1233 the cache size will be unbounded; if 0 is passed, the cache will

1234 be effectively disabled.

1235

1236 This speedup may break existing programs that use parse actions that

1237 have side-effects. For this reason, packrat parsing is disabled when

1238 you first import pyparsing. To activate the packrat feature, your

1239 program must call the class method :class:`ParserElement.enable_packrat`.

1240 For best results, call ``enable_packrat()`` immediately after

1241 importing pyparsing.

1242

1243 .. Can't really be doctested, alas

1244

1245 Example::

1246

1247 import pyparsing

1248 pyparsing.ParserElement.enable_packrat()

1249

1250 Packrat parsing works similar but not identical to Bounded Recursion parsing,

1251 thus the two cannot be used together. Use ``force=True`` to disable any

1252 previous, conflicting settings.

1253 """

1254 with ParserElement.packrat_cache_lock:

1255 if force:

1256 ParserElement.disable_memoization()

1257 elif ParserElement._left_recursion_enabled:

1258 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1259

1260 if ParserElement._packratEnabled:

1261 return

1262

1263 ParserElement._packratEnabled = True

1264 if cache_size_limit is None:

1265 ParserElement.packrat_cache = _UnboundedCache()

1266 else:

1267 ParserElement.packrat_cache = _FifoCache(cache_size_limit)

1268 ParserElement._parse = ParserElement._parseCache

1269

1270 def parse_string(

1271 self, instring: str, parse_all: bool = False, **kwargs

1272 ) -> ParseResults:

1273 """

1274 Parse a string with respect to the parser definition. This function is intended as the primary interface to the

1275 client code.

1276

1277 :param instring: The input string to be parsed.

1278 :param parse_all: If set, the entire input string must match the grammar.

1279 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.

1280 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.

1281 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or

1282 an object with attributes if the given parser includes results names.

1283

1284 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This

1285 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().

1286

1287 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are

1288 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string

1289 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string

1290 being parsed, one can ensure a consistent view of the input string by doing one of the following:

1291

1292 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),

1293 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the

1294 parse action's ``s`` argument, or

1295 - explicitly expand the tabs in your input string before calling ``parse_string``.

1296

1297 Examples:

1298

1299 By default, partial matches are OK.

1300

1301 .. doctest::

1302

1303 >>> res = Word('a').parse_string('aaaaabaaa')

1304 >>> print(res)

1305 ['aaaaa']

1306

1307 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children

1308 directly to see more examples.

1309

1310 It raises an exception if parse_all flag is set and instring does not match the whole grammar.

1311

1312 .. doctest::

1313

1314 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)

1315 Traceback (most recent call last):

1316 ParseException: Expected end of text, found 'b' ...

1317 """

1318 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)

1319

1320 parse_all = parse_all or parseAll

1321

1322 ParserElement.reset_cache()

1323 if not self.streamlined:

1324 self.streamline()

1325 for e in self.ignoreExprs:

1326 e.streamline()

1327 if not self.keepTabs:

1328 instring = instring.expandtabs()

1329 try:

1330 loc, tokens = self._parse(instring, 0)

1331 if parse_all:

1332 loc = self.preParse(instring, loc)

1333 se = Empty() + StringEnd().set_debug(False)

1334 se._parse(instring, loc)

1335 except _ParseActionIndexError as pa_exc:

1336 raise pa_exc.exc

1337 except ParseBaseException as exc:

1338 if ParserElement.verbose_stacktrace:

1339 raise

1340

1341 # catch and re-raise exception from here, clearing out pyparsing internal stack trace

1342 raise exc.with_traceback(None)

1343 else:

1344 return tokens

1345

1346 def scan_string(

1347 self,

1348 instring: str,

1349 max_matches: int = _MAX_INT,

1350 overlap: bool = False,

1351 always_skip_whitespace=True,

1352 *,

1353 debug: bool = False,

1354 **kwargs,

1355 ) -> Generator[tuple[ParseResults, int, int], None, None]:

1356 """

1357 Scan the input string for expression matches. Each match will return the

1358 matching tokens, start location, and end location. May be called with optional

1359 ``max_matches`` argument, to clip scanning after 'n' matches are found. If

1360 ``overlap`` is specified, then overlapping matches will be reported.

1361

1362 Note that the start and end locations are reported relative to the string

1363 being parsed. See :class:`parse_string` for more information on parsing

1364 strings with embedded tabs.

1365

1366 Example:

1367

1368 .. testcode::

1369

1370 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"

1371 print(source)

1372 for tokens, start, end in Word(alphas).scan_string(source):

1373 print(' '*start + '^'*(end-start))

1374 print(' '*start + tokens[0])

1375

1376 prints:

1377

1378 .. testoutput::

1379

1380 sldjf123lsdjjkf345sldkjf879lkjsfd987

1381 ^^^^^

1382 sldjf

1383 ^^^^^^^

1384 lsdjjkf

1385 ^^^^^^

1386 sldkjf

1387 ^^^^^^

1388 lkjsfd

1389 """

1390 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)

1391

1392 max_matches = min(maxMatches, max_matches)

1393 if not self.streamlined:

1394 self.streamline()

1395 for e in self.ignoreExprs:

1396 e.streamline()

1397

1398 if not self.keepTabs:

1399 instring = str(instring).expandtabs()

1400 instrlen = len(instring)

1401 loc = 0

1402 if always_skip_whitespace:

1403 preparser = Empty()

1404 preparser.ignoreExprs = self.ignoreExprs

1405 preparser.whiteChars = self.whiteChars

1406 preparseFn = preparser.preParse

1407 else:

1408 preparseFn = self.preParse

1409 parseFn = self._parse

1410 ParserElement.reset_cache()

1411 matches = 0

1412 try:

1413 while loc <= instrlen and matches < max_matches:

1414 try:

1415 preloc: int = preparseFn(instring, loc)

1416 nextLoc: int

1417 tokens: ParseResults

1418 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)

1419 except ParseException:

1420 loc = preloc + 1

1421 else:

1422 if nextLoc > loc:

1423 matches += 1

1424 if debug:

1425 print(

1426 {

1427 "tokens": tokens.as_list(),

1428 "start": preloc,

1429 "end": nextLoc,

1430 }

1431 )

1432 yield tokens, preloc, nextLoc

1433 if overlap:

1434 nextloc = preparseFn(instring, loc)

1435 if nextloc > loc:

1436 loc = nextLoc

1437 else:

1438 loc += 1

1439 else:

1440 loc = nextLoc

1441 else:

1442 loc = preloc + 1

1443 except ParseBaseException as exc:

1444 if ParserElement.verbose_stacktrace:

1445 raise

1446

1447 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1448 raise exc.with_traceback(None)

1449

1450 def transform_string(self, instring: str, *, debug: bool = False) -> str:

1451 """

1452 Extension to :class:`scan_string`, to modify matching text with modified tokens that may

1453 be returned from a parse action. To use ``transform_string``, define a grammar and

1454 attach a parse action to it that modifies the returned token list.

1455 Invoking ``transform_string()`` on a target string will then scan for matches,

1456 and replace the matched text patterns according to the logic in the parse

1457 action. ``transform_string()`` returns the resulting transformed string.

1458

1459 Example:

1460

1461 .. testcode::

1462

1463 quote = '''now is the winter of our discontent,

1464 made glorious summer by this sun of york.'''

1465

1466 wd = Word(alphas)

1467 wd.set_parse_action(lambda toks: toks[0].title())

1468

1469 print(wd.transform_string(quote))

1470

1471 prints:

1472

1473 .. testoutput::

1474

1475 Now Is The Winter Of Our Discontent,

1476 Made Glorious Summer By This Sun Of York.

1477 """

1478 out: list[str] = []

1479 lastE = 0

1480 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to

1481 # keep string locs straight between transform_string and scan_string

1482 self.keepTabs = True

1483 try:

1484 for t, s, e in self.scan_string(instring, debug=debug):

1485 if s > lastE:

1486 out.append(instring[lastE:s])

1487 lastE = e

1488

1489 if not t:

1490 continue

1491

1492 if isinstance(t, ParseResults):

1493 out += t.as_list()

1494 elif isinstance(t, Iterable) and not isinstance(t, str_type):

1495 out.extend(t)

1496 else:

1497 out.append(t)

1498

1499 out.append(instring[lastE:])

1500 out = [o for o in out if o]

1501 return "".join([str(s) for s in _flatten(out)])

1502 except ParseBaseException as exc:

1503 if ParserElement.verbose_stacktrace:

1504 raise

1505

1506 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1507 raise exc.with_traceback(None)

1508

1509 def search_string(

1510 self,

1511 instring: str,

1512 max_matches: int = _MAX_INT,

1513 *,

1514 debug: bool = False,

1515 **kwargs,

1516 ) -> ParseResults:

1517 """

1518 Another extension to :class:`scan_string`, simplifying the access to the tokens found

1519 to match the given parse expression. May be called with optional

1520 ``max_matches`` argument, to clip searching after 'n' matches are found.

1521

1522 Example:

1523

1524 .. testcode::

1525

1526 quote = '''More than Iron, more than Lead,

1527 more than Gold I need Electricity'''

1528

1529 # a capitalized word starts with an uppercase letter,

1530 # followed by zero or more lowercase letters

1531 cap_word = Word(alphas.upper(), alphas.lower())

1532

1533 print(cap_word.search_string(quote))

1534

1535 # the sum() builtin can be used to merge results

1536 # into a single ParseResults object

1537 print(sum(cap_word.search_string(quote)))

1538

1539 prints:

1540

1541 .. testoutput::

1542

1543 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]

1544 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']

1545 """

1546 maxMatches: int = deprecate_argument(kwargs, "maxMatches", _MAX_INT)

1547

1548 max_matches = min(maxMatches, max_matches)

1549 try:

1550 return ParseResults(

1551 [

1552 t

1553 for t, s, e in self.scan_string(

1554 instring,

1555 max_matches=max_matches,

1556 always_skip_whitespace=False,

1557 debug=debug,

1558 )

1559 ]

1560 )

1561 except ParseBaseException as exc:

1562 if ParserElement.verbose_stacktrace:

1563 raise

1564

1565 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1566 raise exc.with_traceback(None)

1567

1568 def split(

1569 self,

1570 instring: str,

1571 maxsplit: int = _MAX_INT,

1572 include_separators: bool = False,

1573 **kwargs,

1574 ) -> Generator[str, None, None]:

1575 """

1576 Generator method to split a string using the given expression as a separator.

1577 May be called with optional ``maxsplit`` argument, to limit the number of splits;

1578 and the optional ``include_separators`` argument (default= ``False``), if the separating

1579 matching text should be included in the split results.

1580

1581 Example:

1582

1583 .. testcode::

1584

1585 punc = one_of(list(".,;:/-!?"))

1586 print(list(punc.split(

1587 "This, this?, this sentence, is badly punctuated!")))

1588

1589 prints:

1590

1591 .. testoutput::

1592

1593 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']

1594 """

1595 includeSeparators: bool = deprecate_argument(kwargs, "includeSeparators", False)

1596

1597 include_separators = includeSeparators or include_separators

1598 last = 0

1599 for t, s, e in self.scan_string(instring, max_matches=maxsplit):

1600 yield instring[last:s]

1601 if include_separators:

1602 yield t[0]

1603 last = e

1604 yield instring[last:]

1605

1606 def __add__(self, other) -> ParserElement:

1607 """

1608 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`

1609 converts them to :class:`Literal`\\ s by default.

1610

1611 Example:

1612

1613 .. testcode::

1614

1615 greet = Word(alphas) + "," + Word(alphas) + "!"

1616 hello = "Hello, World!"

1617 print(hello, "->", greet.parse_string(hello))

1618

1619 prints:

1620

1621 .. testoutput::

1622

1623 Hello, World! -> ['Hello', ',', 'World', '!']

1624

1625 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:

1626

1627 .. testcode::

1628

1629 Literal('start') + ... + Literal('end')

1630

1631 is equivalent to:

1632

1633 .. testcode::

1634

1635 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')

1636

1637 Note that the skipped text is returned with '_skipped' as a results name,

1638 and to support having multiple skips in the same parser, the value returned is

1639 a list of all skipped text.

1640 """

1641 if other is Ellipsis:

1642 return _PendingSkip(self)

1643

1644 if isinstance(other, str_type):

1645 other = self._literalStringClass(other)

1646 if not isinstance(other, ParserElement):

1647 return NotImplemented

1648 return And([self, other])

1649

1650 def __radd__(self, other) -> ParserElement:

1651 """

1652 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`

1653 """

1654 if other is Ellipsis:

1655 return SkipTo(self)("_skipped*") + self

1656

1657 if isinstance(other, str_type):

1658 other = self._literalStringClass(other)

1659 if not isinstance(other, ParserElement):

1660 return NotImplemented

1661 return other + self

1662

1663 def __sub__(self, other) -> ParserElement:

1664 """

1665 Implementation of ``-`` operator, returns :class:`And` with error stop

1666 """

1667 if isinstance(other, str_type):

1668 other = self._literalStringClass(other)

1669 if not isinstance(other, ParserElement):

1670 return NotImplemented

1671 return self + And._ErrorStop() + other

1672

1673 def __rsub__(self, other) -> ParserElement:

1674 """

1675 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`

1676 """

1677 if isinstance(other, str_type):

1678 other = self._literalStringClass(other)

1679 if not isinstance(other, ParserElement):

1680 return NotImplemented

1681 return other - self

1682

1683 def __mul__(self, other) -> ParserElement:

1684 """

1685 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of

1686 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer

1687 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples

1688 may also include ``None`` as in:

1689

1690 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent

1691 to ``expr*n + ZeroOrMore(expr)``

1692 (read as "at least n instances of ``expr``")

1693 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``

1694 (read as "0 to n instances of ``expr``")

1695 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``

1696 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``

1697

1698 Note that ``expr*(None, n)`` does not raise an exception if

1699 more than n exprs exist in the input stream; that is,

1700 ``expr*(None, n)`` does not enforce a maximum number of expr

1701 occurrences. If this behavior is desired, then write

1702 ``expr*(None, n) + ~expr``

1703 """

1704 if other is Ellipsis:

1705 other = (0, None)

1706 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):

1707 other = ((0,) + other[1:] + (None,))[:2]

1708

1709 if not isinstance(other, (int, tuple)):

1710 return NotImplemented

1711

1712 if isinstance(other, int):

1713 minElements, optElements = other, 0

1714 else:

1715 other = tuple(o if o is not Ellipsis else None for o in other)

1716 other = (other + (None, None))[:2]

1717 if other[0] is None:

1718 other = (0, other[1])

1719 if isinstance(other[0], int) and other[1] is None:

1720 if other[0] == 0:

1721 return ZeroOrMore(self)

1722 if other[0] == 1:

1723 return OneOrMore(self)

1724 else:

1725 return self * other[0] + ZeroOrMore(self)

1726 elif isinstance(other[0], int) and isinstance(other[1], int):

1727 minElements, optElements = other

1728 optElements -= minElements

1729 else:

1730 return NotImplemented

1731

1732 if minElements < 0:

1733 raise ValueError("cannot multiply ParserElement by negative value")

1734 if optElements < 0:

1735 raise ValueError(

1736 "second tuple value must be greater or equal to first tuple value"

1737 )

1738 if minElements == optElements == 0:

1739 return And([])

1740

1741 if optElements:

1742

1743 def makeOptionalList(n):

1744 if n > 1:

1745 return Opt(self + makeOptionalList(n - 1))

1746 else:

1747 return Opt(self)

1748

1749 if minElements:

1750 if minElements == 1:

1751 ret = self + makeOptionalList(optElements)

1752 else:

1753 ret = And([self] * minElements) + makeOptionalList(optElements)

1754 else:

1755 ret = makeOptionalList(optElements)

1756 else:

1757 if minElements == 1:

1758 ret = self

1759 else:

1760 ret = And([self] * minElements)

1761 return ret

1762

1763 def __rmul__(self, other) -> ParserElement:

1764 return self.__mul__(other)

1765

1766 def __or__(self, other) -> ParserElement:

1767 """

1768 Implementation of ``|`` operator - returns :class:`MatchFirst`

1769

1770 .. versionchanged:: 3.1.0

1771 Support ``expr | ""`` as a synonym for ``Optional(expr)``.

1772 """

1773 if other is Ellipsis:

1774 return _PendingSkip(self, must_skip=True)

1775

1776 if isinstance(other, str_type):

1777 # `expr | ""` is equivalent to `Opt(expr)`

1778 if other == "":

1779 return Opt(self)

1780 other = self._literalStringClass(other)

1781 if not isinstance(other, ParserElement):

1782 return NotImplemented

1783 return MatchFirst([self, other])

1784

1785 def __ror__(self, other) -> ParserElement:

1786 """

1787 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`

1788 """

1789 if isinstance(other, str_type):

1790 other = self._literalStringClass(other)

1791 if not isinstance(other, ParserElement):

1792 return NotImplemented

1793 return other | self

1794

1795 def __xor__(self, other) -> ParserElement:

1796 """

1797 Implementation of ``^`` operator - returns :class:`Or`

1798 """

1799 if isinstance(other, str_type):

1800 other = self._literalStringClass(other)

1801 if not isinstance(other, ParserElement):

1802 return NotImplemented

1803 return Or([self, other])

1804

1805 def __rxor__(self, other) -> ParserElement:

1806 """

1807 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`

1808 """

1809 if isinstance(other, str_type):

1810 other = self._literalStringClass(other)

1811 if not isinstance(other, ParserElement):

1812 return NotImplemented

1813 return other ^ self

1814

1815 def __and__(self, other) -> ParserElement:

1816 """

1817 Implementation of ``&`` operator - returns :class:`Each`

1818 """

1819 if isinstance(other, str_type):

1820 other = self._literalStringClass(other)

1821 if not isinstance(other, ParserElement):

1822 return NotImplemented

1823 return Each([self, other])

1824

1825 def __rand__(self, other) -> ParserElement:

1826 """

1827 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`

1828 """

1829 if isinstance(other, str_type):

1830 other = self._literalStringClass(other)

1831 if not isinstance(other, ParserElement):

1832 return NotImplemented

1833 return other & self

1834

1835 def __invert__(self) -> ParserElement:

1836 """

1837 Implementation of ``~`` operator - returns :class:`NotAny`

1838 """

1839 return NotAny(self)

1840

1841 # disable __iter__ to override legacy use of sequential access to __getitem__ to

1842 # iterate over a sequence

1843 __iter__ = None

1844

1845 def __getitem__(self, key):

1846 """

1847 use ``[]`` indexing notation as a short form for expression repetition:

1848

1849 - ``expr[n]`` is equivalent to ``expr*n``

1850 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``

1851 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent

1852 to ``expr*n + ZeroOrMore(expr)``

1853 (read as "at least n instances of ``expr``")

1854 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``

1855 (read as "0 to n instances of ``expr``")

1856 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``

1857 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``

1858

1859 ``None`` may be used in place of ``...``.

1860

1861 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception

1862 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is

1863 desired, then write ``expr[..., n] + ~expr``.

1864

1865 For repetition with a stop_on expression, use slice notation:

1866

1867 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``

1868 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``

1869

1870 .. versionchanged:: 3.1.0

1871 Support for slice notation.

1872 """

1873

1874 stop_on_defined = False

1875 stop_on = NoMatch()

1876 if isinstance(key, slice):

1877 key, stop_on = key.start, key.stop

1878 if key is None:

1879 key = ...

1880 stop_on_defined = True

1881 elif isinstance(key, tuple) and isinstance(key[-1], slice):

1882 key, stop_on = (key[0], key[1].start), key[1].stop

1883 stop_on_defined = True

1884

1885 # convert single arg keys to tuples

1886 if isinstance(key, str_type):

1887 key = (key,)

1888 try:

1889 iter(key)

1890 except TypeError:

1891 key = (key, key)

1892

1893 if len(key) > 2:

1894 raise TypeError(

1895 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"

1896 )

1897

1898 # clip to 2 elements

1899 ret = self * tuple(key[:2])

1900 ret = typing.cast(_MultipleMatch, ret)

1901

1902 if stop_on_defined:

1903 ret.stopOn(stop_on)

1904

1905 return ret

1906

1907 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:

1908 """

1909 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.

1910

1911 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be

1912 passed as ``True``.

1913

1914 If ``name`` is omitted, same as calling :class:`copy`.

1915

1916 Example:

1917

1918 .. testcode::

1919

1920 # these are equivalent

1921 userdata = (

1922 Word(alphas).set_results_name("name")

1923 + Word(nums + "-").set_results_name("socsecno")

1924 )

1925

1926 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")

1927 """

1928 if name is not None:

1929 return self._setResultsName(name)

1930

1931 return self.copy()

1932

1933 def suppress(self) -> ParserElement:

1934 """

1935 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from

1936 cluttering up returned output.

1937 """

1938 return Suppress(self)

1939

1940 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

1941 """

1942 Enables the skipping of whitespace before matching the characters in the

1943 :class:`ParserElement`'s defined pattern.

1944

1945 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)

1946 """

1947 self.skipWhitespace = True

1948 return self

1949

1950 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

1951 """

1952 Disables the skipping of whitespace before matching the characters in the

1953 :class:`ParserElement`'s defined pattern. This is normally only used internally by

1954 the pyparsing module, but may be needed in some whitespace-sensitive grammars.

1955

1956 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)

1957 """

1958 self.skipWhitespace = False

1959 return self

1960

1961 def set_whitespace_chars(

1962 self, chars: Union[set[str], str], copy_defaults: bool = False

1963 ) -> ParserElement:

1964 """

1965 Overrides the default whitespace chars

1966 """

1967 self.skipWhitespace = True

1968 self.whiteChars = set(chars)

1969 self.copyDefaultWhiteChars = copy_defaults

1970 return self

1971

1972 def parse_with_tabs(self) -> ParserElement:

1973 """

1974 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.

1975 Must be called before ``parse_string`` when the input grammar contains elements that

1976 match ``<TAB>`` characters.

1977 """

1978 self.keepTabs = True

1979 return self

1980

1981 def ignore(self, other: ParserElement) -> ParserElement:

1982 """

1983 Define expression to be ignored (e.g., comments) while doing pattern

1984 matching; may be called repeatedly, to define multiple comment or other

1985 ignorable patterns.

1986

1987 Example:

1988

1989 .. doctest::

1990

1991 >>> patt = Word(alphas)[...]

1992 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))

1993 ['ablaj']

1994

1995 >>> patt = Word(alphas)[...].ignore(c_style_comment)

1996 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))

1997 ['ablaj', 'lskjd']

1998 """

1999 if isinstance(other, str_type):

2000 other = Suppress(other)

2001

2002 if isinstance(other, Suppress):

2003 if other not in self.ignoreExprs:

2004 self.ignoreExprs.append(other)

2005 else:

2006 self.ignoreExprs.append(Suppress(other.copy()))

2007 return self

2008

2009 def set_debug_actions(

2010 self,

2011 start_action: DebugStartAction,

2012 success_action: DebugSuccessAction,

2013 exception_action: DebugExceptionAction,

2014 ) -> ParserElement:

2015 """

2016 Customize display of debugging messages while doing pattern matching:

2017

2018 :param start_action: method to be called when an expression is about to be parsed;

2019 should have the signature::

2020

2021 fn(input_string: str,

2022 location: int,

2023 expression: ParserElement,

2024 cache_hit: bool)

2025

2026 :param success_action: method to be called when an expression has successfully parsed;

2027 should have the signature::

2028

2029 fn(input_string: str,

2030 start_location: int,

2031 end_location: int,

2032 expression: ParserELement,

2033 parsed_tokens: ParseResults,

2034 cache_hit: bool)

2035

2036 :param exception_action: method to be called when expression fails to parse;

2037 should have the signature::

2038

2039 fn(input_string: str,

2040 location: int,

2041 expression: ParserElement,

2042 exception: Exception,

2043 cache_hit: bool)

2044 """

2045 self.debugActions = self.DebugActions(

2046 start_action or _default_start_debug_action, # type: ignore[truthy-function]

2047 success_action or _default_success_debug_action, # type: ignore[truthy-function]

2048 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

2049 )

2050 self.debug = any(self.debugActions)

2051 return self

2052

2053 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:

2054 """

2055 Enable display of debugging messages while doing pattern matching.

2056 Set ``flag`` to ``True`` to enable, ``False`` to disable.

2057 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

2058

2059 Example:

2060

2061 .. testcode::

2062

2063 wd = Word(alphas).set_name("alphaword")

2064 integer = Word(nums).set_name("numword")

2065 term = wd | integer

2066

2067 # turn on debugging for wd

2068 wd.set_debug()

2069

2070 term[1, ...].parse_string("abc 123 xyz 890")

2071

2072 prints:

2073

2074 .. testoutput::

2075 :options: +NORMALIZE_WHITESPACE

2076

2077 Match alphaword at loc 0(1,1)

2078 abc 123 xyz 890

2079 ^

2080 Matched alphaword -> ['abc']

2081 Match alphaword at loc 4(1,5)

2082 abc 123 xyz 890

2083 ^

2084 Match alphaword failed, ParseException raised: Expected alphaword, ...

2085 Match alphaword at loc 8(1,9)

2086 abc 123 xyz 890

2087 ^

2088 Matched alphaword -> ['xyz']

2089 Match alphaword at loc 12(1,13)

2090 abc 123 xyz 890

2091 ^

2092 Match alphaword failed, ParseException raised: Expected alphaword, ...

2093 abc 123 xyz 890

2094 ^

2095 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ...

2096

2097 The output shown is that produced by the default debug actions - custom debug actions can be

2098 specified using :meth:`set_debug_actions`. Prior to attempting

2099 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

2100 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

2101 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression,

2102 which makes debugging and exception messages easier to understand - for instance, the default

2103 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``.

2104

2105 .. versionchanged:: 3.1.0

2106 ``recurse`` argument added.

2107 """

2108 if recurse:

2109 for expr in self.visit_all():

2110 expr.set_debug(flag, recurse=False)

2111 return self

2112

2113 if flag:

2114 self.set_debug_actions(

2115 _default_start_debug_action,

2116 _default_success_debug_action,

2117 _default_exception_debug_action,

2118 )

2119 else:

2120 self.debug = False

2121 return self

2122

2123 @property

2124 def default_name(self) -> str:

2125 if self._defaultName is None:

2126 self._defaultName = self._generateDefaultName()

2127 return self._defaultName

2128

2129 @abstractmethod

2130 def _generateDefaultName(self) -> str:

2131 """

2132 Child classes must define this method, which defines how the ``default_name`` is set.

2133 """

2134

2135 def set_name(self, name: typing.Optional[str]) -> ParserElement:

2136 """

2137 Define name for this expression, makes debugging and exception messages clearer. If

2138 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also

2139 enable debug for this expression.

2140

2141 If `name` is None, clears any custom name for this expression, and clears the

2142 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.

2143

2144 Example:

2145

2146 .. doctest::

2147

2148 >>> integer = Word(nums)

2149 >>> integer.parse_string("ABC")

2150 Traceback (most recent call last):

2151 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1)

2152

2153 >>> integer.set_name("integer")

2154 integer

2155 >>> integer.parse_string("ABC")

2156 Traceback (most recent call last):

2157 ParseException: Expected integer (at char 0), (line:1, col:1)

2158

2159 .. versionchanged:: 3.1.0

2160 Accept ``None`` as the ``name`` argument.

2161 """

2162 self.customName = name # type: ignore[assignment]

2163 self.errmsg = f"Expected {str(self)}"

2164

2165 if __diag__.enable_debug_on_named_expressions:

2166 self.set_debug(name is not None)

2167

2168 return self

2169

2170 @property

2171 def name(self) -> str:

2172 """

2173 Returns a user-defined name if available, but otherwise defaults back to the auto-generated name

2174 """

2175 return self.customName if self.customName is not None else self.default_name

2176

2177 @name.setter

2178 def name(self, new_name) -> None:

2179 self.set_name(new_name)

2180

2181 def __str__(self) -> str:

2182 return self.name

2183

2184 def __repr__(self) -> str:

2185 return str(self)

2186

2187 def streamline(self) -> ParserElement:

2188 self.streamlined = True

2189 self._defaultName = None

2190 return self

2191

2192 def recurse(self) -> list[ParserElement]:

2193 return []

2194

2195 def _checkRecursion(self, parseElementList):

2196 subRecCheckList = parseElementList[:] + [self]

2197 for e in self.recurse():

2198 e._checkRecursion(subRecCheckList)

2199

2200 def validate(self, validateTrace=None) -> None:

2201 """

2202 .. deprecated:: 3.0.0

2203 Do not use to check for left recursion.

2204

2205 Check defined expressions for valid structure, check for infinite recursive definitions.

2206

2207 """

2208 warnings.warn(

2209 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

2210 PyparsingDeprecationWarning,

2211 stacklevel=2,

2212 )

2213 self._checkRecursion([])

2214

2215 def parse_file(

2216 self,

2217 file_or_filename: Union[str, Path, TextIO],

2218 encoding: str = "utf-8",

2219 parse_all: bool = False,

2220 **kwargs,

2221 ) -> ParseResults:

2222 """

2223 Execute the parse expression on the given file or filename.

2224 If a filename is specified (instead of a file object),

2225 the entire file is opened, read, and closed before parsing.

2226 """

2227 parseAll: bool = deprecate_argument(kwargs, "parseAll", False)

2228

2229 parse_all = parse_all or parseAll

2230 try:

2231 file_or_filename = typing.cast(TextIO, file_or_filename)

2232 file_contents = file_or_filename.read()

2233 except AttributeError:

2234 file_or_filename = typing.cast(str, file_or_filename)

2235 with open(file_or_filename, "r", encoding=encoding) as f:

2236 file_contents = f.read()

2237 try:

2238 return self.parse_string(file_contents, parse_all)

2239 except ParseBaseException as exc:

2240 if ParserElement.verbose_stacktrace:

2241 raise

2242

2243 # catch and re-raise exception from here, clears out pyparsing internal stack trace

2244 raise exc.with_traceback(None)

2245

2246 def __eq__(self, other):

2247 if self is other:

2248 return True

2249 elif isinstance(other, str_type):

2250 return self.matches(other, parse_all=True)

2251 elif isinstance(other, ParserElement):

2252 return vars(self) == vars(other)

2253 return False

2254

2255 def __hash__(self):

2256 return id(self)

2257

2258 def matches(self, test_string: str, parse_all: bool = True, **kwargs) -> bool:

2259 """

2260 Method for quick testing of a parser against a test string. Good for simple

2261 inline microtests of sub expressions while building up larger parser.

2262

2263 :param test_string: to test against this expression for a match

2264 :param parse_all: flag to pass to :meth:`parse_string` when running tests

2265

2266 Example:

2267

2268 .. doctest::

2269

2270 >>> expr = Word(nums)

2271 >>> expr.matches("100")

2272 True

2273 """

2274 parseAll: bool = deprecate_argument(kwargs, "parseAll", True)

2275

2276 parse_all = parse_all and parseAll

2277 try:

2278 self.parse_string(str(test_string), parse_all=parse_all)

2279 return True

2280 except ParseBaseException:

2281 return False

2282

2283 def run_tests(

2284 self,

2285 tests: Union[str, list[str]],

2286 parse_all: bool = True,

2287 comment: typing.Optional[Union[ParserElement, str]] = "#",

2288 full_dump: bool = True,

2289 print_results: bool = True,

2290 failure_tests: bool = False,

2291 post_parse: typing.Optional[

2292 Callable[[str, ParseResults], typing.Optional[str]]

2293 ] = None,

2294 file: typing.Optional[TextIO] = None,

2295 with_line_numbers: bool = False,

2296 *,

2297 parseAll: bool = True,

2298 fullDump: bool = True,

2299 printResults: bool = True,

2300 failureTests: bool = False,

2301 postParse: typing.Optional[

2302 Callable[[str, ParseResults], typing.Optional[str]]

2303 ] = None,

2304 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:

2305 """

2306 Execute the parse expression on a series of test strings, showing each

2307 test, the parsed results or where the parse failed. Quick and easy way to

2308 run a parse expression against a list of sample strings.

2309

2310 Parameters:

2311

2312 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2313 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2314 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2315 string; pass None to disable comment filtering

2316 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2317 if False, only dump nested list

2318 - ``print_results`` - (default= ``True``) prints test output to stdout

2319 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2320 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2321 `fn(test_string, parse_results)` and returns a string to be added to the test output

2322 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2323 if None, will default to ``sys.stdout``

2324 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2325

2326 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2327 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2328 test's output

2329

2330 Passing example:

2331

2332 .. testcode::

2333

2334 number_expr = pyparsing_common.number.copy()

2335

2336 result = number_expr.run_tests('''

2337 # unsigned integer

2338 100

2339 # negative integer

2340 -100

2341 # float with scientific notation

2342 6.02e23

2343 # integer with scientific notation

2344 1e-12

2345 # negative decimal number without leading digit

2346 -.100

2347 ''')

2348 print("Success" if result[0] else "Failed!")

2349

2350 prints:

2351

2352 .. testoutput::

2353 :options: +NORMALIZE_WHITESPACE

2354

2355

2356 # unsigned integer

2357 100

2358 [100]

2359

2360 # negative integer

2361 -100

2362 [-100]

2363

2364 # float with scientific notation

2365 6.02e23

2366 [6.02e+23]

2367

2368 # integer with scientific notation

2369 1e-12

2370 [1e-12]

2371

2372 # negative decimal number without leading digit

2373 -.100

2374 [-0.1]

2375 Success

2376

2377 Failure-test example:

2378

2379 .. testcode::

2380

2381 result = number_expr.run_tests('''

2382 # stray character

2383 100Z

2384 # too many '.'

2385 3.14.159

2386 ''', failure_tests=True)

2387 print("Success" if result[0] else "Failed!")

2388

2389 prints:

2390

2391 .. testoutput::

2392 :options: +NORMALIZE_WHITESPACE

2393

2394

2395 # stray character

2396 100Z

2397 100Z

2398 ^

2399 ParseException: Expected end of text, found 'Z' ...

2400

2401 # too many '.'

2402 3.14.159

2403 3.14.159

2404 ^

2405 ParseException: Expected end of text, found '.' ...

2406 FAIL: Expected end of text, found '.' ...

2407 Success

2408

2409 Each test string must be on a single line. If you want to test a string that spans multiple

2410 lines, create a test like this:

2411

2412 .. testcode::

2413

2414 expr = Word(alphanums)[1,...]

2415 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2416

2417 .. testoutput::

2418 :options: +NORMALIZE_WHITESPACE

2419 :hide:

2420

2421

2422 this is a test\\n of strings that spans \\n 3 lines

2423 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines']

2424

2425 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2426 """

2427 from .testing import pyparsing_test

2428

2429 parseAll = parseAll and parse_all

2430 fullDump = fullDump and full_dump

2431 printResults = printResults and print_results

2432 failureTests = failureTests or failure_tests

2433 postParse = postParse or post_parse

2434 if isinstance(tests, str_type):

2435 tests = typing.cast(str, tests)

2436 line_strip = type(tests).strip

2437 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2438 comment_specified = comment is not None

2439 if comment_specified:

2440 if isinstance(comment, str_type):

2441 comment = typing.cast(str, comment)

2442 comment = Literal(comment)

2443 comment = typing.cast(ParserElement, comment)

2444 if file is None:

2445 file = sys.stdout

2446 print_ = file.write

2447

2448 result: Union[ParseResults, Exception]

2449 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []

2450 comments: list[str] = []

2451 success = True

2452 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2453 BOM = "\ufeff"

2454 nlstr = "\n"

2455 for t in tests:

2456 if comment_specified and comment.matches(t, False) or comments and not t:

2457 comments.append(

2458 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2459 )

2460 continue

2461 if not t:

2462 continue

2463 out = [

2464 f"{nlstr}{nlstr.join(comments) if comments else ''}",

2465 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2466 ]

2467 comments.clear()

2468 try:

2469 # convert newline marks to actual newlines, and strip leading BOM if present

2470 t = NL.transform_string(t.lstrip(BOM))

2471 result = self.parse_string(t, parse_all=parse_all)

2472 except ParseBaseException as pe:

2473 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""

2474 out.append(pe.explain())

2475 out.append(f"FAIL: {fatal}{pe}")

2476 if ParserElement.verbose_stacktrace:

2477 out.extend(traceback.format_tb(pe.__traceback__))

2478 success = success and failureTests

2479 result = pe

2480 except Exception as exc:

2481 tag = "FAIL-EXCEPTION"

2482

2483 # see if this exception was raised in a parse action

2484 tb = exc.__traceback__

2485 it = iter(traceback.walk_tb(tb))

2486 for f, line in it:

2487 if (f.f_code.co_filename, line) == pa_call_line_synth:

2488 next_f = next(it)[0]

2489 tag += f" (raised in parse action {next_f.f_code.co_name!r})"

2490 break

2491

2492 out.append(f"{tag}: {type(exc).__name__}: {exc}")

2493 if ParserElement.verbose_stacktrace:

2494 out.extend(traceback.format_tb(exc.__traceback__))

2495 success = success and failureTests

2496 result = exc

2497 else:

2498 success = success and not failureTests

2499 if postParse is not None:

2500 try:

2501 pp_value = postParse(t, result)

2502 if pp_value is not None:

2503 if isinstance(pp_value, ParseResults):

2504 out.append(pp_value.dump())

2505 else:

2506 out.append(str(pp_value))

2507 else:

2508 out.append(result.dump())

2509 except Exception as e:

2510 out.append(result.dump(full=fullDump))

2511 out.append(

2512 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2513 )

2514 else:

2515 out.append(result.dump(full=fullDump))

2516 out.append("")

2517

2518 if printResults:

2519 print_("\n".join(out))

2520

2521 allResults.append((t, result))

2522

2523 return success, allResults

2524

2525 def create_diagram(

2526 self,

2527 output_html: Union[TextIO, Path, str],

2528 vertical: int = 3,

2529 show_results_names: bool = False,

2530 show_groups: bool = False,

2531 embed: bool = False,

2532 show_hidden: bool = False,

2533 **kwargs,

2534 ) -> None:

2535 """

2536 Create a railroad diagram for the parser.

2537

2538 Parameters:

2539

2540 - ``output_html`` (str or file-like object) - output target for generated

2541 diagram HTML

2542 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2543 instead of horizontally (default=3)

2544 - ``show_results_names`` - bool flag whether diagram should show annotations for

2545 defined results names

2546 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2547 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden

2548 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2549 the resulting HTML in an enclosing HTML source

2550 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2551 can be used to insert custom CSS styling

2552 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2553 generated code

2554

2555 Additional diagram-formatting keyword arguments can also be included;

2556 see railroad.Diagram class.

2557

2558 .. versionchanged:: 3.1.0

2559 ``embed`` argument added.

2560 """

2561

2562 try:

2563 from .diagram import to_railroad, railroad_to_html

2564 except ImportError as ie:

2565 raise Exception(

2566 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2567 ) from ie

2568

2569 self.streamline()

2570

2571 railroad = to_railroad(

2572 self,

2573 vertical=vertical,

2574 show_results_names=show_results_names,

2575 show_groups=show_groups,

2576 show_hidden=show_hidden,

2577 diagram_kwargs=kwargs,

2578 )

2579 if not isinstance(output_html, (str, Path)):

2580 # we were passed a file-like object, just write to it

2581 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2582 return

2583

2584 with open(output_html, "w", encoding="utf-8") as diag_file:

2585 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2586

2587 # Compatibility synonyms

2588 # fmt: off

2589 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))

2590 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(

2591 "setDefaultWhitespaceChars", set_default_whitespace_chars

2592 ))

2593 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))

2594 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))

2595 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))

2596 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))

2597

2598 setResultsName = replaced_by_pep8("setResultsName", set_results_name)

2599 setBreak = replaced_by_pep8("setBreak", set_break)

2600 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)

2601 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)

2602 addCondition = replaced_by_pep8("addCondition", add_condition)

2603 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)

2604 tryParse = replaced_by_pep8("tryParse", try_parse)

2605 parseString = replaced_by_pep8("parseString", parse_string)

2606 scanString = replaced_by_pep8("scanString", scan_string)

2607 transformString = replaced_by_pep8("transformString", transform_string)

2608 searchString = replaced_by_pep8("searchString", search_string)

2609 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

2610 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

2611 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)

2612 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)

2613 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)

2614 setDebug = replaced_by_pep8("setDebug", set_debug)

2615 setName = replaced_by_pep8("setName", set_name)

2616 parseFile = replaced_by_pep8("parseFile", parse_file)

2617 runTests = replaced_by_pep8("runTests", run_tests)

2618 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)

2619 defaultName = default_name

2620 # fmt: on

2621

2622

2623class _PendingSkip(ParserElement):

2624 # internal placeholder class to hold a place were '...' is added to a parser element,

2625 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2626 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:

2627 super().__init__()

2628 self.anchor = expr

2629 self.must_skip = must_skip

2630

2631 def _generateDefaultName(self) -> str:

2632 return str(self.anchor + Empty()).replace("Empty", "...")

2633

2634 def __add__(self, other) -> ParserElement:

2635 skipper = SkipTo(other).set_name("...")("_skipped*")

2636 if self.must_skip:

2637

2638 def must_skip(t):

2639 if not t._skipped or t._skipped.as_list() == [""]:

2640 del t[0]

2641 t.pop("_skipped", None)

2642

2643 def show_skip(t):

2644 if t._skipped.as_list()[-1:] == [""]:

2645 t.pop("_skipped")

2646 t["_skipped"] = f"missing <{self.anchor!r}>"

2647

2648 return (

2649 self.anchor + skipper().add_parse_action(must_skip)

2650 | skipper().add_parse_action(show_skip)

2651 ) + other

2652

2653 return self.anchor + skipper + other

2654

2655 def __repr__(self):

2656 return self.defaultName

2657

2658 def parseImpl(self, *args) -> ParseImplReturnType:

2659 raise Exception(

2660 "use of `...` expression without following SkipTo target expression"

2661 )

2662

2663

2664class Token(ParserElement):

2665 """Abstract :class:`ParserElement` subclass, for defining atomic

2666 matching patterns.

2667 """

2668

2669 def __init__(self) -> None:

2670 super().__init__(savelist=False)

2671

2672 def _generateDefaultName(self) -> str:

2673 return type(self).__name__

2674

2675

2676class NoMatch(Token):

2677 """

2678 A token that will never match.

2679 """

2680

2681 def __init__(self) -> None:

2682 super().__init__()

2683 self._may_return_empty = True

2684 self.mayIndexError = False

2685 self.errmsg = "Unmatchable token"

2686

2687 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2688 raise ParseException(instring, loc, self.errmsg, self)

2689

2690

2691class Literal(Token):

2692 """

2693 Token to exactly match a specified string.

2694

2695 Example:

2696

2697 .. doctest::

2698

2699 >>> Literal('abc').parse_string('abc')

2700 ParseResults(['abc'], {})

2701 >>> Literal('abc').parse_string('abcdef')

2702 ParseResults(['abc'], {})

2703 >>> Literal('abc').parse_string('ab')

2704 Traceback (most recent call last):

2705 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1)

2706

2707 For case-insensitive matching, use :class:`CaselessLiteral`.

2708

2709 For keyword matching (force word break before and after the matched string),

2710 use :class:`Keyword` or :class:`CaselessKeyword`.

2711 """

2712

2713 def __new__(cls, match_string: str = "", **kwargs):

2714 # Performance tuning: select a subclass with optimized parseImpl

2715 if cls is Literal:

2716 matchString: str = deprecate_argument(kwargs, "matchString", "")

2717

2718 match_string = matchString or match_string

2719 if not match_string:

2720 return super().__new__(Empty)

2721 if len(match_string) == 1:

2722 return super().__new__(_SingleCharLiteral)

2723

2724 # Default behavior

2725 return super().__new__(cls)

2726

2727 # Needed to make copy.copy() work correctly if we customize __new__

2728 def __getnewargs__(self):

2729 return (self.match,)

2730

2731 def __init__(self, match_string: str = "", **kwargs) -> None:

2732 matchString: str = deprecate_argument(kwargs, "matchString", "")

2733

2734 super().__init__()

2735 match_string = matchString or match_string

2736 self.match = match_string

2737 self.matchLen = len(match_string)

2738 self.firstMatchChar = match_string[:1]

2739 self.errmsg = f"Expected {self.name}"

2740 self._may_return_empty = False

2741 self.mayIndexError = False

2742

2743 def _generateDefaultName(self) -> str:

2744 return repr(self.match)

2745

2746 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2747 if instring[loc] == self.firstMatchChar and instring.startswith(

2748 self.match, loc

2749 ):

2750 return loc + self.matchLen, self.match

2751 raise ParseException(instring, loc, self.errmsg, self)

2752

2753

2754class Empty(Literal):

2755 """

2756 An empty token, will always match.

2757 """

2758

2759 def __init__(self, match_string="", *, matchString="") -> None:

2760 super().__init__("")

2761 self._may_return_empty = True

2762 self.mayIndexError = False

2763

2764 def _generateDefaultName(self) -> str:

2765 return "Empty"

2766

2767 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2768 return loc, []

2769

2770

2771class _SingleCharLiteral(Literal):

2772 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2773 if instring[loc] == self.firstMatchChar:

2774 return loc + 1, self.match

2775 raise ParseException(instring, loc, self.errmsg, self)

2776

2777

2778ParserElement._literalStringClass = Literal

2779

2780

2781class Keyword(Token):

2782 """

2783 Token to exactly match a specified string as a keyword, that is,

2784 it must be immediately preceded and followed by whitespace or

2785 non-keyword characters. Compare with :class:`Literal`:

2786

2787 - ``Literal("if")`` will match the leading ``'if'`` in

2788 ``'ifAndOnlyIf'``.

2789 - ``Keyword("if")`` will not; it will only match the leading

2790 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2791

2792 Accepts two optional constructor arguments in addition to the

2793 keyword string:

2794

2795 - ``ident_chars`` is a string of characters that would be valid

2796 identifier characters, defaulting to all alphanumerics + "_" and

2797 "$"

2798 - ``caseless`` allows case-insensitive matching, default is ``False``.

2799

2800 Example:

2801

2802 .. doctest::

2803 :options: +NORMALIZE_WHITESPACE

2804

2805 >>> Keyword("start").parse_string("start")

2806 ParseResults(['start'], {})

2807 >>> Keyword("start").parse_string("starting")

2808 Traceback (most recent call last):

2809 ParseException: Expected Keyword 'start', keyword was immediately

2810 followed by keyword character, found 'ing' (at char 5), (line:1, col:6)

2811

2812 .. doctest::

2813 :options: +NORMALIZE_WHITESPACE

2814

2815 >>> Keyword("start").parse_string("starting").debug()

2816 Traceback (most recent call last):

2817 ParseException: Expected Keyword "start", keyword was immediately

2818 followed by keyword character, found 'ing' ...

2819

2820 For case-insensitive matching, use :class:`CaselessKeyword`.

2821 """

2822

2823 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2824

2825 def __init__(

2826 self,

2827 match_string: str = "",

2828 ident_chars: typing.Optional[str] = None,

2829 caseless: bool = False,

2830 **kwargs,

2831 ) -> None:

2832 matchString = deprecate_argument(kwargs, "matchString", "")

2833 identChars = deprecate_argument(kwargs, "identChars", None)

2834

2835 super().__init__()

2836 identChars = identChars or ident_chars

2837 if identChars is None:

2838 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2839 match_string = matchString or match_string

2840 self.match = match_string

2841 self.matchLen = len(match_string)

2842 self.firstMatchChar = match_string[:1]

2843 if not self.firstMatchChar:

2844 raise ValueError("null string passed to Keyword; use Empty() instead")

2845 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2846 self._may_return_empty = False

2847 self.mayIndexError = False

2848 self.caseless = caseless

2849 if caseless:

2850 self.caselessmatch = match_string.upper()

2851 identChars = identChars.upper()

2852 self.ident_chars = set(identChars)

2853

2854 @property

2855 def identChars(self) -> set[str]:

2856 """

2857 .. deprecated:: 3.3.0

2858 use ident_chars instead.

2859

2860 Property returning the characters being used as keyword characters for this expression.

2861 """

2862 return self.ident_chars

2863

2864 def _generateDefaultName(self) -> str:

2865 return repr(self.match)

2866

2867 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2868 errmsg = self.errmsg or ""

2869 errloc = loc

2870 if self.caseless:

2871 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2872 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2873 if (

2874 loc >= len(instring) - self.matchLen

2875 or instring[loc + self.matchLen].upper() not in self.identChars

2876 ):

2877 return loc + self.matchLen, self.match

2878

2879 # followed by keyword char

2880 errmsg += ", was immediately followed by keyword character"

2881 errloc = loc + self.matchLen

2882 else:

2883 # preceded by keyword char

2884 errmsg += ", keyword was immediately preceded by keyword character"

2885 errloc = loc - 1

2886 # else no match just raise plain exception

2887

2888 elif (

2889 instring[loc] == self.firstMatchChar

2890 and self.matchLen == 1

2891 or instring.startswith(self.match, loc)

2892 ):

2893 if loc == 0 or instring[loc - 1] not in self.identChars:

2894 if (

2895 loc >= len(instring) - self.matchLen

2896 or instring[loc + self.matchLen] not in self.identChars

2897 ):

2898 return loc + self.matchLen, self.match

2899

2900 # followed by keyword char

2901 errmsg += ", keyword was immediately followed by keyword character"

2902 errloc = loc + self.matchLen

2903 else:

2904 # preceded by keyword char

2905 errmsg += ", keyword was immediately preceded by keyword character"

2906 errloc = loc - 1

2907 # else no match just raise plain exception

2908

2909 raise ParseException(instring, errloc, errmsg, self)

2910

2911 @staticmethod

2912 def set_default_keyword_chars(chars) -> None:

2913 """

2914 Overrides the default characters used by :class:`Keyword` expressions.

2915 """

2916 Keyword.DEFAULT_KEYWORD_CHARS = chars

2917

2918 # Compatibility synonyms

2919 setDefaultKeywordChars = staticmethod(

2920 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)

2921 )

2922

2923

2924class CaselessLiteral(Literal):

2925 """

2926 Token to match a specified string, ignoring case of letters.

2927 Note: the matched results will always be in the case of the given

2928 match string, NOT the case of the input text.

2929

2930 Example:

2931

2932 .. doctest::

2933

2934 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2935 ParseResults(['CMD', 'CMD', 'CMD'], {})

2936

2937 (Contrast with example for :class:`CaselessKeyword`.)

2938 """

2939

2940 def __init__(self, match_string: str = "", **kwargs) -> None:

2941 matchString: str = deprecate_argument(kwargs, "matchString", "")

2942

2943 match_string = matchString or match_string

2944 super().__init__(match_string.upper())

2945 # Preserve the defining literal.

2946 self.returnString = match_string

2947 self.errmsg = f"Expected {self.name}"

2948

2949 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2950 if instring[loc : loc + self.matchLen].upper() == self.match:

2951 return loc + self.matchLen, self.returnString

2952 raise ParseException(instring, loc, self.errmsg, self)

2953

2954

2955class CaselessKeyword(Keyword):

2956 """

2957 Caseless version of :class:`Keyword`.

2958

2959 Example:

2960

2961 .. doctest::

2962

2963 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2964 ParseResults(['CMD', 'CMD'], {})

2965

2966 (Contrast with example for :class:`CaselessLiteral`.)

2967 """

2968

2969 def __init__(

2970 self, match_string: str = "", ident_chars: typing.Optional[str] = None, **kwargs

2971 ) -> None:

2972 matchString: str = deprecate_argument(kwargs, "matchString", "")

2973 identChars: typing.Optional[str] = deprecate_argument(

2974 kwargs, "identChars", None

2975 )

2976

2977 identChars = identChars or ident_chars

2978 match_string = matchString or match_string

2979 super().__init__(match_string, identChars, caseless=True)

2980

2981

2982class CloseMatch(Token):

2983 """A variation on :class:`Literal` which matches "close" matches,

2984 that is, strings with at most 'n' mismatching characters.

2985 :class:`CloseMatch` takes parameters:

2986

2987 - ``match_string`` - string to be matched

2988 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2989 - ``max_mismatches`` - (``default=1``) maximum number of

2990 mismatches allowed to count as a match

2991

2992 The results from a successful parse will contain the matched text

2993 from the input string and the following named results:

2994

2995 - ``mismatches`` - a list of the positions within the

2996 match_string where mismatches were found

2997 - ``original`` - the original match_string used to compare

2998 against the input string

2999

3000 If ``mismatches`` is an empty list, then the match was an exact

3001 match.

3002

3003 Example:

3004

3005 .. doctest::

3006 :options: +NORMALIZE_WHITESPACE

3007

3008 >>> patt = CloseMatch("ATCATCGAATGGA")

3009 >>> patt.parse_string("ATCATCGAAXGGA")

3010 ParseResults(['ATCATCGAAXGGA'],

3011 {'original': 'ATCATCGAATGGA', 'mismatches': [9]})

3012

3013 >>> patt.parse_string("ATCAXCGAAXGGA")

3014 Traceback (most recent call last):

3015 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches),

3016 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1)

3017

3018 # exact match

3019 >>> patt.parse_string("ATCATCGAATGGA")

3020 ParseResults(['ATCATCGAATGGA'],

3021 {'original': 'ATCATCGAATGGA', 'mismatches': []})

3022

3023 # close match allowing up to 2 mismatches

3024 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

3025 >>> patt.parse_string("ATCAXCGAAXGGA")

3026 ParseResults(['ATCAXCGAAXGGA'],

3027 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]})

3028 """

3029

3030 def __init__(

3031 self,

3032 match_string: str,

3033 max_mismatches: typing.Optional[int] = None,

3034 *,

3035 caseless=False,

3036 **kwargs,

3037 ) -> None:

3038 maxMismatches: int = deprecate_argument(kwargs, "maxMismatches", 1)

3039

3040 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

3041 super().__init__()

3042 self.match_string = match_string

3043 self.maxMismatches = maxMismatches

3044 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

3045 self.caseless = caseless

3046 self.mayIndexError = False

3047 self._may_return_empty = False

3048

3049 def _generateDefaultName(self) -> str:

3050 return f"{type(self).__name__}:{self.match_string!r}"

3051

3052 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3053 start = loc

3054 instrlen = len(instring)

3055 maxloc = start + len(self.match_string)

3056

3057 if maxloc <= instrlen:

3058 match_string = self.match_string

3059 match_stringloc = 0

3060 mismatches = []

3061 maxMismatches = self.maxMismatches

3062

3063 for match_stringloc, s_m in enumerate(

3064 zip(instring[loc:maxloc], match_string)

3065 ):

3066 src, mat = s_m

3067 if self.caseless:

3068 src, mat = src.lower(), mat.lower()

3069

3070 if src != mat:

3071 mismatches.append(match_stringloc)

3072 if len(mismatches) > maxMismatches:

3073 break

3074 else:

3075 loc = start + match_stringloc + 1

3076 results = ParseResults([instring[start:loc]])

3077 results["original"] = match_string

3078 results["mismatches"] = mismatches

3079 return loc, results

3080

3081 raise ParseException(instring, loc, self.errmsg, self)

3082

3083

3084class Word(Token):

3085 """Token for matching words composed of allowed character sets.

3086

3087 Parameters:

3088

3089 - ``init_chars`` - string of all characters that should be used to

3090 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

3091 if ``body_chars`` is also specified, then this is the string of

3092 initial characters

3093 - ``body_chars`` - string of characters that

3094 can be used for matching after a matched initial character as

3095 given in ``init_chars``; if omitted, same as the initial characters

3096 (default=``None``)

3097 - ``min`` - minimum number of characters to match (default=1)

3098 - ``max`` - maximum number of characters to match (default=0)

3099 - ``exact`` - exact number of characters to match (default=0)

3100 - ``as_keyword`` - match as a keyword (default=``False``)

3101 - ``exclude_chars`` - characters that might be

3102 found in the input ``body_chars`` string but which should not be

3103 accepted for matching ;useful to define a word of all

3104 printables except for one or two characters, for instance

3105 (default=``None``)

3106

3107 :class:`srange` is useful for defining custom character set strings

3108 for defining :class:`Word` expressions, using range notation from

3109 regular expression character sets.

3110

3111 A common mistake is to use :class:`Word` to match a specific literal

3112 string, as in ``Word("Address")``. Remember that :class:`Word`

3113 uses the string argument to define *sets* of matchable characters.

3114 This expression would match "Add", "AAA", "dAred", or any other word

3115 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

3116 exact literal string, use :class:`Literal` or :class:`Keyword`.

3117

3118 pyparsing includes helper strings for building Words:

3119

3120 - :attr:`alphas`

3121 - :attr:`nums`

3122 - :attr:`alphanums`

3123 - :attr:`hexnums`

3124 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255

3125 - accented, tilded, umlauted, etc.)

3126 - :attr:`punc8bit` (non-alphabetic characters in ASCII range

3127 128-255 - currency, symbols, superscripts, diacriticals, etc.)

3128 - :attr:`printables` (any non-whitespace character)

3129

3130 ``alphas``, ``nums``, and ``printables`` are also defined in several

3131 Unicode sets - see :class:`pyparsing_unicode`.

3132

3133 Example:

3134

3135 .. testcode::

3136

3137 # a word composed of digits

3138 integer = Word(nums)

3139 # Two equivalent alternate forms:

3140 Word("0123456789")

3141 Word(srange("[0-9]"))

3142

3143 # a word with a leading capital, and zero or more lowercase

3144 capitalized_word = Word(alphas.upper(), alphas.lower())

3145

3146 # hostnames are alphanumeric, with leading alpha, and '-'

3147 hostname = Word(alphas, alphanums + '-')

3148

3149 # roman numeral

3150 # (not a strict parser, accepts invalid mix of characters)

3151 roman = Word("IVXLCDM")

3152

3153 # any string of non-whitespace characters, except for ','

3154 csv_value = Word(printables, exclude_chars=",")

3155

3156 :raises ValueError: If ``min`` and ``max`` are both specified

3157 and the test ``min <= max`` fails.

3158

3159 .. versionchanged:: 3.1.0

3160 Raises :exc:`ValueError` if ``min`` > ``max``.

3161 """

3162

3163 def __init__(

3164 self,

3165 init_chars: str = "",

3166 body_chars: typing.Optional[str] = None,

3167 min: int = 1,

3168 max: int = 0,

3169 exact: int = 0,

3170 as_keyword: bool = False,

3171 exclude_chars: typing.Optional[str] = None,

3172 **kwargs,

3173 ) -> None:

3174 initChars: typing.Optional[str] = deprecate_argument(kwargs, "initChars", None)

3175 bodyChars: typing.Optional[str] = deprecate_argument(kwargs, "bodyChars", None)

3176 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)

3177 excludeChars: typing.Optional[str] = deprecate_argument(

3178 kwargs, "excludeChars", None

3179 )

3180

3181 initChars = initChars or init_chars

3182 bodyChars = bodyChars or body_chars

3183 asKeyword = asKeyword or as_keyword

3184 excludeChars = excludeChars or exclude_chars

3185 super().__init__()

3186 if not initChars:

3187 raise ValueError(

3188 f"invalid {type(self).__name__}, initChars cannot be empty string"

3189 )

3190

3191 initChars_set = set(initChars)

3192 if excludeChars:

3193 excludeChars_set = set(excludeChars)

3194 initChars_set -= excludeChars_set

3195 if bodyChars:

3196 bodyChars = "".join(set(bodyChars) - excludeChars_set)

3197 self.init_chars = initChars_set

3198 self.initCharsOrig = "".join(sorted(initChars_set))

3199

3200 if bodyChars:

3201 self.bodyChars = set(bodyChars)

3202 self.bodyCharsOrig = "".join(sorted(bodyChars))

3203 else:

3204 self.bodyChars = initChars_set

3205 self.bodyCharsOrig = self.initCharsOrig

3206

3207 self.maxSpecified = max > 0

3208

3209 if min < 1:

3210 raise ValueError(

3211 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

3212 )

3213

3214 if self.maxSpecified and min > max:

3215 raise ValueError(

3216 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

3217 )

3218

3219 self.minLen = min

3220

3221 if max > 0:

3222 self.maxLen = max

3223 else:

3224 self.maxLen = _MAX_INT

3225

3226 if exact > 0:

3227 min = max = exact

3228 self.maxLen = exact

3229 self.minLen = exact

3230

3231 self.errmsg = f"Expected {self.name}"

3232 self.mayIndexError = False

3233 self.asKeyword = asKeyword

3234 if self.asKeyword:

3235 self.errmsg += " as a keyword"

3236

3237 # see if we can make a regex for this Word

3238 if " " not in (self.initChars | self.bodyChars):

3239 if len(self.initChars) == 1:

3240 re_leading_fragment = re.escape(self.initCharsOrig)

3241 else:

3242 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

3243

3244 if self.bodyChars == self.initChars:

3245 if max == 0 and self.minLen == 1:

3246 repeat = "+"

3247 elif max == 1:

3248 repeat = ""

3249 else:

3250 if self.minLen != self.maxLen:

3251 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

3252 else:

3253 repeat = f"{{{self.minLen}}}"

3254 self.reString = f"{re_leading_fragment}{repeat}"

3255 else:

3256 if max == 1:

3257 re_body_fragment = ""

3258 repeat = ""

3259 else:

3260 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

3261 if max == 0 and self.minLen == 1:

3262 repeat = "*"

3263 elif max == 2:

3264 repeat = "?" if min <= 1 else ""

3265 else:

3266 if min != max:

3267 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

3268 else:

3269 repeat = f"{{{min - 1 if min > 0 else ''}}}"

3270

3271 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

3272

3273 if self.asKeyword:

3274 self.reString = rf"\b{self.reString}\b"

3275

3276 try:

3277 self.re = re.compile(self.reString)

3278 except re.error:

3279 self.re = None # type: ignore[assignment]

3280 else:

3281 self.re_match = self.re.match

3282 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]

3283

3284 @property

3285 def initChars(self) -> set[str]:

3286 """

3287 .. deprecated:: 3.3.0

3288 use `init_chars` instead.

3289

3290 Property returning the initial chars to be used when matching this

3291 Word expression. If no body chars were specified, the initial characters

3292 will also be the body characters.

3293 """

3294 return set(self.init_chars)

3295

3296 def copy(self) -> Word:

3297 """

3298 Returns a copy of this expression.

3299

3300 Generally only used internally by pyparsing.

3301 """

3302 ret: Word = cast(Word, super().copy())

3303 if hasattr(self, "re_match"):

3304 ret.re_match = self.re_match

3305 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign]

3306 return ret

3307

3308 def _generateDefaultName(self) -> str:

3309 def charsAsStr(s):

3310 max_repr_len = 16

3311 s = _collapse_string_to_ranges(s, re_escape=False)

3312

3313 if len(s) > max_repr_len:

3314 return s[: max_repr_len - 3] + "..."

3315

3316 return s

3317

3318 if self.initChars != self.bodyChars:

3319 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

3320 else:

3321 base = f"W:({charsAsStr(self.initChars)})"

3322

3323 # add length specification

3324 if self.minLen > 1 or self.maxLen != _MAX_INT:

3325 if self.minLen == self.maxLen:

3326 if self.minLen == 1:

3327 return base[2:]

3328 else:

3329 return base + f"{{{self.minLen}}}"

3330 elif self.maxLen == _MAX_INT:

3331 return base + f"{{{self.minLen},...}}"

3332 else:

3333 return base + f"{{{self.minLen},{self.maxLen}}}"

3334 return base

3335

3336 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3337 if instring[loc] not in self.initChars:

3338 raise ParseException(instring, loc, self.errmsg, self)

3339

3340 start = loc

3341 loc += 1

3342 instrlen = len(instring)

3343 body_chars: set[str] = self.bodyChars

3344 maxloc = start + self.maxLen

3345 maxloc = min(maxloc, instrlen)

3346 while loc < maxloc and instring[loc] in body_chars:

3347 loc += 1

3348

3349 throw_exception = False

3350 if loc - start < self.minLen:

3351 throw_exception = True

3352 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:

3353 throw_exception = True

3354 elif self.asKeyword and (

3355 (start > 0 and instring[start - 1] in body_chars)

3356 or (loc < instrlen and instring[loc] in body_chars)

3357 ):

3358 throw_exception = True

3359

3360 if throw_exception:

3361 raise ParseException(instring, loc, self.errmsg, self)

3362

3363 return loc, instring[start:loc]

3364

3365 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3366 result = self.re_match(instring, loc)

3367 if not result:

3368 raise ParseException(instring, loc, self.errmsg, self)

3369

3370 loc = result.end()

3371 return loc, result[0]

3372

3373

3374class Char(Word):

3375 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

3376 when defining a match of any single character in a string of

3377 characters.

3378 """

3379

3380 def __init__(

3381 self,

3382 charset: str,

3383 as_keyword: bool = False,

3384 exclude_chars: typing.Optional[str] = None,

3385 **kwargs,

3386 ) -> None:

3387 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)

3388 excludeChars: typing.Optional[str] = deprecate_argument(

3389 kwargs, "excludeChars", None

3390 )

3391

3392 asKeyword = asKeyword or as_keyword

3393 excludeChars = excludeChars or exclude_chars

3394 super().__init__(

3395 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

3396 )

3397

3398

3399class Regex(Token):

3400 r"""Token for matching strings that match a given regular

3401 expression. Defined with string specifying the regular expression in

3402 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

3403 If the given regex contains named groups (defined using ``(?P<name>...)``),

3404 these will be preserved as named :class:`ParseResults`.

3405

3406 If instead of the Python stdlib ``re`` module you wish to use a different RE module

3407 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

3408 a compiled RE that was compiled using ``regex``.

3409

3410 The parameters ``pattern`` and ``flags`` are passed

3411 to the ``re.compile()`` function as-is. See the Python

3412 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3413 explanation of the acceptable patterns and flags.

3414

3415 Example:

3416

3417 .. testcode::

3418

3419 realnum = Regex(r"[+-]?\d+\.\d*")

3420 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

3421 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

3422

3423 # named fields in a regex will be returned as named results

3424 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

3425

3426 # the Regex class will accept regular expressions compiled using the

3427 # re module

3428 import re

3429 parser = pp.Regex(re.compile(r'[0-9]'))

3430 """

3431

3432 def __init__(

3433 self,

3434 pattern: Any,

3435 flags: Union[re.RegexFlag, int] = 0,

3436 as_group_list: bool = False,

3437 as_match: bool = False,

3438 **kwargs,

3439 ) -> None:

3440 super().__init__()

3441 asGroupList: bool = deprecate_argument(kwargs, "asGroupList", False)

3442 asMatch: bool = deprecate_argument(kwargs, "asMatch", False)

3443

3444 asGroupList = asGroupList or as_group_list

3445 asMatch = asMatch or as_match

3446

3447 if isinstance(pattern, str_type):

3448 if not pattern:

3449 raise ValueError("null string passed to Regex; use Empty() instead")

3450

3451 self._re = None

3452 self._may_return_empty = None # type: ignore [assignment]

3453 self.reString = self.pattern = pattern

3454

3455 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3456 self._re = pattern

3457 self._may_return_empty = None # type: ignore [assignment]

3458 self.pattern = self.reString = pattern.pattern

3459

3460 elif callable(pattern):

3461 # defer creating this pattern until we really need it

3462 self.pattern = pattern

3463 self._may_return_empty = None # type: ignore [assignment]

3464 self._re = None

3465

3466 else:

3467 raise TypeError(

3468 "Regex may only be constructed with a string or a compiled RE object,"

3469 " or a callable that takes no arguments and returns a string or a"

3470 " compiled RE object"

3471 )

3472

3473 self.flags = flags

3474 self.errmsg = f"Expected {self.name}"

3475 self.mayIndexError = False

3476 self.asGroupList = asGroupList

3477 self.asMatch = asMatch

3478 if self.asGroupList:

3479 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]

3480 if self.asMatch:

3481 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]

3482

3483 def copy(self) -> Regex:

3484 """

3485 Returns a copy of this expression.

3486

3487 Generally only used internally by pyparsing.

3488 """

3489 ret: Regex = cast(Regex, super().copy())

3490 if self.asGroupList:

3491 ret.parseImpl = ret.parseImplAsGroupList # type: ignore [method-assign]

3492 if self.asMatch:

3493 ret.parseImpl = ret.parseImplAsMatch # type: ignore [method-assign]

3494 return ret

3495

3496 @cached_property

3497 def re(self) -> re.Pattern:

3498 """

3499 Property returning the compiled regular expression for this Regex.

3500

3501 Generally only used internally by pyparsing.

3502 """

3503 if self._re:

3504 return self._re

3505

3506 if callable(self.pattern):

3507 # replace self.pattern with the string returned by calling self.pattern()

3508 self.pattern = cast(Callable[[], str], self.pattern)()

3509

3510 # see if we got a compiled RE back instead of a str - if so, we're done

3511 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):

3512 self._re = cast(re.Pattern[str], self.pattern)

3513 self.pattern = self.reString = self._re.pattern

3514 return self._re

3515

3516 try:

3517 self._re = re.compile(self.pattern, self.flags)

3518 except re.error:

3519 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3520 else:

3521 self._may_return_empty = self.re.match("", pos=0) is not None

3522 return self._re

3523

3524 @cached_property

3525 def re_match(self) -> Callable[[str, int], Any]:

3526 return self.re.match

3527

3528 @property

3529 def mayReturnEmpty(self):

3530 if self._may_return_empty is None:

3531 # force compile of regex pattern, to set may_return_empty flag

3532 self.re # noqa

3533 return self._may_return_empty

3534

3535 @mayReturnEmpty.setter

3536 def mayReturnEmpty(self, value):

3537 self._may_return_empty = value

3538

3539 def _generateDefaultName(self) -> str:

3540 unescaped = repr(self.pattern).replace("\\\\", "\\")

3541 return f"Re:({unescaped})"

3542

3543 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3544 # explicit check for matching past the length of the string;

3545 # this is done because the re module will not complain about

3546 # a match with `pos > len(instring)`, it will just return ""

3547 if loc > len(instring) and self.mayReturnEmpty:

3548 raise ParseException(instring, loc, self.errmsg, self)

3549

3550 result = self.re_match(instring, loc)

3551 if not result:

3552 raise ParseException(instring, loc, self.errmsg, self)

3553

3554 loc = result.end()

3555 ret = ParseResults(result[0])

3556 d = result.groupdict()

3557

3558 for k, v in d.items():

3559 ret[k] = v

3560

3561 return loc, ret

3562

3563 def parseImplAsGroupList(self, instring, loc, do_actions=True):

3564 if loc > len(instring) and self.mayReturnEmpty:

3565 raise ParseException(instring, loc, self.errmsg, self)

3566

3567 result = self.re_match(instring, loc)

3568 if not result:

3569 raise ParseException(instring, loc, self.errmsg, self)

3570

3571 loc = result.end()

3572 ret = result.groups()

3573 return loc, ret

3574

3575 def parseImplAsMatch(self, instring, loc, do_actions=True):

3576 if loc > len(instring) and self.mayReturnEmpty:

3577 raise ParseException(instring, loc, self.errmsg, self)

3578

3579 result = self.re_match(instring, loc)

3580 if not result:

3581 raise ParseException(instring, loc, self.errmsg, self)

3582

3583 loc = result.end()

3584 ret = result

3585 return loc, ret

3586

3587 def sub(self, repl: str) -> ParserElement:

3588 r"""

3589 Return :class:`Regex` with an attached parse action to transform the parsed

3590 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3591

3592 Example:

3593

3594 .. testcode::

3595

3596 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3597 print(make_html.transform_string("h1:main title:"))

3598

3599 .. testoutput::

3600

3601 <h1>main title</h1>

3602 """

3603 if self.asGroupList:

3604 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3605

3606 if self.asMatch and callable(repl):

3607 raise TypeError(

3608 "cannot use sub() with a callable with Regex(as_match=True)"

3609 )

3610

3611 if self.asMatch:

3612

3613 def pa(tokens):

3614 return tokens[0].expand(repl)

3615

3616 else:

3617

3618 def pa(tokens):

3619 return self.re.sub(repl, tokens[0])

3620

3621 return self.add_parse_action(pa)

3622

3623

3624class QuotedString(Token):

3625 r"""

3626 Token for matching strings that are delimited by quoting characters.

3627

3628 Defined with the following parameters:

3629

3630 - ``quote_char`` - string of one or more characters defining the

3631 quote delimiting string

3632 - ``esc_char`` - character to re_escape quotes, typically backslash

3633 (default= ``None``)

3634 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3635 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3636 (default= ``None``)

3637 - ``multiline`` - boolean indicating whether quotes can span

3638 multiple lines (default= ``False``)

3639 - ``unquote_results`` - boolean indicating whether the matched text

3640 should be unquoted (default= ``True``)

3641 - ``end_quote_char`` - string of one or more characters defining the

3642 end of the quote delimited string (default= ``None`` => same as

3643 quote_char)

3644 - ``convert_whitespace_escapes`` - convert escaped whitespace

3645 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3646 (default= ``True``)

3647

3648 .. caution:: ``convert_whitespace_escapes`` has no effect if

3649 ``unquote_results`` is ``False``.

3650

3651 Example:

3652

3653 .. doctest::

3654

3655 >>> qs = QuotedString('"')

3656 >>> print(qs.search_string('lsjdf "This is the quote" sldjf'))

3657 [['This is the quote']]

3658 >>> complex_qs = QuotedString('{{', end_quote_char='}}')

3659 >>> print(complex_qs.search_string(

3660 ... 'lsjdf {{This is the "quote"}} sldjf'))

3661 [['This is the "quote"']]

3662 >>> sql_qs = QuotedString('"', esc_quote='""')

3663 >>> print(sql_qs.search_string(

3664 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3665 [['This is the quote with "embedded" quotes']]

3666 """

3667

3668 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3669

3670 def __init__(

3671 self,

3672 quote_char: str = "",

3673 esc_char: typing.Optional[str] = None,

3674 esc_quote: typing.Optional[str] = None,

3675 multiline: bool = False,

3676 unquote_results: bool = True,

3677 end_quote_char: typing.Optional[str] = None,

3678 convert_whitespace_escapes: bool = True,

3679 **kwargs,

3680 ) -> None:

3681 super().__init__()

3682 quoteChar: str = deprecate_argument(kwargs, "quoteChar", "")

3683 escChar: str = deprecate_argument(kwargs, "escChar", None)

3684 escQuote: str = deprecate_argument(kwargs, "escQuote", None)

3685 unquoteResults: bool = deprecate_argument(kwargs, "unquoteResults", True)

3686 endQuoteChar: typing.Optional[str] = deprecate_argument(

3687 kwargs, "endQuoteChar", None

3688 )

3689 convertWhitespaceEscapes: bool = deprecate_argument(

3690 kwargs, "convertWhitespaceEscapes", True

3691 )

3692

3693 esc_char = escChar or esc_char

3694 esc_quote = escQuote or esc_quote

3695 unquote_results = unquoteResults and unquote_results

3696 end_quote_char = endQuoteChar or end_quote_char

3697 convert_whitespace_escapes = (

3698 convertWhitespaceEscapes and convert_whitespace_escapes

3699 )

3700 quote_char = quoteChar or quote_char

3701

3702 # remove white space from quote chars

3703 quote_char = quote_char.strip()

3704 if not quote_char:

3705 raise ValueError("quote_char cannot be the empty string")

3706

3707 if end_quote_char is None:

3708 end_quote_char = quote_char

3709 else:

3710 end_quote_char = end_quote_char.strip()

3711 if not end_quote_char:

3712 raise ValueError("end_quote_char cannot be the empty string")

3713

3714 self.quote_char: str = quote_char

3715 self.quote_char_len: int = len(quote_char)

3716 self.first_quote_char: str = quote_char[0]

3717 self.end_quote_char: str = end_quote_char

3718 self.end_quote_char_len: int = len(end_quote_char)

3719 self.esc_char: str = esc_char or ""

3720 self.has_esc_char: bool = esc_char is not None

3721 self.esc_quote: str = esc_quote or ""

3722 self.unquote_results: bool = unquote_results

3723 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3724 self.multiline = multiline

3725 self.re_flags = re.RegexFlag(0)

3726

3727 # fmt: off

3728 # build up re pattern for the content between the quote delimiters

3729 inner_pattern: list[str] = []

3730

3731 if esc_quote:

3732 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3733

3734 if esc_char:

3735 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3736

3737 if len(self.end_quote_char) > 1:

3738 inner_pattern.append(

3739 "(?:"

3740 + "|".join(

3741 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3742 for i in range(len(self.end_quote_char) - 1, 0, -1)

3743 )

3744 + ")"

3745 )

3746

3747 if self.multiline:

3748 self.re_flags |= re.MULTILINE | re.DOTALL

3749 inner_pattern.append(

3750 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3751 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3752 )

3753 else:

3754 inner_pattern.append(

3755 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3756 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3757 )

3758

3759 self.pattern = "".join(

3760 [

3761 re.escape(self.quote_char),

3762 "(?:",

3763 '|'.join(inner_pattern),

3764 ")*",

3765 re.escape(self.end_quote_char),

3766 ]

3767 )

3768

3769 if self.unquote_results:

3770 if self.convert_whitespace_escapes:

3771 self.unquote_scan_re = re.compile(

3772 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3773 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"

3774 rf"|({re.escape(self.esc_char)}.)"

3775 rf"|(\n|.)",

3776 flags=self.re_flags,

3777 )

3778 else:

3779 self.unquote_scan_re = re.compile(

3780 rf"({re.escape(self.esc_char)}.)"

3781 rf"|(\n|.)",

3782 flags=self.re_flags

3783 )

3784 # fmt: on

3785

3786 try:

3787 self.re = re.compile(self.pattern, self.re_flags)

3788 self.reString = self.pattern

3789 self.re_match = self.re.match

3790 except re.error:

3791 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3792

3793 self.errmsg = f"Expected {self.name}"

3794 self.mayIndexError = False

3795 self._may_return_empty = True

3796

3797 def _generateDefaultName(self) -> str:

3798 if self.quote_char == self.end_quote_char and isinstance(

3799 self.quote_char, str_type

3800 ):

3801 return f"string enclosed in {self.quote_char!r}"

3802

3803 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3804

3805 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3806 # check first character of opening quote to see if that is a match

3807 # before doing the more complicated regex match

3808 result = (

3809 instring[loc] == self.first_quote_char

3810 and self.re_match(instring, loc)

3811 or None

3812 )

3813 if not result:

3814 raise ParseException(instring, loc, self.errmsg, self)

3815

3816 # get ending loc and matched string from regex matching result

3817 loc = result.end()

3818 ret = result[0]

3819

3820 if self.unquote_results:

3821 # strip off quotes

3822 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3823

3824 if isinstance(ret, str_type):

3825 # fmt: off

3826 if self.convert_whitespace_escapes:

3827 # as we iterate over matches in the input string,

3828 # collect from whichever match group of the unquote_scan_re

3829 # regex matches (only 1 group will match at any given time)

3830 ret = "".join(

3831 # match group 1 matches \t, \n, etc.

3832 self.ws_map[g] if (g := match[1])

3833 # match group 2 matches escaped octal, null, hex, and Unicode

3834 # sequences

3835 else _convert_escaped_numerics_to_char(g[1:]) if (g := match[2])

3836 # match group 3 matches escaped characters

3837 else g[-1] if (g := match[3])

3838 # match group 4 matches any character

3839 else match[4]

3840 for match in self.unquote_scan_re.finditer(ret)

3841 )

3842 else:

3843 ret = "".join(

3844 # match group 1 matches escaped characters

3845 g[-1] if (g := match[1])

3846 # match group 2 matches any character

3847 else match[2]

3848 for match in self.unquote_scan_re.finditer(ret)

3849 )

3850 # fmt: on

3851

3852 # replace escaped quotes

3853 if self.esc_quote:

3854 ret = ret.replace(self.esc_quote, self.end_quote_char)

3855

3856 return loc, ret

3857

3858

3859class CharsNotIn(Token):

3860 """Token for matching words composed of characters *not* in a given

3861 set (will include whitespace in matched characters if not listed in

3862 the provided exclusion set - see example). Defined with string

3863 containing all disallowed characters, and an optional minimum,

3864 maximum, and/or exact length. The default value for ``min`` is

3865 1 (a minimum value < 1 is not valid); the default values for

3866 ``max`` and ``exact`` are 0, meaning no maximum or exact

3867 length restriction.

3868

3869 Example:

3870

3871 .. testcode::

3872

3873 # define a comma-separated-value as anything that is not a ','

3874 csv_value = CharsNotIn(',')

3875 print(

3876 DelimitedList(csv_value).parse_string(

3877 "dkls,lsdkjf,s12 34,@!#,213"

3878 )

3879 )

3880

3881 prints:

3882

3883 .. testoutput::

3884

3885 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3886 """

3887

3888 def __init__(

3889 self, not_chars: str = "", min: int = 1, max: int = 0, exact: int = 0, **kwargs

3890 ) -> None:

3891 super().__init__()

3892 notChars: str = deprecate_argument(kwargs, "notChars", "")

3893

3894 self.skipWhitespace = False

3895 self.notChars = not_chars or notChars

3896 self.notCharsSet = set(self.notChars)

3897

3898 if min < 1:

3899 raise ValueError(

3900 "cannot specify a minimum length < 1; use"

3901 " Opt(CharsNotIn()) if zero-length char group is permitted"

3902 )

3903

3904 self.minLen = min

3905

3906 if max > 0:

3907 self.maxLen = max

3908 else:

3909 self.maxLen = _MAX_INT

3910

3911 if exact > 0:

3912 self.maxLen = exact

3913 self.minLen = exact

3914

3915 self.errmsg = f"Expected {self.name}"

3916 self._may_return_empty = self.minLen == 0

3917 self.mayIndexError = False

3918

3919 def _generateDefaultName(self) -> str:

3920 not_chars_str = _collapse_string_to_ranges(self.notChars)

3921 if len(not_chars_str) > 16:

3922 return f"!W:({self.notChars[: 16 - 3]}...)"

3923 else:

3924 return f"!W:({self.notChars})"

3925

3926 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3927 notchars = self.notCharsSet

3928 if instring[loc] in notchars:

3929 raise ParseException(instring, loc, self.errmsg, self)

3930

3931 start = loc

3932 loc += 1

3933 maxlen = min(start + self.maxLen, len(instring))

3934 while loc < maxlen and instring[loc] not in notchars:

3935 loc += 1

3936

3937 if loc - start < self.minLen:

3938 raise ParseException(instring, loc, self.errmsg, self)

3939

3940 return loc, instring[start:loc]

3941

3942

3943class White(Token):

3944 """Special matching class for matching whitespace. Normally,

3945 whitespace is ignored by pyparsing grammars. This class is included

3946 when some whitespace structures are significant. Define with

3947 a string containing the whitespace characters to be matched; default

3948 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3949 ``max``, and ``exact`` arguments, as defined for the

3950 :class:`Word` class.

3951 """

3952

3953 whiteStrs = {

3954 " ": "<SP>",

3955 "\t": "<TAB>",

3956 "\n": "<LF>",

3957 "\r": "<CR>",

3958 "\f": "<FF>",

3959 "\u00a0": "<NBSP>",

3960 "\u1680": "<OGHAM_SPACE_MARK>",

3961 "\u180e": "<MONGOLIAN_VOWEL_SEPARATOR>",

3962 "\u2000": "<EN_QUAD>",

3963 "\u2001": "<EM_QUAD>",

3964 "\u2002": "<EN_SPACE>",

3965 "\u2003": "<EM_SPACE>",

3966 "\u2004": "<THREE-PER-EM_SPACE>",

3967 "\u2005": "<FOUR-PER-EM_SPACE>",

3968 "\u2006": "<SIX-PER-EM_SPACE>",

3969 "\u2007": "<FIGURE_SPACE>",

3970 "\u2008": "<PUNCTUATION_SPACE>",

3971 "\u2009": "<THIN_SPACE>",

3972 "\u200a": "<HAIR_SPACE>",

3973 "\u200b": "<ZERO_WIDTH_SPACE>",

3974 "\u202f": "<NNBSP>",

3975 "\u205f": "<MMSP>",

3976 "\u3000": "<IDEOGRAPHIC_SPACE>",

3977 }

3978

3979 def __init__(

3980 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0

3981 ) -> None:

3982 super().__init__()

3983 self.matchWhite = ws

3984 self.set_whitespace_chars(

3985 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3986 copy_defaults=True,

3987 )

3988 # self.leave_whitespace()

3989 self._may_return_empty = True

3990 self.errmsg = f"Expected {self.name}"

3991

3992 self.minLen = min

3993

3994 if max > 0:

3995 self.maxLen = max

3996 else:

3997 self.maxLen = _MAX_INT

3998

3999 if exact > 0:

4000 self.maxLen = exact

4001 self.minLen = exact

4002

4003 def _generateDefaultName(self) -> str:

4004 return "".join(White.whiteStrs[c] for c in self.matchWhite)

4005

4006 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4007 if instring[loc] not in self.matchWhite:

4008 raise ParseException(instring, loc, self.errmsg, self)

4009 start = loc

4010 loc += 1

4011 maxloc = start + self.maxLen

4012 maxloc = min(maxloc, len(instring))

4013 while loc < maxloc and instring[loc] in self.matchWhite:

4014 loc += 1

4015

4016 if loc - start < self.minLen:

4017 raise ParseException(instring, loc, self.errmsg, self)

4018

4019 return loc, instring[start:loc]

4020

4021

4022class PositionToken(Token):

4023 def __init__(self) -> None:

4024 super().__init__()

4025 self._may_return_empty = True

4026 self.mayIndexError = False

4027

4028

4029class GoToColumn(PositionToken):

4030 """Token to advance to a specific column of input text; useful for

4031 tabular report scraping.

4032 """

4033

4034 def __init__(self, colno: int) -> None:

4035 super().__init__()

4036 self.col = colno

4037

4038 def preParse(self, instring: str, loc: int) -> int:

4039 if col(loc, instring) == self.col:

4040 return loc

4041

4042 instrlen = len(instring)

4043 if self.ignoreExprs:

4044 loc = self._skipIgnorables(instring, loc)

4045 while (

4046 loc < instrlen

4047 and instring[loc].isspace()

4048 and col(loc, instring) != self.col

4049 ):

4050 loc += 1

4051

4052 return loc

4053

4054 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4055 thiscol = col(loc, instring)

4056 if thiscol > self.col:

4057 raise ParseException(instring, loc, "Text not in expected column", self)

4058 newloc = loc + self.col - thiscol

4059 ret = instring[loc:newloc]

4060 return newloc, ret

4061

4062

4063class LineStart(PositionToken):

4064 r"""Matches if current position is at the logical beginning of a line (after skipping whitespace)

4065 within the parse string

4066

4067 Example:

4068

4069 .. testcode::

4070

4071 test = '''\

4072 AAA this line

4073 AAA and this line

4074 AAA and even this line

4075 B AAA but definitely not this line

4076 '''

4077

4078 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

4079 print(t)

4080

4081 prints:

4082

4083 .. testoutput::

4084

4085 ['AAA', ' this line']

4086 ['AAA', ' and this line']

4087 ['AAA', ' and even this line']

4088

4089 """

4090

4091 def __init__(self) -> None:

4092 super().__init__()

4093 self.leave_whitespace()

4094 self.orig_whiteChars = set() | self.whiteChars

4095 self.whiteChars.discard("\n")

4096 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

4097 self.set_name("start of line")

4098

4099 def preParse(self, instring: str, loc: int) -> int:

4100 if loc == 0:

4101 return loc

4102

4103 ret = self.skipper.preParse(instring, loc)

4104

4105 if "\n" in self.orig_whiteChars:

4106 while instring[ret : ret + 1] == "\n":

4107 ret = self.skipper.preParse(instring, ret + 1)

4108

4109 return ret

4110

4111 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4112 if col(loc, instring) == 1:

4113 return loc, []

4114 raise ParseException(instring, loc, self.errmsg, self)

4115

4116

4117class LineEnd(PositionToken):

4118 """Matches if current position is at the end of a line within the

4119 parse string

4120 """

4121

4122 def __init__(self) -> None:

4123 super().__init__()

4124 self.whiteChars.discard("\n")

4125 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

4126 self.set_name("end of line")

4127

4128 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4129 if loc < len(instring):

4130 if instring[loc] == "\n":

4131 return loc + 1, "\n"

4132 else:

4133 raise ParseException(instring, loc, self.errmsg, self)

4134 elif loc == len(instring):

4135 return loc + 1, []

4136 else:

4137 raise ParseException(instring, loc, self.errmsg, self)

4138

4139

4140class StringStart(PositionToken):

4141 """Matches if current position is at the beginning of the parse

4142 string

4143 """

4144

4145 def __init__(self) -> None:

4146 super().__init__()

4147 self.set_name("start of text")

4148

4149 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4150 # see if entire string up to here is just whitespace and ignoreables

4151 if loc != 0 and loc != self.preParse(instring, 0):

4152 raise ParseException(instring, loc, self.errmsg, self)

4153

4154 return loc, []

4155

4156

4157class StringEnd(PositionToken):

4158 """

4159 Matches if current position is at the end of the parse string

4160 """

4161

4162 def __init__(self) -> None:

4163 super().__init__()

4164 self.set_name("end of text")

4165

4166 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4167 if loc < len(instring):

4168 raise ParseException(instring, loc, self.errmsg, self)

4169 if loc == len(instring):

4170 return loc + 1, []

4171 if loc > len(instring):

4172 return loc, []

4173

4174 raise ParseException(instring, loc, self.errmsg, self)

4175

4176

4177class WordStart(PositionToken):

4178 """Matches if the current position is at the beginning of a

4179 :class:`Word`, and is not preceded by any character in a given

4180 set of ``word_chars`` (default= ``printables``). To emulate the

4181 ``\b`` behavior of regular expressions, use

4182 ``WordStart(alphanums)``. ``WordStart`` will also match at

4183 the beginning of the string being parsed, or at the beginning of

4184 a line.

4185 """

4186

4187 def __init__(self, word_chars: str = printables, **kwargs) -> None:

4188 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)

4189

4190 wordChars = word_chars if wordChars == printables else wordChars

4191 super().__init__()

4192 self.wordChars = set(wordChars)

4193 self.set_name("start of a word")

4194

4195 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4196 if loc != 0:

4197 if (

4198 instring[loc - 1] in self.wordChars

4199 or instring[loc] not in self.wordChars

4200 ):

4201 raise ParseException(instring, loc, self.errmsg, self)

4202 return loc, []

4203

4204

4205class WordEnd(PositionToken):

4206 """Matches if the current position is at the end of a :class:`Word`,

4207 and is not followed by any character in a given set of ``word_chars``

4208 (default= ``printables``). To emulate the ``\b`` behavior of

4209 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

4210 will also match at the end of the string being parsed, or at the end

4211 of a line.

4212 """

4213

4214 def __init__(self, word_chars: str = printables, **kwargs) -> None:

4215 wordChars: str = deprecate_argument(kwargs, "wordChars", printables)

4216

4217 wordChars = word_chars if wordChars == printables else wordChars

4218 super().__init__()

4219 self.wordChars = set(wordChars)

4220 self.skipWhitespace = False

4221 self.set_name("end of a word")

4222

4223 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4224 instrlen = len(instring)

4225 if instrlen > 0 and loc < instrlen:

4226 if (

4227 instring[loc] in self.wordChars

4228 or instring[loc - 1] not in self.wordChars

4229 ):

4230 raise ParseException(instring, loc, self.errmsg, self)

4231 return loc, []

4232

4233

4234class Tag(Token):

4235 """

4236 A meta-element for inserting a named result into the parsed

4237 tokens that may be checked later in a parse action or while

4238 processing the parsed results. Accepts an optional tag value,

4239 defaulting to `True`.

4240

4241 Example:

4242

4243 .. doctest::

4244

4245 >>> end_punc = "." | ("!" + Tag("enthusiastic"))

4246 >>> greeting = "Hello," + Word(alphas) + end_punc

4247

4248 >>> result = greeting.parse_string("Hello, World.")

4249 >>> print(result.dump())

4250 ['Hello,', 'World', '.']

4251

4252 >>> result = greeting.parse_string("Hello, World!")

4253 >>> print(result.dump())

4254 ['Hello,', 'World', '!']

4255 - enthusiastic: True

4256

4257 .. versionadded:: 3.1.0

4258 """

4259

4260 def __init__(self, tag_name: str, value: Any = True) -> None:

4261 super().__init__()

4262 self._may_return_empty = True

4263 self.mayIndexError = False

4264 self.leave_whitespace()

4265 self.tag_name = tag_name

4266 self.tag_value = value

4267 self.add_parse_action(self._add_tag)

4268 self.show_in_diagram = False

4269

4270 def _add_tag(self, tokens: ParseResults):

4271 tokens[self.tag_name] = self.tag_value

4272

4273 def _generateDefaultName(self) -> str:

4274 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"

4275

4276

4277class ParseExpression(ParserElement):

4278 """Abstract subclass of ParserElement, for combining and

4279 post-processing parsed tokens.

4280 """

4281

4282 def __init__(

4283 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4284 ) -> None:

4285 super().__init__(savelist)

4286 self.exprs: list[ParserElement]

4287 if isinstance(exprs, _generatorType):

4288 exprs = list(exprs)

4289

4290 if isinstance(exprs, str_type):

4291 self.exprs = [self._literalStringClass(exprs)]

4292 elif isinstance(exprs, ParserElement):

4293 self.exprs = [exprs]

4294 elif isinstance(exprs, Iterable):

4295 exprs = list(exprs)

4296 # if sequence of strings provided, wrap with Literal

4297 if any(isinstance(expr, str_type) for expr in exprs):

4298 exprs = (

4299 self._literalStringClass(e) if isinstance(e, str_type) else e

4300 for e in exprs

4301 )

4302 self.exprs = list(exprs)

4303 else:

4304 try:

4305 self.exprs = list(exprs)

4306 except TypeError:

4307 self.exprs = [exprs]

4308 self.callPreparse = False

4309

4310 def recurse(self) -> list[ParserElement]:

4311 return self.exprs[:]

4312

4313 def append(self, other) -> ParserElement:

4314 """

4315 Add an expression to the list of expressions related to this ParseExpression instance.

4316 """

4317 self.exprs.append(other)

4318 self._defaultName = None

4319 return self

4320

4321 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4322 """

4323 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

4324 all contained expressions.

4325 """

4326 super().leave_whitespace(recursive)

4327

4328 if recursive:

4329 self.exprs = [e.copy() for e in self.exprs]

4330 for e in self.exprs:

4331 e.leave_whitespace(recursive)

4332 return self

4333

4334 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4335 """

4336 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on

4337 all contained expressions.

4338 """

4339 super().ignore_whitespace(recursive)

4340 if recursive:

4341 self.exprs = [e.copy() for e in self.exprs]

4342 for e in self.exprs:

4343 e.ignore_whitespace(recursive)

4344 return self

4345

4346 def ignore(self, other) -> ParserElement:

4347 """

4348 Define expression to be ignored (e.g., comments) while doing pattern

4349 matching; may be called repeatedly, to define multiple comment or other

4350 ignorable patterns.

4351 """

4352 if isinstance(other, Suppress):

4353 if other not in self.ignoreExprs:

4354 super().ignore(other)

4355 for e in self.exprs:

4356 e.ignore(self.ignoreExprs[-1])

4357 else:

4358 super().ignore(other)

4359 for e in self.exprs:

4360 e.ignore(self.ignoreExprs[-1])

4361 return self

4362

4363 def _generateDefaultName(self) -> str:

4364 return f"{type(self).__name__}:({self.exprs})"

4365

4366 def streamline(self) -> ParserElement:

4367 if self.streamlined:

4368 return self

4369

4370 super().streamline()

4371

4372 for e in self.exprs:

4373 e.streamline()

4374

4375 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

4376 # but only if there are no parse actions or resultsNames on the nested And's

4377 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

4378 if len(self.exprs) == 2:

4379 other = self.exprs[0]

4380 if (

4381 isinstance(other, self.__class__)

4382 and not other.parseAction

4383 and other.resultsName is None

4384 and not other.debug

4385 ):

4386 self.exprs = other.exprs[:] + [self.exprs[1]]

4387 self._defaultName = None

4388 self._may_return_empty |= other.mayReturnEmpty

4389 self.mayIndexError |= other.mayIndexError

4390

4391 other = self.exprs[-1]

4392 if (

4393 isinstance(other, self.__class__)

4394 and not other.parseAction

4395 and other.resultsName is None

4396 and not other.debug

4397 ):

4398 self.exprs = self.exprs[:-1] + other.exprs[:]

4399 self._defaultName = None

4400 self._may_return_empty |= other.mayReturnEmpty

4401 self.mayIndexError |= other.mayIndexError

4402

4403 self.errmsg = f"Expected {self}"

4404

4405 return self

4406

4407 def validate(self, validateTrace=None) -> None:

4408 warnings.warn(

4409 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4410 PyparsingDeprecationWarning,

4411 stacklevel=2,

4412 )

4413 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

4414 for e in self.exprs:

4415 e.validate(tmp)

4416 self._checkRecursion([])

4417

4418 def copy(self) -> ParserElement:

4419 """

4420 Returns a copy of this expression.

4421

4422 Generally only used internally by pyparsing.

4423 """

4424 ret = super().copy()

4425 ret = typing.cast(ParseExpression, ret)

4426 ret.exprs = [e.copy() for e in self.exprs]

4427 return ret

4428

4429 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4430 if not (

4431 __diag__.warn_ungrouped_named_tokens_in_collection

4432 and Diagnostics.warn_ungrouped_named_tokens_in_collection

4433 not in self.suppress_warnings_

4434 ):

4435 return super()._setResultsName(name, list_all_matches)

4436

4437 for e in self.exprs:

4438 if (

4439 isinstance(e, ParserElement)

4440 and e.resultsName

4441 and (

4442 Diagnostics.warn_ungrouped_named_tokens_in_collection

4443 not in e.suppress_warnings_

4444 )

4445 ):

4446 warning = (

4447 "warn_ungrouped_named_tokens_in_collection:"

4448 f" setting results name {name!r} on {type(self).__name__} expression"

4449 f" collides with {e.resultsName!r} on contained expression"

4450 )

4451 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)

4452 break

4453

4454 return super()._setResultsName(name, list_all_matches)

4455

4456 # Compatibility synonyms

4457 # fmt: off

4458 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4459 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4460 # fmt: on

4461

4462

4463class And(ParseExpression):

4464 """

4465 Requires all given :class:`ParserElement` s to be found in the given order.

4466 Expressions may be separated by whitespace.

4467 May be constructed using the ``'+'`` operator.

4468 May also be constructed using the ``'-'`` operator, which will

4469 suppress backtracking.

4470

4471 Example:

4472

4473 .. testcode::

4474

4475 integer = Word(nums)

4476 name_expr = Word(alphas)[1, ...]

4477

4478 expr = And([integer("id"), name_expr("name"), integer("age")])

4479 # more easily written as:

4480 expr = integer("id") + name_expr("name") + integer("age")

4481 """

4482

4483 class _ErrorStop(Empty):

4484 def __init__(self, *args, **kwargs) -> None:

4485 super().__init__(*args, **kwargs)

4486 self.leave_whitespace()

4487

4488 def _generateDefaultName(self) -> str:

4489 return "-"

4490

4491 def __init__(

4492 self,

4493 exprs_arg: typing.Iterable[Union[ParserElement, str]],

4494 savelist: bool = True,

4495 ) -> None:

4496 # instantiate exprs as a list, converting strs to ParserElements

4497 exprs: list[ParserElement] = [

4498 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg

4499 ]

4500

4501 # convert any Ellipsis elements to SkipTo

4502 if Ellipsis in exprs:

4503

4504 # Ellipsis cannot be the last element

4505 if exprs[-1] is Ellipsis:

4506 raise Exception("cannot construct And with sequence ending in ...")

4507

4508 tmp: list[ParserElement] = []

4509 for cur_expr, next_expr in zip(exprs, exprs[1:]):

4510 if cur_expr is Ellipsis:

4511 tmp.append(SkipTo(next_expr)("_skipped*"))

4512 else:

4513 tmp.append(cur_expr)

4514

4515 exprs[:-1] = tmp

4516

4517 super().__init__(exprs, savelist)

4518 if self.exprs:

4519 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4520 if not isinstance(self.exprs[0], White):

4521 self.set_whitespace_chars(

4522 self.exprs[0].whiteChars,

4523 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

4524 )

4525 self.skipWhitespace = self.exprs[0].skipWhitespace

4526 else:

4527 self.skipWhitespace = False

4528 else:

4529 self._may_return_empty = True

4530 self.callPreparse = True

4531

4532 def streamline(self) -> ParserElement:

4533 """

4534 Collapse `And` expressions like `And(And(And(A, B), C), D)`

4535 to `And(A, B, C, D)`.

4536

4537 .. doctest::

4538

4539 >>> expr = Word("A") + Word("B") + Word("C") + Word("D")

4540 >>> # Using '+' operator creates nested And expression

4541 >>> expr

4542 {{{W:(A) W:(B)} W:(C)} W:(D)}

4543 >>> # streamline simplifies to a single And with multiple expressions

4544 >>> expr.streamline()

4545 {W:(A) W:(B) W:(C) W:(D)}

4546

4547 Guards against collapsing out expressions that have special features,

4548 such as results names or parse actions.

4549

4550 Resolves pending Skip commands defined using `...` terms.

4551 """

4552 # collapse any _PendingSkip's

4553 if self.exprs and any(

4554 isinstance(e, ParseExpression)

4555 and e.exprs

4556 and isinstance(e.exprs[-1], _PendingSkip)

4557 for e in self.exprs[:-1]

4558 ):

4559 deleted_expr_marker = NoMatch()

4560 for i, e in enumerate(self.exprs[:-1]):

4561 if e is deleted_expr_marker:

4562 continue

4563 if (

4564 isinstance(e, ParseExpression)

4565 and e.exprs

4566 and isinstance(e.exprs[-1], _PendingSkip)

4567 ):

4568 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

4569 self.exprs[i + 1] = deleted_expr_marker

4570 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

4571

4572 super().streamline()

4573

4574 # link any IndentedBlocks to the prior expression

4575 prev: ParserElement

4576 cur: ParserElement

4577 for prev, cur in zip(self.exprs, self.exprs[1:]):

4578 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

4579 # (but watch out for recursive grammar)

4580 seen = set()

4581 while True:

4582 if id(cur) in seen:

4583 break

4584 seen.add(id(cur))

4585 if isinstance(cur, IndentedBlock):

4586 prev.add_parse_action(

4587 lambda s, l, t, cur_=cur: setattr(

4588 cur_, "parent_anchor", col(l, s)

4589 )

4590 )

4591 break

4592 subs = cur.recurse()

4593 next_first = next(iter(subs), None)

4594 if next_first is None:

4595 break

4596 cur = typing.cast(ParserElement, next_first)

4597

4598 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4599 return self

4600

4601 def parseImpl(self, instring, loc, do_actions=True):

4602 # pass False as callPreParse arg to _parse for first element, since we already

4603 # pre-parsed the string as part of our And pre-parsing

4604 loc, resultlist = self.exprs[0]._parse(

4605 instring, loc, do_actions, callPreParse=False

4606 )

4607 errorStop = False

4608 for e in self.exprs[1:]:

4609 # if isinstance(e, And._ErrorStop):

4610 if type(e) is And._ErrorStop:

4611 errorStop = True

4612 continue

4613 if errorStop:

4614 try:

4615 loc, exprtokens = e._parse(instring, loc, do_actions)

4616 except ParseSyntaxException:

4617 raise

4618 except ParseBaseException as pe:

4619 pe.__traceback__ = None

4620 raise ParseSyntaxException._from_exception(pe)

4621 except IndexError:

4622 raise ParseSyntaxException(

4623 instring, len(instring), self.errmsg, self

4624 )

4625 else:

4626 loc, exprtokens = e._parse(instring, loc, do_actions)

4627 resultlist += exprtokens

4628 return loc, resultlist

4629

4630 def __iadd__(self, other):

4631 if isinstance(other, str_type):

4632 other = self._literalStringClass(other)

4633 if not isinstance(other, ParserElement):

4634 return NotImplemented

4635 return self.append(other) # And([self, other])

4636

4637 def _checkRecursion(self, parseElementList):

4638 subRecCheckList = parseElementList[:] + [self]

4639 for e in self.exprs:

4640 e._checkRecursion(subRecCheckList)

4641 if not e.mayReturnEmpty:

4642 break

4643

4644 def _generateDefaultName(self) -> str:

4645 inner = " ".join(str(e) for e in self.exprs)

4646 # strip off redundant inner {}'s

4647 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4648 inner = inner[1:-1]

4649 return f"{{{inner}}}"

4650

4651

4652class Or(ParseExpression):

4653 """Requires that at least one :class:`ParserElement` is found. If

4654 two expressions match, the expression that matches the longest

4655 string will be used. May be constructed using the ``'^'``

4656 operator.

4657

4658 Example:

4659

4660 .. testcode::

4661

4662 # construct Or using '^' operator

4663

4664 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4665 print(number.search_string("123 3.1416 789"))

4666

4667 prints:

4668

4669 .. testoutput::

4670

4671 [['123'], ['3.1416'], ['789']]

4672 """

4673

4674 def __init__(

4675 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4676 ) -> None:

4677 super().__init__(exprs, savelist)

4678 if self.exprs:

4679 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4680 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4681 else:

4682 self._may_return_empty = True

4683

4684 def streamline(self) -> ParserElement:

4685 super().streamline()

4686 if self.exprs:

4687 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4688 self.saveAsList = any(e.saveAsList for e in self.exprs)

4689 self.skipWhitespace = all(

4690 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4691 )

4692 else:

4693 self.saveAsList = False

4694 return self

4695

4696 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4697 maxExcLoc = -1

4698 maxException = None

4699 matches: list[tuple[int, ParserElement]] = []

4700 fatals: list[ParseFatalException] = []

4701 if all(e.callPreparse for e in self.exprs):

4702 loc = self.preParse(instring, loc)

4703 for e in self.exprs:

4704 try:

4705 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4706 except ParseFatalException as pfe:

4707 pfe.__traceback__ = None

4708 pfe.parser_element = e

4709 fatals.append(pfe)

4710 maxException = None

4711 maxExcLoc = -1

4712 except ParseException as err:

4713 if not fatals:

4714 err.__traceback__ = None

4715 if err.loc > maxExcLoc:

4716 maxException = err

4717 maxExcLoc = err.loc

4718 except IndexError:

4719 if len(instring) > maxExcLoc:

4720 maxException = ParseException(

4721 instring, len(instring), e.errmsg, self

4722 )

4723 maxExcLoc = len(instring)

4724 else:

4725 # save match among all matches, to retry longest to shortest

4726 matches.append((loc2, e))

4727

4728 if matches:

4729 # re-evaluate all matches in descending order of length of match, in case attached actions

4730 # might change whether or how much they match of the input.

4731 matches.sort(key=itemgetter(0), reverse=True)

4732

4733 if not do_actions:

4734 # no further conditions or parse actions to change the selection of

4735 # alternative, so the first match will be the best match

4736 best_expr = matches[0][1]

4737 return best_expr._parse(instring, loc, do_actions)

4738

4739 longest: tuple[int, typing.Optional[ParseResults]] = -1, None

4740 for loc1, expr1 in matches:

4741 if loc1 <= longest[0]:

4742 # already have a longer match than this one will deliver, we are done

4743 return longest

4744

4745 try:

4746 loc2, toks = expr1._parse(instring, loc, do_actions)

4747 except ParseException as err:

4748 err.__traceback__ = None

4749 if err.loc > maxExcLoc:

4750 maxException = err

4751 maxExcLoc = err.loc

4752 else:

4753 if loc2 >= loc1:

4754 return loc2, toks

4755 # didn't match as much as before

4756 elif loc2 > longest[0]:

4757 longest = loc2, toks

4758

4759 if longest != (-1, None):

4760 return longest

4761

4762 if fatals:

4763 if len(fatals) > 1:

4764 fatals.sort(key=lambda e: -e.loc)

4765 if fatals[0].loc == fatals[1].loc:

4766 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4767 max_fatal = fatals[0]

4768 raise max_fatal

4769

4770 if maxException is not None:

4771 # infer from this check that all alternatives failed at the current position

4772 # so emit this collective error message instead of any single error message

4773 parse_start_loc = self.preParse(instring, loc)

4774 if maxExcLoc == parse_start_loc:

4775 maxException.msg = self.errmsg or ""

4776 raise maxException

4777

4778 raise ParseException(instring, loc, "no defined alternatives to match", self)

4779

4780 def __ixor__(self, other):

4781 if isinstance(other, str_type):

4782 other = self._literalStringClass(other)

4783 if not isinstance(other, ParserElement):

4784 return NotImplemented

4785 return self.append(other) # Or([self, other])

4786

4787 def _generateDefaultName(self) -> str:

4788 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"

4789

4790 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4791 if (

4792 __diag__.warn_multiple_tokens_in_named_alternation

4793 and Diagnostics.warn_multiple_tokens_in_named_alternation

4794 not in self.suppress_warnings_

4795 ):

4796 if any(

4797 isinstance(e, And)

4798 and Diagnostics.warn_multiple_tokens_in_named_alternation

4799 not in e.suppress_warnings_

4800 for e in self.exprs

4801 ):

4802 warning = (

4803 "warn_multiple_tokens_in_named_alternation:"

4804 f" setting results name {name!r} on {type(self).__name__} expression"

4805 " will return a list of all parsed tokens in an And alternative,"

4806 " in prior versions only the first token was returned; enclose"

4807 " contained argument in Group"

4808 )

4809 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)

4810

4811 return super()._setResultsName(name, list_all_matches)

4812

4813

4814class MatchFirst(ParseExpression):

4815 """Requires that at least one :class:`ParserElement` is found. If

4816 more than one expression matches, the first one listed is the one that will

4817 match. May be constructed using the ``'|'`` operator.

4818

4819 Example: Construct MatchFirst using '|' operator

4820

4821 .. doctest::

4822

4823 # watch the order of expressions to match

4824 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4825 >>> print(number.search_string("123 3.1416 789")) # Fail!

4826 [['123'], ['3'], ['1416'], ['789']]

4827

4828 # put more selective expression first

4829 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4830 >>> print(number.search_string("123 3.1416 789")) # Better

4831 [['123'], ['3.1416'], ['789']]

4832 """

4833

4834 def __init__(

4835 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4836 ) -> None:

4837 super().__init__(exprs, savelist)

4838 if self.exprs:

4839 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4840 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4841 else:

4842 self._may_return_empty = True

4843

4844 def streamline(self) -> ParserElement:

4845 if self.streamlined:

4846 return self

4847

4848 super().streamline()

4849 if self.exprs:

4850 self.saveAsList = any(e.saveAsList for e in self.exprs)

4851 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4852 self.skipWhitespace = all(

4853 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4854 )

4855 else:

4856 self.saveAsList = False

4857 self._may_return_empty = True

4858 return self

4859

4860 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4861 maxExcLoc = -1

4862 maxException = None

4863

4864 for e in self.exprs:

4865 try:

4866 return e._parse(instring, loc, do_actions)

4867 except ParseFatalException as pfe:

4868 pfe.__traceback__ = None

4869 pfe.parser_element = e

4870 raise

4871 except ParseException as err:

4872 if err.loc > maxExcLoc:

4873 maxException = err

4874 maxExcLoc = err.loc

4875 except IndexError:

4876 if len(instring) > maxExcLoc:

4877 maxException = ParseException(

4878 instring, len(instring), e.errmsg, self

4879 )

4880 maxExcLoc = len(instring)

4881

4882 if maxException is not None:

4883 # infer from this check that all alternatives failed at the current position

4884 # so emit this collective error message instead of any individual error message

4885 parse_start_loc = self.preParse(instring, loc)

4886 if maxExcLoc == parse_start_loc:

4887 maxException.msg = self.errmsg or ""

4888 raise maxException

4889

4890 raise ParseException(instring, loc, "no defined alternatives to match", self)

4891

4892 def __ior__(self, other):

4893 if isinstance(other, str_type):

4894 other = self._literalStringClass(other)

4895 if not isinstance(other, ParserElement):

4896 return NotImplemented

4897 return self.append(other) # MatchFirst([self, other])

4898

4899 def _generateDefaultName(self) -> str:

4900 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"

4901

4902 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4903 if (

4904 __diag__.warn_multiple_tokens_in_named_alternation

4905 and Diagnostics.warn_multiple_tokens_in_named_alternation

4906 not in self.suppress_warnings_

4907 ):

4908 if any(

4909 isinstance(e, And)

4910 and Diagnostics.warn_multiple_tokens_in_named_alternation

4911 not in e.suppress_warnings_

4912 for e in self.exprs

4913 ):

4914 warning = (

4915 "warn_multiple_tokens_in_named_alternation:"

4916 f" setting results name {name!r} on {type(self).__name__} expression"

4917 " will return a list of all parsed tokens in an And alternative,"

4918 " in prior versions only the first token was returned; enclose"

4919 " contained argument in Group"

4920 )

4921 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)

4922

4923 return super()._setResultsName(name, list_all_matches)

4924

4925

4926class Each(ParseExpression):

4927 """Requires all given :class:`ParserElement` s to be found, but in

4928 any order. Expressions may be separated by whitespace.

4929

4930 May be constructed using the ``'&'`` operator.

4931

4932 Example:

4933

4934 .. testcode::

4935

4936 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4937 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4938 integer = Word(nums)

4939 shape_attr = "shape:" + shape_type("shape")

4940 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4941 color_attr = "color:" + color("color")

4942 size_attr = "size:" + integer("size")

4943

4944 # use Each (using operator '&') to accept attributes in any order

4945 # (shape and posn are required, color and size are optional)

4946 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4947

4948 shape_spec.run_tests('''

4949 shape: SQUARE color: BLACK posn: 100, 120

4950 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4951 color:GREEN size:20 shape:TRIANGLE posn:20,40

4952 '''

4953 )

4954

4955 prints:

4956

4957 .. testoutput::

4958 :options: +NORMALIZE_WHITESPACE

4959

4960

4961 shape: SQUARE color: BLACK posn: 100, 120

4962 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4963 - color: 'BLACK'

4964 - posn: ['100', ',', '120']

4965 - x: '100'

4966 - y: '120'

4967 - shape: 'SQUARE'

4968 ...

4969

4970 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4971 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE',

4972 'posn:', ['50', ',', '80']]

4973 - color: 'BLUE'

4974 - posn: ['50', ',', '80']

4975 - x: '50'

4976 - y: '80'

4977 - shape: 'CIRCLE'

4978 - size: '50'

4979 ...

4980

4981 color:GREEN size:20 shape:TRIANGLE posn:20,40

4982 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE',

4983 'posn:', ['20', ',', '40']]

4984 - color: 'GREEN'

4985 - posn: ['20', ',', '40']

4986 - x: '20'

4987 - y: '40'

4988 - shape: 'TRIANGLE'

4989 - size: '20'

4990 ...

4991 """

4992

4993 def __init__(

4994 self, exprs: typing.Iterable[ParserElement], savelist: bool = True

4995 ) -> None:

4996 super().__init__(exprs, savelist)

4997 if self.exprs:

4998 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4999 else:

5000 self._may_return_empty = True

5001 self.skipWhitespace = True

5002 self.initExprGroups = True

5003 self.saveAsList = True

5004

5005 def __iand__(self, other):

5006 if isinstance(other, str_type):

5007 other = self._literalStringClass(other)

5008 if not isinstance(other, ParserElement):

5009 return NotImplemented

5010 return self.append(other) # Each([self, other])

5011

5012 def streamline(self) -> ParserElement:

5013 super().streamline()

5014 if self.exprs:

5015 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

5016 else:

5017 self._may_return_empty = True

5018 return self

5019

5020 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5021 if self.initExprGroups:

5022 self.opt1map = dict(

5023 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

5024 )

5025 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

5026 opt2 = [

5027 e

5028 for e in self.exprs

5029 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

5030 ]

5031 self.optionals = opt1 + opt2

5032 self.multioptionals = [

5033 e.expr.set_results_name(e.resultsName, list_all_matches=True)

5034 for e in self.exprs

5035 if isinstance(e, _MultipleMatch)

5036 ]

5037 self.multirequired = [

5038 e.expr.set_results_name(e.resultsName, list_all_matches=True)

5039 for e in self.exprs

5040 if isinstance(e, OneOrMore)

5041 ]

5042 self.required = [

5043 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

5044 ]

5045 self.required += self.multirequired

5046 self.initExprGroups = False

5047

5048 tmpLoc = loc

5049 tmpReqd = self.required[:]

5050 tmpOpt = self.optionals[:]

5051 multis = self.multioptionals[:]

5052 matchOrder: list[ParserElement] = []

5053

5054 keepMatching = True

5055 failed: list[ParserElement] = []

5056 fatals: list[ParseFatalException] = []

5057 while keepMatching:

5058 tmpExprs = tmpReqd + tmpOpt + multis

5059 failed.clear()

5060 fatals.clear()

5061 for e in tmpExprs:

5062 try:

5063 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

5064 except ParseFatalException as pfe:

5065 pfe.__traceback__ = None

5066 pfe.parser_element = e

5067 fatals.append(pfe)

5068 failed.append(e)

5069 except ParseException:

5070 failed.append(e)

5071 else:

5072 matchOrder.append(self.opt1map.get(id(e), e))

5073 if e in tmpReqd:

5074 tmpReqd.remove(e)

5075 elif e in tmpOpt:

5076 tmpOpt.remove(e)

5077 if len(failed) == len(tmpExprs):

5078 keepMatching = False

5079

5080 # look for any ParseFatalExceptions

5081 if fatals:

5082 if len(fatals) > 1:

5083 fatals.sort(key=lambda e: -e.loc)

5084 if fatals[0].loc == fatals[1].loc:

5085 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

5086 max_fatal = fatals[0]

5087 raise max_fatal

5088

5089 if tmpReqd:

5090 missing = ", ".join([str(e) for e in tmpReqd])

5091 raise ParseException(

5092 instring,

5093 loc,

5094 f"Missing one or more required elements ({missing})",

5095 )

5096

5097 # add any unmatched Opts, in case they have default values defined

5098 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

5099

5100 total_results = ParseResults([])

5101 for e in matchOrder:

5102 loc, results = e._parse(instring, loc, do_actions)

5103 total_results += results

5104

5105 return loc, total_results

5106

5107 def _generateDefaultName(self) -> str:

5108 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"

5109

5110

5111class ParseElementEnhance(ParserElement):

5112 """Abstract subclass of :class:`ParserElement`, for combining and

5113 post-processing parsed tokens.

5114 """

5115

5116 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

5117 super().__init__(savelist)

5118 if isinstance(expr, str_type):

5119 expr_str = typing.cast(str, expr)

5120 if issubclass(self._literalStringClass, Token):

5121 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

5122 elif issubclass(type(self), self._literalStringClass):

5123 expr = Literal(expr_str)

5124 else:

5125 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

5126 expr = typing.cast(ParserElement, expr)

5127 self.expr = expr

5128 if expr is not None:

5129 self.mayIndexError = expr.mayIndexError

5130 self._may_return_empty = expr.mayReturnEmpty

5131 self.set_whitespace_chars(

5132 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

5133 )

5134 self.skipWhitespace = expr.skipWhitespace

5135 self.saveAsList = expr.saveAsList

5136 self.callPreparse = expr.callPreparse

5137 self.ignoreExprs.extend(expr.ignoreExprs)

5138

5139 def recurse(self) -> list[ParserElement]:

5140 return [self.expr] if self.expr is not None else []

5141

5142 def parseImpl(self, instring, loc, do_actions=True):

5143 if self.expr is None:

5144 raise ParseException(instring, loc, "No expression defined", self)

5145

5146 try:

5147 return self.expr._parse(instring, loc, do_actions, callPreParse=False)

5148 except ParseSyntaxException:

5149 raise

5150 except ParseBaseException as pbe:

5151 pbe.pstr = pbe.pstr or instring

5152 pbe.loc = pbe.loc or loc

5153 pbe.parser_element = pbe.parser_element or self

5154 if not isinstance(self, Forward) and self.customName is not None:

5155 if self.errmsg:

5156 pbe.msg = self.errmsg

5157 raise

5158

5159 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5160 """

5161 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

5162 the contained expression.

5163 """

5164 super().leave_whitespace(recursive)

5165

5166 if recursive:

5167 if self.expr is not None:

5168 self.expr = self.expr.copy()

5169 self.expr.leave_whitespace(recursive)

5170 return self

5171

5172 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5173 """

5174 Extends ``ignore_whitespace`` defined in base class, and also invokes ``ignore_whitespace`` on

5175 the contained expression.

5176 """

5177 super().ignore_whitespace(recursive)

5178

5179 if recursive:

5180 if self.expr is not None:

5181 self.expr = self.expr.copy()

5182 self.expr.ignore_whitespace(recursive)

5183 return self

5184

5185 def ignore(self, other) -> ParserElement:

5186 """

5187 Define expression to be ignored (e.g., comments) while doing pattern

5188 matching; may be called repeatedly, to define multiple comment or other

5189 ignorable patterns.

5190 """

5191 if not isinstance(other, Suppress) or other not in self.ignoreExprs:

5192 super().ignore(other)

5193 if self.expr is not None:

5194 self.expr.ignore(self.ignoreExprs[-1])

5195

5196 return self

5197

5198 def streamline(self) -> ParserElement:

5199 super().streamline()

5200 if self.expr is not None:

5201 self.expr.streamline()

5202 return self

5203

5204 def _checkRecursion(self, parseElementList):

5205 if self in parseElementList:

5206 raise RecursiveGrammarException(parseElementList + [self])

5207 subRecCheckList = parseElementList[:] + [self]

5208 if self.expr is not None:

5209 self.expr._checkRecursion(subRecCheckList)

5210

5211 def validate(self, validateTrace=None) -> None:

5212 warnings.warn(

5213 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5214 PyparsingDeprecationWarning,

5215 stacklevel=2,

5216 )

5217 if validateTrace is None:

5218 validateTrace = []

5219 tmp = validateTrace[:] + [self]

5220 if self.expr is not None:

5221 self.expr.validate(tmp)

5222 self._checkRecursion([])

5223

5224 def _generateDefaultName(self) -> str:

5225 return f"{type(self).__name__}:({self.expr})"

5226

5227 # Compatibility synonyms

5228 # fmt: off

5229 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

5230 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

5231 # fmt: on

5232

5233

5234class IndentedBlock(ParseElementEnhance):

5235 """

5236 Expression to match one or more expressions at a given indentation level.

5237 Useful for parsing text where structure is implied by indentation (like Python source code).

5238

5239 Example:

5240

5241 .. testcode::

5242

5243 '''

5244 BNF:

5245 statement ::= assignment_stmt | if_stmt

5246 assignment_stmt ::= identifier '=' rvalue

5247 rvalue ::= identifier | integer

5248 if_stmt ::= 'if' bool_condition block

5249 block ::= ([indent] statement)...

5250 identifier ::= [A..Za..z]

5251 integer ::= [0..9]...

5252 bool_condition ::= 'TRUE' | 'FALSE'

5253 '''

5254

5255 IF, TRUE, FALSE = Keyword.using_each("IF TRUE FALSE".split())

5256

5257 statement = Forward()

5258 identifier = Char(alphas)

5259 integer = Word(nums).add_parse_action(lambda t: int(t[0]))

5260 rvalue = identifier | integer

5261 assignment_stmt = identifier + "=" + rvalue

5262

5263 if_stmt = IF + (TRUE | FALSE) + IndentedBlock(statement)

5264

5265 statement <<= Group(assignment_stmt | if_stmt)

5266

5267 result = if_stmt.parse_string('''

5268 IF TRUE

5269 a = 1000

5270 b = 2000

5271 IF FALSE

5272 z = 100

5273 ''')

5274 print(result.dump())

5275

5276 .. testoutput::

5277

5278 ['IF', 'TRUE', [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]]

5279 [0]:

5280 IF

5281 [1]:

5282 TRUE

5283 [2]:

5284 [['a', '=', 1000], ['b', '=', 2000], ['IF', 'FALSE', [['z', '=', 100]]]]

5285 [0]:

5286 ['a', '=', 1000]

5287 [1]:

5288 ['b', '=', 2000]

5289 [2]:

5290 ['IF', 'FALSE', [['z', '=', 100]]]

5291 [0]:

5292 IF

5293 [1]:

5294 FALSE

5295 [2]:

5296 [['z', '=', 100]]

5297 [0]:

5298 ['z', '=', 100]

5299 """

5300

5301 class _Indent(Empty):

5302 def __init__(self, ref_col: int) -> None:

5303 super().__init__()

5304 self.errmsg = f"expected indent at column {ref_col}"

5305 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

5306

5307 class _IndentGreater(Empty):

5308 def __init__(self, ref_col: int) -> None:

5309 super().__init__()

5310 self.errmsg = f"expected indent at column greater than {ref_col}"

5311 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

5312

5313 def __init__(

5314 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

5315 ) -> None:

5316 super().__init__(expr, savelist=True)

5317 # if recursive:

5318 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

5319 self._recursive = recursive

5320 self._grouped = grouped

5321 self.parent_anchor = 1

5322

5323 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5324 # advance parse position to non-whitespace by using an Empty()

5325 # this should be the column to be used for all subsequent indented lines

5326 anchor_loc = Empty().preParse(instring, loc)

5327

5328 # see if self.expr matches at the current location - if not it will raise an exception

5329 # and no further work is necessary

5330 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)

5331

5332 indent_col = col(anchor_loc, instring)

5333 peer_detect_expr = self._Indent(indent_col)

5334

5335 inner_expr = Empty() + peer_detect_expr + self.expr

5336 if self._recursive:

5337 sub_indent = self._IndentGreater(indent_col)

5338 nested_block = IndentedBlock(

5339 self.expr, recursive=self._recursive, grouped=self._grouped

5340 )

5341 nested_block.set_debug(self.debug)

5342 nested_block.parent_anchor = indent_col

5343 inner_expr += Opt(sub_indent + nested_block)

5344

5345 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

5346 block = OneOrMore(inner_expr)

5347

5348 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

5349

5350 if self._grouped:

5351 wrapper = Group

5352 else:

5353 wrapper = lambda expr: expr # type: ignore[misc, assignment]

5354 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

5355 instring, anchor_loc, do_actions

5356 )

5357

5358

5359class AtStringStart(ParseElementEnhance):

5360 """Matches if expression matches at the beginning of the parse

5361 string::

5362

5363 AtStringStart(Word(nums)).parse_string("123")

5364 # prints ["123"]

5365

5366 AtStringStart(Word(nums)).parse_string(" 123")

5367 # raises ParseException

5368 """

5369

5370 def __init__(self, expr: Union[ParserElement, str]) -> None:

5371 super().__init__(expr)

5372 self.callPreparse = False

5373

5374 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5375 if loc != 0:

5376 raise ParseException(instring, loc, "not found at string start")

5377 return super().parseImpl(instring, loc, do_actions)

5378

5379

5380class AtLineStart(ParseElementEnhance):

5381 r"""Matches if an expression matches at the beginning of a line within

5382 the parse string

5383

5384 Example:

5385

5386 .. testcode::

5387

5388 test = '''\

5389 BBB this line

5390 BBB and this line

5391 BBB but not this one

5392 A BBB and definitely not this one

5393 '''

5394

5395 for t in (AtLineStart('BBB') + rest_of_line).search_string(test):

5396 print(t)

5397

5398 prints:

5399

5400 .. testoutput::

5401

5402 ['BBB', ' this line']

5403 ['BBB', ' and this line']

5404 """

5405

5406 def __init__(self, expr: Union[ParserElement, str]) -> None:

5407 super().__init__(expr)

5408 self.callPreparse = False

5409

5410 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5411 if col(loc, instring) != 1:

5412 raise ParseException(instring, loc, "not found at line start")

5413 return super().parseImpl(instring, loc, do_actions)

5414

5415

5416class FollowedBy(ParseElementEnhance):

5417 """Lookahead matching of the given parse expression.

5418 ``FollowedBy`` does *not* advance the parsing position within

5419 the input string, it only verifies that the specified parse

5420 expression matches at the current position. ``FollowedBy``

5421 always returns a null token list. If any results names are defined

5422 in the lookahead expression, those *will* be returned for access by

5423 name.

5424

5425 Example:

5426

5427 .. testcode::

5428

5429 # use FollowedBy to match a label only if it is followed by a ':'

5430 data_word = Word(alphas)

5431 label = data_word + FollowedBy(':')

5432 attr_expr = Group(

5433 label + Suppress(':')

5434 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)

5435 )

5436

5437 attr_expr[1, ...].parse_string(

5438 "shape: SQUARE color: BLACK posn: upper left").pprint()

5439

5440 prints:

5441

5442 .. testoutput::

5443

5444 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

5445 """

5446

5447 def __init__(self, expr: Union[ParserElement, str]) -> None:

5448 super().__init__(expr)

5449 self._may_return_empty = True

5450

5451 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5452 # by using self._expr.parse and deleting the contents of the returned ParseResults list

5453 # we keep any named results that were defined in the FollowedBy expression

5454 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)

5455 del ret[:]

5456

5457 return loc, ret

5458

5459

5460class PrecededBy(ParseElementEnhance):

5461 """Lookbehind matching of the given parse expression.

5462 ``PrecededBy`` does not advance the parsing position within the

5463 input string, it only verifies that the specified parse expression

5464 matches prior to the current position. ``PrecededBy`` always

5465 returns a null token list, but if a results name is defined on the

5466 given expression, it is returned.

5467

5468 Parameters:

5469

5470 - ``expr`` - expression that must match prior to the current parse

5471 location

5472 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

5473 to lookbehind prior to the current parse location

5474

5475 If the lookbehind expression is a string, :class:`Literal`,

5476 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

5477 with a specified exact or maximum length, then the retreat

5478 parameter is not required. Otherwise, retreat must be specified to

5479 give a maximum number of characters to look back from

5480 the current parse position for a lookbehind match.

5481

5482 Example:

5483

5484 .. testcode::

5485

5486 # VB-style variable names with type prefixes

5487 int_var = PrecededBy("#") + pyparsing_common.identifier

5488 str_var = PrecededBy("$") + pyparsing_common.identifier

5489 """

5490

5491 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:

5492 super().__init__(expr)

5493 self.expr = self.expr().leave_whitespace()

5494 self._may_return_empty = True

5495 self.mayIndexError = False

5496 self.exact = False

5497 if isinstance(expr, str_type):

5498 expr = typing.cast(str, expr)

5499 retreat = len(expr)

5500 self.exact = True

5501 elif isinstance(expr, (Literal, Keyword)):

5502 retreat = expr.matchLen

5503 self.exact = True

5504 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

5505 retreat = expr.maxLen

5506 self.exact = True

5507 elif isinstance(expr, PositionToken):

5508 retreat = 0

5509 self.exact = True

5510 self.retreat = retreat

5511 self.errmsg = f"not preceded by {expr}"

5512 self.skipWhitespace = False

5513 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

5514

5515 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:

5516 if self.exact:

5517 if loc < self.retreat:

5518 raise ParseException(instring, loc, self.errmsg, self)

5519 start = loc - self.retreat

5520 _, ret = self.expr._parse(instring, start)

5521 return loc, ret

5522

5523 # retreat specified a maximum lookbehind window, iterate

5524 test_expr = self.expr + StringEnd()

5525 instring_slice = instring[max(0, loc - self.retreat) : loc]

5526 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)

5527

5528 for offset in range(1, min(loc, self.retreat + 1) + 1):

5529 try:

5530 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

5531 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)

5532 except ParseBaseException as pbe:

5533 last_expr = pbe

5534 else:

5535 break

5536 else:

5537 raise last_expr

5538

5539 return loc, ret

5540

5541

5542class Located(ParseElementEnhance):

5543 """

5544 Decorates a returned token with its starting and ending

5545 locations in the input string.

5546

5547 This helper adds the following results names:

5548

5549 - ``locn_start`` - location where matched expression begins

5550 - ``locn_end`` - location where matched expression ends

5551 - ``value`` - the actual parsed results

5552

5553 Be careful if the input text contains ``<TAB>`` characters, you

5554 may want to call :class:`ParserElement.parse_with_tabs`

5555

5556 Example:

5557

5558 .. testcode::

5559

5560 wd = Word(alphas)

5561 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

5562 print(match)

5563

5564 prints:

5565

5566 .. testoutput::

5567

5568 [0, ['ljsdf'], 5]

5569 [8, ['lksdjjf'], 15]

5570 [18, ['lkkjj'], 23]

5571 """

5572

5573 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5574 start = loc

5575 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)

5576 ret_tokens = ParseResults([start, tokens, loc])

5577 ret_tokens["locn_start"] = start

5578 ret_tokens["value"] = tokens

5579 ret_tokens["locn_end"] = loc

5580 if self.resultsName:

5581 # must return as a list, so that the name will be attached to the complete group

5582 return loc, [ret_tokens]

5583 else:

5584 return loc, ret_tokens

5585

5586

5587class NotAny(ParseElementEnhance):

5588 """

5589 Lookahead to disallow matching with the given parse expression.

5590 ``NotAny`` does *not* advance the parsing position within the

5591 input string, it only verifies that the specified parse expression

5592 does *not* match at the current position. Also, ``NotAny`` does

5593 *not* skip over leading whitespace. ``NotAny`` always returns

5594 a null token list. May be constructed using the ``'~'`` operator.

5595

5596 Example:

5597

5598 .. testcode::

5599

5600 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

5601

5602 # take care not to mistake keywords for identifiers

5603 ident = ~(AND | OR | NOT) + Word(alphas)

5604 boolean_term = Opt(NOT) + ident

5605

5606 # very crude boolean expression - to support parenthesis groups and

5607 # operation hierarchy, use infix_notation

5608 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

5609

5610 # integers that are followed by "." are actually floats

5611 integer = Word(nums) + ~Char(".")

5612 """

5613

5614 def __init__(self, expr: Union[ParserElement, str]) -> None:

5615 super().__init__(expr)

5616 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

5617 # self.leave_whitespace()

5618 self.skipWhitespace = False

5619

5620 self._may_return_empty = True

5621 self.errmsg = f"Found unwanted token, {self.expr}"

5622

5623 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5624 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):

5625 raise ParseException(instring, loc, self.errmsg, self)

5626 return loc, []

5627

5628 def _generateDefaultName(self) -> str:

5629 return f"~{{{self.expr}}}"

5630

5631

5632class _MultipleMatch(ParseElementEnhance):

5633 def __init__(

5634 self,

5635 expr: Union[str, ParserElement],

5636 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5637 **kwargs,

5638 ) -> None:

5639 stopOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(

5640 kwargs, "stopOn", None

5641 )

5642

5643 super().__init__(expr)

5644 stopOn = stopOn or stop_on

5645 self.saveAsList = True

5646 ender = stopOn

5647 if isinstance(ender, str_type):

5648 ender = self._literalStringClass(ender)

5649 self.stopOn(ender)

5650

5651 def stop_on(self, ender) -> ParserElement:

5652 if isinstance(ender, str_type):

5653 ender = self._literalStringClass(ender)

5654 self.not_ender = ~ender if ender is not None else None

5655 return self

5656

5657 stopOn = stop_on

5658

5659 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5660 self_expr_parse = self.expr._parse

5661 self_skip_ignorables = self._skipIgnorables

5662 check_ender = False

5663 if self.not_ender is not None:

5664 try_not_ender = self.not_ender.try_parse

5665 check_ender = True

5666

5667 # must be at least one (but first see if we are the stopOn sentinel;

5668 # if so, fail)

5669 if check_ender:

5670 try_not_ender(instring, loc)

5671 loc, tokens = self_expr_parse(instring, loc, do_actions)

5672 try:

5673 hasIgnoreExprs = not not self.ignoreExprs

5674 while 1:

5675 if check_ender:

5676 try_not_ender(instring, loc)

5677 if hasIgnoreExprs:

5678 preloc = self_skip_ignorables(instring, loc)

5679 else:

5680 preloc = loc

5681 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)

5682 tokens += tmptokens

5683 except (ParseException, IndexError):

5684 pass

5685

5686 return loc, tokens

5687

5688 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5689 if (

5690 __diag__.warn_ungrouped_named_tokens_in_collection

5691 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5692 not in self.suppress_warnings_

5693 ):

5694 for e in [self.expr] + self.expr.recurse():

5695 if (

5696 isinstance(e, ParserElement)

5697 and e.resultsName

5698 and (

5699 Diagnostics.warn_ungrouped_named_tokens_in_collection

5700 not in e.suppress_warnings_

5701 )

5702 ):

5703 warning = (

5704 "warn_ungrouped_named_tokens_in_collection:"

5705 f" setting results name {name!r} on {type(self).__name__} expression"

5706 f" collides with {e.resultsName!r} on contained expression"

5707 )

5708 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)

5709 break

5710

5711 return super()._setResultsName(name, list_all_matches)

5712

5713

5714class OneOrMore(_MultipleMatch):

5715 """

5716 Repetition of one or more of the given expression.

5717

5718 Parameters:

5719

5720 - ``expr`` - expression that must match one or more times

5721 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

5722 (only required if the sentinel would ordinarily match the repetition

5723 expression)

5724

5725 Example:

5726

5727 .. doctest::

5728

5729 >>> data_word = Word(alphas)

5730 >>> label = data_word + FollowedBy(':')

5731 >>> attr_expr = Group(

5732 ... label + Suppress(':')

5733 ... + OneOrMore(data_word).set_parse_action(' '.join))

5734

5735 >>> text = "shape: SQUARE posn: upper left color: BLACK"

5736

5737 # Fail! read 'posn' as data instead of next label

5738 >>> attr_expr[1, ...].parse_string(text).pprint()

5739 [['shape', 'SQUARE posn']]

5740

5741 # use stop_on attribute for OneOrMore

5742 # to avoid reading label string as part of the data

5743 >>> attr_expr = Group(

5744 ... label + Suppress(':')

5745 ... + OneOrMore(

5746 ... data_word, stop_on=label).set_parse_action(' '.join))

5747 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better

5748 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5749

5750 # could also be written as

5751 >>> (attr_expr * (1,)).parse_string(text).pprint()

5752 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5753 """

5754

5755 def _generateDefaultName(self) -> str:

5756 return f"{{{self.expr}}}..."

5757

5758

5759class ZeroOrMore(_MultipleMatch):

5760 """

5761 Optional repetition of zero or more of the given expression.

5762

5763 Parameters:

5764

5765 - ``expr`` - expression that must match zero or more times

5766 - ``stop_on`` - expression for a terminating sentinel

5767 (only required if the sentinel would ordinarily match the repetition

5768 expression) - (default= ``None``)

5769

5770 Example: similar to :class:`OneOrMore`

5771 """

5772

5773 def __init__(

5774 self,

5775 expr: Union[str, ParserElement],

5776 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5777 **kwargs,

5778 ) -> None:

5779 stopOn: Union[ParserElement, str] = deprecate_argument(kwargs, "stopOn", None)

5780

5781 super().__init__(expr, stop_on=stopOn or stop_on)

5782 self._may_return_empty = True

5783

5784 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5785 try:

5786 return super().parseImpl(instring, loc, do_actions)

5787 except (ParseException, IndexError):

5788 return loc, ParseResults([], name=self.resultsName)

5789

5790 def _generateDefaultName(self) -> str:

5791 return f"[{self.expr}]..."

5792

5793

5794class DelimitedList(ParseElementEnhance):

5795 """Helper to define a delimited list of expressions - the delimiter

5796 defaults to ','. By default, the list elements and delimiters can

5797 have intervening whitespace, and comments, but this can be

5798 overridden by passing ``combine=True`` in the constructor. If

5799 ``combine`` is set to ``True``, the matching tokens are

5800 returned as a single token string, with the delimiters included;

5801 otherwise, the matching tokens are returned as a list of tokens,

5802 with the delimiters suppressed.

5803

5804 If ``allow_trailing_delim`` is set to True, then the list may end with

5805 a delimiter.

5806

5807 Example:

5808

5809 .. doctest::

5810

5811 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc")

5812 ParseResults(['aa', 'bb', 'cc'], {})

5813 >>> DelimitedList(Word(hexnums), delim=':', combine=True

5814 ... ).parse_string("AA:BB:CC:DD:EE")

5815 ParseResults(['AA:BB:CC:DD:EE'], {})

5816

5817 .. versionadded:: 3.1.0

5818 """

5819

5820 def __init__(

5821 self,

5822 expr: Union[str, ParserElement],

5823 delim: Union[str, ParserElement] = ",",

5824 combine: bool = False,

5825 min: typing.Optional[int] = None,

5826 max: typing.Optional[int] = None,

5827 *,

5828 allow_trailing_delim: bool = False,

5829 ) -> None:

5830 if isinstance(expr, str_type):

5831 expr = ParserElement._literalStringClass(expr)

5832 expr = typing.cast(ParserElement, expr)

5833

5834 if min is not None and min < 1:

5835 raise ValueError("min must be greater than 0")

5836

5837 if max is not None and min is not None and max < min:

5838 raise ValueError("max must be greater than, or equal to min")

5839

5840 self.content = expr

5841 self.raw_delim = str(delim)

5842 self.delim = delim

5843 self.combine = combine

5844 if not combine:

5845 self.delim = Suppress(delim) if not isinstance(delim, Suppress) else delim

5846 self.min = min or 1

5847 self.max = max

5848 self.allow_trailing_delim = allow_trailing_delim

5849

5850 delim_list_expr = self.content + (self.delim + self.content) * (

5851 self.min - 1,

5852 None if self.max is None else self.max - 1,

5853 )

5854 if self.allow_trailing_delim:

5855 delim_list_expr += Opt(self.delim)

5856

5857 if self.combine:

5858 delim_list_expr = Combine(delim_list_expr)

5859

5860 super().__init__(delim_list_expr, savelist=True)

5861

5862 def _generateDefaultName(self) -> str:

5863 content_expr = self.content.streamline()

5864 return f"{content_expr} [{self.raw_delim} {content_expr}]..."

5865

5866

5867class _NullToken:

5868 def __bool__(self):

5869 return False

5870

5871 def __str__(self):

5872 return ""

5873

5874

5875class Opt(ParseElementEnhance):

5876 """

5877 Optional matching of the given expression.

5878

5879 :param expr: expression that must match zero or more times

5880 :param default: (optional) - value to be returned

5881 if the optional expression is not found.

5882

5883 Example:

5884

5885 .. testcode::

5886

5887 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5888 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5889 zip.run_tests('''

5890 # traditional ZIP code

5891 12345

5892

5893 # ZIP+4 form

5894 12101-0001

5895

5896 # invalid ZIP

5897 98765-

5898 ''')

5899

5900 prints:

5901

5902 .. testoutput::

5903 :options: +NORMALIZE_WHITESPACE

5904

5905

5906 # traditional ZIP code

5907 12345

5908 ['12345']

5909

5910 # ZIP+4 form

5911 12101-0001

5912 ['12101-0001']

5913

5914 # invalid ZIP

5915 98765-

5916 98765-

5917 ^

5918 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6)

5919 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6)

5920 """

5921

5922 __optionalNotMatched = _NullToken()

5923

5924 def __init__(

5925 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5926 ) -> None:

5927 super().__init__(expr, savelist=False)

5928 self.saveAsList = self.expr.saveAsList

5929 self.defaultValue = default

5930 self._may_return_empty = True

5931

5932 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5933 self_expr = self.expr

5934 try:

5935 loc, tokens = self_expr._parse(

5936 instring, loc, do_actions, callPreParse=False

5937 )

5938 except (ParseException, IndexError):

5939 default_value = self.defaultValue

5940 if default_value is not self.__optionalNotMatched:

5941 if self_expr.resultsName:

5942 tokens = ParseResults([default_value])

5943 tokens[self_expr.resultsName] = default_value

5944 else:

5945 tokens = [default_value] # type: ignore[assignment]

5946 else:

5947 tokens = [] # type: ignore[assignment]

5948 return loc, tokens

5949

5950 def _generateDefaultName(self) -> str:

5951 inner = str(self.expr)

5952 # strip off redundant inner {}'s

5953 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5954 inner = inner[1:-1]

5955 return f"[{inner}]"

5958Optional = Opt

5961class SkipTo(ParseElementEnhance):

5962 """

5963 Token for skipping over all undefined text until the matched

5964 expression is found.

5965

5966 :param expr: target expression marking the end of the data to be skipped

5967 :param include: if ``True``, the target expression is also parsed

5968 (the skipped text and target expression are returned

5969 as a 2-element list) (default= ``False``).

5970

5971 :param ignore: (default= ``None``) used to define grammars

5972 (typically quoted strings and comments)

5973 that might contain false matches to the target expression

5974

5975 :param fail_on: (default= ``None``) define expressions that

5976 are not allowed to be included in the skipped test;

5977 if found before the target expression is found,

5978 the :class:`SkipTo` is not a match

5979

5980 Example:

5981

5982 .. testcode::

5983

5984 report = '''

5985 Outstanding Issues Report - 1 Jan 2000

5986

5987 # | Severity | Description | Days Open

5988 -----+----------+-------------------------------------------+-----------

5989 101 | Critical | Intermittent system crash | 6

5990 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5991 79 | Minor | System slow when running too many reports | 47

5992 '''

5993 integer = Word(nums)

5994 SEP = Suppress('|')

5995 # use SkipTo to simply match everything up until the next SEP

5996 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5997 # - parse action will call token.strip() for each matched token, i.e., the description body

5998 string_data = SkipTo(SEP, ignore=quoted_string)

5999 string_data.set_parse_action(token_map(str.strip))

6000 ticket_expr = (integer("issue_num") + SEP

6001 + string_data("sev") + SEP

6002 + string_data("desc") + SEP

6003 + integer("days_open"))

6004

6005 for tkt in ticket_expr.search_string(report):

6006 print(tkt.dump())

6007

6008 prints:

6009

6010 .. testoutput::

6011

6012 ['101', 'Critical', 'Intermittent system crash', '6']

6013 - days_open: '6'

6014 - desc: 'Intermittent system crash'

6015 - issue_num: '101'

6016 - sev: 'Critical'

6017 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

6018 - days_open: '14'

6019 - desc: "Spelling error on Login ('log|n')"

6020 - issue_num: '94'

6021 - sev: 'Cosmetic'

6022 ['79', 'Minor', 'System slow when running too many reports', '47']

6023 - days_open: '47'

6024 - desc: 'System slow when running too many reports'

6025 - issue_num: '79'

6026 - sev: 'Minor'

6027 """

6028

6029 def __init__(

6030 self,

6031 other: Union[ParserElement, str],

6032 include: bool = False,

6033 ignore: typing.Optional[Union[ParserElement, str]] = None,

6034 fail_on: typing.Optional[Union[ParserElement, str]] = None,

6035 **kwargs,

6036 ) -> None:

6037 failOn: typing.Optional[Union[ParserElement, str]] = deprecate_argument(

6038 kwargs, "failOn", None

6039 )

6040

6041 super().__init__(other)

6042 failOn = failOn or fail_on

6043 self.ignoreExpr = ignore

6044 self._may_return_empty = True

6045 self.mayIndexError = False

6046 self.includeMatch = include

6047 self.saveAsList = False

6048 if isinstance(failOn, str_type):

6049 self.failOn = self._literalStringClass(failOn)

6050 else:

6051 self.failOn = failOn

6052 self.errmsg = f"No match found for {self.expr}"

6053 self.ignorer = Empty().leave_whitespace()

6054 self._update_ignorer()

6055

6056 def _update_ignorer(self):

6057 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

6058 self.ignorer.ignoreExprs.clear()

6059 for e in self.expr.ignoreExprs:

6060 self.ignorer.ignore(e)

6061 if self.ignoreExpr:

6062 self.ignorer.ignore(self.ignoreExpr)

6063

6064 def ignore(self, expr):

6065 """

6066 Define expression to be ignored (e.g., comments) while doing pattern

6067 matching; may be called repeatedly, to define multiple comment or other

6068 ignorable patterns.

6069 """

6070 super().ignore(expr)

6071 self._update_ignorer()

6072

6073 def parseImpl(self, instring, loc, do_actions=True):

6074 startloc = loc

6075 instrlen = len(instring)

6076 self_expr_parse = self.expr._parse

6077 self_failOn_canParseNext = (

6078 self.failOn.can_parse_next if self.failOn is not None else None

6079 )

6080 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

6081

6082 tmploc = loc

6083 while tmploc <= instrlen:

6084 if self_failOn_canParseNext is not None:

6085 # break if failOn expression matches

6086 if self_failOn_canParseNext(instring, tmploc):

6087 break

6088

6089 if ignorer_try_parse is not None:

6090 # advance past ignore expressions

6091 prev_tmploc = tmploc

6092 while 1:

6093 try:

6094 tmploc = ignorer_try_parse(instring, tmploc)

6095 except ParseBaseException:

6096 break

6097 # see if all ignorers matched, but didn't actually ignore anything

6098 if tmploc == prev_tmploc:

6099 break

6100 prev_tmploc = tmploc

6101

6102 try:

6103 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)

6104 except (ParseException, IndexError):

6105 # no match, advance loc in string

6106 tmploc += 1

6107 else:

6108 # matched skipto expr, done

6109 break

6110

6111 else:

6112 # ran off the end of the input string without matching skipto expr, fail

6113 raise ParseException(instring, loc, self.errmsg, self)

6114

6115 # build up return values

6116 loc = tmploc

6117 skiptext = instring[startloc:loc]

6118 skipresult = ParseResults(skiptext)

6119

6120 if self.includeMatch:

6121 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)

6122 skipresult += mat

6123

6124 return loc, skipresult

6125

6126

6127class Forward(ParseElementEnhance):

6128 """

6129 Forward declaration of an expression to be defined later -

6130 used for recursive grammars, such as algebraic infix notation.

6131 When the expression is known, it is assigned to the ``Forward``

6132 instance using the ``'<<'`` operator.

6133

6134 .. Note::

6135

6136 Take care when assigning to ``Forward`` not to overlook

6137 precedence of operators.

6138

6139 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

6140

6141 fwd_expr << a | b | c

6142

6143 will actually be evaluated as::

6144

6145 (fwd_expr << a) | b | c

6146

6147 thereby leaving b and c out as parseable alternatives.

6148 It is recommended that you explicitly group the values

6149 inserted into the :class:`Forward`::

6150

6151 fwd_expr << (a | b | c)

6152

6153 Converting to use the ``'<<='`` operator instead will avoid this problem.

6154

6155 See :meth:`ParseResults.pprint` for an example of a recursive

6156 parser created using :class:`Forward`.

6157 """

6158

6159 def __init__(

6160 self, other: typing.Optional[Union[ParserElement, str]] = None

6161 ) -> None:

6162 self.caller_frame = traceback.extract_stack(limit=2)[0]

6163 super().__init__(other, savelist=False) # type: ignore[arg-type]

6164 self.lshift_line = None

6165

6166 def __lshift__(self, other) -> Forward:

6167 if hasattr(self, "caller_frame"):

6168 del self.caller_frame

6169 if isinstance(other, str_type):

6170 other = self._literalStringClass(other)

6171

6172 if not isinstance(other, ParserElement):

6173 return NotImplemented

6174

6175 self.expr = other

6176 self.streamlined = other.streamlined

6177 self.mayIndexError = self.expr.mayIndexError

6178 self._may_return_empty = self.expr.mayReturnEmpty

6179 self.set_whitespace_chars(

6180 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

6181 )

6182 self.skipWhitespace = self.expr.skipWhitespace

6183 self.saveAsList = self.expr.saveAsList

6184 self.ignoreExprs.extend(self.expr.ignoreExprs)

6185 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

6186 return self

6187

6188 def __ilshift__(self, other) -> Forward:

6189 if not isinstance(other, ParserElement):

6190 return NotImplemented

6191

6192 return self << other

6193

6194 def __or__(self, other) -> ParserElement:

6195 caller_line = traceback.extract_stack(limit=2)[-2]

6196 if (

6197 __diag__.warn_on_match_first_with_lshift_operator

6198 and caller_line == self.lshift_line

6199 and Diagnostics.warn_on_match_first_with_lshift_operator

6200 not in self.suppress_warnings_

6201 ):

6202 warnings.warn(

6203 "warn_on_match_first_with_lshift_operator:"

6204 " using '<<' operator with '|' is probably an error, use '<<='",

6205 PyparsingDiagnosticWarning,

6206 stacklevel=2,

6207 )

6208 ret = super().__or__(other)

6209 return ret

6210

6211 def __del__(self):

6212 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

6213 if (

6214 self.expr is None

6215 and __diag__.warn_on_assignment_to_Forward

6216 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

6217 ):

6218 warnings.warn_explicit(

6219 "warn_on_assignment_to_Forward:"

6220 " Forward defined here but no expression attached later using '<<=' or '<<'",

6221 UserWarning,

6222 filename=self.caller_frame.filename,

6223 lineno=self.caller_frame.lineno,

6224 )

6225

6226 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

6227 if (

6228 self.expr is None

6229 and __diag__.warn_on_parse_using_empty_Forward

6230 and Diagnostics.warn_on_parse_using_empty_Forward

6231 not in self.suppress_warnings_

6232 ):

6233 # walk stack until parse_string, scan_string, search_string, or transform_string is found

6234 parse_fns = (

6235 "parse_string",

6236 "scan_string",

6237 "search_string",

6238 "transform_string",

6239 )

6240 tb = traceback.extract_stack(limit=200)

6241 for i, frm in enumerate(reversed(tb), start=1):

6242 if frm.name in parse_fns:

6243 stacklevel = i + 1

6244 break

6245 else:

6246 stacklevel = 2

6247 warnings.warn(

6248 "warn_on_parse_using_empty_Forward:"

6249 " Forward expression was never assigned a value, will not parse any input",

6250 PyparsingDiagnosticWarning,

6251 stacklevel=stacklevel,

6252 )

6253 if not ParserElement._left_recursion_enabled:

6254 return super().parseImpl(instring, loc, do_actions)

6255 # ## Bounded Recursion algorithm ##

6256 # Recursion only needs to be processed at ``Forward`` elements, since they are

6257 # the only ones that can actually refer to themselves. The general idea is

6258 # to handle recursion stepwise: We start at no recursion, then recurse once,

6259 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

6260 #

6261 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

6262 # - to *match* a specific recursion level, and

6263 # - to *search* the bounded recursion level

6264 # and the two run concurrently. The *search* must *match* each recursion level

6265 # to find the best possible match. This is handled by a memo table, which

6266 # provides the previous match to the next level match attempt.

6267 #

6268 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

6269 #

6270 # There is a complication since we not only *parse* but also *transform* via

6271 # actions: We do not want to run the actions too often while expanding. Thus,

6272 # we expand using `do_actions=False` and only run `do_actions=True` if the next

6273 # recursion level is acceptable.

6274 with ParserElement.recursion_lock:

6275 memo = ParserElement.recursion_memos

6276 try:

6277 # we are parsing at a specific recursion expansion - use it as-is

6278 prev_loc, prev_result = memo[loc, self, do_actions]

6279 if isinstance(prev_result, Exception):

6280 raise prev_result

6281 return prev_loc, prev_result.copy()

6282 except KeyError:

6283 act_key = (loc, self, True)

6284 peek_key = (loc, self, False)

6285 # we are searching for the best recursion expansion - keep on improving

6286 # both `do_actions` cases must be tracked separately here!

6287 prev_loc, prev_peek = memo[peek_key] = (

6288 loc - 1,

6289 ParseException(

6290 instring, loc, "Forward recursion without base case", self

6291 ),

6292 )

6293 if do_actions:

6294 memo[act_key] = memo[peek_key]

6295 while True:

6296 try:

6297 new_loc, new_peek = super().parseImpl(instring, loc, False)

6298 except ParseException:

6299 # we failed before getting any match - do not hide the error

6300 if isinstance(prev_peek, Exception):

6301 raise

6302 new_loc, new_peek = prev_loc, prev_peek

6303 # the match did not get better: we are done

6304 if new_loc <= prev_loc:

6305 if do_actions:

6306 # replace the match for do_actions=False as well,

6307 # in case the action did backtrack

6308 prev_loc, prev_result = memo[peek_key] = memo[act_key]

6309 del memo[peek_key], memo[act_key]

6310 return prev_loc, copy.copy(prev_result)

6311 del memo[peek_key]

6312 return prev_loc, copy.copy(prev_peek)

6313 # the match did get better: see if we can improve further

6314 if do_actions:

6315 try:

6316 memo[act_key] = super().parseImpl(instring, loc, True)

6317 except ParseException as e:

6318 memo[peek_key] = memo[act_key] = (new_loc, e)

6319 raise

6320 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

6321

6322 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

6323 """

6324 Extends ``leave_whitespace`` defined in base class.

6325 """

6326 self.skipWhitespace = False

6327 return self

6328

6329 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

6330 """

6331 Extends ``ignore_whitespace`` defined in base class.

6332 """

6333 self.skipWhitespace = True

6334 return self

6335

6336 def streamline(self) -> ParserElement:

6337 if not self.streamlined:

6338 self.streamlined = True

6339 if self.expr is not None:

6340 self.expr.streamline()

6341 return self

6342

6343 def validate(self, validateTrace=None) -> None:

6344 warnings.warn(

6345 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

6346 PyparsingDeprecationWarning,

6347 stacklevel=2,

6348 )

6349 if validateTrace is None:

6350 validateTrace = []

6351

6352 if self not in validateTrace:

6353 tmp = validateTrace[:] + [self]

6354 if self.expr is not None:

6355 self.expr.validate(tmp)

6356 self._checkRecursion([])

6357

6358 def _generateDefaultName(self) -> str:

6359 # Avoid infinite recursion by setting a temporary _defaultName

6360 save_default_name = self._defaultName

6361 self._defaultName = ": ..."

6362

6363 # Use the string representation of main expression.

6364 try:

6365 if self.expr is not None:

6366 ret_string = str(self.expr)[:1000]

6367 else:

6368 ret_string = "None"

6369 except Exception:

6370 ret_string = "..."

6371

6372 self._defaultName = save_default_name

6373 return f"{type(self).__name__}: {ret_string}"

6374

6375 def copy(self) -> ParserElement:

6376 """

6377 Returns a copy of this expression.

6378

6379 Generally only used internally by pyparsing.

6380 """

6381 if self.expr is not None:

6382 return super().copy()

6383 else:

6384 ret = Forward()

6385 ret <<= self

6386 return ret

6387

6388 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

6389 # fmt: off

6390 if (

6391 __diag__.warn_name_set_on_empty_Forward

6392 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_

6393 and self.expr is None

6394 ):

6395 warning = (

6396 "warn_name_set_on_empty_Forward:"

6397 f" setting results name {name!r} on {type(self).__name__} expression"

6398 " that has no contained expression"

6399 )

6400 warnings.warn(warning, PyparsingDiagnosticWarning, stacklevel=3)

6401 # fmt: on

6402

6403 return super()._setResultsName(name, list_all_matches)

6404

6405 # Compatibility synonyms

6406 # fmt: off

6407 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

6408 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

6409 # fmt: on

6410

6411

6412class TokenConverter(ParseElementEnhance):

6413 """

6414 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.

6415 """

6416

6417 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:

6418 super().__init__(expr) # , savelist)

6419 self.saveAsList = False

6420

6421

6422class Combine(TokenConverter):

6423 """Converter to concatenate all matching tokens to a single string.

6424 By default, the matching patterns must also be contiguous in the

6425 input string; this can be disabled by specifying

6426 ``'adjacent=False'`` in the constructor.

6427

6428 Example:

6429

6430 .. doctest::

6431

6432 >>> real = Word(nums) + '.' + Word(nums)

6433 >>> print(real.parse_string('3.1416'))

6434 ['3', '.', '1416']

6435

6436 >>> # will also erroneously match the following

6437 >>> print(real.parse_string('3. 1416'))

6438 ['3', '.', '1416']

6439

6440 >>> real = Combine(Word(nums) + '.' + Word(nums))

6441 >>> print(real.parse_string('3.1416'))

6442 ['3.1416']

6443

6444 >>> # no match when there are internal spaces

6445 >>> print(real.parse_string('3. 1416'))

6446 Traceback (most recent call last):

6447 ParseException: Expected W:(0123...)

6448 """

6449

6450 def __init__(

6451 self,

6452 expr: ParserElement,

6453 join_string: str = "",

6454 adjacent: bool = True,

6455 *,

6456 joinString: typing.Optional[str] = None,

6457 ) -> None:

6458 super().__init__(expr)

6459 joinString = joinString if joinString is not None else join_string

6460 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

6461 if adjacent:

6462 self.leave_whitespace()

6463 self.adjacent = adjacent

6464 self.skipWhitespace = True

6465 self.joinString = joinString

6466 self.callPreparse = True

6467

6468 def ignore(self, other) -> ParserElement:

6469 """

6470 Define expression to be ignored (e.g., comments) while doing pattern

6471 matching; may be called repeatedly, to define multiple comment or other

6472 ignorable patterns.

6473 """

6474 if self.adjacent:

6475 ParserElement.ignore(self, other)

6476 else:

6477 super().ignore(other)

6478 return self

6479

6480 def postParse(self, instring, loc, tokenlist):

6481 retToks = tokenlist.copy()

6482 del retToks[:]

6483 retToks += ParseResults(

6484 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

6485 )

6486

6487 if self.resultsName and retToks.haskeys():

6488 return [retToks]

6489 else:

6490 return retToks

6491

6492

6493class Group(TokenConverter):

6494 """Converter to return the matched tokens as a list - useful for

6495 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

6496

6497 The optional ``aslist`` argument when set to True will return the

6498 parsed tokens as a Python list instead of a pyparsing ParseResults.

6499

6500 Example:

6501

6502 .. doctest::

6503

6504 >>> ident = Word(alphas)

6505 >>> num = Word(nums)

6506 >>> term = ident | num

6507 >>> func = ident + Opt(DelimitedList(term))

6508 >>> print(func.parse_string("fn a, b, 100"))

6509 ['fn', 'a', 'b', '100']

6510

6511 >>> func = ident + Group(Opt(DelimitedList(term)))

6512 >>> print(func.parse_string("fn a, b, 100"))

6513 ['fn', ['a', 'b', '100']]

6514 """

6515

6516 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:

6517 super().__init__(expr)

6518 self.saveAsList = True

6519 self._asPythonList = aslist

6520

6521 def postParse(self, instring, loc, tokenlist):

6522 if self._asPythonList:

6523 return ParseResults.List(

6524 tokenlist.as_list()

6525 if isinstance(tokenlist, ParseResults)

6526 else list(tokenlist)

6527 )

6528

6529 return [tokenlist]

6530

6531

6532class Dict(TokenConverter):

6533 """Converter to return a repetitive expression as a list, but also

6534 as a dictionary. Each element can also be referenced using the first

6535 token in the expression as its key. Useful for tabular report

6536 scraping when the first column can be used as a item key.

6537

6538 The optional ``asdict`` argument when set to True will return the

6539 parsed tokens as a Python dict instead of a pyparsing ParseResults.

6540

6541 Example:

6542

6543 .. doctest::

6544

6545 >>> data_word = Word(alphas)

6546 >>> label = data_word + FollowedBy(':')

6547

6548 >>> attr_expr = (

6549 ... label + Suppress(':')

6550 ... + OneOrMore(data_word, stop_on=label)

6551 ... .set_parse_action(' '.join)

6552 ... )

6553

6554 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

6555

6556 >>> # print attributes as plain groups

6557 >>> print(attr_expr[1, ...].parse_string(text).dump())

6558 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

6559

6560 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...])

6561 # Dict will auto-assign names.

6562 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

6563 >>> print(result.dump())

6564 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

6565 - color: 'light blue'

6566 - posn: 'upper left'

6567 - shape: 'SQUARE'

6568 - texture: 'burlap'

6569 [0]:

6570 ['shape', 'SQUARE']

6571 [1]:

6572 ['posn', 'upper left']

6573 [2]:

6574 ['color', 'light blue']

6575 [3]:

6576 ['texture', 'burlap']

6577

6578 # access named fields as dict entries, or output as dict

6579 >>> print(result['shape'])

6580 SQUARE

6581 >>> print(result.as_dict())

6582 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}

6583

6584 See more examples at :class:`ParseResults` of accessing fields by results name.

6585 """

6586

6587 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:

6588 super().__init__(expr)

6589 self.saveAsList = True

6590 self._asPythonDict = asdict

6591

6592 def postParse(self, instring, loc, tokenlist):

6593 for i, tok in enumerate(tokenlist):

6594 if len(tok) == 0:

6595 continue

6596

6597 ikey = tok[0]

6598 if isinstance(ikey, int):

6599 ikey = str(ikey).strip()

6600

6601 if len(tok) == 1:

6602 tokenlist[ikey] = _ParseResultsWithOffset("", i)

6603

6604 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

6605 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

6606

6607 else:

6608 try:

6609 dictvalue = tok.copy() # ParseResults(i)

6610 except Exception:

6611 exc = TypeError(

6612 "could not extract dict values from parsed results"

6613 " - Dict expression must contain Grouped expressions"

6614 )

6615 raise exc from None

6616

6617 del dictvalue[0]

6618

6619 if len(dictvalue) != 1 or (

6620 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

6621 ):

6622 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

6623 else:

6624 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

6625

6626 if self._asPythonDict:

6627 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

6628

6629 return [tokenlist] if self.resultsName else tokenlist

6630

6631

6632class Suppress(TokenConverter):

6633 """Converter for ignoring the results of a parsed expression.

6634

6635 Example:

6636

6637 .. doctest::

6638

6639 >>> source = "a, b, c,d"

6640 >>> wd = Word(alphas)

6641 >>> wd_list1 = wd + (',' + wd)[...]

6642 >>> print(wd_list1.parse_string(source))

6643 ['a', ',', 'b', ',', 'c', ',', 'd']

6644

6645 # often, delimiters that are useful during parsing are just in the

6646 # way afterward - use Suppress to keep them out of the parsed output

6647 >>> wd_list2 = wd + (Suppress(',') + wd)[...]

6648 >>> print(wd_list2.parse_string(source))

6649 ['a', 'b', 'c', 'd']

6650

6651 # Skipped text (using '...') can be suppressed as well

6652 >>> source = "lead in START relevant text END trailing text"

6653 >>> start_marker = Keyword("START")

6654 >>> end_marker = Keyword("END")

6655 >>> find_body = Suppress(...) + start_marker + ... + end_marker

6656 >>> print(find_body.parse_string(source))

6657 ['START', 'relevant text ', 'END']

6658

6659 (See also :class:`DelimitedList`.)

6660 """

6661

6662 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

6663 if expr is ...:

6664 expr = _PendingSkip(NoMatch())

6665 super().__init__(expr)

6666

6667 def __add__(self, other) -> ParserElement:

6668 if isinstance(self.expr, _PendingSkip):

6669 return Suppress(SkipTo(other)) + other

6670

6671 return super().__add__(other)

6672

6673 def __sub__(self, other) -> ParserElement:

6674 if isinstance(self.expr, _PendingSkip):

6675 return Suppress(SkipTo(other)) - other

6676

6677 return super().__sub__(other)

6678

6679 def postParse(self, instring, loc, tokenlist):

6680 return []

6681

6682 def suppress(self) -> ParserElement:

6683 return self

6684

6685

6686# XXX: Example needs to be re-done for updated output

6687def trace_parse_action(f: ParseAction) -> ParseAction:

6688 """Decorator for debugging parse actions.

6689

6690 When the parse action is called, this decorator will print

6691 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

6692 When the parse action completes, the decorator will print

6693 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

6694

6695 Example:

6696

6697 .. testsetup:: stderr

6698

6699 import sys

6700 sys.stderr = sys.stdout

6701

6702 .. testcleanup:: stderr

6703

6704 sys.stderr = sys.__stderr__

6705

6706 .. testcode:: stderr

6707

6708 wd = Word(alphas)

6709

6710 @trace_parse_action

6711 def remove_duplicate_chars(tokens):

6712 return ''.join(sorted(set(''.join(tokens))))

6713

6714 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

6715 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

6716

6717 prints:

6718

6719 .. testoutput:: stderr

6720 :options: +NORMALIZE_WHITESPACE

6721

6722 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf',

6723 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

6724 <<leaving remove_duplicate_chars (ret: 'dfjkls')

6725 ['dfjkls']

6726

6727 .. versionchanged:: 3.1.0

6728 Exception type added to output

6729 """

6730 f = _trim_arity(f)

6731

6732 def z(*paArgs):

6733 thisFunc = f.__name__

6734 s, l, t = paArgs[-3:]

6735 if len(paArgs) > 3:

6736 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"

6737 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

6738 try:

6739 ret = f(*paArgs)

6740 except Exception as exc:

6741 sys.stderr.write(

6742 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"

6743 )

6744 raise

6745 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

6746 return ret

6747

6748 z.__name__ = f.__name__

6749 return z

6750

6751

6752# convenience constants for positional expressions

6753empty = Empty().set_name("empty")

6754line_start = LineStart().set_name("line_start")

6755line_end = LineEnd().set_name("line_end")

6756string_start = StringStart().set_name("string_start")

6757string_end = StringEnd().set_name("string_end")

6758

6759_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

6760 lambda s, l, t: t[0][1]

6761)

6762_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

6763 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

6764)

6765_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

6766 lambda s, l, t: chr(int(t[0][1:], 8))

6767)

6768_singleChar = (

6769 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

6770)

6771_charRange = Group(_singleChar + Suppress("-") + _singleChar)

6772_reBracketExpr = (

6773 Literal("[")

6774 + Opt("^").set_results_name("negate")

6775 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

6776 + Literal("]")

6777)

6778

6779

6780def srange(s: str) -> str:

6781 r"""Helper to easily define string ranges for use in :class:`Word`

6782 construction. Borrows syntax from regexp ``'[]'`` string range

6783 definitions::

6784

6785 srange("[0-9]") -> "0123456789"

6786 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

6787 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

6788

6789 The input string must be enclosed in []'s, and the returned string

6790 is the expanded character set joined into a single string. The

6791 values enclosed in the []'s may be:

6792

6793 - a single character

6794 - an escaped character with a leading backslash (such as ``\-``

6795 or ``\]``)

6796 - an escaped hex character with a leading ``'\x'``

6797 (``\x21``, which is a ``'!'`` character) (``\0x##``

6798 is also supported for backwards compatibility)

6799 - an escaped octal character with a leading ``'\0'``

6800 (``\041``, which is a ``'!'`` character)

6801 - a range of any of the above, separated by a dash (``'a-z'``,

6802 etc.)

6803 - any combination of the above (``'aeiouy'``,

6804 ``'a-zA-Z0-9_$'``, etc.)

6805 """

6806

6807 def _expanded(p):

6808 if isinstance(p, ParseResults):

6809 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

6810 else:

6811 yield p

6812

6813 try:

6814 return "".join(

6815 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]

6816 )

6817 except Exception as e:

6818 return ""

6819

6820

6821def token_map(func, *args) -> ParseAction:

6822 """Helper to define a parse action by mapping a function to all

6823 elements of a :class:`ParseResults` list. If any additional args are passed,

6824 they are forwarded to the given function as additional arguments

6825 after the token, as in

6826 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

6827 which will convert the parsed data to an integer using base 16.

6828

6829 Example (compare the last to example in :class:`ParserElement.transform_string`::

6830

6831 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

6832 hex_ints.run_tests('''

6833 00 11 22 aa FF 0a 0d 1a

6834 ''')

6835

6836 upperword = Word(alphas).set_parse_action(token_map(str.upper))

6837 upperword[1, ...].run_tests('''

6838 my kingdom for a horse

6839 ''')

6840

6841 wd = Word(alphas).set_parse_action(token_map(str.title))

6842 wd[1, ...].set_parse_action(' '.join).run_tests('''

6843 now is the winter of our discontent made glorious summer by this sun of york

6844 ''')

6845

6846 prints::

6847

6848 00 11 22 aa FF 0a 0d 1a

6849 [0, 17, 34, 170, 255, 10, 13, 26]

6850

6851 my kingdom for a horse

6852 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6853

6854 now is the winter of our discontent made glorious summer by this sun of york

6855 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6856 """

6857

6858 def pa(s, l, t):

6859 return [func(tokn, *args) for tokn in t]

6860

6861 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6862 pa.__name__ = func_name

6863

6864 return pa

6865

6866

6867def autoname_elements() -> None:

6868 """

6869 Utility to simplify mass-naming of parser elements, for

6870 generating railroad diagram with named subdiagrams.

6871 """

6872

6873 # guard against _getframe not being implemented in the current Python

6874 getframe_fn = getattr(sys, "_getframe", lambda _: None)

6875 calling_frame = getframe_fn(1)

6876 if calling_frame is None:

6877 return

6878

6879 # find all locals in the calling frame that are ParserElements

6880 calling_frame = typing.cast(types.FrameType, calling_frame)

6881 for name, var in calling_frame.f_locals.items():

6882 # if no custom name defined, set the name to the var name

6883 if isinstance(var, ParserElement) and not var.customName:

6884 var.set_name(name)

6885

6886

6887dbl_quoted_string = Combine(

6888 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6889).set_name("string enclosed in double quotes")

6890

6891sgl_quoted_string = Combine(

6892 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6893).set_name("string enclosed in single quotes")

6894

6895quoted_string = Combine(

6896 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6897 "double quoted string"

6898 )

6899 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6900 "single quoted string"

6901 )

6902).set_name("quoted string using single or double quotes")

6903

6904# XXX: Is there some way to make this show up in API docs?

6905# .. versionadded:: 3.1.0

6906python_quoted_string = Combine(

6907 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6908 "multiline double quoted string"

6909 )

6910 ^ (

6911 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6912 ).set_name("multiline single quoted string")

6913 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6914 "double quoted string"

6915 )

6916 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6917 "single quoted string"

6918 )

6919).set_name("Python quoted string")

6920

6921unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6922

6923

6924alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6925punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6926

6927# build list of built-in expressions, for future reference if a global default value

6928# gets updated

6929_builtin_exprs: list[ParserElement] = [

6930 v for v in vars().values() if isinstance(v, ParserElement)

6931]

6932

6933# Compatibility synonyms

6934# fmt: off

6935sglQuotedString = sgl_quoted_string

6936dblQuotedString = dbl_quoted_string

6937quotedString = quoted_string

6938unicodeString = unicode_string

6939lineStart = line_start

6940lineEnd = line_end

6941stringStart = string_start

6942stringEnd = string_end

6943nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)

6944traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)

6945conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)

6946tokenMap = replaced_by_pep8("tokenMap", token_map)

6947# fmt: on