Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/core.py: 43%

2# core.py

4from __future__ import annotations

6import collections.abc

7from collections import deque

8import os

9import typing

10from typing import (

11 Any,

12 Callable,

13 Generator,

14 NamedTuple,

15 Sequence,

16 TextIO,

17 Union,

18 cast,

19)

20from abc import ABC, abstractmethod

21from enum import Enum

22import string

23import copy

24import warnings

25import re

26import sys

27from collections.abc import Iterable

28import traceback

29import types

30from operator import itemgetter

31from functools import wraps

32from threading import RLock

33from pathlib import Path

35from .util import (

36 _FifoCache,

37 _UnboundedCache,

38 __config_flags,

39 _collapse_string_to_ranges,

40 _escape_regex_range_chars,

41 _flatten,

42 LRUMemo as _LRUMemo,

43 UnboundedMemo as _UnboundedMemo,

44 replaced_by_pep8,

45)

46from .exceptions import *

47from .actions import *

48from .results import ParseResults, _ParseResultsWithOffset

49from .unicode import pyparsing_unicode

51_MAX_INT = sys.maxsize

52str_type: tuple[type, ...] = (str, bytes)

54#

56#

57# Permission is hereby granted, free of charge, to any person obtaining

58# a copy of this software and associated documentation files (the

59# "Software"), to deal in the Software without restriction, including

60# without limitation the rights to use, copy, modify, merge, publish,

61# distribute, sublicense, and/or sell copies of the Software, and to

62# permit persons to whom the Software is furnished to do so, subject to

63# the following conditions:

64#

65# The above copyright notice and this permission notice shall be

66# included in all copies or substantial portions of the Software.

67#

68# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

69# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

70# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

71# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

72# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

73# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

74# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

75#

77from functools import cached_property

80class __compat__(__config_flags):

81 """

82 A cross-version compatibility configuration for pyparsing features that will be

83 released in a future version. By setting values in this configuration to True,

84 those features can be enabled in prior versions for compatibility development

85 and testing.

87 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping

88 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;

89 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1

90 behavior

91 """

93 _type_desc = "compatibility"

95 collect_all_And_tokens = True

97 _all_names = [__ for __ in locals() if not __.startswith("_")]

98 _fixed_names = """

99 collect_all_And_tokens

100 """.split()

101

102

103class __diag__(__config_flags):

104 _type_desc = "diagnostic"

105

106 warn_multiple_tokens_in_named_alternation = False

107 warn_ungrouped_named_tokens_in_collection = False

108 warn_name_set_on_empty_Forward = False

109 warn_on_parse_using_empty_Forward = False

110 warn_on_assignment_to_Forward = False

111 warn_on_multiple_string_args_to_oneof = False

112 warn_on_match_first_with_lshift_operator = False

113 enable_debug_on_named_expressions = False

114

115 _all_names = [__ for __ in locals() if not __.startswith("_")]

116 _warning_names = [name for name in _all_names if name.startswith("warn")]

117 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]

118

119 @classmethod

120 def enable_all_warnings(cls) -> None:

121 for name in cls._warning_names:

122 cls.enable(name)

123

124

125class Diagnostics(Enum):

126 """

127 Diagnostic configuration (all default to disabled)

128

129 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results

130 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions

131 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results

132 name is defined on a containing expression with ungrouped subexpressions that also

133 have results names

134 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined

135 with a results name, but has no contents defined

136 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is

137 defined in a grammar but has never had an expression attached to it

138 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined

139 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``

140 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is

141 incorrectly called with multiple str arguments

142 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent

143 calls to :class:`ParserElement.set_name`

144

145 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.

146 All warnings can be enabled by calling :class:`enable_all_warnings`.

147 """

148

149 warn_multiple_tokens_in_named_alternation = 0

150 warn_ungrouped_named_tokens_in_collection = 1

151 warn_name_set_on_empty_Forward = 2

152 warn_on_parse_using_empty_Forward = 3

153 warn_on_assignment_to_Forward = 4

154 warn_on_multiple_string_args_to_oneof = 5

155 warn_on_match_first_with_lshift_operator = 6

156 enable_debug_on_named_expressions = 7

157

158

159def enable_diag(diag_enum: Diagnostics) -> None:

160 """

161 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

162 """

163 __diag__.enable(diag_enum.name)

164

165

166def disable_diag(diag_enum: Diagnostics) -> None:

167 """

168 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

169 """

170 __diag__.disable(diag_enum.name)

171

172

173def enable_all_warnings() -> None:

174 """

175 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).

176 """

177 __diag__.enable_all_warnings()

178

179

180# hide abstract class

181del __config_flags

182

183

184def _should_enable_warnings(

185 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]

186) -> bool:

187 enable = bool(warn_env_var)

188 for warn_opt in cmd_line_warn_options:

189 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(

190 ":"

191 )[:5]

192 if not w_action.lower().startswith("i") and (

193 not (w_message or w_category or w_module) or w_module == "pyparsing"

194 ):

195 enable = True

196 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):

197 enable = False

198 return enable

199

200

201if _should_enable_warnings(

202 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")

203):

204 enable_all_warnings()

205

206

207# build list of single arg builtins, that can be used as parse actions

208# fmt: off

209_single_arg_builtins = {

210 sum, len, sorted, reversed, list, tuple, set, any, all, min, max

211}

212# fmt: on

213

214_generatorType = types.GeneratorType

215ParseImplReturnType = tuple[int, Any]

216PostParseReturnType = Union[ParseResults, Sequence[ParseResults]]

217

218ParseCondition = Union[

219 Callable[[], bool],

220 Callable[[ParseResults], bool],

221 Callable[[int, ParseResults], bool],

222 Callable[[str, int, ParseResults], bool],

223]

224ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]

225DebugStartAction = Callable[[str, int, "ParserElement", bool], None]

226DebugSuccessAction = Callable[

227 [str, int, int, "ParserElement", ParseResults, bool], None

228]

229DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]

230

231

232alphas: str = string.ascii_uppercase + string.ascii_lowercase

233identchars: str = pyparsing_unicode.Latin1.identchars

234identbodychars: str = pyparsing_unicode.Latin1.identbodychars

235nums: str = "0123456789"

236hexnums: str = nums + "ABCDEFabcdef"

237alphanums: str = alphas + nums

238printables: str = "".join([c for c in string.printable if c not in string.whitespace])

239

240

241class _ParseActionIndexError(Exception):

242 """

243 Internal wrapper around IndexError so that IndexErrors raised inside

244 parse actions aren't misinterpreted as IndexErrors raised inside

245 ParserElement parseImpl methods.

246 """

247

248 def __init__(self, msg: str, exc: BaseException) -> None:

249 self.msg: str = msg

250 self.exc: BaseException = exc

251

252

253_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment]

254pa_call_line_synth = ()

255

256

257def _trim_arity(func, max_limit=3):

258 """decorator to trim function calls to match the arity of the target"""

259 global _trim_arity_call_line, pa_call_line_synth

260

261 if func in _single_arg_builtins:

262 return lambda s, l, t: func(t)

263

264 limit = 0

265 found_arity = False

266

267 # synthesize what would be returned by traceback.extract_stack at the call to

268 # user's parse action 'func', so that we don't incur call penalty at parse time

269

270 # fmt: off

271 LINE_DIFF = 9

272 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND

273 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!

274 _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]

275 pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)

276

277 def wrapper(*args):

278 nonlocal found_arity, limit

279 if found_arity:

280 return func(*args[limit:])

281 while 1:

282 try:

283 ret = func(*args[limit:])

284 found_arity = True

285 return ret

286 except TypeError as te:

287 # re-raise TypeErrors if they did not come from our arity testing

288 if found_arity:

289 raise

290 else:

291 tb = te.__traceback__

292 frames = traceback.extract_tb(tb, limit=2)

293 frame_summary = frames[-1]

294 trim_arity_type_error = (

295 [frame_summary[:2]][-1][:2] == pa_call_line_synth

296 )

297 del tb

298

299 if trim_arity_type_error:

300 if limit < max_limit:

301 limit += 1

302 continue

303

304 raise

305 except IndexError as ie:

306 # wrap IndexErrors inside a _ParseActionIndexError

307 raise _ParseActionIndexError(

308 "IndexError raised in parse action", ie

309 ).with_traceback(None)

310 # fmt: on

311

312 # copy func name to wrapper for sensible debug output

313 # (can't use functools.wraps, since that messes with function signature)

314 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

315 wrapper.__name__ = func_name

316 wrapper.__doc__ = func.__doc__

317

318 return wrapper

319

320

321def condition_as_parse_action(

322 fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False

323) -> ParseAction:

324 """

325 Function to convert a simple predicate function that returns ``True`` or ``False``

326 into a parse action. Can be used in places when a parse action is required

327 and :meth:`ParserElement.add_condition` cannot be used (such as when adding a condition

328 to an operator level in :class:`infix_notation`).

329

330 Optional keyword arguments:

331

332 :param message: define a custom message to be used in the raised exception

333 :param fatal: if ``True``, will raise :class:`ParseFatalException`

334 to stop parsing immediately;

335 otherwise will raise :class:`ParseException`

336

337 """

338 msg = message if message is not None else "failed user-defined condition"

339 exc_type = ParseFatalException if fatal else ParseException

340 fn = _trim_arity(fn)

341

342 @wraps(fn)

343 def pa(s, l, t):

344 if not bool(fn(s, l, t)):

345 raise exc_type(s, l, msg)

346

347 return pa

348

349

350def _default_start_debug_action(

351 instring: str, loc: int, expr: ParserElement, cache_hit: bool = False

352):

353 cache_hit_str = "*" if cache_hit else ""

354 print(

355 (

356 f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n"

357 f" {line(loc, instring)}\n"

358 f" {'^':>{col(loc, instring)}}"

359 )

360 )

361

362

363def _default_success_debug_action(

364 instring: str,

365 startloc: int,

366 endloc: int,

367 expr: ParserElement,

368 toks: ParseResults,

369 cache_hit: bool = False,

370):

371 cache_hit_str = "*" if cache_hit else ""

372 print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}")

373

374

375def _default_exception_debug_action(

376 instring: str,

377 loc: int,

378 expr: ParserElement,

379 exc: Exception,

380 cache_hit: bool = False,

381):

382 cache_hit_str = "*" if cache_hit else ""

383 print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}")

384

385

386def null_debug_action(*args):

387 """'Do-nothing' debug action, to suppress debugging output during parsing."""

388

389

390class ParserElement(ABC):

391 """Abstract base level parser element class."""

392

393 DEFAULT_WHITE_CHARS: str = " \n\t\r"

394 verbose_stacktrace: bool = False

395 _literalStringClass: type = None # type: ignore[assignment]

396

397 @staticmethod

398 def set_default_whitespace_chars(chars: str) -> None:

399 r"""

400 Overrides the default whitespace chars

401

402 Example:

403

404 .. doctest::

405

406 # default whitespace chars are space, <TAB> and newline

407 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")

408 ParseResults(['abc', 'def', 'ghi', 'jkl'], {})

409

410 # change to just treat newline as significant

411 >>> ParserElement.set_default_whitespace_chars(" \t")

412 >>> Word(alphas)[1, ...].parse_string("abc def\nghi jkl")

413 ParseResults(['abc', 'def'], {})

414

415 # Reset to default

416 >>> ParserElement.set_default_whitespace_chars(" \n\t\r")

417 """

418 ParserElement.DEFAULT_WHITE_CHARS = chars

419

420 # update whitespace all parse expressions defined in this module

421 for expr in _builtin_exprs:

422 if expr.copyDefaultWhiteChars:

423 expr.whiteChars = set(chars)

424

425 @staticmethod

426 def inline_literals_using(cls: type) -> None:

427 """

428 Set class to be used for inclusion of string literals into a parser.

429

430 Example:

431

432 .. doctest::

433 :options: +NORMALIZE_WHITESPACE

434

435 # default literal class used is Literal

436 >>> integer = Word(nums)

437 >>> date_str = (

438 ... integer("year") + '/'

439 ... + integer("month") + '/'

440 ... + integer("day")

441 ... )

442

443 >>> date_str.parse_string("1999/12/31")

444 ParseResults(['1999', '/', '12', '/', '31'],

445 {'year': '1999', 'month': '12', 'day': '31'})

446

447 # change to Suppress

448 >>> ParserElement.inline_literals_using(Suppress)

449 >>> date_str = (

450 ... integer("year") + '/'

451 ... + integer("month") + '/'

452 ... + integer("day")

453 ... )

454

455 >>> date_str.parse_string("1999/12/31")

456 ParseResults(['1999', '12', '31'],

457 {'year': '1999', 'month': '12', 'day': '31'})

458

459 # Reset

460 >>> ParserElement.inline_literals_using(Literal)

461 """

462 ParserElement._literalStringClass = cls

463

464 @classmethod

465 def using_each(cls, seq, **class_kwargs):

466 """

467 Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq.

468

469 Example:

470

471 .. testcode::

472

473 LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};")

474

475 .. versionadded:: 3.1.0

476 """

477 yield from (cls(obj, **class_kwargs) for obj in seq)

478

479 class DebugActions(NamedTuple):

480 debug_try: typing.Optional[DebugStartAction]

481 debug_match: typing.Optional[DebugSuccessAction]

482 debug_fail: typing.Optional[DebugExceptionAction]

483

484 def __init__(self, savelist: bool = False) -> None:

485 self.parseAction: list[ParseAction] = list()

486 self.failAction: typing.Optional[ParseFailAction] = None

487 self.customName: str = None # type: ignore[assignment]

488 self._defaultName: typing.Optional[str] = None

489 self.resultsName: str = None # type: ignore[assignment]

490 self.saveAsList = savelist

491 self.skipWhitespace = True

492 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)

493 self.copyDefaultWhiteChars = True

494 # used when checking for left-recursion

495 self._may_return_empty = False

496 self.keepTabs = False

497 self.ignoreExprs: list[ParserElement] = list()

498 self.debug = False

499 self.streamlined = False

500 # optimize exception handling for subclasses that don't advance parse index

501 self.mayIndexError = True

502 self.errmsg: Union[str, None] = ""

503 # mark results names as modal (report only last) or cumulative (list all)

504 self.modalResults = True

505 # custom debug actions

506 self.debugActions = self.DebugActions(None, None, None)

507 # avoid redundant calls to preParse

508 self.callPreparse = True

509 self.callDuringTry = False

510 self.suppress_warnings_: list[Diagnostics] = []

511 self.show_in_diagram = True

512

513 @property

514 def mayReturnEmpty(self):

515 return self._may_return_empty

516

517 @mayReturnEmpty.setter

518 def mayReturnEmpty(self, value):

519 self._may_return_empty = value

520

521 def suppress_warning(self, warning_type: Diagnostics) -> ParserElement:

522 """

523 Suppress warnings emitted for a particular diagnostic on this expression.

524

525 Example:

526

527 .. doctest::

528

529 >>> label = pp.Word(pp.alphas)

530

531 # Normally using an empty Forward in a grammar

532 # would print a warning, but we can suppress that

533 >>> base = pp.Forward().suppress_warning(

534 ... pp.Diagnostics.warn_on_parse_using_empty_Forward)

535

536 >>> grammar = base | label

537 >>> print(grammar.parse_string("x"))

538 ['x']

539 """

540 self.suppress_warnings_.append(warning_type)

541 return self

542

543 def visit_all(self):

544 """General-purpose method to yield all expressions and sub-expressions

545 in a grammar. Typically just for internal use.

546 """

547 to_visit = deque([self])

548 seen = set()

549 while to_visit:

550 cur = to_visit.popleft()

551

552 # guard against looping forever through recursive grammars

553 if cur in seen:

554 continue

555 seen.add(cur)

556

557 to_visit.extend(cur.recurse())

558 yield cur

559

560 def copy(self) -> ParserElement:

561 """

562 Make a copy of this :class:`ParserElement`. Useful for defining

563 different parse actions for the same parsing pattern, using copies of

564 the original parse element.

565

566 Example:

567

568 .. testcode::

569

570 integer = Word(nums).set_parse_action(

571 lambda toks: int(toks[0]))

572 integerK = integer.copy().add_parse_action(

573 lambda toks: toks[0] * 1024) + Suppress("K")

574 integerM = integer.copy().add_parse_action(

575 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

576

577 print(

578 (integerK | integerM | integer)[1, ...].parse_string(

579 "5K 100 640K 256M")

580 )

581

582 prints:

583

584 .. testoutput::

585

586 [5120, 100, 655360, 268435456]

587

588 Equivalent form of ``expr.copy()`` is just ``expr()``:

589

590 .. testcode::

591

592 integerM = integer().add_parse_action(

593 lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

594 """

595 cpy = copy.copy(self)

596 cpy.parseAction = self.parseAction[:]

597 cpy.ignoreExprs = self.ignoreExprs[:]

598 if self.copyDefaultWhiteChars:

599 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)

600 return cpy

601

602 def set_results_name(

603 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False

604 ) -> ParserElement:

605 """

606 Define name for referencing matching tokens as a nested attribute

607 of the returned parse results.

608

609 Normally, results names are assigned as you would assign keys in a dict:

610 any existing value is overwritten by later values. If it is necessary to

611 keep all values captured for a particular results name, call ``set_results_name``

612 with ``list_all_matches`` = True.

613

614 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;

615 this is so that the client can define a basic element, such as an

616 integer, and reference it in multiple places with different names.

617

618 You can also set results names using the abbreviated syntax,

619 ``expr("name")`` in place of ``expr.set_results_name("name")``

620 - see :meth:`__call__`. If ``list_all_matches`` is required, use

621 ``expr("name*")``.

622

623 Example:

624

625 .. testcode::

626

627 integer = Word(nums)

628 date_str = (integer.set_results_name("year") + '/'

629 + integer.set_results_name("month") + '/'

630 + integer.set_results_name("day"))

631

632 # equivalent form:

633 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

634 """

635 listAllMatches = listAllMatches or list_all_matches

636 return self._setResultsName(name, listAllMatches)

637

638 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

639 if name is None:

640 return self

641 newself = self.copy()

642 if name.endswith("*"):

643 name = name[:-1]

644 list_all_matches = True

645 newself.resultsName = name

646 newself.modalResults = not list_all_matches

647 return newself

648

649 def set_break(self, break_flag: bool = True) -> ParserElement:

650 """

651 Method to invoke the Python pdb debugger when this element is

652 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to

653 disable.

654 """

655 if break_flag:

656 _parseMethod = self._parse

657

658 def breaker(instring, loc, do_actions=True, callPreParse=True):

659 # this call to breakpoint() is intentional, not a checkin error

660 breakpoint()

661 return _parseMethod(instring, loc, do_actions, callPreParse)

662

663 breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined]

664 self._parse = breaker # type: ignore [method-assign]

665 elif hasattr(self._parse, "_originalParseMethod"):

666 self._parse = self._parse._originalParseMethod # type: ignore [method-assign]

667 return self

668

669 def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:

670 """

671 Define one or more actions to perform when successfully matching parse element definition.

672

673 Parse actions can be called to perform data conversions, do extra validation,

674 update external data structures, or enhance or replace the parsed tokens.

675 Each parse action ``fn`` is a callable method with 0-3 arguments, called as

676 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:

677

678 - ``s`` = the original string being parsed (see note below)

679 - ``loc`` = the location of the matching substring

680 - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object

681

682 The parsed tokens are passed to the parse action as ParseResults. They can be

683 modified in place using list-style append, extend, and pop operations to update

684 the parsed list elements; and with dictionary-style item set and del operations

685 to add, update, or remove any named results. If the tokens are modified in place,

686 it is not necessary to return them with a return statement.

687

688 Parse actions can also completely replace the given tokens, with another ``ParseResults``

689 object, or with some entirely different object (common for parse actions that perform data

690 conversions). A convenient way to build a new parse result is to define the values

691 using a dict, and then create the return value using :class:`ParseResults.from_dict`.

692

693 If None is passed as the ``fn`` parse action, all previously added parse actions for this

694 expression are cleared.

695

696 Optional keyword arguments:

697

698 :param call_during_try: (default= ``False``) indicate if parse action

699 should be run during lookaheads and alternate

700 testing. For parse actions that have side

701 effects, it is important to only call the parse

702 action once it is determined that it is being

703 called as part of a successful parse.

704 For parse actions that perform additional

705 validation, then ``call_during_try`` should

706 be passed as True, so that the validation code

707 is included in the preliminary "try" parses.

708

709 .. Note::

710 The default parsing behavior is to expand tabs in the input string

711 before starting the parsing process.

712 See :meth:`parse_string` for more information on parsing strings

713 containing ``<TAB>`` s, and suggested methods to maintain a

714 consistent view of the parsed string, the parse location, and

715 line and column positions within the parsed string.

716

717 Example: Parse dates in the form ``YYYY/MM/DD``

718 -----------------------------------------------

719

720 Setup code:

721

722 .. testcode::

723

724 def convert_to_int(toks):

725 '''a parse action to convert toks from str to int

726 at parse time'''

727 return int(toks[0])

728

729 def is_valid_date(instring, loc, toks):

730 '''a parse action to verify that the date is a valid date'''

731 from datetime import date

732 year, month, day = toks[::2]

733 try:

734 date(year, month, day)

735 except ValueError:

736 raise ParseException(instring, loc, "invalid date given")

737

738 integer = Word(nums)

739 date_str = integer + '/' + integer + '/' + integer

740

741 # add parse actions

742 integer.set_parse_action(convert_to_int)

743 date_str.set_parse_action(is_valid_date)

744

745 Successful parse - note that integer fields are converted to ints:

746

747 .. testcode::

748

749 print(date_str.parse_string("1999/12/31"))

750

751 prints:

752

753 .. testoutput::

754

755 [1999, '/', 12, '/', 31]

756

757 Failure - invalid date:

758

759 .. testcode::

760

761 date_str.parse_string("1999/13/31")

762

763 prints:

764

765 .. testoutput::

766

767 Traceback (most recent call last):

768 ParseException: invalid date given, found '1999' ...

769 """

770 if list(fns) == [None]:

771 self.parseAction.clear()

772 return self

773

774 if not all(callable(fn) for fn in fns):

775 raise TypeError("parse actions must be callable")

776 self.parseAction[:] = [_trim_arity(fn) for fn in fns]

777 self.callDuringTry = kwargs.get(

778 "call_during_try", kwargs.get("callDuringTry", False)

779 )

780

781 return self

782

783 def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement:

784 """

785 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.

786

787 See examples in :class:`copy`.

788 """

789 self.parseAction += [_trim_arity(fn) for fn in fns]

790 self.callDuringTry = self.callDuringTry or kwargs.get(

791 "call_during_try", kwargs.get("callDuringTry", False)

792 )

793 return self

794

795 def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement:

796 """Add a boolean predicate function to expression's list of parse actions. See

797 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,

798 functions passed to ``add_condition`` need to return boolean success/fail of the condition.

799

800 Optional keyword arguments:

801

802 - ``message`` = define a custom message to be used in the raised exception

803 - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise

804 ParseException

805 - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls,

806 default=False

807

808 Example:

809

810 .. doctest::

811 :options: +NORMALIZE_WHITESPACE

812

813 >>> integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))

814 >>> year_int = integer.copy().add_condition(

815 ... lambda toks: toks[0] >= 2000,

816 ... message="Only support years 2000 and later")

817 >>> date_str = year_int + '/' + integer + '/' + integer

818

819 >>> result = date_str.parse_string("1999/12/31")

820 Traceback (most recent call last):

821 ParseException: Only support years 2000 and later...

822 """

823 for fn in fns:

824 self.parseAction.append(

825 condition_as_parse_action(

826 fn,

827 message=str(kwargs.get("message")),

828 fatal=bool(kwargs.get("fatal", False)),

829 )

830 )

831

832 self.callDuringTry = self.callDuringTry or kwargs.get(

833 "call_during_try", kwargs.get("callDuringTry", False)

834 )

835 return self

836

837 def set_fail_action(self, fn: ParseFailAction) -> ParserElement:

838 """

839 Define action to perform if parsing fails at this expression.

840 Fail acton fn is a callable function that takes the arguments

841 ``fn(s, loc, expr, err)`` where:

842

843 - ``s`` = string being parsed

844 - ``loc`` = location where expression match was attempted and failed

845 - ``expr`` = the parse expression that failed

846 - ``err`` = the exception thrown

847

848 The function returns no value. It may throw :class:`ParseFatalException`

849 if it is desired to stop parsing immediately."""

850 self.failAction = fn

851 return self

852

853 def _skipIgnorables(self, instring: str, loc: int) -> int:

854 if not self.ignoreExprs:

855 return loc

856 exprsFound = True

857 ignore_expr_fns = [e._parse for e in self.ignoreExprs]

858 last_loc = loc

859 while exprsFound:

860 exprsFound = False

861 for ignore_fn in ignore_expr_fns:

862 try:

863 while 1:

864 loc, dummy = ignore_fn(instring, loc)

865 exprsFound = True

866 except ParseException:

867 pass

868 # check if all ignore exprs matched but didn't actually advance the parse location

869 if loc == last_loc:

870 break

871 last_loc = loc

872 return loc

873

874 def preParse(self, instring: str, loc: int) -> int:

875 if self.ignoreExprs:

876 loc = self._skipIgnorables(instring, loc)

877

878 if self.skipWhitespace:

879 instrlen = len(instring)

880 white_chars = self.whiteChars

881 while loc < instrlen and instring[loc] in white_chars:

882 loc += 1

883

884 return loc

885

886 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

887 return loc, []

888

889 def postParse(self, instring, loc, tokenlist):

890 return tokenlist

891

892 # @profile

893 def _parseNoCache(

894 self, instring, loc, do_actions=True, callPreParse=True

895 ) -> tuple[int, ParseResults]:

896 debugging = self.debug # and do_actions)

897 len_instring = len(instring)

898

899 if debugging or self.failAction:

900 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))

901 try:

902 if callPreParse and self.callPreparse:

903 pre_loc = self.preParse(instring, loc)

904 else:

905 pre_loc = loc

906 tokens_start = pre_loc

907 if self.debugActions.debug_try:

908 self.debugActions.debug_try(instring, tokens_start, self, False)

909 if self.mayIndexError or pre_loc >= len_instring:

910 try:

911 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

912 except IndexError:

913 raise ParseException(instring, len_instring, self.errmsg, self)

914 else:

915 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

916 except Exception as err:

917 # print("Exception raised:", err)

918 if self.debugActions.debug_fail:

919 self.debugActions.debug_fail(

920 instring, tokens_start, self, err, False

921 )

922 if self.failAction:

923 self.failAction(instring, tokens_start, self, err)

924 raise

925 else:

926 if callPreParse and self.callPreparse:

927 pre_loc = self.preParse(instring, loc)

928 else:

929 pre_loc = loc

930 tokens_start = pre_loc

931 if self.mayIndexError or pre_loc >= len_instring:

932 try:

933 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

934 except IndexError:

935 raise ParseException(instring, len_instring, self.errmsg, self)

936 else:

937 loc, tokens = self.parseImpl(instring, pre_loc, do_actions)

938

939 tokens = self.postParse(instring, loc, tokens)

940

941 ret_tokens = ParseResults(

942 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults

943 )

944 if self.parseAction and (do_actions or self.callDuringTry):

945 if debugging:

946 try:

947 for fn in self.parseAction:

948 try:

949 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]

950 except IndexError as parse_action_exc:

951 exc = ParseException("exception raised in parse action")

952 raise exc from parse_action_exc

953

954 if tokens is not None and tokens is not ret_tokens:

955 ret_tokens = ParseResults(

956 tokens,

957 self.resultsName,

958 asList=self.saveAsList

959 and isinstance(tokens, (ParseResults, list)),

960 modal=self.modalResults,

961 )

962 except Exception as err:

963 # print "Exception raised in user parse action:", err

964 if self.debugActions.debug_fail:

965 self.debugActions.debug_fail(

966 instring, tokens_start, self, err, False

967 )

968 raise

969 else:

970 for fn in self.parseAction:

971 try:

972 tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type]

973 except IndexError as parse_action_exc:

974 exc = ParseException("exception raised in parse action")

975 raise exc from parse_action_exc

976

977 if tokens is not None and tokens is not ret_tokens:

978 ret_tokens = ParseResults(

979 tokens,

980 self.resultsName,

981 asList=self.saveAsList

982 and isinstance(tokens, (ParseResults, list)),

983 modal=self.modalResults,

984 )

985 if debugging:

986 # print("Matched", self, "->", ret_tokens.as_list())

987 if self.debugActions.debug_match:

988 self.debugActions.debug_match(

989 instring, tokens_start, loc, self, ret_tokens, False

990 )

991

992 return loc, ret_tokens

993

994 def try_parse(

995 self,

996 instring: str,

997 loc: int,

998 *,

999 raise_fatal: bool = False,

1000 do_actions: bool = False,

1001 ) -> int:

1002 try:

1003 return self._parse(instring, loc, do_actions=do_actions)[0]

1004 except ParseFatalException:

1005 if raise_fatal:

1006 raise

1007 raise ParseException(instring, loc, self.errmsg, self)

1008

1009 def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool:

1010 try:

1011 self.try_parse(instring, loc, do_actions=do_actions)

1012 except (ParseException, IndexError):

1013 return False

1014 else:

1015 return True

1016

1017 # cache for left-recursion in Forward references

1018 recursion_lock = RLock()

1019 recursion_memos: collections.abc.MutableMapping[

1020 tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]]

1021 ] = {}

1022

1023 class _CacheType(typing.Protocol):

1024 """

1025 Class to be used for packrat and left-recursion cacheing of results

1026 and exceptions.

1027 """

1028

1029 not_in_cache: bool

1030

1031 def get(self, *args) -> typing.Any: ...

1032

1033 def set(self, *args) -> None: ...

1034

1035 def clear(self) -> None: ...

1036

1037 class NullCache(dict):

1038 """

1039 A null cache type for initialization of the packrat_cache class variable.

1040 If/when enable_packrat() is called, this null cache will be replaced by a

1041 proper _CacheType class instance.

1042 """

1043

1044 not_in_cache: bool = True

1045

1046 def get(self, *args) -> typing.Any: ...

1047

1048 def set(self, *args) -> None: ...

1049

1050 def clear(self) -> None: ...

1051

1052 # class-level argument cache for optimizing repeated calls when backtracking

1053 # through recursive expressions

1054 packrat_cache: _CacheType = NullCache()

1055 packrat_cache_lock = RLock()

1056 packrat_cache_stats = [0, 0]

1057

1058 # this method gets repeatedly called during backtracking with the same arguments -

1059 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

1060 def _parseCache(

1061 self, instring, loc, do_actions=True, callPreParse=True

1062 ) -> tuple[int, ParseResults]:

1063 HIT, MISS = 0, 1

1064 lookup = (self, instring, loc, callPreParse, do_actions)

1065 with ParserElement.packrat_cache_lock:

1066 cache = ParserElement.packrat_cache

1067 value = cache.get(lookup)

1068 if value is cache.not_in_cache:

1069 ParserElement.packrat_cache_stats[MISS] += 1

1070 try:

1071 value = self._parseNoCache(instring, loc, do_actions, callPreParse)

1072 except ParseBaseException as pe:

1073 # cache a copy of the exception, without the traceback

1074 cache.set(lookup, pe.__class__(*pe.args))

1075 raise

1076 else:

1077 cache.set(lookup, (value[0], value[1].copy(), loc))

1078 return value

1079 else:

1080 ParserElement.packrat_cache_stats[HIT] += 1

1081 if self.debug and self.debugActions.debug_try:

1082 try:

1083 self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg]

1084 except TypeError:

1085 pass

1086 if isinstance(value, Exception):

1087 if self.debug and self.debugActions.debug_fail:

1088 try:

1089 self.debugActions.debug_fail(

1090 instring, loc, self, value, cache_hit=True # type: ignore [call-arg]

1091 )

1092 except TypeError:

1093 pass

1094 raise value

1095

1096 value = cast(tuple[int, ParseResults, int], value)

1097 loc_, result, endloc = value[0], value[1].copy(), value[2]

1098 if self.debug and self.debugActions.debug_match:

1099 try:

1100 self.debugActions.debug_match(

1101 instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg]

1102 )

1103 except TypeError:

1104 pass

1105

1106 return loc_, result

1107

1108 _parse = _parseNoCache

1109

1110 @staticmethod

1111 def reset_cache() -> None:

1112 with ParserElement.packrat_cache_lock:

1113 ParserElement.packrat_cache.clear()

1114 ParserElement.packrat_cache_stats[:] = [0] * len(

1115 ParserElement.packrat_cache_stats

1116 )

1117 ParserElement.recursion_memos.clear()

1118

1119 # class attributes to keep caching status

1120 _packratEnabled = False

1121 _left_recursion_enabled = False

1122

1123 @staticmethod

1124 def disable_memoization() -> None:

1125 """

1126 Disables active Packrat or Left Recursion parsing and their memoization

1127

1128 This method also works if neither Packrat nor Left Recursion are enabled.

1129 This makes it safe to call before activating Packrat nor Left Recursion

1130 to clear any previous settings.

1131 """

1132 with ParserElement.packrat_cache_lock:

1133 ParserElement.reset_cache()

1134 ParserElement._left_recursion_enabled = False

1135 ParserElement._packratEnabled = False

1136 ParserElement._parse = ParserElement._parseNoCache

1137

1138 @staticmethod

1139 def enable_left_recursion(

1140 cache_size_limit: typing.Optional[int] = None, *, force=False

1141 ) -> None:

1142 """

1143 Enables "bounded recursion" parsing, which allows for both direct and indirect

1144 left-recursion. During parsing, left-recursive :class:`Forward` elements are

1145 repeatedly matched with a fixed recursion depth that is gradually increased

1146 until finding the longest match.

1147

1148 Example:

1149

1150 .. testcode::

1151

1152 import pyparsing as pp

1153 pp.ParserElement.enable_left_recursion()

1154

1155 E = pp.Forward("E")

1156 num = pp.Word(pp.nums)

1157

1158 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...

1159 E <<= E + '+' - num | num

1160

1161 print(E.parse_string("1+2+3+4"))

1162

1163 prints:

1164

1165 .. testoutput::

1166

1167 ['1', '+', '2', '+', '3', '+', '4']

1168

1169 Recursion search naturally memoizes matches of ``Forward`` elements and may

1170 thus skip reevaluation of parse actions during backtracking. This may break

1171 programs with parse actions which rely on strict ordering of side-effects.

1172

1173 Parameters:

1174

1175 - ``cache_size_limit`` - (default=``None``) - memoize at most this many

1176 ``Forward`` elements during matching; if ``None`` (the default),

1177 memoize all ``Forward`` elements.

1178

1179 Bounded Recursion parsing works similar but not identical to Packrat parsing,

1180 thus the two cannot be used together. Use ``force=True`` to disable any

1181 previous, conflicting settings.

1182 """

1183 with ParserElement.packrat_cache_lock:

1184 if force:

1185 ParserElement.disable_memoization()

1186 elif ParserElement._packratEnabled:

1187 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1188 if cache_size_limit is None:

1189 ParserElement.recursion_memos = _UnboundedMemo()

1190 elif cache_size_limit > 0:

1191 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment]

1192 else:

1193 raise NotImplementedError(f"Memo size of {cache_size_limit}")

1194 ParserElement._left_recursion_enabled = True

1195

1196 @staticmethod

1197 def enable_packrat(

1198 cache_size_limit: Union[int, None] = 128, *, force: bool = False

1199 ) -> None:

1200 """

1201 Enables "packrat" parsing, which adds memoizing to the parsing logic.

1202 Repeated parse attempts at the same string location (which happens

1203 often in many complex grammars) can immediately return a cached value,

1204 instead of re-executing parsing/validating code. Memoizing is done of

1205 both valid results and parsing exceptions.

1206

1207 Parameters:

1208

1209 - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided

1210 will limit the size of the packrat cache; if None is passed, then

1211 the cache size will be unbounded; if 0 is passed, the cache will

1212 be effectively disabled.

1213

1214 This speedup may break existing programs that use parse actions that

1215 have side-effects. For this reason, packrat parsing is disabled when

1216 you first import pyparsing. To activate the packrat feature, your

1217 program must call the class method :class:`ParserElement.enable_packrat`.

1218 For best results, call ``enable_packrat()`` immediately after

1219 importing pyparsing.

1220

1221 .. Can't really be doctested, alas

1222

1223 Example::

1224

1225 import pyparsing

1226 pyparsing.ParserElement.enable_packrat()

1227

1228 Packrat parsing works similar but not identical to Bounded Recursion parsing,

1229 thus the two cannot be used together. Use ``force=True`` to disable any

1230 previous, conflicting settings.

1231 """

1232 with ParserElement.packrat_cache_lock:

1233 if force:

1234 ParserElement.disable_memoization()

1235 elif ParserElement._left_recursion_enabled:

1236 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1237

1238 if ParserElement._packratEnabled:

1239 return

1240

1241 ParserElement._packratEnabled = True

1242 if cache_size_limit is None:

1243 ParserElement.packrat_cache = _UnboundedCache()

1244 else:

1245 ParserElement.packrat_cache = _FifoCache(cache_size_limit)

1246 ParserElement._parse = ParserElement._parseCache

1247

1248 def parse_string(

1249 self, instring: str, parse_all: bool = False, *, parseAll: bool = False

1250 ) -> ParseResults:

1251 """

1252 Parse a string with respect to the parser definition. This function is intended as the primary interface to the

1253 client code.

1254

1255 :param instring: The input string to be parsed.

1256 :param parse_all: If set, the entire input string must match the grammar.

1257 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.

1258 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.

1259 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or

1260 an object with attributes if the given parser includes results names.

1261

1262 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This

1263 is also equivalent to ending the grammar with :class:`StringEnd`\\ ().

1264

1265 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are

1266 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string

1267 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string

1268 being parsed, one can ensure a consistent view of the input string by doing one of the following:

1269

1270 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),

1271 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the

1272 parse action's ``s`` argument, or

1273 - explicitly expand the tabs in your input string before calling ``parse_string``.

1274

1275 Examples:

1276

1277 By default, partial matches are OK.

1278

1279 .. doctest::

1280

1281 >>> res = Word('a').parse_string('aaaaabaaa')

1282 >>> print(res)

1283 ['aaaaa']

1284

1285 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children

1286 directly to see more examples.

1287

1288 It raises an exception if parse_all flag is set and instring does not match the whole grammar.

1289

1290 .. doctest::

1291

1292 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)

1293 Traceback (most recent call last):

1294 ParseException: Expected end of text, found 'b' ...

1295 """

1296 parseAll = parse_all or parseAll

1297

1298 ParserElement.reset_cache()

1299 if not self.streamlined:

1300 self.streamline()

1301 for e in self.ignoreExprs:

1302 e.streamline()

1303 if not self.keepTabs:

1304 instring = instring.expandtabs()

1305 try:

1306 loc, tokens = self._parse(instring, 0)

1307 if parseAll:

1308 loc = self.preParse(instring, loc)

1309 se = Empty() + StringEnd().set_debug(False)

1310 se._parse(instring, loc)

1311 except _ParseActionIndexError as pa_exc:

1312 raise pa_exc.exc

1313 except ParseBaseException as exc:

1314 if ParserElement.verbose_stacktrace:

1315 raise

1316

1317 # catch and re-raise exception from here, clearing out pyparsing internal stack trace

1318 raise exc.with_traceback(None)

1319 else:

1320 return tokens

1321

1322 def scan_string(

1323 self,

1324 instring: str,

1325 max_matches: int = _MAX_INT,

1326 overlap: bool = False,

1327 always_skip_whitespace=True,

1328 *,

1329 debug: bool = False,

1330 maxMatches: int = _MAX_INT,

1331 ) -> Generator[tuple[ParseResults, int, int], None, None]:

1332 """

1333 Scan the input string for expression matches. Each match will return the

1334 matching tokens, start location, and end location. May be called with optional

1335 ``max_matches`` argument, to clip scanning after 'n' matches are found. If

1336 ``overlap`` is specified, then overlapping matches will be reported.

1337

1338 Note that the start and end locations are reported relative to the string

1339 being parsed. See :class:`parse_string` for more information on parsing

1340 strings with embedded tabs.

1341

1342 Example:

1343

1344 .. testcode::

1345

1346 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"

1347 print(source)

1348 for tokens, start, end in Word(alphas).scan_string(source):

1349 print(' '*start + '^'*(end-start))

1350 print(' '*start + tokens[0])

1351

1352 prints:

1353

1354 .. testoutput::

1355

1356 sldjf123lsdjjkf345sldkjf879lkjsfd987

1357 ^^^^^

1358 sldjf

1359 ^^^^^^^

1360 lsdjjkf

1361 ^^^^^^

1362 sldkjf

1363 ^^^^^^

1364 lkjsfd

1365 """

1366 maxMatches = min(maxMatches, max_matches)

1367 if not self.streamlined:

1368 self.streamline()

1369 for e in self.ignoreExprs:

1370 e.streamline()

1371

1372 if not self.keepTabs:

1373 instring = str(instring).expandtabs()

1374 instrlen = len(instring)

1375 loc = 0

1376 if always_skip_whitespace:

1377 preparser = Empty()

1378 preparser.ignoreExprs = self.ignoreExprs

1379 preparser.whiteChars = self.whiteChars

1380 preparseFn = preparser.preParse

1381 else:

1382 preparseFn = self.preParse

1383 parseFn = self._parse

1384 ParserElement.resetCache()

1385 matches = 0

1386 try:

1387 while loc <= instrlen and matches < maxMatches:

1388 try:

1389 preloc: int = preparseFn(instring, loc)

1390 nextLoc: int

1391 tokens: ParseResults

1392 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)

1393 except ParseException:

1394 loc = preloc + 1

1395 else:

1396 if nextLoc > loc:

1397 matches += 1

1398 if debug:

1399 print(

1400 {

1401 "tokens": tokens.asList(),

1402 "start": preloc,

1403 "end": nextLoc,

1404 }

1405 )

1406 yield tokens, preloc, nextLoc

1407 if overlap:

1408 nextloc = preparseFn(instring, loc)

1409 if nextloc > loc:

1410 loc = nextLoc

1411 else:

1412 loc += 1

1413 else:

1414 loc = nextLoc

1415 else:

1416 loc = preloc + 1

1417 except ParseBaseException as exc:

1418 if ParserElement.verbose_stacktrace:

1419 raise

1420

1421 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1422 raise exc.with_traceback(None)

1423

1424 def transform_string(self, instring: str, *, debug: bool = False) -> str:

1425 """

1426 Extension to :class:`scan_string`, to modify matching text with modified tokens that may

1427 be returned from a parse action. To use ``transform_string``, define a grammar and

1428 attach a parse action to it that modifies the returned token list.

1429 Invoking ``transform_string()`` on a target string will then scan for matches,

1430 and replace the matched text patterns according to the logic in the parse

1431 action. ``transform_string()`` returns the resulting transformed string.

1432

1433 Example:

1434

1435 .. testcode::

1436

1437 quote = '''now is the winter of our discontent,

1438 made glorious summer by this sun of york.'''

1439

1440 wd = Word(alphas)

1441 wd.set_parse_action(lambda toks: toks[0].title())

1442

1443 print(wd.transform_string(quote))

1444

1445 prints:

1446

1447 .. testoutput::

1448

1449 Now Is The Winter Of Our Discontent,

1450 Made Glorious Summer By This Sun Of York.

1451 """

1452 out: list[str] = []

1453 lastE = 0

1454 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to

1455 # keep string locs straight between transform_string and scan_string

1456 self.keepTabs = True

1457 try:

1458 for t, s, e in self.scan_string(instring, debug=debug):

1459 if s > lastE:

1460 out.append(instring[lastE:s])

1461 lastE = e

1462

1463 if not t:

1464 continue

1465

1466 if isinstance(t, ParseResults):

1467 out += t.as_list()

1468 elif isinstance(t, Iterable) and not isinstance(t, str_type):

1469 out.extend(t)

1470 else:

1471 out.append(t)

1472

1473 out.append(instring[lastE:])

1474 out = [o for o in out if o]

1475 return "".join([str(s) for s in _flatten(out)])

1476 except ParseBaseException as exc:

1477 if ParserElement.verbose_stacktrace:

1478 raise

1479

1480 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1481 raise exc.with_traceback(None)

1482

1483 def search_string(

1484 self,

1485 instring: str,

1486 max_matches: int = _MAX_INT,

1487 *,

1488 debug: bool = False,

1489 maxMatches: int = _MAX_INT,

1490 ) -> ParseResults:

1491 """

1492 Another extension to :class:`scan_string`, simplifying the access to the tokens found

1493 to match the given parse expression. May be called with optional

1494 ``max_matches`` argument, to clip searching after 'n' matches are found.

1495

1496 Example:

1497

1498 .. testcode::

1499

1500 quote = '''More than Iron, more than Lead,

1501 more than Gold I need Electricity'''

1502

1503 # a capitalized word starts with an uppercase letter,

1504 # followed by zero or more lowercase letters

1505 cap_word = Word(alphas.upper(), alphas.lower())

1506

1507 print(cap_word.search_string(quote))

1508

1509 # the sum() builtin can be used to merge results

1510 # into a single ParseResults object

1511 print(sum(cap_word.search_string(quote)))

1512

1513 prints:

1514

1515 .. testoutput::

1516

1517 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]

1518 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']

1519 """

1520 maxMatches = min(maxMatches, max_matches)

1521 try:

1522 return ParseResults(

1523 [

1524 t

1525 for t, s, e in self.scan_string(

1526 instring, maxMatches, always_skip_whitespace=False, debug=debug

1527 )

1528 ]

1529 )

1530 except ParseBaseException as exc:

1531 if ParserElement.verbose_stacktrace:

1532 raise

1533

1534 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1535 raise exc.with_traceback(None)

1536

1537 def split(

1538 self,

1539 instring: str,

1540 maxsplit: int = _MAX_INT,

1541 include_separators: bool = False,

1542 *,

1543 includeSeparators=False,

1544 ) -> Generator[str, None, None]:

1545 """

1546 Generator method to split a string using the given expression as a separator.

1547 May be called with optional ``maxsplit`` argument, to limit the number of splits;

1548 and the optional ``include_separators`` argument (default= ``False``), if the separating

1549 matching text should be included in the split results.

1550

1551 Example:

1552

1553 .. testcode::

1554

1555 punc = one_of(list(".,;:/-!?"))

1556 print(list(punc.split(

1557 "This, this?, this sentence, is badly punctuated!")))

1558

1559 prints:

1560

1561 .. testoutput::

1562

1563 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']

1564 """

1565 includeSeparators = includeSeparators or include_separators

1566 last = 0

1567 for t, s, e in self.scan_string(instring, max_matches=maxsplit):

1568 yield instring[last:s]

1569 if includeSeparators:

1570 yield t[0]

1571 last = e

1572 yield instring[last:]

1573

1574 def __add__(self, other) -> ParserElement:

1575 """

1576 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`

1577 converts them to :class:`Literal`\\ s by default.

1578

1579 Example:

1580

1581 .. testcode::

1582

1583 greet = Word(alphas) + "," + Word(alphas) + "!"

1584 hello = "Hello, World!"

1585 print(hello, "->", greet.parse_string(hello))

1586

1587 prints:

1588

1589 .. testoutput::

1590

1591 Hello, World! -> ['Hello', ',', 'World', '!']

1592

1593 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:

1594

1595 .. testcode::

1596

1597 Literal('start') + ... + Literal('end')

1598

1599 is equivalent to:

1600

1601 .. testcode::

1602

1603 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')

1604

1605 Note that the skipped text is returned with '_skipped' as a results name,

1606 and to support having multiple skips in the same parser, the value returned is

1607 a list of all skipped text.

1608 """

1609 if other is Ellipsis:

1610 return _PendingSkip(self)

1611

1612 if isinstance(other, str_type):

1613 other = self._literalStringClass(other)

1614 if not isinstance(other, ParserElement):

1615 return NotImplemented

1616 return And([self, other])

1617

1618 def __radd__(self, other) -> ParserElement:

1619 """

1620 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`

1621 """

1622 if other is Ellipsis:

1623 return SkipTo(self)("_skipped*") + self

1624

1625 if isinstance(other, str_type):

1626 other = self._literalStringClass(other)

1627 if not isinstance(other, ParserElement):

1628 return NotImplemented

1629 return other + self

1630

1631 def __sub__(self, other) -> ParserElement:

1632 """

1633 Implementation of ``-`` operator, returns :class:`And` with error stop

1634 """

1635 if isinstance(other, str_type):

1636 other = self._literalStringClass(other)

1637 if not isinstance(other, ParserElement):

1638 return NotImplemented

1639 return self + And._ErrorStop() + other

1640

1641 def __rsub__(self, other) -> ParserElement:

1642 """

1643 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`

1644 """

1645 if isinstance(other, str_type):

1646 other = self._literalStringClass(other)

1647 if not isinstance(other, ParserElement):

1648 return NotImplemented

1649 return other - self

1650

1651 def __mul__(self, other) -> ParserElement:

1652 """

1653 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of

1654 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer

1655 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples

1656 may also include ``None`` as in:

1657

1658 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent

1659 to ``expr*n + ZeroOrMore(expr)``

1660 (read as "at least n instances of ``expr``")

1661 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``

1662 (read as "0 to n instances of ``expr``")

1663 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``

1664 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``

1665

1666 Note that ``expr*(None, n)`` does not raise an exception if

1667 more than n exprs exist in the input stream; that is,

1668 ``expr*(None, n)`` does not enforce a maximum number of expr

1669 occurrences. If this behavior is desired, then write

1670 ``expr*(None, n) + ~expr``

1671 """

1672 if other is Ellipsis:

1673 other = (0, None)

1674 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):

1675 other = ((0,) + other[1:] + (None,))[:2]

1676

1677 if not isinstance(other, (int, tuple)):

1678 return NotImplemented

1679

1680 if isinstance(other, int):

1681 minElements, optElements = other, 0

1682 else:

1683 other = tuple(o if o is not Ellipsis else None for o in other)

1684 other = (other + (None, None))[:2]

1685 if other[0] is None:

1686 other = (0, other[1])

1687 if isinstance(other[0], int) and other[1] is None:

1688 if other[0] == 0:

1689 return ZeroOrMore(self)

1690 if other[0] == 1:

1691 return OneOrMore(self)

1692 else:

1693 return self * other[0] + ZeroOrMore(self)

1694 elif isinstance(other[0], int) and isinstance(other[1], int):

1695 minElements, optElements = other

1696 optElements -= minElements

1697 else:

1698 return NotImplemented

1699

1700 if minElements < 0:

1701 raise ValueError("cannot multiply ParserElement by negative value")

1702 if optElements < 0:

1703 raise ValueError(

1704 "second tuple value must be greater or equal to first tuple value"

1705 )

1706 if minElements == optElements == 0:

1707 return And([])

1708

1709 if optElements:

1710

1711 def makeOptionalList(n):

1712 if n > 1:

1713 return Opt(self + makeOptionalList(n - 1))

1714 else:

1715 return Opt(self)

1716

1717 if minElements:

1718 if minElements == 1:

1719 ret = self + makeOptionalList(optElements)

1720 else:

1721 ret = And([self] * minElements) + makeOptionalList(optElements)

1722 else:

1723 ret = makeOptionalList(optElements)

1724 else:

1725 if minElements == 1:

1726 ret = self

1727 else:

1728 ret = And([self] * minElements)

1729 return ret

1730

1731 def __rmul__(self, other) -> ParserElement:

1732 return self.__mul__(other)

1733

1734 def __or__(self, other) -> ParserElement:

1735 """

1736 Implementation of ``|`` operator - returns :class:`MatchFirst`

1737

1738 .. versionchanged:: 3.1.0

1739 Support ``expr | ""`` as a synonym for ``Optional(expr)``.

1740 """

1741 if other is Ellipsis:

1742 return _PendingSkip(self, must_skip=True)

1743

1744 if isinstance(other, str_type):

1745 # `expr | ""` is equivalent to `Opt(expr)`

1746 if other == "":

1747 return Opt(self)

1748 other = self._literalStringClass(other)

1749 if not isinstance(other, ParserElement):

1750 return NotImplemented

1751 return MatchFirst([self, other])

1752

1753 def __ror__(self, other) -> ParserElement:

1754 """

1755 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`

1756 """

1757 if isinstance(other, str_type):

1758 other = self._literalStringClass(other)

1759 if not isinstance(other, ParserElement):

1760 return NotImplemented

1761 return other | self

1762

1763 def __xor__(self, other) -> ParserElement:

1764 """

1765 Implementation of ``^`` operator - returns :class:`Or`

1766 """

1767 if isinstance(other, str_type):

1768 other = self._literalStringClass(other)

1769 if not isinstance(other, ParserElement):

1770 return NotImplemented

1771 return Or([self, other])

1772

1773 def __rxor__(self, other) -> ParserElement:

1774 """

1775 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`

1776 """

1777 if isinstance(other, str_type):

1778 other = self._literalStringClass(other)

1779 if not isinstance(other, ParserElement):

1780 return NotImplemented

1781 return other ^ self

1782

1783 def __and__(self, other) -> ParserElement:

1784 """

1785 Implementation of ``&`` operator - returns :class:`Each`

1786 """

1787 if isinstance(other, str_type):

1788 other = self._literalStringClass(other)

1789 if not isinstance(other, ParserElement):

1790 return NotImplemented

1791 return Each([self, other])

1792

1793 def __rand__(self, other) -> ParserElement:

1794 """

1795 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`

1796 """

1797 if isinstance(other, str_type):

1798 other = self._literalStringClass(other)

1799 if not isinstance(other, ParserElement):

1800 return NotImplemented

1801 return other & self

1802

1803 def __invert__(self) -> ParserElement:

1804 """

1805 Implementation of ``~`` operator - returns :class:`NotAny`

1806 """

1807 return NotAny(self)

1808

1809 # disable __iter__ to override legacy use of sequential access to __getitem__ to

1810 # iterate over a sequence

1811 __iter__ = None

1812

1813 def __getitem__(self, key):

1814 """

1815 use ``[]`` indexing notation as a short form for expression repetition:

1816

1817 - ``expr[n]`` is equivalent to ``expr*n``

1818 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``

1819 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent

1820 to ``expr*n + ZeroOrMore(expr)``

1821 (read as "at least n instances of ``expr``")

1822 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``

1823 (read as "0 to n instances of ``expr``")

1824 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``

1825 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``

1826

1827 ``None`` may be used in place of ``...``.

1828

1829 Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception

1830 if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is

1831 desired, then write ``expr[..., n] + ~expr``.

1832

1833 For repetition with a stop_on expression, use slice notation:

1834

1835 - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)``

1836 - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)``

1837

1838 .. versionchanged:: 3.1.0

1839 Support for slice notation.

1840 """

1841

1842 stop_on_defined = False

1843 stop_on = NoMatch()

1844 if isinstance(key, slice):

1845 key, stop_on = key.start, key.stop

1846 if key is None:

1847 key = ...

1848 stop_on_defined = True

1849 elif isinstance(key, tuple) and isinstance(key[-1], slice):

1850 key, stop_on = (key[0], key[1].start), key[1].stop

1851 stop_on_defined = True

1852

1853 # convert single arg keys to tuples

1854 if isinstance(key, str_type):

1855 key = (key,)

1856 try:

1857 iter(key)

1858 except TypeError:

1859 key = (key, key)

1860

1861 if len(key) > 2:

1862 raise TypeError(

1863 f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})"

1864 )

1865

1866 # clip to 2 elements

1867 ret = self * tuple(key[:2])

1868 ret = typing.cast(_MultipleMatch, ret)

1869

1870 if stop_on_defined:

1871 ret.stopOn(stop_on)

1872

1873 return ret

1874

1875 def __call__(self, name: typing.Optional[str] = None) -> ParserElement:

1876 """

1877 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.

1878

1879 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be

1880 passed as ``True``.

1881

1882 If ``name`` is omitted, same as calling :class:`copy`.

1883

1884 Example:

1885

1886 .. testcode::

1887

1888 # these are equivalent

1889 userdata = (

1890 Word(alphas).set_results_name("name")

1891 + Word(nums + "-").set_results_name("socsecno")

1892 )

1893

1894 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")

1895 """

1896 if name is not None:

1897 return self._setResultsName(name)

1898

1899 return self.copy()

1900

1901 def suppress(self) -> ParserElement:

1902 """

1903 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from

1904 cluttering up returned output.

1905 """

1906 return Suppress(self)

1907

1908 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

1909 """

1910 Enables the skipping of whitespace before matching the characters in the

1911 :class:`ParserElement`'s defined pattern.

1912

1913 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)

1914 """

1915 self.skipWhitespace = True

1916 return self

1917

1918 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

1919 """

1920 Disables the skipping of whitespace before matching the characters in the

1921 :class:`ParserElement`'s defined pattern. This is normally only used internally by

1922 the pyparsing module, but may be needed in some whitespace-sensitive grammars.

1923

1924 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)

1925 """

1926 self.skipWhitespace = False

1927 return self

1928

1929 def set_whitespace_chars(

1930 self, chars: Union[set[str], str], copy_defaults: bool = False

1931 ) -> ParserElement:

1932 """

1933 Overrides the default whitespace chars

1934 """

1935 self.skipWhitespace = True

1936 self.whiteChars = set(chars)

1937 self.copyDefaultWhiteChars = copy_defaults

1938 return self

1939

1940 def parse_with_tabs(self) -> ParserElement:

1941 """

1942 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.

1943 Must be called before ``parse_string`` when the input grammar contains elements that

1944 match ``<TAB>`` characters.

1945 """

1946 self.keepTabs = True

1947 return self

1948

1949 def ignore(self, other: ParserElement) -> ParserElement:

1950 """

1951 Define expression to be ignored (e.g., comments) while doing pattern

1952 matching; may be called repeatedly, to define multiple comment or other

1953 ignorable patterns.

1954

1955 Example:

1956

1957 .. doctest::

1958

1959 >>> patt = Word(alphas)[...]

1960 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))

1961 ['ablaj']

1962

1963 >>> patt = Word(alphas)[...].ignore(c_style_comment)

1964 >>> print(patt.parse_string('ablaj /* comment */ lskjd'))

1965 ['ablaj', 'lskjd']

1966 """

1967 if isinstance(other, str_type):

1968 other = Suppress(other)

1969

1970 if isinstance(other, Suppress):

1971 if other not in self.ignoreExprs:

1972 self.ignoreExprs.append(other)

1973 else:

1974 self.ignoreExprs.append(Suppress(other.copy()))

1975 return self

1976

1977 def set_debug_actions(

1978 self,

1979 start_action: DebugStartAction,

1980 success_action: DebugSuccessAction,

1981 exception_action: DebugExceptionAction,

1982 ) -> ParserElement:

1983 """

1984 Customize display of debugging messages while doing pattern matching:

1985

1986 :param start_action: method to be called when an expression is about to be parsed;

1987 should have the signature::

1988

1989 fn(input_string: str,

1990 location: int,

1991 expression: ParserElement,

1992 cache_hit: bool)

1993

1994 :param success_action: method to be called when an expression has successfully parsed;

1995 should have the signature::

1996

1997 fn(input_string: str,

1998 start_location: int,

1999 end_location: int,

2000 expression: ParserELement,

2001 parsed_tokens: ParseResults,

2002 cache_hit: bool)

2003

2004 :param exception_action: method to be called when expression fails to parse;

2005 should have the signature::

2006

2007 fn(input_string: str,

2008 location: int,

2009 expression: ParserElement,

2010 exception: Exception,

2011 cache_hit: bool)

2012 """

2013 self.debugActions = self.DebugActions(

2014 start_action or _default_start_debug_action, # type: ignore[truthy-function]

2015 success_action or _default_success_debug_action, # type: ignore[truthy-function]

2016 exception_action or _default_exception_debug_action, # type: ignore[truthy-function]

2017 )

2018 self.debug = True

2019 return self

2020

2021 def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement:

2022 """

2023 Enable display of debugging messages while doing pattern matching.

2024 Set ``flag`` to ``True`` to enable, ``False`` to disable.

2025 Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions.

2026

2027 Example:

2028

2029 .. testcode::

2030

2031 wd = Word(alphas).set_name("alphaword")

2032 integer = Word(nums).set_name("numword")

2033 term = wd | integer

2034

2035 # turn on debugging for wd

2036 wd.set_debug()

2037

2038 term[1, ...].parse_string("abc 123 xyz 890")

2039

2040 prints:

2041

2042 .. testoutput::

2043 :options: +NORMALIZE_WHITESPACE

2044

2045 Match alphaword at loc 0(1,1)

2046 abc 123 xyz 890

2047 ^

2048 Matched alphaword -> ['abc']

2049 Match alphaword at loc 4(1,5)

2050 abc 123 xyz 890

2051 ^

2052 Match alphaword failed, ParseException raised: Expected alphaword, ...

2053 Match alphaword at loc 8(1,9)

2054 abc 123 xyz 890

2055 ^

2056 Matched alphaword -> ['xyz']

2057 Match alphaword at loc 12(1,13)

2058 abc 123 xyz 890

2059 ^

2060 Match alphaword failed, ParseException raised: Expected alphaword, ...

2061 abc 123 xyz 890

2062 ^

2063 Match alphaword failed, ParseException raised: Expected alphaword, found end of text ...

2064

2065 The output shown is that produced by the default debug actions - custom debug actions can be

2066 specified using :meth:`set_debug_actions`. Prior to attempting

2067 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

2068 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

2069 message is shown. Also note the use of :meth:`set_name` to assign a human-readable name to the expression,

2070 which makes debugging and exception messages easier to understand - for instance, the default

2071 name created for the :class:`Word` expression without calling :meth:`set_name` is ``"W:(A-Za-z)"``.

2072

2073 .. versionchanged:: 3.1.0

2074 ``recurse`` argument added.

2075 """

2076 if recurse:

2077 for expr in self.visit_all():

2078 expr.set_debug(flag, recurse=False)

2079 return self

2080

2081 if flag:

2082 self.set_debug_actions(

2083 _default_start_debug_action,

2084 _default_success_debug_action,

2085 _default_exception_debug_action,

2086 )

2087 else:

2088 self.debug = False

2089 return self

2090

2091 @property

2092 def default_name(self) -> str:

2093 if self._defaultName is None:

2094 self._defaultName = self._generateDefaultName()

2095 return self._defaultName

2096

2097 @abstractmethod

2098 def _generateDefaultName(self) -> str:

2099 """

2100 Child classes must define this method, which defines how the ``default_name`` is set.

2101 """

2102

2103 def set_name(self, name: typing.Optional[str]) -> ParserElement:

2104 """

2105 Define name for this expression, makes debugging and exception messages clearer. If

2106 `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also

2107 enable debug for this expression.

2108

2109 If `name` is None, clears any custom name for this expression, and clears the

2110 debug flag is it was enabled via `__diag__.enable_debug_on_named_expressions`.

2111

2112 Example:

2113

2114 .. doctest::

2115

2116 >>> integer = Word(nums)

2117 >>> integer.parse_string("ABC")

2118 Traceback (most recent call last):

2119 ParseException: Expected W:(0-9) (at char 0), (line:1, col:1)

2120

2121 >>> integer.set_name("integer")

2122 integer

2123 >>> integer.parse_string("ABC")

2124 Traceback (most recent call last):

2125 ParseException: Expected integer (at char 0), (line:1, col:1)

2126

2127 .. versionchanged:: 3.1.0

2128 Accept ``None`` as the ``name`` argument.

2129 """

2130 self.customName = name # type: ignore[assignment]

2131 self.errmsg = f"Expected {str(self)}"

2132

2133 if __diag__.enable_debug_on_named_expressions:

2134 self.set_debug(name is not None)

2135

2136 return self

2137

2138 @property

2139 def name(self) -> str:

2140 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name

2141 return self.customName if self.customName is not None else self.default_name

2142

2143 @name.setter

2144 def name(self, new_name) -> None:

2145 self.set_name(new_name)

2146

2147 def __str__(self) -> str:

2148 return self.name

2149

2150 def __repr__(self) -> str:

2151 return str(self)

2152

2153 def streamline(self) -> ParserElement:

2154 self.streamlined = True

2155 self._defaultName = None

2156 return self

2157

2158 def recurse(self) -> list[ParserElement]:

2159 return []

2160

2161 def _checkRecursion(self, parseElementList):

2162 subRecCheckList = parseElementList[:] + [self]

2163 for e in self.recurse():

2164 e._checkRecursion(subRecCheckList)

2165

2166 def validate(self, validateTrace=None) -> None:

2167 """

2168 .. deprecated:: 3.0.0

2169 Do not use to check for left recursion.

2170

2171 Check defined expressions for valid structure, check for infinite recursive definitions.

2172

2173 """

2174 warnings.warn(

2175 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

2176 DeprecationWarning,

2177 stacklevel=2,

2178 )

2179 self._checkRecursion([])

2180

2181 def parse_file(

2182 self,

2183 file_or_filename: Union[str, Path, TextIO],

2184 encoding: str = "utf-8",

2185 parse_all: bool = False,

2186 *,

2187 parseAll: bool = False,

2188 ) -> ParseResults:

2189 """

2190 Execute the parse expression on the given file or filename.

2191 If a filename is specified (instead of a file object),

2192 the entire file is opened, read, and closed before parsing.

2193 """

2194 parseAll = parseAll or parse_all

2195 try:

2196 file_or_filename = typing.cast(TextIO, file_or_filename)

2197 file_contents = file_or_filename.read()

2198 except AttributeError:

2199 file_or_filename = typing.cast(str, file_or_filename)

2200 with open(file_or_filename, "r", encoding=encoding) as f:

2201 file_contents = f.read()

2202 try:

2203 return self.parse_string(file_contents, parseAll)

2204 except ParseBaseException as exc:

2205 if ParserElement.verbose_stacktrace:

2206 raise

2207

2208 # catch and re-raise exception from here, clears out pyparsing internal stack trace

2209 raise exc.with_traceback(None)

2210

2211 def __eq__(self, other):

2212 if self is other:

2213 return True

2214 elif isinstance(other, str_type):

2215 return self.matches(other, parse_all=True)

2216 elif isinstance(other, ParserElement):

2217 return vars(self) == vars(other)

2218 return False

2219

2220 def __hash__(self):

2221 return id(self)

2222

2223 def matches(

2224 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True

2225 ) -> bool:

2226 """

2227 Method for quick testing of a parser against a test string. Good for simple

2228 inline microtests of sub expressions while building up larger parser.

2229

2230 :param test_string: to test against this expression for a match

2231 :param parse_all: flag to pass to :meth:`parse_string` when running tests

2232

2233 Example:

2234

2235 .. doctest::

2236

2237 >>> expr = Word(nums)

2238 >>> expr.matches("100")

2239 True

2240 """

2241 parseAll = parseAll and parse_all

2242 try:

2243 self.parse_string(str(test_string), parse_all=parseAll)

2244 return True

2245 except ParseBaseException:

2246 return False

2247

2248 def run_tests(

2249 self,

2250 tests: Union[str, list[str]],

2251 parse_all: bool = True,

2252 comment: typing.Optional[Union[ParserElement, str]] = "#",

2253 full_dump: bool = True,

2254 print_results: bool = True,

2255 failure_tests: bool = False,

2256 post_parse: typing.Optional[

2257 Callable[[str, ParseResults], typing.Optional[str]]

2258 ] = None,

2259 file: typing.Optional[TextIO] = None,

2260 with_line_numbers: bool = False,

2261 *,

2262 parseAll: bool = True,

2263 fullDump: bool = True,

2264 printResults: bool = True,

2265 failureTests: bool = False,

2266 postParse: typing.Optional[

2267 Callable[[str, ParseResults], typing.Optional[str]]

2268 ] = None,

2269 ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]:

2270 """

2271 Execute the parse expression on a series of test strings, showing each

2272 test, the parsed results or where the parse failed. Quick and easy way to

2273 run a parse expression against a list of sample strings.

2274

2275 Parameters:

2276

2277 - ``tests`` - a list of separate test strings, or a multiline string of test strings

2278 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

2279 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

2280 string; pass None to disable comment filtering

2281 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

2282 if False, only dump nested list

2283 - ``print_results`` - (default= ``True``) prints test output to stdout

2284 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

2285 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

2286 `fn(test_string, parse_results)` and returns a string to be added to the test output

2287 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

2288 if None, will default to ``sys.stdout``

2289 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

2290

2291 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2292 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

2293 test's output

2294

2295 Passing example:

2296

2297 .. testcode::

2298

2299 number_expr = pyparsing_common.number.copy()

2300

2301 result = number_expr.run_tests('''

2302 # unsigned integer

2303 100

2304 # negative integer

2305 -100

2306 # float with scientific notation

2307 6.02e23

2308 # integer with scientific notation

2309 1e-12

2310 # negative decimal number without leading digit

2311 -.100

2312 ''')

2313 print("Success" if result[0] else "Failed!")

2314

2315 prints:

2316

2317 .. testoutput::

2318 :options: +NORMALIZE_WHITESPACE

2319

2320

2321 # unsigned integer

2322 100

2323 [100]

2324

2325 # negative integer

2326 -100

2327 [-100]

2328

2329 # float with scientific notation

2330 6.02e23

2331 [6.02e+23]

2332

2333 # integer with scientific notation

2334 1e-12

2335 [1e-12]

2336

2337 # negative decimal number without leading digit

2338 -.100

2339 [-0.1]

2340 Success

2341

2342 Failure-test example:

2343

2344 .. testcode::

2345

2346 result = number_expr.run_tests('''

2347 # stray character

2348 100Z

2349 # too many '.'

2350 3.14.159

2351 ''', failure_tests=True)

2352 print("Success" if result[0] else "Failed!")

2353

2354 prints:

2355

2356 .. testoutput::

2357 :options: +NORMALIZE_WHITESPACE

2358

2359

2360 # stray character

2361 100Z

2362 100Z

2363 ^

2364 ParseException: Expected end of text, found 'Z' ...

2365

2366 # too many '.'

2367 3.14.159

2368 3.14.159

2369 ^

2370 ParseException: Expected end of text, found '.' ...

2371 FAIL: Expected end of text, found '.' ...

2372 Success

2373

2374 Each test string must be on a single line. If you want to test a string that spans multiple

2375 lines, create a test like this:

2376

2377 .. testcode::

2378

2379 expr = Word(alphanums)[1,...]

2380 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2381

2382 .. testoutput::

2383 :options: +NORMALIZE_WHITESPACE

2384 :hide:

2385

2386

2387 this is a test\\n of strings that spans \\n 3 lines

2388 ['this', 'is', 'a', 'test', 'of', 'strings', 'that', 'spans', '3', 'lines']

2389

2390 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2391 """

2392 from .testing import pyparsing_test

2393

2394 parseAll = parseAll and parse_all

2395 fullDump = fullDump and full_dump

2396 printResults = printResults and print_results

2397 failureTests = failureTests or failure_tests

2398 postParse = postParse or post_parse

2399 if isinstance(tests, str_type):

2400 tests = typing.cast(str, tests)

2401 line_strip = type(tests).strip

2402 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2403 comment_specified = comment is not None

2404 if comment_specified:

2405 if isinstance(comment, str_type):

2406 comment = typing.cast(str, comment)

2407 comment = Literal(comment)

2408 comment = typing.cast(ParserElement, comment)

2409 if file is None:

2410 file = sys.stdout

2411 print_ = file.write

2412

2413 result: Union[ParseResults, Exception]

2414 allResults: list[tuple[str, Union[ParseResults, Exception]]] = []

2415 comments: list[str] = []

2416 success = True

2417 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2418 BOM = "\ufeff"

2419 nlstr = "\n"

2420 for t in tests:

2421 if comment_specified and comment.matches(t, False) or comments and not t:

2422 comments.append(

2423 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2424 )

2425 continue

2426 if not t:

2427 continue

2428 out = [

2429 f"{nlstr}{nlstr.join(comments) if comments else ''}",

2430 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2431 ]

2432 comments.clear()

2433 try:

2434 # convert newline marks to actual newlines, and strip leading BOM if present

2435 t = NL.transform_string(t.lstrip(BOM))

2436 result = self.parse_string(t, parse_all=parseAll)

2437 except ParseBaseException as pe:

2438 fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else ""

2439 out.append(pe.explain())

2440 out.append(f"FAIL: {fatal}{pe}")

2441 if ParserElement.verbose_stacktrace:

2442 out.extend(traceback.format_tb(pe.__traceback__))

2443 success = success and failureTests

2444 result = pe

2445 except Exception as exc:

2446 tag = "FAIL-EXCEPTION"

2447

2448 # see if this exception was raised in a parse action

2449 tb = exc.__traceback__

2450 it = iter(traceback.walk_tb(tb))

2451 for f, line in it:

2452 if (f.f_code.co_filename, line) == pa_call_line_synth:

2453 next_f = next(it)[0]

2454 tag += f" (raised in parse action {next_f.f_code.co_name!r})"

2455 break

2456

2457 out.append(f"{tag}: {type(exc).__name__}: {exc}")

2458 if ParserElement.verbose_stacktrace:

2459 out.extend(traceback.format_tb(exc.__traceback__))

2460 success = success and failureTests

2461 result = exc

2462 else:

2463 success = success and not failureTests

2464 if postParse is not None:

2465 try:

2466 pp_value = postParse(t, result)

2467 if pp_value is not None:

2468 if isinstance(pp_value, ParseResults):

2469 out.append(pp_value.dump())

2470 else:

2471 out.append(str(pp_value))

2472 else:

2473 out.append(result.dump())

2474 except Exception as e:

2475 out.append(result.dump(full=fullDump))

2476 out.append(

2477 f"{postParse.__name__} failed: {type(e).__name__}: {e}"

2478 )

2479 else:

2480 out.append(result.dump(full=fullDump))

2481 out.append("")

2482

2483 if printResults:

2484 print_("\n".join(out))

2485

2486 allResults.append((t, result))

2487

2488 return success, allResults

2489

2490 def create_diagram(

2491 self,

2492 output_html: Union[TextIO, Path, str],

2493 vertical: int = 3,

2494 show_results_names: bool = False,

2495 show_groups: bool = False,

2496 embed: bool = False,

2497 show_hidden: bool = False,

2498 **kwargs,

2499 ) -> None:

2500 """

2501 Create a railroad diagram for the parser.

2502

2503 Parameters:

2504

2505 - ``output_html`` (str or file-like object) - output target for generated

2506 diagram HTML

2507 - ``vertical`` (int) - threshold for formatting multiple alternatives vertically

2508 instead of horizontally (default=3)

2509 - ``show_results_names`` - bool flag whether diagram should show annotations for

2510 defined results names

2511 - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box

2512 - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden

2513 - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed

2514 the resulting HTML in an enclosing HTML source

2515 - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code;

2516 can be used to insert custom CSS styling

2517 - ``body`` - str containing additional HTML to insert at the beginning of the <BODY> section of the

2518 generated code

2519

2520 Additional diagram-formatting keyword arguments can also be included;

2521 see railroad.Diagram class.

2522

2523 .. versionchanged:: 3.1.0

2524 ``embed`` argument added.

2525 """

2526

2527 try:

2528 from .diagram import to_railroad, railroad_to_html

2529 except ImportError as ie:

2530 raise Exception(

2531 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2532 ) from ie

2533

2534 self.streamline()

2535

2536 railroad = to_railroad(

2537 self,

2538 vertical=vertical,

2539 show_results_names=show_results_names,

2540 show_groups=show_groups,

2541 show_hidden=show_hidden,

2542 diagram_kwargs=kwargs,

2543 )

2544 if not isinstance(output_html, (str, Path)):

2545 # we were passed a file-like object, just write to it

2546 output_html.write(railroad_to_html(railroad, embed=embed, **kwargs))

2547 return

2548

2549 with open(output_html, "w", encoding="utf-8") as diag_file:

2550 diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs))

2551

2552 # Compatibility synonyms

2553 # fmt: off

2554 inlineLiteralsUsing = staticmethod(replaced_by_pep8("inlineLiteralsUsing", inline_literals_using))

2555 setDefaultWhitespaceChars = staticmethod(replaced_by_pep8(

2556 "setDefaultWhitespaceChars", set_default_whitespace_chars

2557 ))

2558 disableMemoization = staticmethod(replaced_by_pep8("disableMemoization", disable_memoization))

2559 enableLeftRecursion = staticmethod(replaced_by_pep8("enableLeftRecursion", enable_left_recursion))

2560 enablePackrat = staticmethod(replaced_by_pep8("enablePackrat", enable_packrat))

2561 resetCache = staticmethod(replaced_by_pep8("resetCache", reset_cache))

2562

2563 setResultsName = replaced_by_pep8("setResultsName", set_results_name)

2564 setBreak = replaced_by_pep8("setBreak", set_break)

2565 setParseAction = replaced_by_pep8("setParseAction", set_parse_action)

2566 addParseAction = replaced_by_pep8("addParseAction", add_parse_action)

2567 addCondition = replaced_by_pep8("addCondition", add_condition)

2568 setFailAction = replaced_by_pep8("setFailAction", set_fail_action)

2569 tryParse = replaced_by_pep8("tryParse", try_parse)

2570 parseString = replaced_by_pep8("parseString", parse_string)

2571 scanString = replaced_by_pep8("scanString", scan_string)

2572 transformString = replaced_by_pep8("transformString", transform_string)

2573 searchString = replaced_by_pep8("searchString", search_string)

2574 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

2575 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

2576 setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars)

2577 parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs)

2578 setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions)

2579 setDebug = replaced_by_pep8("setDebug", set_debug)

2580 setName = replaced_by_pep8("setName", set_name)

2581 parseFile = replaced_by_pep8("parseFile", parse_file)

2582 runTests = replaced_by_pep8("runTests", run_tests)

2583 canParseNext = replaced_by_pep8("canParseNext", can_parse_next)

2584 defaultName = default_name

2585 # fmt: on

2586

2587

2588class _PendingSkip(ParserElement):

2589 # internal placeholder class to hold a place were '...' is added to a parser element,

2590 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2591 def __init__(self, expr: ParserElement, must_skip: bool = False) -> None:

2592 super().__init__()

2593 self.anchor = expr

2594 self.must_skip = must_skip

2595

2596 def _generateDefaultName(self) -> str:

2597 return str(self.anchor + Empty()).replace("Empty", "...")

2598

2599 def __add__(self, other) -> ParserElement:

2600 skipper = SkipTo(other).set_name("...")("_skipped*")

2601 if self.must_skip:

2602

2603 def must_skip(t):

2604 if not t._skipped or t._skipped.as_list() == [""]:

2605 del t[0]

2606 t.pop("_skipped", None)

2607

2608 def show_skip(t):

2609 if t._skipped.as_list()[-1:] == [""]:

2610 t.pop("_skipped")

2611 t["_skipped"] = f"missing <{self.anchor!r}>"

2612

2613 return (

2614 self.anchor + skipper().add_parse_action(must_skip)

2615 | skipper().add_parse_action(show_skip)

2616 ) + other

2617

2618 return self.anchor + skipper + other

2619

2620 def __repr__(self):

2621 return self.defaultName

2622

2623 def parseImpl(self, *args) -> ParseImplReturnType:

2624 raise Exception(

2625 "use of `...` expression without following SkipTo target expression"

2626 )

2627

2628

2629class Token(ParserElement):

2630 """Abstract :class:`ParserElement` subclass, for defining atomic

2631 matching patterns.

2632 """

2633

2634 def __init__(self) -> None:

2635 super().__init__(savelist=False)

2636

2637 def _generateDefaultName(self) -> str:

2638 return type(self).__name__

2639

2640

2641class NoMatch(Token):

2642 """

2643 A token that will never match.

2644 """

2645

2646 def __init__(self) -> None:

2647 super().__init__()

2648 self._may_return_empty = True

2649 self.mayIndexError = False

2650 self.errmsg = "Unmatchable token"

2651

2652 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2653 raise ParseException(instring, loc, self.errmsg, self)

2654

2655

2656class Literal(Token):

2657 """

2658 Token to exactly match a specified string.

2659

2660 Example:

2661

2662 .. doctest::

2663

2664 >>> Literal('abc').parse_string('abc')

2665 ParseResults(['abc'], {})

2666 >>> Literal('abc').parse_string('abcdef')

2667 ParseResults(['abc'], {})

2668 >>> Literal('abc').parse_string('ab')

2669 Traceback (most recent call last):

2670 ParseException: Expected 'abc', found 'ab' (at char 0), (line: 1, col: 1)

2671

2672 For case-insensitive matching, use :class:`CaselessLiteral`.

2673

2674 For keyword matching (force word break before and after the matched string),

2675 use :class:`Keyword` or :class:`CaselessKeyword`.

2676 """

2677

2678 def __new__(cls, match_string: str = "", *, matchString: str = ""):

2679 # Performance tuning: select a subclass with optimized parseImpl

2680 if cls is Literal:

2681 match_string = matchString or match_string

2682 if not match_string:

2683 return super().__new__(Empty)

2684 if len(match_string) == 1:

2685 return super().__new__(_SingleCharLiteral)

2686

2687 # Default behavior

2688 return super().__new__(cls)

2689

2690 # Needed to make copy.copy() work correctly if we customize __new__

2691 def __getnewargs__(self):

2692 return (self.match,)

2693

2694 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:

2695 super().__init__()

2696 match_string = matchString or match_string

2697 self.match = match_string

2698 self.matchLen = len(match_string)

2699 self.firstMatchChar = match_string[:1]

2700 self.errmsg = f"Expected {self.name}"

2701 self._may_return_empty = False

2702 self.mayIndexError = False

2703

2704 def _generateDefaultName(self) -> str:

2705 return repr(self.match)

2706

2707 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2708 if instring[loc] == self.firstMatchChar and instring.startswith(

2709 self.match, loc

2710 ):

2711 return loc + self.matchLen, self.match

2712 raise ParseException(instring, loc, self.errmsg, self)

2713

2714

2715class Empty(Literal):

2716 """

2717 An empty token, will always match.

2718 """

2719

2720 def __init__(self, match_string="", *, matchString="") -> None:

2721 super().__init__("")

2722 self._may_return_empty = True

2723 self.mayIndexError = False

2724

2725 def _generateDefaultName(self) -> str:

2726 return "Empty"

2727

2728 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2729 return loc, []

2730

2731

2732class _SingleCharLiteral(Literal):

2733 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2734 if instring[loc] == self.firstMatchChar:

2735 return loc + 1, self.match

2736 raise ParseException(instring, loc, self.errmsg, self)

2737

2738

2739ParserElement._literalStringClass = Literal

2740

2741

2742class Keyword(Token):

2743 """

2744 Token to exactly match a specified string as a keyword, that is,

2745 it must be immediately preceded and followed by whitespace or

2746 non-keyword characters. Compare with :class:`Literal`:

2747

2748 - ``Literal("if")`` will match the leading ``'if'`` in

2749 ``'ifAndOnlyIf'``.

2750 - ``Keyword("if")`` will not; it will only match the leading

2751 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2752

2753 Accepts two optional constructor arguments in addition to the

2754 keyword string:

2755

2756 - ``ident_chars`` is a string of characters that would be valid

2757 identifier characters, defaulting to all alphanumerics + "_" and

2758 "$"

2759 - ``caseless`` allows case-insensitive matching, default is ``False``.

2760

2761 Example:

2762

2763 .. doctest::

2764 :options: +NORMALIZE_WHITESPACE

2765

2766 >>> Keyword("start").parse_string("start")

2767 ParseResults(['start'], {})

2768 >>> Keyword("start").parse_string("starting")

2769 Traceback (most recent call last):

2770 ParseException: Expected Keyword 'start', keyword was immediately

2771 followed by keyword character, found 'ing' (at char 5), (line:1, col:6)

2772

2773 .. doctest::

2774 :options: +NORMALIZE_WHITESPACE

2775

2776 >>> Keyword("start").parse_string("starting").debug()

2777 Traceback (most recent call last):

2778 ParseException: Expected Keyword "start", keyword was immediately

2779 followed by keyword character, found 'ing' ...

2780

2781 For case-insensitive matching, use :class:`CaselessKeyword`.

2782 """

2783

2784 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2785

2786 def __init__(

2787 self,

2788 match_string: str = "",

2789 ident_chars: typing.Optional[str] = None,

2790 caseless: bool = False,

2791 *,

2792 matchString: str = "",

2793 identChars: typing.Optional[str] = None,

2794 ) -> None:

2795 super().__init__()

2796 identChars = identChars or ident_chars

2797 if identChars is None:

2798 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2799 match_string = matchString or match_string

2800 self.match = match_string

2801 self.matchLen = len(match_string)

2802 self.firstMatchChar = match_string[:1]

2803 if not self.firstMatchChar:

2804 raise ValueError("null string passed to Keyword; use Empty() instead")

2805 self.errmsg = f"Expected {type(self).__name__} {self.name}"

2806 self._may_return_empty = False

2807 self.mayIndexError = False

2808 self.caseless = caseless

2809 if caseless:

2810 self.caselessmatch = match_string.upper()

2811 identChars = identChars.upper()

2812 self.identChars = set(identChars)

2813

2814 def _generateDefaultName(self) -> str:

2815 return repr(self.match)

2816

2817 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2818 errmsg = self.errmsg or ""

2819 errloc = loc

2820 if self.caseless:

2821 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2822 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2823 if (

2824 loc >= len(instring) - self.matchLen

2825 or instring[loc + self.matchLen].upper() not in self.identChars

2826 ):

2827 return loc + self.matchLen, self.match

2828

2829 # followed by keyword char

2830 errmsg += ", was immediately followed by keyword character"

2831 errloc = loc + self.matchLen

2832 else:

2833 # preceded by keyword char

2834 errmsg += ", keyword was immediately preceded by keyword character"

2835 errloc = loc - 1

2836 # else no match just raise plain exception

2837

2838 elif (

2839 instring[loc] == self.firstMatchChar

2840 and self.matchLen == 1

2841 or instring.startswith(self.match, loc)

2842 ):

2843 if loc == 0 or instring[loc - 1] not in self.identChars:

2844 if (

2845 loc >= len(instring) - self.matchLen

2846 or instring[loc + self.matchLen] not in self.identChars

2847 ):

2848 return loc + self.matchLen, self.match

2849

2850 # followed by keyword char

2851 errmsg += ", keyword was immediately followed by keyword character"

2852 errloc = loc + self.matchLen

2853 else:

2854 # preceded by keyword char

2855 errmsg += ", keyword was immediately preceded by keyword character"

2856 errloc = loc - 1

2857 # else no match just raise plain exception

2858

2859 raise ParseException(instring, errloc, errmsg, self)

2860

2861 @staticmethod

2862 def set_default_keyword_chars(chars) -> None:

2863 """

2864 Overrides the default characters used by :class:`Keyword` expressions.

2865 """

2866 Keyword.DEFAULT_KEYWORD_CHARS = chars

2867

2868 # Compatibility synonyms

2869 setDefaultKeywordChars = staticmethod(

2870 replaced_by_pep8("setDefaultKeywordChars", set_default_keyword_chars)

2871 )

2872

2873

2874class CaselessLiteral(Literal):

2875 """

2876 Token to match a specified string, ignoring case of letters.

2877 Note: the matched results will always be in the case of the given

2878 match string, NOT the case of the input text.

2879

2880 Example:

2881

2882 .. doctest::

2883

2884 >>> CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2885 ParseResults(['CMD', 'CMD', 'CMD'], {})

2886

2887 (Contrast with example for :class:`CaselessKeyword`.)

2888 """

2889

2890 def __init__(self, match_string: str = "", *, matchString: str = "") -> None:

2891 match_string = matchString or match_string

2892 super().__init__(match_string.upper())

2893 # Preserve the defining literal.

2894 self.returnString = match_string

2895 self.errmsg = f"Expected {self.name}"

2896

2897 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2898 if instring[loc : loc + self.matchLen].upper() == self.match:

2899 return loc + self.matchLen, self.returnString

2900 raise ParseException(instring, loc, self.errmsg, self)

2901

2902

2903class CaselessKeyword(Keyword):

2904 """

2905 Caseless version of :class:`Keyword`.

2906

2907 Example:

2908

2909 .. doctest::

2910

2911 >>> CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2912 ParseResults(['CMD', 'CMD'], {})

2913

2914 (Contrast with example for :class:`CaselessLiteral`.)

2915 """

2916

2917 def __init__(

2918 self,

2919 match_string: str = "",

2920 ident_chars: typing.Optional[str] = None,

2921 *,

2922 matchString: str = "",

2923 identChars: typing.Optional[str] = None,

2924 ) -> None:

2925 identChars = identChars or ident_chars

2926 match_string = matchString or match_string

2927 super().__init__(match_string, identChars, caseless=True)

2928

2929

2930class CloseMatch(Token):

2931 """A variation on :class:`Literal` which matches "close" matches,

2932 that is, strings with at most 'n' mismatching characters.

2933 :class:`CloseMatch` takes parameters:

2934

2935 - ``match_string`` - string to be matched

2936 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2937 - ``max_mismatches`` - (``default=1``) maximum number of

2938 mismatches allowed to count as a match

2939

2940 The results from a successful parse will contain the matched text

2941 from the input string and the following named results:

2942

2943 - ``mismatches`` - a list of the positions within the

2944 match_string where mismatches were found

2945 - ``original`` - the original match_string used to compare

2946 against the input string

2947

2948 If ``mismatches`` is an empty list, then the match was an exact

2949 match.

2950

2951 Example:

2952

2953 .. doctest::

2954 :options: +NORMALIZE_WHITESPACE

2955

2956 >>> patt = CloseMatch("ATCATCGAATGGA")

2957 >>> patt.parse_string("ATCATCGAAXGGA")

2958 ParseResults(['ATCATCGAAXGGA'],

2959 {'original': 'ATCATCGAATGGA', 'mismatches': [9]})

2960

2961 >>> patt.parse_string("ATCAXCGAAXGGA")

2962 Traceback (most recent call last):

2963 ParseException: Expected 'ATCATCGAATGGA' (with up to 1 mismatches),

2964 found 'ATCAXCGAAXGGA' (at char 0), (line:1, col:1)

2965

2966 # exact match

2967 >>> patt.parse_string("ATCATCGAATGGA")

2968 ParseResults(['ATCATCGAATGGA'],

2969 {'original': 'ATCATCGAATGGA', 'mismatches': []})

2970

2971 # close match allowing up to 2 mismatches

2972 >>> patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

2973 >>> patt.parse_string("ATCAXCGAAXGGA")

2974 ParseResults(['ATCAXCGAAXGGA'],

2975 {'original': 'ATCATCGAATGGA', 'mismatches': [4, 9]})

2976 """

2977

2978 def __init__(

2979 self,

2980 match_string: str,

2981 max_mismatches: typing.Optional[int] = None,

2982 *,

2983 maxMismatches: int = 1,

2984 caseless=False,

2985 ) -> None:

2986 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

2987 super().__init__()

2988 self.match_string = match_string

2989 self.maxMismatches = maxMismatches

2990 self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)"

2991 self.caseless = caseless

2992 self.mayIndexError = False

2993 self._may_return_empty = False

2994

2995 def _generateDefaultName(self) -> str:

2996 return f"{type(self).__name__}:{self.match_string!r}"

2997

2998 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

2999 start = loc

3000 instrlen = len(instring)

3001 maxloc = start + len(self.match_string)

3002

3003 if maxloc <= instrlen:

3004 match_string = self.match_string

3005 match_stringloc = 0

3006 mismatches = []

3007 maxMismatches = self.maxMismatches

3008

3009 for match_stringloc, s_m in enumerate(

3010 zip(instring[loc:maxloc], match_string)

3011 ):

3012 src, mat = s_m

3013 if self.caseless:

3014 src, mat = src.lower(), mat.lower()

3015

3016 if src != mat:

3017 mismatches.append(match_stringloc)

3018 if len(mismatches) > maxMismatches:

3019 break

3020 else:

3021 loc = start + match_stringloc + 1

3022 results = ParseResults([instring[start:loc]])

3023 results["original"] = match_string

3024 results["mismatches"] = mismatches

3025 return loc, results

3026

3027 raise ParseException(instring, loc, self.errmsg, self)

3028

3029

3030class Word(Token):

3031 """Token for matching words composed of allowed character sets.

3032

3033 Parameters:

3034

3035 - ``init_chars`` - string of all characters that should be used to

3036 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

3037 if ``body_chars`` is also specified, then this is the string of

3038 initial characters

3039 - ``body_chars`` - string of characters that

3040 can be used for matching after a matched initial character as

3041 given in ``init_chars``; if omitted, same as the initial characters

3042 (default=``None``)

3043 - ``min`` - minimum number of characters to match (default=1)

3044 - ``max`` - maximum number of characters to match (default=0)

3045 - ``exact`` - exact number of characters to match (default=0)

3046 - ``as_keyword`` - match as a keyword (default=``False``)

3047 - ``exclude_chars`` - characters that might be

3048 found in the input ``body_chars`` string but which should not be

3049 accepted for matching ;useful to define a word of all

3050 printables except for one or two characters, for instance

3051 (default=``None``)

3052

3053 :class:`srange` is useful for defining custom character set strings

3054 for defining :class:`Word` expressions, using range notation from

3055 regular expression character sets.

3056

3057 A common mistake is to use :class:`Word` to match a specific literal

3058 string, as in ``Word("Address")``. Remember that :class:`Word`

3059 uses the string argument to define *sets* of matchable characters.

3060 This expression would match "Add", "AAA", "dAred", or any other word

3061 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

3062 exact literal string, use :class:`Literal` or :class:`Keyword`.

3063

3064 pyparsing includes helper strings for building Words:

3065

3066 - :attr:`alphas`

3067 - :attr:`nums`

3068 - :attr:`alphanums`

3069 - :attr:`hexnums`

3070 - :attr:`alphas8bit` (alphabetic characters in ASCII range 128-255

3071 - accented, tilded, umlauted, etc.)

3072 - :attr:`punc8bit` (non-alphabetic characters in ASCII range

3073 128-255 - currency, symbols, superscripts, diacriticals, etc.)

3074 - :attr:`printables` (any non-whitespace character)

3075

3076 ``alphas``, ``nums``, and ``printables`` are also defined in several

3077 Unicode sets - see :class:`pyparsing_unicode`.

3078

3079 Example:

3080

3081 .. testcode::

3082

3083 # a word composed of digits

3084 integer = Word(nums)

3085 # Two equivalent alternate forms:

3086 Word("0123456789")

3087 Word(srange("[0-9]"))

3088

3089 # a word with a leading capital, and zero or more lowercase

3090 capitalized_word = Word(alphas.upper(), alphas.lower())

3091

3092 # hostnames are alphanumeric, with leading alpha, and '-'

3093 hostname = Word(alphas, alphanums + '-')

3094

3095 # roman numeral

3096 # (not a strict parser, accepts invalid mix of characters)

3097 roman = Word("IVXLCDM")

3098

3099 # any string of non-whitespace characters, except for ','

3100 csv_value = Word(printables, exclude_chars=",")

3101

3102 :raises ValueError: If ``min`` and ``max`` are both specified

3103 and the test ``min <= max`` fails.

3104

3105 .. versionchanged:: 3.1.0

3106 Raises :exc:`ValueError` if ``min`` > ``max``.

3107 """

3108

3109 def __init__(

3110 self,

3111 init_chars: str = "",

3112 body_chars: typing.Optional[str] = None,

3113 min: int = 1,

3114 max: int = 0,

3115 exact: int = 0,

3116 as_keyword: bool = False,

3117 exclude_chars: typing.Optional[str] = None,

3118 *,

3119 initChars: typing.Optional[str] = None,

3120 bodyChars: typing.Optional[str] = None,

3121 asKeyword: bool = False,

3122 excludeChars: typing.Optional[str] = None,

3123 ) -> None:

3124 initChars = initChars or init_chars

3125 bodyChars = bodyChars or body_chars

3126 asKeyword = asKeyword or as_keyword

3127 excludeChars = excludeChars or exclude_chars

3128 super().__init__()

3129 if not initChars:

3130 raise ValueError(

3131 f"invalid {type(self).__name__}, initChars cannot be empty string"

3132 )

3133

3134 initChars_set = set(initChars)

3135 if excludeChars:

3136 excludeChars_set = set(excludeChars)

3137 initChars_set -= excludeChars_set

3138 if bodyChars:

3139 bodyChars = "".join(set(bodyChars) - excludeChars_set)

3140 self.initChars = initChars_set

3141 self.initCharsOrig = "".join(sorted(initChars_set))

3142

3143 if bodyChars:

3144 self.bodyChars = set(bodyChars)

3145 self.bodyCharsOrig = "".join(sorted(bodyChars))

3146 else:

3147 self.bodyChars = initChars_set

3148 self.bodyCharsOrig = self.initCharsOrig

3149

3150 self.maxSpecified = max > 0

3151

3152 if min < 1:

3153 raise ValueError(

3154 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

3155 )

3156

3157 if self.maxSpecified and min > max:

3158 raise ValueError(

3159 f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})"

3160 )

3161

3162 self.minLen = min

3163

3164 if max > 0:

3165 self.maxLen = max

3166 else:

3167 self.maxLen = _MAX_INT

3168

3169 if exact > 0:

3170 min = max = exact

3171 self.maxLen = exact

3172 self.minLen = exact

3173

3174 self.errmsg = f"Expected {self.name}"

3175 self.mayIndexError = False

3176 self.asKeyword = asKeyword

3177 if self.asKeyword:

3178 self.errmsg += " as a keyword"

3179

3180 # see if we can make a regex for this Word

3181 if " " not in (self.initChars | self.bodyChars):

3182 if len(self.initChars) == 1:

3183 re_leading_fragment = re.escape(self.initCharsOrig)

3184 else:

3185 re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]"

3186

3187 if self.bodyChars == self.initChars:

3188 if max == 0 and self.minLen == 1:

3189 repeat = "+"

3190 elif max == 1:

3191 repeat = ""

3192 else:

3193 if self.minLen != self.maxLen:

3194 repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}"

3195 else:

3196 repeat = f"{{{self.minLen}}}"

3197 self.reString = f"{re_leading_fragment}{repeat}"

3198 else:

3199 if max == 1:

3200 re_body_fragment = ""

3201 repeat = ""

3202 else:

3203 re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]"

3204 if max == 0 and self.minLen == 1:

3205 repeat = "*"

3206 elif max == 2:

3207 repeat = "?" if min <= 1 else ""

3208 else:

3209 if min != max:

3210 repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}"

3211 else:

3212 repeat = f"{{{min - 1 if min > 0 else ''}}}"

3213

3214 self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}"

3215

3216 if self.asKeyword:

3217 self.reString = rf"\b{self.reString}\b"

3218

3219 try:

3220 self.re = re.compile(self.reString)

3221 except re.error:

3222 self.re = None # type: ignore[assignment]

3223 else:

3224 self.re_match = self.re.match

3225 self.parseImpl = self.parseImpl_regex # type: ignore[method-assign]

3226

3227 def copy(self) -> Word:

3228 ret: Word = cast(Word, super().copy())

3229 ret.parseImpl = ret.parseImpl_regex # type: ignore[method-assign]

3230 return ret

3231

3232 def _generateDefaultName(self) -> str:

3233 def charsAsStr(s):

3234 max_repr_len = 16

3235 s = _collapse_string_to_ranges(s, re_escape=False)

3236

3237 if len(s) > max_repr_len:

3238 return s[: max_repr_len - 3] + "..."

3239

3240 return s

3241

3242 if self.initChars != self.bodyChars:

3243 base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})"

3244 else:

3245 base = f"W:({charsAsStr(self.initChars)})"

3246

3247 # add length specification

3248 if self.minLen > 1 or self.maxLen != _MAX_INT:

3249 if self.minLen == self.maxLen:

3250 if self.minLen == 1:

3251 return base[2:]

3252 else:

3253 return base + f"{{{self.minLen}}}"

3254 elif self.maxLen == _MAX_INT:

3255 return base + f"{{{self.minLen},...}}"

3256 else:

3257 return base + f"{{{self.minLen},{self.maxLen}}}"

3258 return base

3259

3260 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3261 if instring[loc] not in self.initChars:

3262 raise ParseException(instring, loc, self.errmsg, self)

3263

3264 start = loc

3265 loc += 1

3266 instrlen = len(instring)

3267 body_chars: set[str] = self.bodyChars

3268 maxloc = start + self.maxLen

3269 maxloc = min(maxloc, instrlen)

3270 while loc < maxloc and instring[loc] in body_chars:

3271 loc += 1

3272

3273 throw_exception = False

3274 if loc - start < self.minLen:

3275 throw_exception = True

3276 elif self.maxSpecified and loc < instrlen and instring[loc] in body_chars:

3277 throw_exception = True

3278 elif self.asKeyword and (

3279 (start > 0 and instring[start - 1] in body_chars)

3280 or (loc < instrlen and instring[loc] in body_chars)

3281 ):

3282 throw_exception = True

3283

3284 if throw_exception:

3285 raise ParseException(instring, loc, self.errmsg, self)

3286

3287 return loc, instring[start:loc]

3288

3289 def parseImpl_regex(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3290 result = self.re_match(instring, loc)

3291 if not result:

3292 raise ParseException(instring, loc, self.errmsg, self)

3293

3294 loc = result.end()

3295 return loc, result.group()

3296

3297

3298class Char(Word):

3299 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

3300 when defining a match of any single character in a string of

3301 characters.

3302 """

3303

3304 def __init__(

3305 self,

3306 charset: str,

3307 as_keyword: bool = False,

3308 exclude_chars: typing.Optional[str] = None,

3309 *,

3310 asKeyword: bool = False,

3311 excludeChars: typing.Optional[str] = None,

3312 ) -> None:

3313 asKeyword = asKeyword or as_keyword

3314 excludeChars = excludeChars or exclude_chars

3315 super().__init__(

3316 charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars

3317 )

3318

3319

3320class Regex(Token):

3321 r"""Token for matching strings that match a given regular

3322 expression. Defined with string specifying the regular expression in

3323 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

3324 If the given regex contains named groups (defined using ``(?P<name>...)``),

3325 these will be preserved as named :class:`ParseResults`.

3326

3327 If instead of the Python stdlib ``re`` module you wish to use a different RE module

3328 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

3329 a compiled RE that was compiled using ``regex``.

3330

3331 The parameters ``pattern`` and ``flags`` are passed

3332 to the ``re.compile()`` function as-is. See the Python

3333 `re module <https://docs.python.org/3/library/re.html>`_ module for an

3334 explanation of the acceptable patterns and flags.

3335

3336 Example:

3337

3338 .. testcode::

3339

3340 realnum = Regex(r"[+-]?\d+\.\d*")

3341 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

3342 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

3343

3344 # named fields in a regex will be returned as named results

3345 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

3346

3347 # the Regex class will accept regular expressions compiled using the

3348 # re module

3349 import re

3350 parser = pp.Regex(re.compile(r'[0-9]'))

3351 """

3352

3353 def __init__(

3354 self,

3355 pattern: Any,

3356 flags: Union[re.RegexFlag, int] = 0,

3357 as_group_list: bool = False,

3358 as_match: bool = False,

3359 *,

3360 asGroupList: bool = False,

3361 asMatch: bool = False,

3362 ) -> None:

3363 super().__init__()

3364 asGroupList = asGroupList or as_group_list

3365 asMatch = asMatch or as_match

3366

3367 if isinstance(pattern, str_type):

3368 if not pattern:

3369 raise ValueError("null string passed to Regex; use Empty() instead")

3370

3371 self._re = None

3372 self._may_return_empty = None # type: ignore [assignment]

3373 self.reString = self.pattern = pattern

3374

3375 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

3376 self._re = pattern

3377 self._may_return_empty = None # type: ignore [assignment]

3378 self.pattern = self.reString = pattern.pattern

3379

3380 elif callable(pattern):

3381 # defer creating this pattern until we really need it

3382 self.pattern = pattern

3383 self._may_return_empty = None # type: ignore [assignment]

3384 self._re = None

3385

3386 else:

3387 raise TypeError(

3388 "Regex may only be constructed with a string or a compiled RE object,"

3389 " or a callable that takes no arguments and returns a string or a"

3390 " compiled RE object"

3391 )

3392

3393 self.flags = flags

3394 self.errmsg = f"Expected {self.name}"

3395 self.mayIndexError = False

3396 self.asGroupList = asGroupList

3397 self.asMatch = asMatch

3398 if self.asGroupList:

3399 self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign]

3400 if self.asMatch:

3401 self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign]

3402

3403 def copy(self):

3404 ret: Regex = cast(Regex, super().copy())

3405 if self.asGroupList:

3406 ret.parseImpl = ret.parseImplAsGroupList

3407 if self.asMatch:

3408 ret.parseImpl = ret.parseImplAsMatch

3409 return ret

3410

3411 @cached_property

3412 def re(self) -> re.Pattern:

3413 if self._re:

3414 return self._re

3415

3416 if callable(self.pattern):

3417 # replace self.pattern with the string returned by calling self.pattern()

3418 self.pattern = cast(Callable[[], str], self.pattern)()

3419

3420 # see if we got a compiled RE back instead of a str - if so, we're done

3421 if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"):

3422 self._re = cast(re.Pattern[str], self.pattern)

3423 self.pattern = self.reString = self._re.pattern

3424 return self._re

3425

3426 try:

3427 self._re = re.compile(self.pattern, self.flags)

3428 except re.error:

3429 raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")

3430 else:

3431 self._may_return_empty = self.re.match("", pos=0) is not None

3432 return self._re

3433

3434 @cached_property

3435 def re_match(self) -> Callable[[str, int], Any]:

3436 return self.re.match

3437

3438 @property

3439 def mayReturnEmpty(self):

3440 if self._may_return_empty is None:

3441 # force compile of regex pattern, to set may_return_empty flag

3442 self.re # noqa

3443 return self._may_return_empty

3444

3445 @mayReturnEmpty.setter

3446 def mayReturnEmpty(self, value):

3447 self._may_return_empty = value

3448

3449 def _generateDefaultName(self) -> str:

3450 unescaped = repr(self.pattern).replace("\\\\", "\\")

3451 return f"Re:({unescaped})"

3452

3453 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3454 # explicit check for matching past the length of the string;

3455 # this is done because the re module will not complain about

3456 # a match with `pos > len(instring)`, it will just return ""

3457 if loc > len(instring) and self.mayReturnEmpty:

3458 raise ParseException(instring, loc, self.errmsg, self)

3459

3460 result = self.re_match(instring, loc)

3461 if not result:

3462 raise ParseException(instring, loc, self.errmsg, self)

3463

3464 loc = result.end()

3465 ret = ParseResults(result.group())

3466 d = result.groupdict()

3467

3468 for k, v in d.items():

3469 ret[k] = v

3470

3471 return loc, ret

3472

3473 def parseImplAsGroupList(self, instring, loc, do_actions=True):

3474 if loc > len(instring) and self.mayReturnEmpty:

3475 raise ParseException(instring, loc, self.errmsg, self)

3476

3477 result = self.re_match(instring, loc)

3478 if not result:

3479 raise ParseException(instring, loc, self.errmsg, self)

3480

3481 loc = result.end()

3482 ret = result.groups()

3483 return loc, ret

3484

3485 def parseImplAsMatch(self, instring, loc, do_actions=True):

3486 if loc > len(instring) and self.mayReturnEmpty:

3487 raise ParseException(instring, loc, self.errmsg, self)

3488

3489 result = self.re_match(instring, loc)

3490 if not result:

3491 raise ParseException(instring, loc, self.errmsg, self)

3492

3493 loc = result.end()

3494 ret = result

3495 return loc, ret

3496

3497 def sub(self, repl: str) -> ParserElement:

3498 r"""

3499 Return :class:`Regex` with an attached parse action to transform the parsed

3500 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3501

3502 Example:

3503

3504 .. testcode::

3505

3506 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3507 print(make_html.transform_string("h1:main title:"))

3508

3509 .. testoutput::

3510

3511 <h1>main title</h1>

3512 """

3513 if self.asGroupList:

3514 raise TypeError("cannot use sub() with Regex(as_group_list=True)")

3515

3516 if self.asMatch and callable(repl):

3517 raise TypeError(

3518 "cannot use sub() with a callable with Regex(as_match=True)"

3519 )

3520

3521 if self.asMatch:

3522

3523 def pa(tokens):

3524 return tokens[0].expand(repl)

3525

3526 else:

3527

3528 def pa(tokens):

3529 return self.re.sub(repl, tokens[0])

3530

3531 return self.add_parse_action(pa)

3532

3533

3534class QuotedString(Token):

3535 r"""

3536 Token for matching strings that are delimited by quoting characters.

3537

3538 Defined with the following parameters:

3539

3540 - ``quote_char`` - string of one or more characters defining the

3541 quote delimiting string

3542 - ``esc_char`` - character to re_escape quotes, typically backslash

3543 (default= ``None``)

3544 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3545 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3546 (default= ``None``)

3547 - ``multiline`` - boolean indicating whether quotes can span

3548 multiple lines (default= ``False``)

3549 - ``unquote_results`` - boolean indicating whether the matched text

3550 should be unquoted (default= ``True``)

3551 - ``end_quote_char`` - string of one or more characters defining the

3552 end of the quote delimited string (default= ``None`` => same as

3553 quote_char)

3554 - ``convert_whitespace_escapes`` - convert escaped whitespace

3555 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3556 (default= ``True``)

3557

3558 .. caution:: ``convert_whitespace_escapes`` has no effect if

3559 ``unquote_results`` is ``False``.

3560

3561 Example:

3562

3563 .. doctest::

3564

3565 >>> qs = QuotedString('"')

3566 >>> print(qs.search_string('lsjdf "This is the quote" sldjf'))

3567 [['This is the quote']]

3568 >>> complex_qs = QuotedString('{{', end_quote_char='}}')

3569 >>> print(complex_qs.search_string(

3570 ... 'lsjdf {{This is the "quote"}} sldjf'))

3571 [['This is the "quote"']]

3572 >>> sql_qs = QuotedString('"', esc_quote='""')

3573 >>> print(sql_qs.search_string(

3574 ... 'lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3575 [['This is the quote with "embedded" quotes']]

3576 """

3577

3578 ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")))

3579

3580 def __init__(

3581 self,

3582 quote_char: str = "",

3583 esc_char: typing.Optional[str] = None,

3584 esc_quote: typing.Optional[str] = None,

3585 multiline: bool = False,

3586 unquote_results: bool = True,

3587 end_quote_char: typing.Optional[str] = None,

3588 convert_whitespace_escapes: bool = True,

3589 *,

3590 quoteChar: str = "",

3591 escChar: typing.Optional[str] = None,

3592 escQuote: typing.Optional[str] = None,

3593 unquoteResults: bool = True,

3594 endQuoteChar: typing.Optional[str] = None,

3595 convertWhitespaceEscapes: bool = True,

3596 ) -> None:

3597 super().__init__()

3598 esc_char = escChar or esc_char

3599 esc_quote = escQuote or esc_quote

3600 unquote_results = unquoteResults and unquote_results

3601 end_quote_char = endQuoteChar or end_quote_char

3602 convert_whitespace_escapes = (

3603 convertWhitespaceEscapes and convert_whitespace_escapes

3604 )

3605 quote_char = quoteChar or quote_char

3606

3607 # remove white space from quote chars

3608 quote_char = quote_char.strip()

3609 if not quote_char:

3610 raise ValueError("quote_char cannot be the empty string")

3611

3612 if end_quote_char is None:

3613 end_quote_char = quote_char

3614 else:

3615 end_quote_char = end_quote_char.strip()

3616 if not end_quote_char:

3617 raise ValueError("end_quote_char cannot be the empty string")

3618

3619 self.quote_char: str = quote_char

3620 self.quote_char_len: int = len(quote_char)

3621 self.first_quote_char: str = quote_char[0]

3622 self.end_quote_char: str = end_quote_char

3623 self.end_quote_char_len: int = len(end_quote_char)

3624 self.esc_char: str = esc_char or ""

3625 self.has_esc_char: bool = esc_char is not None

3626 self.esc_quote: str = esc_quote or ""

3627 self.unquote_results: bool = unquote_results

3628 self.convert_whitespace_escapes: bool = convert_whitespace_escapes

3629 self.multiline = multiline

3630 self.re_flags = re.RegexFlag(0)

3631

3632 # fmt: off

3633 # build up re pattern for the content between the quote delimiters

3634 inner_pattern: list[str] = []

3635

3636 if esc_quote:

3637 inner_pattern.append(rf"(?:{re.escape(esc_quote)})")

3638

3639 if esc_char:

3640 inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")

3641

3642 if len(self.end_quote_char) > 1:

3643 inner_pattern.append(

3644 "(?:"

3645 + "|".join(

3646 f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"

3647 for i in range(len(self.end_quote_char) - 1, 0, -1)

3648 )

3649 + ")"

3650 )

3651

3652 if self.multiline:

3653 self.re_flags |= re.MULTILINE | re.DOTALL

3654 inner_pattern.append(

3655 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"

3656 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3657 )

3658 else:

3659 inner_pattern.append(

3660 rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"

3661 rf"{(_escape_regex_range_chars(self.esc_char) if self.has_esc_char else '')}])"

3662 )

3663

3664 self.pattern = "".join(

3665 [

3666 re.escape(self.quote_char),

3667 "(?:",

3668 '|'.join(inner_pattern),

3669 ")*",

3670 re.escape(self.end_quote_char),

3671 ]

3672 )

3673

3674 if self.unquote_results:

3675 if self.convert_whitespace_escapes:

3676 self.unquote_scan_re = re.compile(

3677 rf"({'|'.join(re.escape(k) for k in self.ws_map)})"

3678 rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})"

3679 rf"|({re.escape(self.esc_char)}.)"

3680 rf"|(\n|.)",

3681 flags=self.re_flags,

3682 )

3683 else:

3684 self.unquote_scan_re = re.compile(

3685 rf"({re.escape(self.esc_char)}.)"

3686 rf"|(\n|.)",

3687 flags=self.re_flags

3688 )

3689 # fmt: on

3690

3691 try:

3692 self.re = re.compile(self.pattern, self.re_flags)

3693 self.reString = self.pattern

3694 self.re_match = self.re.match

3695 except re.error:

3696 raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex")

3697

3698 self.errmsg = f"Expected {self.name}"

3699 self.mayIndexError = False

3700 self._may_return_empty = True

3701

3702 def _generateDefaultName(self) -> str:

3703 if self.quote_char == self.end_quote_char and isinstance(

3704 self.quote_char, str_type

3705 ):

3706 return f"string enclosed in {self.quote_char!r}"

3707

3708 return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"

3709

3710 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3711 # check first character of opening quote to see if that is a match

3712 # before doing the more complicated regex match

3713 result = (

3714 instring[loc] == self.first_quote_char

3715 and self.re_match(instring, loc)

3716 or None

3717 )

3718 if not result:

3719 raise ParseException(instring, loc, self.errmsg, self)

3720

3721 # get ending loc and matched string from regex matching result

3722 loc = result.end()

3723 ret = result.group()

3724

3725 def convert_escaped_numerics(s: str) -> str:

3726 if s == "0":

3727 return "\0"

3728 if s.isdigit() and len(s) == 3:

3729 return chr(int(s, base=8))

3730 elif s.startswith(("u", "x")):

3731 return chr(int(s[1:], base=16))

3732 else:

3733 return s

3734

3735 if self.unquote_results:

3736 # strip off quotes

3737 ret = ret[self.quote_char_len : -self.end_quote_char_len]

3738

3739 if isinstance(ret, str_type):

3740 # fmt: off

3741 if self.convert_whitespace_escapes:

3742 # as we iterate over matches in the input string,

3743 # collect from whichever match group of the unquote_scan_re

3744 # regex matches (only 1 group will match at any given time)

3745 ret = "".join(

3746 # match group 1 matches \t, \n, etc.

3747 self.ws_map[match.group(1)] if match.group(1)

3748 # match group 2 matches escaped octal, null, hex, and Unicode

3749 # sequences

3750 else convert_escaped_numerics(match.group(2)[1:]) if match.group(2)

3751 # match group 3 matches escaped characters

3752 else match.group(3)[-1] if match.group(3)

3753 # match group 4 matches any character

3754 else match.group(4)

3755 for match in self.unquote_scan_re.finditer(ret)

3756 )

3757 else:

3758 ret = "".join(

3759 # match group 1 matches escaped characters

3760 match.group(1)[-1] if match.group(1)

3761 # match group 2 matches any character

3762 else match.group(2)

3763 for match in self.unquote_scan_re.finditer(ret)

3764 )

3765 # fmt: on

3766

3767 # replace escaped quotes

3768 if self.esc_quote:

3769 ret = ret.replace(self.esc_quote, self.end_quote_char)

3770

3771 return loc, ret

3772

3773

3774class CharsNotIn(Token):

3775 """Token for matching words composed of characters *not* in a given

3776 set (will include whitespace in matched characters if not listed in

3777 the provided exclusion set - see example). Defined with string

3778 containing all disallowed characters, and an optional minimum,

3779 maximum, and/or exact length. The default value for ``min`` is

3780 1 (a minimum value < 1 is not valid); the default values for

3781 ``max`` and ``exact`` are 0, meaning no maximum or exact

3782 length restriction.

3783

3784 Example:

3785

3786 .. testcode::

3787

3788 # define a comma-separated-value as anything that is not a ','

3789 csv_value = CharsNotIn(',')

3790 print(

3791 DelimitedList(csv_value).parse_string(

3792 "dkls,lsdkjf,s12 34,@!#,213"

3793 )

3794 )

3795

3796 prints:

3797

3798 .. testoutput::

3799

3800 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3801 """

3802

3803 def __init__(

3804 self,

3805 not_chars: str = "",

3806 min: int = 1,

3807 max: int = 0,

3808 exact: int = 0,

3809 *,

3810 notChars: str = "",

3811 ) -> None:

3812 super().__init__()

3813 self.skipWhitespace = False

3814 self.notChars = not_chars or notChars

3815 self.notCharsSet = set(self.notChars)

3816

3817 if min < 1:

3818 raise ValueError(

3819 "cannot specify a minimum length < 1; use"

3820 " Opt(CharsNotIn()) if zero-length char group is permitted"

3821 )

3822

3823 self.minLen = min

3824

3825 if max > 0:

3826 self.maxLen = max

3827 else:

3828 self.maxLen = _MAX_INT

3829

3830 if exact > 0:

3831 self.maxLen = exact

3832 self.minLen = exact

3833

3834 self.errmsg = f"Expected {self.name}"

3835 self._may_return_empty = self.minLen == 0

3836 self.mayIndexError = False

3837

3838 def _generateDefaultName(self) -> str:

3839 not_chars_str = _collapse_string_to_ranges(self.notChars)

3840 if len(not_chars_str) > 16:

3841 return f"!W:({self.notChars[: 16 - 3]}...)"

3842 else:

3843 return f"!W:({self.notChars})"

3844

3845 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3846 notchars = self.notCharsSet

3847 if instring[loc] in notchars:

3848 raise ParseException(instring, loc, self.errmsg, self)

3849

3850 start = loc

3851 loc += 1

3852 maxlen = min(start + self.maxLen, len(instring))

3853 while loc < maxlen and instring[loc] not in notchars:

3854 loc += 1

3855

3856 if loc - start < self.minLen:

3857 raise ParseException(instring, loc, self.errmsg, self)

3858

3859 return loc, instring[start:loc]

3860

3861

3862class White(Token):

3863 """Special matching class for matching whitespace. Normally,

3864 whitespace is ignored by pyparsing grammars. This class is included

3865 when some whitespace structures are significant. Define with

3866 a string containing the whitespace characters to be matched; default

3867 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3868 ``max``, and ``exact`` arguments, as defined for the

3869 :class:`Word` class.

3870 """

3871

3872 whiteStrs = {

3873 " ": "<SP>",

3874 "\t": "<TAB>",

3875 "\n": "<LF>",

3876 "\r": "<CR>",

3877 "\f": "<FF>",

3878 "\u00A0": "<NBSP>",

3879 "\u1680": "<OGHAM_SPACE_MARK>",

3880 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",

3881 "\u2000": "<EN_QUAD>",

3882 "\u2001": "<EM_QUAD>",

3883 "\u2002": "<EN_SPACE>",

3884 "\u2003": "<EM_SPACE>",

3885 "\u2004": "<THREE-PER-EM_SPACE>",

3886 "\u2005": "<FOUR-PER-EM_SPACE>",

3887 "\u2006": "<SIX-PER-EM_SPACE>",

3888 "\u2007": "<FIGURE_SPACE>",

3889 "\u2008": "<PUNCTUATION_SPACE>",

3890 "\u2009": "<THIN_SPACE>",

3891 "\u200A": "<HAIR_SPACE>",

3892 "\u200B": "<ZERO_WIDTH_SPACE>",

3893 "\u202F": "<NNBSP>",

3894 "\u205F": "<MMSP>",

3895 "\u3000": "<IDEOGRAPHIC_SPACE>",

3896 }

3897

3898 def __init__(

3899 self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0

3900 ) -> None:

3901 super().__init__()

3902 self.matchWhite = ws

3903 self.set_whitespace_chars(

3904 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3905 copy_defaults=True,

3906 )

3907 # self.leave_whitespace()

3908 self._may_return_empty = True

3909 self.errmsg = f"Expected {self.name}"

3910

3911 self.minLen = min

3912

3913 if max > 0:

3914 self.maxLen = max

3915 else:

3916 self.maxLen = _MAX_INT

3917

3918 if exact > 0:

3919 self.maxLen = exact

3920 self.minLen = exact

3921

3922 def _generateDefaultName(self) -> str:

3923 return "".join(White.whiteStrs[c] for c in self.matchWhite)

3924

3925 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3926 if instring[loc] not in self.matchWhite:

3927 raise ParseException(instring, loc, self.errmsg, self)

3928 start = loc

3929 loc += 1

3930 maxloc = start + self.maxLen

3931 maxloc = min(maxloc, len(instring))

3932 while loc < maxloc and instring[loc] in self.matchWhite:

3933 loc += 1

3934

3935 if loc - start < self.minLen:

3936 raise ParseException(instring, loc, self.errmsg, self)

3937

3938 return loc, instring[start:loc]

3939

3940

3941class PositionToken(Token):

3942 def __init__(self) -> None:

3943 super().__init__()

3944 self._may_return_empty = True

3945 self.mayIndexError = False

3946

3947

3948class GoToColumn(PositionToken):

3949 """Token to advance to a specific column of input text; useful for

3950 tabular report scraping.

3951 """

3952

3953 def __init__(self, colno: int) -> None:

3954 super().__init__()

3955 self.col = colno

3956

3957 def preParse(self, instring: str, loc: int) -> int:

3958 if col(loc, instring) == self.col:

3959 return loc

3960

3961 instrlen = len(instring)

3962 if self.ignoreExprs:

3963 loc = self._skipIgnorables(instring, loc)

3964 while (

3965 loc < instrlen

3966 and instring[loc].isspace()

3967 and col(loc, instring) != self.col

3968 ):

3969 loc += 1

3970

3971 return loc

3972

3973 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

3974 thiscol = col(loc, instring)

3975 if thiscol > self.col:

3976 raise ParseException(instring, loc, "Text not in expected column", self)

3977 newloc = loc + self.col - thiscol

3978 ret = instring[loc:newloc]

3979 return newloc, ret

3980

3981

3982class LineStart(PositionToken):

3983 r"""Matches if current position is at the beginning of a line within

3984 the parse string

3985

3986 Example:

3987

3988 .. testcode::

3989

3990 test = '''\

3991 AAA this line

3992 AAA and this line

3993 AAA and even this line

3994 B AAA but definitely not this line

3995 '''

3996

3997 for t in (LineStart() + 'AAA' + rest_of_line).search_string(test):

3998 print(t)

3999

4000 prints:

4001

4002 .. testoutput::

4003

4004 ['AAA', ' this line']

4005 ['AAA', ' and this line']

4006 ['AAA', ' and even this line']

4007

4008 """

4009

4010 def __init__(self) -> None:

4011 super().__init__()

4012 self.leave_whitespace()

4013 self.orig_whiteChars = set() | self.whiteChars

4014 self.whiteChars.discard("\n")

4015 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

4016 self.set_name("start of line")

4017

4018 def preParse(self, instring: str, loc: int) -> int:

4019 if loc == 0:

4020 return loc

4021

4022 ret = self.skipper.preParse(instring, loc)

4023

4024 if "\n" in self.orig_whiteChars:

4025 while instring[ret : ret + 1] == "\n":

4026 ret = self.skipper.preParse(instring, ret + 1)

4027

4028 return ret

4029

4030 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4031 if col(loc, instring) == 1:

4032 return loc, []

4033 raise ParseException(instring, loc, self.errmsg, self)

4034

4035

4036class LineEnd(PositionToken):

4037 """Matches if current position is at the end of a line within the

4038 parse string

4039 """

4040

4041 def __init__(self) -> None:

4042 super().__init__()

4043 self.whiteChars.discard("\n")

4044 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

4045 self.set_name("end of line")

4046

4047 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4048 if loc < len(instring):

4049 if instring[loc] == "\n":

4050 return loc + 1, "\n"

4051 else:

4052 raise ParseException(instring, loc, self.errmsg, self)

4053 elif loc == len(instring):

4054 return loc + 1, []

4055 else:

4056 raise ParseException(instring, loc, self.errmsg, self)

4057

4058

4059class StringStart(PositionToken):

4060 """Matches if current position is at the beginning of the parse

4061 string

4062 """

4063

4064 def __init__(self) -> None:

4065 super().__init__()

4066 self.set_name("start of text")

4067

4068 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4069 # see if entire string up to here is just whitespace and ignoreables

4070 if loc != 0 and loc != self.preParse(instring, 0):

4071 raise ParseException(instring, loc, self.errmsg, self)

4072

4073 return loc, []

4074

4075

4076class StringEnd(PositionToken):

4077 """

4078 Matches if current position is at the end of the parse string

4079 """

4080

4081 def __init__(self) -> None:

4082 super().__init__()

4083 self.set_name("end of text")

4084

4085 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4086 if loc < len(instring):

4087 raise ParseException(instring, loc, self.errmsg, self)

4088 if loc == len(instring):

4089 return loc + 1, []

4090 if loc > len(instring):

4091 return loc, []

4092

4093 raise ParseException(instring, loc, self.errmsg, self)

4094

4095

4096class WordStart(PositionToken):

4097 """Matches if the current position is at the beginning of a

4098 :class:`Word`, and is not preceded by any character in a given

4099 set of ``word_chars`` (default= ``printables``). To emulate the

4100 ``\b`` behavior of regular expressions, use

4101 ``WordStart(alphanums)``. ``WordStart`` will also match at

4102 the beginning of the string being parsed, or at the beginning of

4103 a line.

4104 """

4105

4106 def __init__(

4107 self, word_chars: str = printables, *, wordChars: str = printables

4108 ) -> None:

4109 wordChars = word_chars if wordChars == printables else wordChars

4110 super().__init__()

4111 self.wordChars = set(wordChars)

4112 self.set_name("start of a word")

4113

4114 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4115 if loc != 0:

4116 if (

4117 instring[loc - 1] in self.wordChars

4118 or instring[loc] not in self.wordChars

4119 ):

4120 raise ParseException(instring, loc, self.errmsg, self)

4121 return loc, []

4122

4123

4124class WordEnd(PositionToken):

4125 """Matches if the current position is at the end of a :class:`Word`,

4126 and is not followed by any character in a given set of ``word_chars``

4127 (default= ``printables``). To emulate the ``\b`` behavior of

4128 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

4129 will also match at the end of the string being parsed, or at the end

4130 of a line.

4131 """

4132

4133 def __init__(

4134 self, word_chars: str = printables, *, wordChars: str = printables

4135 ) -> None:

4136 wordChars = word_chars if wordChars == printables else wordChars

4137 super().__init__()

4138 self.wordChars = set(wordChars)

4139 self.skipWhitespace = False

4140 self.set_name("end of a word")

4141

4142 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4143 instrlen = len(instring)

4144 if instrlen > 0 and loc < instrlen:

4145 if (

4146 instring[loc] in self.wordChars

4147 or instring[loc - 1] not in self.wordChars

4148 ):

4149 raise ParseException(instring, loc, self.errmsg, self)

4150 return loc, []

4151

4152

4153class Tag(Token):

4154 """

4155 A meta-element for inserting a named result into the parsed

4156 tokens that may be checked later in a parse action or while

4157 processing the parsed results. Accepts an optional tag value,

4158 defaulting to `True`.

4159

4160 Example:

4161

4162 .. doctest::

4163

4164 >>> end_punc = "." | ("!" + Tag("enthusiastic"))

4165 >>> greeting = "Hello," + Word(alphas) + end_punc

4166

4167 >>> result = greeting.parse_string("Hello, World.")

4168 >>> print(result.dump())

4169 ['Hello,', 'World', '.']

4170

4171 >>> result = greeting.parse_string("Hello, World!")

4172 >>> print(result.dump())

4173 ['Hello,', 'World', '!']

4174 - enthusiastic: True

4175

4176 .. versionadded:: 3.1.0

4177 """

4178

4179 def __init__(self, tag_name: str, value: Any = True) -> None:

4180 super().__init__()

4181 self._may_return_empty = True

4182 self.mayIndexError = False

4183 self.leave_whitespace()

4184 self.tag_name = tag_name

4185 self.tag_value = value

4186 self.add_parse_action(self._add_tag)

4187 self.show_in_diagram = False

4188

4189 def _add_tag(self, tokens: ParseResults):

4190 tokens[self.tag_name] = self.tag_value

4191

4192 def _generateDefaultName(self) -> str:

4193 return f"{type(self).__name__}:{self.tag_name}={self.tag_value!r}"

4194

4195

4196class ParseExpression(ParserElement):

4197 """Abstract subclass of ParserElement, for combining and

4198 post-processing parsed tokens.

4199 """

4200

4201 def __init__(

4202 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4203 ) -> None:

4204 super().__init__(savelist)

4205 self.exprs: list[ParserElement]

4206 if isinstance(exprs, _generatorType):

4207 exprs = list(exprs)

4208

4209 if isinstance(exprs, str_type):

4210 self.exprs = [self._literalStringClass(exprs)]

4211 elif isinstance(exprs, ParserElement):

4212 self.exprs = [exprs]

4213 elif isinstance(exprs, Iterable):

4214 exprs = list(exprs)

4215 # if sequence of strings provided, wrap with Literal

4216 if any(isinstance(expr, str_type) for expr in exprs):

4217 exprs = (

4218 self._literalStringClass(e) if isinstance(e, str_type) else e

4219 for e in exprs

4220 )

4221 self.exprs = list(exprs)

4222 else:

4223 try:

4224 self.exprs = list(exprs)

4225 except TypeError:

4226 self.exprs = [exprs]

4227 self.callPreparse = False

4228

4229 def recurse(self) -> list[ParserElement]:

4230 return self.exprs[:]

4231

4232 def append(self, other) -> ParserElement:

4233 self.exprs.append(other)

4234 self._defaultName = None

4235 return self

4236

4237 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4238 """

4239 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

4240 all contained expressions.

4241 """

4242 super().leave_whitespace(recursive)

4243

4244 if recursive:

4245 self.exprs = [e.copy() for e in self.exprs]

4246 for e in self.exprs:

4247 e.leave_whitespace(recursive)

4248 return self

4249

4250 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4251 """

4252 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

4253 all contained expressions.

4254 """

4255 super().ignore_whitespace(recursive)

4256 if recursive:

4257 self.exprs = [e.copy() for e in self.exprs]

4258 for e in self.exprs:

4259 e.ignore_whitespace(recursive)

4260 return self

4261

4262 def ignore(self, other) -> ParserElement:

4263 if isinstance(other, Suppress):

4264 if other not in self.ignoreExprs:

4265 super().ignore(other)

4266 for e in self.exprs:

4267 e.ignore(self.ignoreExprs[-1])

4268 else:

4269 super().ignore(other)

4270 for e in self.exprs:

4271 e.ignore(self.ignoreExprs[-1])

4272 return self

4273

4274 def _generateDefaultName(self) -> str:

4275 return f"{type(self).__name__}:({self.exprs})"

4276

4277 def streamline(self) -> ParserElement:

4278 if self.streamlined:

4279 return self

4280

4281 super().streamline()

4282

4283 for e in self.exprs:

4284 e.streamline()

4285

4286 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

4287 # but only if there are no parse actions or resultsNames on the nested And's

4288 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

4289 if len(self.exprs) == 2:

4290 other = self.exprs[0]

4291 if (

4292 isinstance(other, self.__class__)

4293 and not other.parseAction

4294 and other.resultsName is None

4295 and not other.debug

4296 ):

4297 self.exprs = other.exprs[:] + [self.exprs[1]]

4298 self._defaultName = None

4299 self._may_return_empty |= other.mayReturnEmpty

4300 self.mayIndexError |= other.mayIndexError

4301

4302 other = self.exprs[-1]

4303 if (

4304 isinstance(other, self.__class__)

4305 and not other.parseAction

4306 and other.resultsName is None

4307 and not other.debug

4308 ):

4309 self.exprs = self.exprs[:-1] + other.exprs[:]

4310 self._defaultName = None

4311 self._may_return_empty |= other.mayReturnEmpty

4312 self.mayIndexError |= other.mayIndexError

4313

4314 self.errmsg = f"Expected {self}"

4315

4316 return self

4317

4318 def validate(self, validateTrace=None) -> None:

4319 warnings.warn(

4320 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

4321 DeprecationWarning,

4322 stacklevel=2,

4323 )

4324 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

4325 for e in self.exprs:

4326 e.validate(tmp)

4327 self._checkRecursion([])

4328

4329 def copy(self) -> ParserElement:

4330 ret = super().copy()

4331 ret = typing.cast(ParseExpression, ret)

4332 ret.exprs = [e.copy() for e in self.exprs]

4333 return ret

4334

4335 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4336 if not (

4337 __diag__.warn_ungrouped_named_tokens_in_collection

4338 and Diagnostics.warn_ungrouped_named_tokens_in_collection

4339 not in self.suppress_warnings_

4340 ):

4341 return super()._setResultsName(name, list_all_matches)

4342

4343 for e in self.exprs:

4344 if (

4345 isinstance(e, ParserElement)

4346 and e.resultsName

4347 and (

4348 Diagnostics.warn_ungrouped_named_tokens_in_collection

4349 not in e.suppress_warnings_

4350 )

4351 ):

4352 warning = (

4353 "warn_ungrouped_named_tokens_in_collection:"

4354 f" setting results name {name!r} on {type(self).__name__} expression"

4355 f" collides with {e.resultsName!r} on contained expression"

4356 )

4357 warnings.warn(warning, stacklevel=3)

4358 break

4359

4360 return super()._setResultsName(name, list_all_matches)

4361

4362 # Compatibility synonyms

4363 # fmt: off

4364 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

4365 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

4366 # fmt: on

4367

4368

4369class And(ParseExpression):

4370 """

4371 Requires all given :class:`ParserElement` s to be found in the given order.

4372 Expressions may be separated by whitespace.

4373 May be constructed using the ``'+'`` operator.

4374 May also be constructed using the ``'-'`` operator, which will

4375 suppress backtracking.

4376

4377 Example:

4378

4379 .. testcode::

4380

4381 integer = Word(nums)

4382 name_expr = Word(alphas)[1, ...]

4383

4384 expr = And([integer("id"), name_expr("name"), integer("age")])

4385 # more easily written as:

4386 expr = integer("id") + name_expr("name") + integer("age")

4387 """

4388

4389 class _ErrorStop(Empty):

4390 def __init__(self, *args, **kwargs) -> None:

4391 super().__init__(*args, **kwargs)

4392 self.leave_whitespace()

4393

4394 def _generateDefaultName(self) -> str:

4395 return "-"

4396

4397 def __init__(

4398 self,

4399 exprs_arg: typing.Iterable[Union[ParserElement, str]],

4400 savelist: bool = True,

4401 ) -> None:

4402 # instantiate exprs as a list, converting strs to ParserElements

4403 exprs: list[ParserElement] = [

4404 self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg

4405 ]

4406

4407 # convert any Ellipsis elements to SkipTo

4408 if Ellipsis in exprs:

4409

4410 # Ellipsis cannot be the last element

4411 if exprs[-1] is Ellipsis:

4412 raise Exception("cannot construct And with sequence ending in ...")

4413

4414 tmp: list[ParserElement] = []

4415 for cur_expr, next_expr in zip(exprs, exprs[1:]):

4416 if cur_expr is Ellipsis:

4417 tmp.append(SkipTo(next_expr)("_skipped*"))

4418 else:

4419 tmp.append(cur_expr)

4420

4421 exprs[:-1] = tmp

4422

4423 super().__init__(exprs, savelist)

4424 if self.exprs:

4425 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4426 if not isinstance(self.exprs[0], White):

4427 self.set_whitespace_chars(

4428 self.exprs[0].whiteChars,

4429 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

4430 )

4431 self.skipWhitespace = self.exprs[0].skipWhitespace

4432 else:

4433 self.skipWhitespace = False

4434 else:

4435 self._may_return_empty = True

4436 self.callPreparse = True

4437

4438 def streamline(self) -> ParserElement:

4439 # collapse any _PendingSkip's

4440 if self.exprs and any(

4441 isinstance(e, ParseExpression)

4442 and e.exprs

4443 and isinstance(e.exprs[-1], _PendingSkip)

4444 for e in self.exprs[:-1]

4445 ):

4446 deleted_expr_marker = NoMatch()

4447 for i, e in enumerate(self.exprs[:-1]):

4448 if e is deleted_expr_marker:

4449 continue

4450 if (

4451 isinstance(e, ParseExpression)

4452 and e.exprs

4453 and isinstance(e.exprs[-1], _PendingSkip)

4454 ):

4455 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

4456 self.exprs[i + 1] = deleted_expr_marker

4457 self.exprs = [e for e in self.exprs if e is not deleted_expr_marker]

4458

4459 super().streamline()

4460

4461 # link any IndentedBlocks to the prior expression

4462 prev: ParserElement

4463 cur: ParserElement

4464 for prev, cur in zip(self.exprs, self.exprs[1:]):

4465 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

4466 # (but watch out for recursive grammar)

4467 seen = set()

4468 while True:

4469 if id(cur) in seen:

4470 break

4471 seen.add(id(cur))

4472 if isinstance(cur, IndentedBlock):

4473 prev.add_parse_action(

4474 lambda s, l, t, cur_=cur: setattr(

4475 cur_, "parent_anchor", col(l, s)

4476 )

4477 )

4478 break

4479 subs = cur.recurse()

4480 next_first = next(iter(subs), None)

4481 if next_first is None:

4482 break

4483 cur = typing.cast(ParserElement, next_first)

4484

4485 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4486 return self

4487

4488 def parseImpl(self, instring, loc, do_actions=True):

4489 # pass False as callPreParse arg to _parse for first element, since we already

4490 # pre-parsed the string as part of our And pre-parsing

4491 loc, resultlist = self.exprs[0]._parse(

4492 instring, loc, do_actions, callPreParse=False

4493 )

4494 errorStop = False

4495 for e in self.exprs[1:]:

4496 # if isinstance(e, And._ErrorStop):

4497 if type(e) is And._ErrorStop:

4498 errorStop = True

4499 continue

4500 if errorStop:

4501 try:

4502 loc, exprtokens = e._parse(instring, loc, do_actions)

4503 except ParseSyntaxException:

4504 raise

4505 except ParseBaseException as pe:

4506 pe.__traceback__ = None

4507 raise ParseSyntaxException._from_exception(pe)

4508 except IndexError:

4509 raise ParseSyntaxException(

4510 instring, len(instring), self.errmsg, self

4511 )

4512 else:

4513 loc, exprtokens = e._parse(instring, loc, do_actions)

4514 resultlist += exprtokens

4515 return loc, resultlist

4516

4517 def __iadd__(self, other):

4518 if isinstance(other, str_type):

4519 other = self._literalStringClass(other)

4520 if not isinstance(other, ParserElement):

4521 return NotImplemented

4522 return self.append(other) # And([self, other])

4523

4524 def _checkRecursion(self, parseElementList):

4525 subRecCheckList = parseElementList[:] + [self]

4526 for e in self.exprs:

4527 e._checkRecursion(subRecCheckList)

4528 if not e.mayReturnEmpty:

4529 break

4530

4531 def _generateDefaultName(self) -> str:

4532 inner = " ".join(str(e) for e in self.exprs)

4533 # strip off redundant inner {}'s

4534 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4535 inner = inner[1:-1]

4536 return f"{{{inner}}}"

4537

4538

4539class Or(ParseExpression):

4540 """Requires that at least one :class:`ParserElement` is found. If

4541 two expressions match, the expression that matches the longest

4542 string will be used. May be constructed using the ``'^'``

4543 operator.

4544

4545 Example:

4546

4547 .. testcode::

4548

4549 # construct Or using '^' operator

4550

4551 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

4552 print(number.search_string("123 3.1416 789"))

4553

4554 prints:

4555

4556 .. testoutput::

4557

4558 [['123'], ['3.1416'], ['789']]

4559 """

4560

4561 def __init__(

4562 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4563 ) -> None:

4564 super().__init__(exprs, savelist)

4565 if self.exprs:

4566 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4567 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4568 else:

4569 self._may_return_empty = True

4570

4571 def streamline(self) -> ParserElement:

4572 super().streamline()

4573 if self.exprs:

4574 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4575 self.saveAsList = any(e.saveAsList for e in self.exprs)

4576 self.skipWhitespace = all(

4577 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4578 )

4579 else:

4580 self.saveAsList = False

4581 return self

4582

4583 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4584 maxExcLoc = -1

4585 maxException = None

4586 matches: list[tuple[int, ParserElement]] = []

4587 fatals: list[ParseFatalException] = []

4588 if all(e.callPreparse for e in self.exprs):

4589 loc = self.preParse(instring, loc)

4590 for e in self.exprs:

4591 try:

4592 loc2 = e.try_parse(instring, loc, raise_fatal=True)

4593 except ParseFatalException as pfe:

4594 pfe.__traceback__ = None

4595 pfe.parser_element = e

4596 fatals.append(pfe)

4597 maxException = None

4598 maxExcLoc = -1

4599 except ParseException as err:

4600 if not fatals:

4601 err.__traceback__ = None

4602 if err.loc > maxExcLoc:

4603 maxException = err

4604 maxExcLoc = err.loc

4605 except IndexError:

4606 if len(instring) > maxExcLoc:

4607 maxException = ParseException(

4608 instring, len(instring), e.errmsg, self

4609 )

4610 maxExcLoc = len(instring)

4611 else:

4612 # save match among all matches, to retry longest to shortest

4613 matches.append((loc2, e))

4614

4615 if matches:

4616 # re-evaluate all matches in descending order of length of match, in case attached actions

4617 # might change whether or how much they match of the input.

4618 matches.sort(key=itemgetter(0), reverse=True)

4619

4620 if not do_actions:

4621 # no further conditions or parse actions to change the selection of

4622 # alternative, so the first match will be the best match

4623 best_expr = matches[0][1]

4624 return best_expr._parse(instring, loc, do_actions)

4625

4626 longest: tuple[int, typing.Optional[ParseResults]] = -1, None

4627 for loc1, expr1 in matches:

4628 if loc1 <= longest[0]:

4629 # already have a longer match than this one will deliver, we are done

4630 return longest

4631

4632 try:

4633 loc2, toks = expr1._parse(instring, loc, do_actions)

4634 except ParseException as err:

4635 err.__traceback__ = None

4636 if err.loc > maxExcLoc:

4637 maxException = err

4638 maxExcLoc = err.loc

4639 else:

4640 if loc2 >= loc1:

4641 return loc2, toks

4642 # didn't match as much as before

4643 elif loc2 > longest[0]:

4644 longest = loc2, toks

4645

4646 if longest != (-1, None):

4647 return longest

4648

4649 if fatals:

4650 if len(fatals) > 1:

4651 fatals.sort(key=lambda e: -e.loc)

4652 if fatals[0].loc == fatals[1].loc:

4653 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4654 max_fatal = fatals[0]

4655 raise max_fatal

4656

4657 if maxException is not None:

4658 # infer from this check that all alternatives failed at the current position

4659 # so emit this collective error message instead of any single error message

4660 parse_start_loc = self.preParse(instring, loc)

4661 if maxExcLoc == parse_start_loc:

4662 maxException.msg = self.errmsg or ""

4663 raise maxException

4664

4665 raise ParseException(instring, loc, "no defined alternatives to match", self)

4666

4667 def __ixor__(self, other):

4668 if isinstance(other, str_type):

4669 other = self._literalStringClass(other)

4670 if not isinstance(other, ParserElement):

4671 return NotImplemented

4672 return self.append(other) # Or([self, other])

4673

4674 def _generateDefaultName(self) -> str:

4675 return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}"

4676

4677 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4678 if (

4679 __diag__.warn_multiple_tokens_in_named_alternation

4680 and Diagnostics.warn_multiple_tokens_in_named_alternation

4681 not in self.suppress_warnings_

4682 ):

4683 if any(

4684 isinstance(e, And)

4685 and Diagnostics.warn_multiple_tokens_in_named_alternation

4686 not in e.suppress_warnings_

4687 for e in self.exprs

4688 ):

4689 warning = (

4690 "warn_multiple_tokens_in_named_alternation:"

4691 f" setting results name {name!r} on {type(self).__name__} expression"

4692 " will return a list of all parsed tokens in an And alternative,"

4693 " in prior versions only the first token was returned; enclose"

4694 " contained argument in Group"

4695 )

4696 warnings.warn(warning, stacklevel=3)

4697

4698 return super()._setResultsName(name, list_all_matches)

4699

4700

4701class MatchFirst(ParseExpression):

4702 """Requires that at least one :class:`ParserElement` is found. If

4703 more than one expression matches, the first one listed is the one that will

4704 match. May be constructed using the ``'|'`` operator.

4705

4706 Example: Construct MatchFirst using '|' operator

4707

4708 .. doctest::

4709

4710 # watch the order of expressions to match

4711 >>> number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4712 >>> print(number.search_string("123 3.1416 789")) # Fail!

4713 [['123'], ['3'], ['1416'], ['789']]

4714

4715 # put more selective expression first

4716 >>> number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4717 >>> print(number.search_string("123 3.1416 789")) # Better

4718 [['123'], ['3.1416'], ['789']]

4719 """

4720

4721 def __init__(

4722 self, exprs: typing.Iterable[ParserElement], savelist: bool = False

4723 ) -> None:

4724 super().__init__(exprs, savelist)

4725 if self.exprs:

4726 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4727 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4728 else:

4729 self._may_return_empty = True

4730

4731 def streamline(self) -> ParserElement:

4732 if self.streamlined:

4733 return self

4734

4735 super().streamline()

4736 if self.exprs:

4737 self.saveAsList = any(e.saveAsList for e in self.exprs)

4738 self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs)

4739 self.skipWhitespace = all(

4740 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4741 )

4742 else:

4743 self.saveAsList = False

4744 self._may_return_empty = True

4745 return self

4746

4747 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4748 maxExcLoc = -1

4749 maxException = None

4750

4751 for e in self.exprs:

4752 try:

4753 return e._parse(instring, loc, do_actions)

4754 except ParseFatalException as pfe:

4755 pfe.__traceback__ = None

4756 pfe.parser_element = e

4757 raise

4758 except ParseException as err:

4759 if err.loc > maxExcLoc:

4760 maxException = err

4761 maxExcLoc = err.loc

4762 except IndexError:

4763 if len(instring) > maxExcLoc:

4764 maxException = ParseException(

4765 instring, len(instring), e.errmsg, self

4766 )

4767 maxExcLoc = len(instring)

4768

4769 if maxException is not None:

4770 # infer from this check that all alternatives failed at the current position

4771 # so emit this collective error message instead of any individual error message

4772 parse_start_loc = self.preParse(instring, loc)

4773 if maxExcLoc == parse_start_loc:

4774 maxException.msg = self.errmsg or ""

4775 raise maxException

4776

4777 raise ParseException(instring, loc, "no defined alternatives to match", self)

4778

4779 def __ior__(self, other):

4780 if isinstance(other, str_type):

4781 other = self._literalStringClass(other)

4782 if not isinstance(other, ParserElement):

4783 return NotImplemented

4784 return self.append(other) # MatchFirst([self, other])

4785

4786 def _generateDefaultName(self) -> str:

4787 return f"{{{' | '.join(str(e) for e in self.exprs)}}}"

4788

4789 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

4790 if (

4791 __diag__.warn_multiple_tokens_in_named_alternation

4792 and Diagnostics.warn_multiple_tokens_in_named_alternation

4793 not in self.suppress_warnings_

4794 ):

4795 if any(

4796 isinstance(e, And)

4797 and Diagnostics.warn_multiple_tokens_in_named_alternation

4798 not in e.suppress_warnings_

4799 for e in self.exprs

4800 ):

4801 warning = (

4802 "warn_multiple_tokens_in_named_alternation:"

4803 f" setting results name {name!r} on {type(self).__name__} expression"

4804 " will return a list of all parsed tokens in an And alternative,"

4805 " in prior versions only the first token was returned; enclose"

4806 " contained argument in Group"

4807 )

4808 warnings.warn(warning, stacklevel=3)

4809

4810 return super()._setResultsName(name, list_all_matches)

4811

4812

4813class Each(ParseExpression):

4814 """Requires all given :class:`ParserElement` s to be found, but in

4815 any order. Expressions may be separated by whitespace.

4816

4817 May be constructed using the ``'&'`` operator.

4818

4819 Example:

4820

4821 .. testcode::

4822

4823 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4824 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4825 integer = Word(nums)

4826 shape_attr = "shape:" + shape_type("shape")

4827 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4828 color_attr = "color:" + color("color")

4829 size_attr = "size:" + integer("size")

4830

4831 # use Each (using operator '&') to accept attributes in any order

4832 # (shape and posn are required, color and size are optional)

4833 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4834

4835 shape_spec.run_tests('''

4836 shape: SQUARE color: BLACK posn: 100, 120

4837 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4838 color:GREEN size:20 shape:TRIANGLE posn:20,40

4839 '''

4840 )

4841

4842 prints:

4843

4844 .. testoutput::

4845 :options: +NORMALIZE_WHITESPACE

4846

4847

4848 shape: SQUARE color: BLACK posn: 100, 120

4849 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4850 - color: 'BLACK'

4851 - posn: ['100', ',', '120']

4852 - x: '100'

4853 - y: '120'

4854 - shape: 'SQUARE'

4855 ...

4856

4857 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4858 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE',

4859 'posn:', ['50', ',', '80']]

4860 - color: 'BLUE'

4861 - posn: ['50', ',', '80']

4862 - x: '50'

4863 - y: '80'

4864 - shape: 'CIRCLE'

4865 - size: '50'

4866 ...

4867

4868 color:GREEN size:20 shape:TRIANGLE posn:20,40

4869 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE',

4870 'posn:', ['20', ',', '40']]

4871 - color: 'GREEN'

4872 - posn: ['20', ',', '40']

4873 - x: '20'

4874 - y: '40'

4875 - shape: 'TRIANGLE'

4876 - size: '20'

4877 ...

4878 """

4879

4880 def __init__(

4881 self, exprs: typing.Iterable[ParserElement], savelist: bool = True

4882 ) -> None:

4883 super().__init__(exprs, savelist)

4884 if self.exprs:

4885 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4886 else:

4887 self._may_return_empty = True

4888 self.skipWhitespace = True

4889 self.initExprGroups = True

4890 self.saveAsList = True

4891

4892 def __iand__(self, other):

4893 if isinstance(other, str_type):

4894 other = self._literalStringClass(other)

4895 if not isinstance(other, ParserElement):

4896 return NotImplemented

4897 return self.append(other) # Each([self, other])

4898

4899 def streamline(self) -> ParserElement:

4900 super().streamline()

4901 if self.exprs:

4902 self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs)

4903 else:

4904 self._may_return_empty = True

4905 return self

4906

4907 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

4908 if self.initExprGroups:

4909 self.opt1map = dict(

4910 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

4911 )

4912 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

4913 opt2 = [

4914 e

4915 for e in self.exprs

4916 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

4917 ]

4918 self.optionals = opt1 + opt2

4919 self.multioptionals = [

4920 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4921 for e in self.exprs

4922 if isinstance(e, _MultipleMatch)

4923 ]

4924 self.multirequired = [

4925 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4926 for e in self.exprs

4927 if isinstance(e, OneOrMore)

4928 ]

4929 self.required = [

4930 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

4931 ]

4932 self.required += self.multirequired

4933 self.initExprGroups = False

4934

4935 tmpLoc = loc

4936 tmpReqd = self.required[:]

4937 tmpOpt = self.optionals[:]

4938 multis = self.multioptionals[:]

4939 matchOrder: list[ParserElement] = []

4940

4941 keepMatching = True

4942 failed: list[ParserElement] = []

4943 fatals: list[ParseFatalException] = []

4944 while keepMatching:

4945 tmpExprs = tmpReqd + tmpOpt + multis

4946 failed.clear()

4947 fatals.clear()

4948 for e in tmpExprs:

4949 try:

4950 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

4951 except ParseFatalException as pfe:

4952 pfe.__traceback__ = None

4953 pfe.parser_element = e

4954 fatals.append(pfe)

4955 failed.append(e)

4956 except ParseException:

4957 failed.append(e)

4958 else:

4959 matchOrder.append(self.opt1map.get(id(e), e))

4960 if e in tmpReqd:

4961 tmpReqd.remove(e)

4962 elif e in tmpOpt:

4963 tmpOpt.remove(e)

4964 if len(failed) == len(tmpExprs):

4965 keepMatching = False

4966

4967 # look for any ParseFatalExceptions

4968 if fatals:

4969 if len(fatals) > 1:

4970 fatals.sort(key=lambda e: -e.loc)

4971 if fatals[0].loc == fatals[1].loc:

4972 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element))))

4973 max_fatal = fatals[0]

4974 raise max_fatal

4975

4976 if tmpReqd:

4977 missing = ", ".join([str(e) for e in tmpReqd])

4978 raise ParseException(

4979 instring,

4980 loc,

4981 f"Missing one or more required elements ({missing})",

4982 )

4983

4984 # add any unmatched Opts, in case they have default values defined

4985 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

4986

4987 total_results = ParseResults([])

4988 for e in matchOrder:

4989 loc, results = e._parse(instring, loc, do_actions)

4990 total_results += results

4991

4992 return loc, total_results

4993

4994 def _generateDefaultName(self) -> str:

4995 return f"{{{' & '.join(str(e) for e in self.exprs)}}}"

4996

4997

4998class ParseElementEnhance(ParserElement):

4999 """Abstract subclass of :class:`ParserElement`, for combining and

5000 post-processing parsed tokens.

5001 """

5002

5003 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

5004 super().__init__(savelist)

5005 if isinstance(expr, str_type):

5006 expr_str = typing.cast(str, expr)

5007 if issubclass(self._literalStringClass, Token):

5008 expr = self._literalStringClass(expr_str) # type: ignore[call-arg]

5009 elif issubclass(type(self), self._literalStringClass):

5010 expr = Literal(expr_str)

5011 else:

5012 expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg]

5013 expr = typing.cast(ParserElement, expr)

5014 self.expr = expr

5015 if expr is not None:

5016 self.mayIndexError = expr.mayIndexError

5017 self._may_return_empty = expr.mayReturnEmpty

5018 self.set_whitespace_chars(

5019 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

5020 )

5021 self.skipWhitespace = expr.skipWhitespace

5022 self.saveAsList = expr.saveAsList

5023 self.callPreparse = expr.callPreparse

5024 self.ignoreExprs.extend(expr.ignoreExprs)

5025

5026 def recurse(self) -> list[ParserElement]:

5027 return [self.expr] if self.expr is not None else []

5028

5029 def parseImpl(self, instring, loc, do_actions=True):

5030 if self.expr is None:

5031 raise ParseException(instring, loc, "No expression defined", self)

5032

5033 try:

5034 return self.expr._parse(instring, loc, do_actions, callPreParse=False)

5035 except ParseSyntaxException:

5036 raise

5037 except ParseBaseException as pbe:

5038 pbe.pstr = pbe.pstr or instring

5039 pbe.loc = pbe.loc or loc

5040 pbe.parser_element = pbe.parser_element or self

5041 if not isinstance(self, Forward) and self.customName is not None:

5042 if self.errmsg:

5043 pbe.msg = self.errmsg

5044 raise

5045

5046 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5047 super().leave_whitespace(recursive)

5048

5049 if recursive:

5050 if self.expr is not None:

5051 self.expr = self.expr.copy()

5052 self.expr.leave_whitespace(recursive)

5053 return self

5054

5055 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5056 super().ignore_whitespace(recursive)

5057

5058 if recursive:

5059 if self.expr is not None:

5060 self.expr = self.expr.copy()

5061 self.expr.ignore_whitespace(recursive)

5062 return self

5063

5064 def ignore(self, other) -> ParserElement:

5065 if not isinstance(other, Suppress) or other not in self.ignoreExprs:

5066 super().ignore(other)

5067 if self.expr is not None:

5068 self.expr.ignore(self.ignoreExprs[-1])

5069

5070 return self

5071

5072 def streamline(self) -> ParserElement:

5073 super().streamline()

5074 if self.expr is not None:

5075 self.expr.streamline()

5076 return self

5077

5078 def _checkRecursion(self, parseElementList):

5079 if self in parseElementList:

5080 raise RecursiveGrammarException(parseElementList + [self])

5081 subRecCheckList = parseElementList[:] + [self]

5082 if self.expr is not None:

5083 self.expr._checkRecursion(subRecCheckList)

5084

5085 def validate(self, validateTrace=None) -> None:

5086 warnings.warn(

5087 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

5088 DeprecationWarning,

5089 stacklevel=2,

5090 )

5091 if validateTrace is None:

5092 validateTrace = []

5093 tmp = validateTrace[:] + [self]

5094 if self.expr is not None:

5095 self.expr.validate(tmp)

5096 self._checkRecursion([])

5097

5098 def _generateDefaultName(self) -> str:

5099 return f"{type(self).__name__}:({self.expr})"

5100

5101 # Compatibility synonyms

5102 # fmt: off

5103 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

5104 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

5105 # fmt: on

5106

5107

5108class IndentedBlock(ParseElementEnhance):

5109 """

5110 Expression to match one or more expressions at a given indentation level.

5111 Useful for parsing text where structure is implied by indentation (like Python source code).

5112 """

5113

5114 class _Indent(Empty):

5115 def __init__(self, ref_col: int) -> None:

5116 super().__init__()

5117 self.errmsg = f"expected indent at column {ref_col}"

5118 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

5119

5120 class _IndentGreater(Empty):

5121 def __init__(self, ref_col: int) -> None:

5122 super().__init__()

5123 self.errmsg = f"expected indent at column greater than {ref_col}"

5124 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

5125

5126 def __init__(

5127 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

5128 ) -> None:

5129 super().__init__(expr, savelist=True)

5130 # if recursive:

5131 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

5132 self._recursive = recursive

5133 self._grouped = grouped

5134 self.parent_anchor = 1

5135

5136 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5137 # advance parse position to non-whitespace by using an Empty()

5138 # this should be the column to be used for all subsequent indented lines

5139 anchor_loc = Empty().preParse(instring, loc)

5140

5141 # see if self.expr matches at the current location - if not it will raise an exception

5142 # and no further work is necessary

5143 self.expr.try_parse(instring, anchor_loc, do_actions=do_actions)

5144

5145 indent_col = col(anchor_loc, instring)

5146 peer_detect_expr = self._Indent(indent_col)

5147

5148 inner_expr = Empty() + peer_detect_expr + self.expr

5149 if self._recursive:

5150 sub_indent = self._IndentGreater(indent_col)

5151 nested_block = IndentedBlock(

5152 self.expr, recursive=self._recursive, grouped=self._grouped

5153 )

5154 nested_block.set_debug(self.debug)

5155 nested_block.parent_anchor = indent_col

5156 inner_expr += Opt(sub_indent + nested_block)

5157

5158 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

5159 block = OneOrMore(inner_expr)

5160

5161 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

5162

5163 if self._grouped:

5164 wrapper = Group

5165 else:

5166 wrapper = lambda expr: expr # type: ignore[misc, assignment]

5167 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

5168 instring, anchor_loc, do_actions

5169 )

5170

5171

5172class AtStringStart(ParseElementEnhance):

5173 """Matches if expression matches at the beginning of the parse

5174 string::

5175

5176 AtStringStart(Word(nums)).parse_string("123")

5177 # prints ["123"]

5178

5179 AtStringStart(Word(nums)).parse_string(" 123")

5180 # raises ParseException

5181 """

5182

5183 def __init__(self, expr: Union[ParserElement, str]) -> None:

5184 super().__init__(expr)

5185 self.callPreparse = False

5186

5187 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5188 if loc != 0:

5189 raise ParseException(instring, loc, "not found at string start")

5190 return super().parseImpl(instring, loc, do_actions)

5191

5192

5193class AtLineStart(ParseElementEnhance):

5194 r"""Matches if an expression matches at the beginning of a line within

5195 the parse string

5196

5197 Example:

5198

5199 .. testcode::

5200

5201 test = '''\

5202 BBB this line

5203 BBB and this line

5204 BBB but not this one

5205 A BBB and definitely not this one

5206 '''

5207

5208 for t in (AtLineStart('BBB') + rest_of_line).search_string(test):

5209 print(t)

5210

5211 prints:

5212

5213 .. testoutput::

5214

5215 ['BBB', ' this line']

5216 ['BBB', ' and this line']

5217 """

5218

5219 def __init__(self, expr: Union[ParserElement, str]) -> None:

5220 super().__init__(expr)

5221 self.callPreparse = False

5222

5223 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5224 if col(loc, instring) != 1:

5225 raise ParseException(instring, loc, "not found at line start")

5226 return super().parseImpl(instring, loc, do_actions)

5227

5228

5229class FollowedBy(ParseElementEnhance):

5230 """Lookahead matching of the given parse expression.

5231 ``FollowedBy`` does *not* advance the parsing position within

5232 the input string, it only verifies that the specified parse

5233 expression matches at the current position. ``FollowedBy``

5234 always returns a null token list. If any results names are defined

5235 in the lookahead expression, those *will* be returned for access by

5236 name.

5237

5238 Example:

5239

5240 .. testcode::

5241

5242 # use FollowedBy to match a label only if it is followed by a ':'

5243 data_word = Word(alphas)

5244 label = data_word + FollowedBy(':')

5245 attr_expr = Group(

5246 label + Suppress(':')

5247 + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)

5248 )

5249

5250 attr_expr[1, ...].parse_string(

5251 "shape: SQUARE color: BLACK posn: upper left").pprint()

5252

5253 prints:

5254

5255 .. testoutput::

5256

5257 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

5258 """

5259

5260 def __init__(self, expr: Union[ParserElement, str]) -> None:

5261 super().__init__(expr)

5262 self._may_return_empty = True

5263

5264 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5265 # by using self._expr.parse and deleting the contents of the returned ParseResults list

5266 # we keep any named results that were defined in the FollowedBy expression

5267 _, ret = self.expr._parse(instring, loc, do_actions=do_actions)

5268 del ret[:]

5269

5270 return loc, ret

5271

5272

5273class PrecededBy(ParseElementEnhance):

5274 """Lookbehind matching of the given parse expression.

5275 ``PrecededBy`` does not advance the parsing position within the

5276 input string, it only verifies that the specified parse expression

5277 matches prior to the current position. ``PrecededBy`` always

5278 returns a null token list, but if a results name is defined on the

5279 given expression, it is returned.

5280

5281 Parameters:

5282

5283 - ``expr`` - expression that must match prior to the current parse

5284 location

5285 - ``retreat`` - (default= ``None``) - (int) maximum number of characters

5286 to lookbehind prior to the current parse location

5287

5288 If the lookbehind expression is a string, :class:`Literal`,

5289 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

5290 with a specified exact or maximum length, then the retreat

5291 parameter is not required. Otherwise, retreat must be specified to

5292 give a maximum number of characters to look back from

5293 the current parse position for a lookbehind match.

5294

5295 Example:

5296

5297 .. testcode::

5298

5299 # VB-style variable names with type prefixes

5300 int_var = PrecededBy("#") + pyparsing_common.identifier

5301 str_var = PrecededBy("$") + pyparsing_common.identifier

5302 """

5303

5304 def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None:

5305 super().__init__(expr)

5306 self.expr = self.expr().leave_whitespace()

5307 self._may_return_empty = True

5308 self.mayIndexError = False

5309 self.exact = False

5310 if isinstance(expr, str_type):

5311 expr = typing.cast(str, expr)

5312 retreat = len(expr)

5313 self.exact = True

5314 elif isinstance(expr, (Literal, Keyword)):

5315 retreat = expr.matchLen

5316 self.exact = True

5317 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

5318 retreat = expr.maxLen

5319 self.exact = True

5320 elif isinstance(expr, PositionToken):

5321 retreat = 0

5322 self.exact = True

5323 self.retreat = retreat

5324 self.errmsg = f"not preceded by {expr}"

5325 self.skipWhitespace = False

5326 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

5327

5328 def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType:

5329 if self.exact:

5330 if loc < self.retreat:

5331 raise ParseException(instring, loc, self.errmsg, self)

5332 start = loc - self.retreat

5333 _, ret = self.expr._parse(instring, start)

5334 return loc, ret

5335

5336 # retreat specified a maximum lookbehind window, iterate

5337 test_expr = self.expr + StringEnd()

5338 instring_slice = instring[max(0, loc - self.retreat) : loc]

5339 last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self)

5340

5341 for offset in range(1, min(loc, self.retreat + 1) + 1):

5342 try:

5343 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

5344 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)

5345 except ParseBaseException as pbe:

5346 last_expr = pbe

5347 else:

5348 break

5349 else:

5350 raise last_expr

5351

5352 return loc, ret

5353

5354

5355class Located(ParseElementEnhance):

5356 """

5357 Decorates a returned token with its starting and ending

5358 locations in the input string.

5359

5360 This helper adds the following results names:

5361

5362 - ``locn_start`` - location where matched expression begins

5363 - ``locn_end`` - location where matched expression ends

5364 - ``value`` - the actual parsed results

5365

5366 Be careful if the input text contains ``<TAB>`` characters, you

5367 may want to call :class:`ParserElement.parse_with_tabs`

5368

5369 Example:

5370

5371 .. testcode::

5372

5373 wd = Word(alphas)

5374 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

5375 print(match)

5376

5377 prints:

5378

5379 .. testoutput::

5380

5381 [0, ['ljsdf'], 5]

5382 [8, ['lksdjjf'], 15]

5383 [18, ['lkkjj'], 23]

5384 """

5385

5386 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5387 start = loc

5388 loc, tokens = self.expr._parse(instring, start, do_actions, callPreParse=False)

5389 ret_tokens = ParseResults([start, tokens, loc])

5390 ret_tokens["locn_start"] = start

5391 ret_tokens["value"] = tokens

5392 ret_tokens["locn_end"] = loc

5393 if self.resultsName:

5394 # must return as a list, so that the name will be attached to the complete group

5395 return loc, [ret_tokens]

5396 else:

5397 return loc, ret_tokens

5398

5399

5400class NotAny(ParseElementEnhance):

5401 """

5402 Lookahead to disallow matching with the given parse expression.

5403 ``NotAny`` does *not* advance the parsing position within the

5404 input string, it only verifies that the specified parse expression

5405 does *not* match at the current position. Also, ``NotAny`` does

5406 *not* skip over leading whitespace. ``NotAny`` always returns

5407 a null token list. May be constructed using the ``'~'`` operator.

5408

5409 Example:

5410

5411 .. testcode::

5412

5413 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

5414

5415 # take care not to mistake keywords for identifiers

5416 ident = ~(AND | OR | NOT) + Word(alphas)

5417 boolean_term = Opt(NOT) + ident

5418

5419 # very crude boolean expression - to support parenthesis groups and

5420 # operation hierarchy, use infix_notation

5421 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

5422

5423 # integers that are followed by "." are actually floats

5424 integer = Word(nums) + ~Char(".")

5425 """

5426

5427 def __init__(self, expr: Union[ParserElement, str]) -> None:

5428 super().__init__(expr)

5429 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

5430 # self.leave_whitespace()

5431 self.skipWhitespace = False

5432

5433 self._may_return_empty = True

5434 self.errmsg = f"Found unwanted token, {self.expr}"

5435

5436 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5437 if self.expr.can_parse_next(instring, loc, do_actions=do_actions):

5438 raise ParseException(instring, loc, self.errmsg, self)

5439 return loc, []

5440

5441 def _generateDefaultName(self) -> str:

5442 return f"~{{{self.expr}}}"

5443

5444

5445class _MultipleMatch(ParseElementEnhance):

5446 def __init__(

5447 self,

5448 expr: Union[str, ParserElement],

5449 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5450 *,

5451 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5452 ) -> None:

5453 super().__init__(expr)

5454 stopOn = stopOn or stop_on

5455 self.saveAsList = True

5456 ender = stopOn

5457 if isinstance(ender, str_type):

5458 ender = self._literalStringClass(ender)

5459 self.stopOn(ender)

5460

5461 def stopOn(self, ender) -> ParserElement:

5462 if isinstance(ender, str_type):

5463 ender = self._literalStringClass(ender)

5464 self.not_ender = ~ender if ender is not None else None

5465 return self

5466

5467 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5468 self_expr_parse = self.expr._parse

5469 self_skip_ignorables = self._skipIgnorables

5470 check_ender = False

5471 if self.not_ender is not None:

5472 try_not_ender = self.not_ender.try_parse

5473 check_ender = True

5474

5475 # must be at least one (but first see if we are the stopOn sentinel;

5476 # if so, fail)

5477 if check_ender:

5478 try_not_ender(instring, loc)

5479 loc, tokens = self_expr_parse(instring, loc, do_actions)

5480 try:

5481 hasIgnoreExprs = not not self.ignoreExprs

5482 while 1:

5483 if check_ender:

5484 try_not_ender(instring, loc)

5485 if hasIgnoreExprs:

5486 preloc = self_skip_ignorables(instring, loc)

5487 else:

5488 preloc = loc

5489 loc, tmptokens = self_expr_parse(instring, preloc, do_actions)

5490 tokens += tmptokens

5491 except (ParseException, IndexError):

5492 pass

5493

5494 return loc, tokens

5495

5496 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

5497 if (

5498 __diag__.warn_ungrouped_named_tokens_in_collection

5499 and Diagnostics.warn_ungrouped_named_tokens_in_collection

5500 not in self.suppress_warnings_

5501 ):

5502 for e in [self.expr] + self.expr.recurse():

5503 if (

5504 isinstance(e, ParserElement)

5505 and e.resultsName

5506 and (

5507 Diagnostics.warn_ungrouped_named_tokens_in_collection

5508 not in e.suppress_warnings_

5509 )

5510 ):

5511 warning = (

5512 "warn_ungrouped_named_tokens_in_collection:"

5513 f" setting results name {name!r} on {type(self).__name__} expression"

5514 f" collides with {e.resultsName!r} on contained expression"

5515 )

5516 warnings.warn(warning, stacklevel=3)

5517 break

5518

5519 return super()._setResultsName(name, list_all_matches)

5520

5521

5522class OneOrMore(_MultipleMatch):

5523 """

5524 Repetition of one or more of the given expression.

5525

5526 Parameters:

5527

5528 - ``expr`` - expression that must match one or more times

5529 - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel

5530 (only required if the sentinel would ordinarily match the repetition

5531 expression)

5532

5533 Example:

5534

5535 .. doctest::

5536

5537 >>> data_word = Word(alphas)

5538 >>> label = data_word + FollowedBy(':')

5539 >>> attr_expr = Group(

5540 ... label + Suppress(':')

5541 ... + OneOrMore(data_word).set_parse_action(' '.join))

5542

5543 >>> text = "shape: SQUARE posn: upper left color: BLACK"

5544

5545 # Fail! read 'posn' as data instead of next label

5546 >>> attr_expr[1, ...].parse_string(text).pprint()

5547 [['shape', 'SQUARE posn']]

5548

5549 # use stop_on attribute for OneOrMore

5550 # to avoid reading label string as part of the data

5551 >>> attr_expr = Group(

5552 ... label + Suppress(':')

5553 ... + OneOrMore(

5554 ... data_word, stop_on=label).set_parse_action(' '.join))

5555 >>> OneOrMore(attr_expr).parse_string(text).pprint() # Better

5556 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5557

5558 # could also be written as

5559 >>> (attr_expr * (1,)).parse_string(text).pprint()

5560 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

5561 """

5562

5563 def _generateDefaultName(self) -> str:

5564 return f"{{{self.expr}}}..."

5565

5566

5567class ZeroOrMore(_MultipleMatch):

5568 """

5569 Optional repetition of zero or more of the given expression.

5570

5571 Parameters:

5572

5573 - ``expr`` - expression that must match zero or more times

5574 - ``stop_on`` - expression for a terminating sentinel

5575 (only required if the sentinel would ordinarily match the repetition

5576 expression) - (default= ``None``)

5577

5578 Example: similar to :class:`OneOrMore`

5579 """

5580

5581 def __init__(

5582 self,

5583 expr: Union[str, ParserElement],

5584 stop_on: typing.Optional[Union[ParserElement, str]] = None,

5585 *,

5586 stopOn: typing.Optional[Union[ParserElement, str]] = None,

5587 ) -> None:

5588 super().__init__(expr, stopOn=stopOn or stop_on)

5589 self._may_return_empty = True

5590

5591 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5592 try:

5593 return super().parseImpl(instring, loc, do_actions)

5594 except (ParseException, IndexError):

5595 return loc, ParseResults([], name=self.resultsName)

5596

5597 def _generateDefaultName(self) -> str:

5598 return f"[{self.expr}]..."

5599

5600

5601class DelimitedList(ParseElementEnhance):

5602 """Helper to define a delimited list of expressions - the delimiter

5603 defaults to ','. By default, the list elements and delimiters can

5604 have intervening whitespace, and comments, but this can be

5605 overridden by passing ``combine=True`` in the constructor. If

5606 ``combine`` is set to ``True``, the matching tokens are

5607 returned as a single token string, with the delimiters included;

5608 otherwise, the matching tokens are returned as a list of tokens,

5609 with the delimiters suppressed.

5610

5611 If ``allow_trailing_delim`` is set to True, then the list may end with

5612 a delimiter.

5613

5614 Example:

5615

5616 .. doctest::

5617

5618 >>> DelimitedList(Word(alphas)).parse_string("aa,bb,cc")

5619 ParseResults(['aa', 'bb', 'cc'], {})

5620 >>> DelimitedList(Word(hexnums), delim=':', combine=True

5621 ... ).parse_string("AA:BB:CC:DD:EE")

5622 ParseResults(['AA:BB:CC:DD:EE'], {})

5623

5624 .. versionadded:: 3.1.0

5625 """

5626

5627 def __init__(

5628 self,

5629 expr: Union[str, ParserElement],

5630 delim: Union[str, ParserElement] = ",",

5631 combine: bool = False,

5632 min: typing.Optional[int] = None,

5633 max: typing.Optional[int] = None,

5634 *,

5635 allow_trailing_delim: bool = False,

5636 ) -> None:

5637 if isinstance(expr, str_type):

5638 expr = ParserElement._literalStringClass(expr)

5639 expr = typing.cast(ParserElement, expr)

5640

5641 if min is not None and min < 1:

5642 raise ValueError("min must be greater than 0")

5643

5644 if max is not None and min is not None and max < min:

5645 raise ValueError("max must be greater than, or equal to min")

5646

5647 self.content = expr

5648 self.raw_delim = str(delim)

5649 self.delim = delim

5650 self.combine = combine

5651 if not combine:

5652 self.delim = Suppress(delim)

5653 self.min = min or 1

5654 self.max = max

5655 self.allow_trailing_delim = allow_trailing_delim

5656

5657 delim_list_expr = self.content + (self.delim + self.content) * (

5658 self.min - 1,

5659 None if self.max is None else self.max - 1,

5660 )

5661 if self.allow_trailing_delim:

5662 delim_list_expr += Opt(self.delim)

5663

5664 if self.combine:

5665 delim_list_expr = Combine(delim_list_expr)

5666

5667 super().__init__(delim_list_expr, savelist=True)

5668

5669 def _generateDefaultName(self) -> str:

5670 content_expr = self.content.streamline()

5671 return f"{content_expr} [{self.raw_delim} {content_expr}]..."

5672

5673

5674class _NullToken:

5675 def __bool__(self):

5676 return False

5677

5678 def __str__(self):

5679 return ""

5680

5681

5682class Opt(ParseElementEnhance):

5683 """

5684 Optional matching of the given expression.

5685

5686 :param expr: expression that must match zero or more times

5687 :param default: (optional) - value to be returned

5688 if the optional expression is not found.

5689

5690 Example:

5691

5692 .. testcode::

5693

5694 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

5695 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

5696 zip.run_tests('''

5697 # traditional ZIP code

5698 12345

5699

5700 # ZIP+4 form

5701 12101-0001

5702

5703 # invalid ZIP

5704 98765-

5705 ''')

5706

5707 prints:

5708

5709 .. testoutput::

5710 :options: +NORMALIZE_WHITESPACE

5711

5712

5713 # traditional ZIP code

5714 12345

5715 ['12345']

5716

5717 # ZIP+4 form

5718 12101-0001

5719 ['12101-0001']

5720

5721 # invalid ZIP

5722 98765-

5723 98765-

5724 ^

5725 ParseException: Expected end of text, found '-' (at char 5), (line:1, col:6)

5726 FAIL: Expected end of text, found '-' (at char 5), (line:1, col:6)

5727 """

5728

5729 __optionalNotMatched = _NullToken()

5730

5731 def __init__(

5732 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

5733 ) -> None:

5734 super().__init__(expr, savelist=False)

5735 self.saveAsList = self.expr.saveAsList

5736 self.defaultValue = default

5737 self._may_return_empty = True

5738

5739 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

5740 self_expr = self.expr

5741 try:

5742 loc, tokens = self_expr._parse(

5743 instring, loc, do_actions, callPreParse=False

5744 )

5745 except (ParseException, IndexError):

5746 default_value = self.defaultValue

5747 if default_value is not self.__optionalNotMatched:

5748 if self_expr.resultsName:

5749 tokens = ParseResults([default_value])

5750 tokens[self_expr.resultsName] = default_value

5751 else:

5752 tokens = [default_value] # type: ignore[assignment]

5753 else:

5754 tokens = [] # type: ignore[assignment]

5755 return loc, tokens

5756

5757 def _generateDefaultName(self) -> str:

5758 inner = str(self.expr)

5759 # strip off redundant inner {}'s

5760 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

5761 inner = inner[1:-1]

5762 return f"[{inner}]"

5765Optional = Opt

5768class SkipTo(ParseElementEnhance):

5769 """

5770 Token for skipping over all undefined text until the matched

5771 expression is found.

5772

5773 :param expr: target expression marking the end of the data to be skipped

5774 :param include: if ``True``, the target expression is also parsed

5775 (the skipped text and target expression are returned

5776 as a 2-element list) (default= ``False``).

5777

5778 :param ignore: (default= ``None``) used to define grammars

5779 (typically quoted strings and comments)

5780 that might contain false matches to the target expression

5781

5782 :param fail_on: (default= ``None``) define expressions that

5783 are not allowed to be included in the skipped test;

5784 if found before the target expression is found,

5785 the :class:`SkipTo` is not a match

5786

5787 Example:

5788

5789 .. testcode::

5790

5791 report = '''

5792 Outstanding Issues Report - 1 Jan 2000

5793

5794 # | Severity | Description | Days Open

5795 -----+----------+-------------------------------------------+-----------

5796 101 | Critical | Intermittent system crash | 6

5797 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5798 79 | Minor | System slow when running too many reports | 47

5799 '''

5800 integer = Word(nums)

5801 SEP = Suppress('|')

5802 # use SkipTo to simply match everything up until the next SEP

5803 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5804 # - parse action will call token.strip() for each matched token, i.e., the description body

5805 string_data = SkipTo(SEP, ignore=quoted_string)

5806 string_data.set_parse_action(token_map(str.strip))

5807 ticket_expr = (integer("issue_num") + SEP

5808 + string_data("sev") + SEP

5809 + string_data("desc") + SEP

5810 + integer("days_open"))

5811

5812 for tkt in ticket_expr.search_string(report):

5813 print(tkt.dump())

5814

5815 prints:

5816

5817 .. testoutput::

5818

5819 ['101', 'Critical', 'Intermittent system crash', '6']

5820 - days_open: '6'

5821 - desc: 'Intermittent system crash'

5822 - issue_num: '101'

5823 - sev: 'Critical'

5824 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

5825 - days_open: '14'

5826 - desc: "Spelling error on Login ('log|n')"

5827 - issue_num: '94'

5828 - sev: 'Cosmetic'

5829 ['79', 'Minor', 'System slow when running too many reports', '47']

5830 - days_open: '47'

5831 - desc: 'System slow when running too many reports'

5832 - issue_num: '79'

5833 - sev: 'Minor'

5834 """

5835

5836 def __init__(

5837 self,

5838 other: Union[ParserElement, str],

5839 include: bool = False,

5840 ignore: typing.Optional[Union[ParserElement, str]] = None,

5841 fail_on: typing.Optional[Union[ParserElement, str]] = None,

5842 *,

5843 failOn: typing.Optional[Union[ParserElement, str]] = None,

5844 ) -> None:

5845 super().__init__(other)

5846 failOn = failOn or fail_on

5847 self.ignoreExpr = ignore

5848 self._may_return_empty = True

5849 self.mayIndexError = False

5850 self.includeMatch = include

5851 self.saveAsList = False

5852 if isinstance(failOn, str_type):

5853 self.failOn = self._literalStringClass(failOn)

5854 else:

5855 self.failOn = failOn

5856 self.errmsg = f"No match found for {self.expr}"

5857 self.ignorer = Empty().leave_whitespace()

5858 self._update_ignorer()

5859

5860 def _update_ignorer(self):

5861 # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr

5862 self.ignorer.ignoreExprs.clear()

5863 for e in self.expr.ignoreExprs:

5864 self.ignorer.ignore(e)

5865 if self.ignoreExpr:

5866 self.ignorer.ignore(self.ignoreExpr)

5867

5868 def ignore(self, expr):

5869 super().ignore(expr)

5870 self._update_ignorer()

5871

5872 def parseImpl(self, instring, loc, do_actions=True):

5873 startloc = loc

5874 instrlen = len(instring)

5875 self_expr_parse = self.expr._parse

5876 self_failOn_canParseNext = (

5877 self.failOn.canParseNext if self.failOn is not None else None

5878 )

5879 ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None

5880

5881 tmploc = loc

5882 while tmploc <= instrlen:

5883 if self_failOn_canParseNext is not None:

5884 # break if failOn expression matches

5885 if self_failOn_canParseNext(instring, tmploc):

5886 break

5887

5888 if ignorer_try_parse is not None:

5889 # advance past ignore expressions

5890 prev_tmploc = tmploc

5891 while 1:

5892 try:

5893 tmploc = ignorer_try_parse(instring, tmploc)

5894 except ParseBaseException:

5895 break

5896 # see if all ignorers matched, but didn't actually ignore anything

5897 if tmploc == prev_tmploc:

5898 break

5899 prev_tmploc = tmploc

5900

5901 try:

5902 self_expr_parse(instring, tmploc, do_actions=False, callPreParse=False)

5903 except (ParseException, IndexError):

5904 # no match, advance loc in string

5905 tmploc += 1

5906 else:

5907 # matched skipto expr, done

5908 break

5909

5910 else:

5911 # ran off the end of the input string without matching skipto expr, fail

5912 raise ParseException(instring, loc, self.errmsg, self)

5913

5914 # build up return values

5915 loc = tmploc

5916 skiptext = instring[startloc:loc]

5917 skipresult = ParseResults(skiptext)

5918

5919 if self.includeMatch:

5920 loc, mat = self_expr_parse(instring, loc, do_actions, callPreParse=False)

5921 skipresult += mat

5922

5923 return loc, skipresult

5924

5925

5926class Forward(ParseElementEnhance):

5927 """

5928 Forward declaration of an expression to be defined later -

5929 used for recursive grammars, such as algebraic infix notation.

5930 When the expression is known, it is assigned to the ``Forward``

5931 instance using the ``'<<'`` operator.

5932

5933 .. Note::

5934

5935 Take care when assigning to ``Forward`` not to overlook

5936 precedence of operators.

5937

5938 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

5939

5940 fwd_expr << a | b | c

5941

5942 will actually be evaluated as::

5943

5944 (fwd_expr << a) | b | c

5945

5946 thereby leaving b and c out as parseable alternatives.

5947 It is recommended that you explicitly group the values

5948 inserted into the :class:`Forward`::

5949

5950 fwd_expr << (a | b | c)

5951

5952 Converting to use the ``'<<='`` operator instead will avoid this problem.

5953

5954 See :meth:`ParseResults.pprint` for an example of a recursive

5955 parser created using :class:`Forward`.

5956 """

5957

5958 def __init__(

5959 self, other: typing.Optional[Union[ParserElement, str]] = None

5960 ) -> None:

5961 self.caller_frame = traceback.extract_stack(limit=2)[0]

5962 super().__init__(other, savelist=False) # type: ignore[arg-type]

5963 self.lshift_line = None

5964

5965 def __lshift__(self, other) -> Forward:

5966 if hasattr(self, "caller_frame"):

5967 del self.caller_frame

5968 if isinstance(other, str_type):

5969 other = self._literalStringClass(other)

5970

5971 if not isinstance(other, ParserElement):

5972 return NotImplemented

5973

5974 self.expr = other

5975 self.streamlined = other.streamlined

5976 self.mayIndexError = self.expr.mayIndexError

5977 self._may_return_empty = self.expr.mayReturnEmpty

5978 self.set_whitespace_chars(

5979 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

5980 )

5981 self.skipWhitespace = self.expr.skipWhitespace

5982 self.saveAsList = self.expr.saveAsList

5983 self.ignoreExprs.extend(self.expr.ignoreExprs)

5984 self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment]

5985 return self

5986

5987 def __ilshift__(self, other) -> Forward:

5988 if not isinstance(other, ParserElement):

5989 return NotImplemented

5990

5991 return self << other

5992

5993 def __or__(self, other) -> ParserElement:

5994 caller_line = traceback.extract_stack(limit=2)[-2]

5995 if (

5996 __diag__.warn_on_match_first_with_lshift_operator

5997 and caller_line == self.lshift_line

5998 and Diagnostics.warn_on_match_first_with_lshift_operator

5999 not in self.suppress_warnings_

6000 ):

6001 warnings.warn(

6002 "warn_on_match_first_with_lshift_operator:"

6003 " using '<<' operator with '|' is probably an error, use '<<='",

6004 stacklevel=2,

6005 )

6006 ret = super().__or__(other)

6007 return ret

6008

6009 def __del__(self):

6010 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

6011 if (

6012 self.expr is None

6013 and __diag__.warn_on_assignment_to_Forward

6014 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

6015 ):

6016 warnings.warn_explicit(

6017 "warn_on_assignment_to_Forward:"

6018 " Forward defined here but no expression attached later using '<<=' or '<<'",

6019 UserWarning,

6020 filename=self.caller_frame.filename,

6021 lineno=self.caller_frame.lineno,

6022 )

6023

6024 def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:

6025 if (

6026 self.expr is None

6027 and __diag__.warn_on_parse_using_empty_Forward

6028 and Diagnostics.warn_on_parse_using_empty_Forward

6029 not in self.suppress_warnings_

6030 ):

6031 # walk stack until parse_string, scan_string, search_string, or transform_string is found

6032 parse_fns = (

6033 "parse_string",

6034 "scan_string",

6035 "search_string",

6036 "transform_string",

6037 )

6038 tb = traceback.extract_stack(limit=200)

6039 for i, frm in enumerate(reversed(tb), start=1):

6040 if frm.name in parse_fns:

6041 stacklevel = i + 1

6042 break

6043 else:

6044 stacklevel = 2

6045 warnings.warn(

6046 "warn_on_parse_using_empty_Forward:"

6047 " Forward expression was never assigned a value, will not parse any input",

6048 stacklevel=stacklevel,

6049 )

6050 if not ParserElement._left_recursion_enabled:

6051 return super().parseImpl(instring, loc, do_actions)

6052 # ## Bounded Recursion algorithm ##

6053 # Recursion only needs to be processed at ``Forward`` elements, since they are

6054 # the only ones that can actually refer to themselves. The general idea is

6055 # to handle recursion stepwise: We start at no recursion, then recurse once,

6056 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

6057 #

6058 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

6059 # - to *match* a specific recursion level, and

6060 # - to *search* the bounded recursion level

6061 # and the two run concurrently. The *search* must *match* each recursion level

6062 # to find the best possible match. This is handled by a memo table, which

6063 # provides the previous match to the next level match attempt.

6064 #

6065 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

6066 #

6067 # There is a complication since we not only *parse* but also *transform* via

6068 # actions: We do not want to run the actions too often while expanding. Thus,

6069 # we expand using `do_actions=False` and only run `do_actions=True` if the next

6070 # recursion level is acceptable.

6071 with ParserElement.recursion_lock:

6072 memo = ParserElement.recursion_memos

6073 try:

6074 # we are parsing at a specific recursion expansion - use it as-is

6075 prev_loc, prev_result = memo[loc, self, do_actions]

6076 if isinstance(prev_result, Exception):

6077 raise prev_result

6078 return prev_loc, prev_result.copy()

6079 except KeyError:

6080 act_key = (loc, self, True)

6081 peek_key = (loc, self, False)

6082 # we are searching for the best recursion expansion - keep on improving

6083 # both `do_actions` cases must be tracked separately here!

6084 prev_loc, prev_peek = memo[peek_key] = (

6085 loc - 1,

6086 ParseException(

6087 instring, loc, "Forward recursion without base case", self

6088 ),

6089 )

6090 if do_actions:

6091 memo[act_key] = memo[peek_key]

6092 while True:

6093 try:

6094 new_loc, new_peek = super().parseImpl(instring, loc, False)

6095 except ParseException:

6096 # we failed before getting any match - do not hide the error

6097 if isinstance(prev_peek, Exception):

6098 raise

6099 new_loc, new_peek = prev_loc, prev_peek

6100 # the match did not get better: we are done

6101 if new_loc <= prev_loc:

6102 if do_actions:

6103 # replace the match for do_actions=False as well,

6104 # in case the action did backtrack

6105 prev_loc, prev_result = memo[peek_key] = memo[act_key]

6106 del memo[peek_key], memo[act_key]

6107 return prev_loc, copy.copy(prev_result)

6108 del memo[peek_key]

6109 return prev_loc, copy.copy(prev_peek)

6110 # the match did get better: see if we can improve further

6111 if do_actions:

6112 try:

6113 memo[act_key] = super().parseImpl(instring, loc, True)

6114 except ParseException as e:

6115 memo[peek_key] = memo[act_key] = (new_loc, e)

6116 raise

6117 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

6118

6119 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

6120 self.skipWhitespace = False

6121 return self

6122

6123 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

6124 self.skipWhitespace = True

6125 return self

6126

6127 def streamline(self) -> ParserElement:

6128 if not self.streamlined:

6129 self.streamlined = True

6130 if self.expr is not None:

6131 self.expr.streamline()

6132 return self

6133

6134 def validate(self, validateTrace=None) -> None:

6135 warnings.warn(

6136 "ParserElement.validate() is deprecated, and should not be used to check for left recursion",

6137 DeprecationWarning,

6138 stacklevel=2,

6139 )

6140 if validateTrace is None:

6141 validateTrace = []

6142

6143 if self not in validateTrace:

6144 tmp = validateTrace[:] + [self]

6145 if self.expr is not None:

6146 self.expr.validate(tmp)

6147 self._checkRecursion([])

6148

6149 def _generateDefaultName(self) -> str:

6150 # Avoid infinite recursion by setting a temporary _defaultName

6151 save_default_name = self._defaultName

6152 self._defaultName = ": ..."

6153

6154 # Use the string representation of main expression.

6155 try:

6156 if self.expr is not None:

6157 ret_string = str(self.expr)[:1000]

6158 else:

6159 ret_string = "None"

6160 except Exception:

6161 ret_string = "..."

6162

6163 self._defaultName = save_default_name

6164 return f"{type(self).__name__}: {ret_string}"

6165

6166 def copy(self) -> ParserElement:

6167 if self.expr is not None:

6168 return super().copy()

6169 else:

6170 ret = Forward()

6171 ret <<= self

6172 return ret

6173

6174 def _setResultsName(self, name, list_all_matches=False) -> ParserElement:

6175 # fmt: off

6176 if (

6177 __diag__.warn_name_set_on_empty_Forward

6178 and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_

6179 and self.expr is None

6180 ):

6181 warning = (

6182 "warn_name_set_on_empty_Forward:"

6183 f" setting results name {name!r} on {type(self).__name__} expression"

6184 " that has no contained expression"

6185 )

6186 warnings.warn(warning, stacklevel=3)

6187 # fmt: on

6188

6189 return super()._setResultsName(name, list_all_matches)

6190

6191 # Compatibility synonyms

6192 # fmt: off

6193 leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace)

6194 ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace)

6195 # fmt: on

6196

6197

6198class TokenConverter(ParseElementEnhance):

6199 """

6200 Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results.

6201 """

6202

6203 def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None:

6204 super().__init__(expr) # , savelist)

6205 self.saveAsList = False

6206

6207

6208class Combine(TokenConverter):

6209 """Converter to concatenate all matching tokens to a single string.

6210 By default, the matching patterns must also be contiguous in the

6211 input string; this can be disabled by specifying

6212 ``'adjacent=False'`` in the constructor.

6213

6214 Example:

6215

6216 .. doctest::

6217

6218 >>> real = Word(nums) + '.' + Word(nums)

6219 >>> print(real.parse_string('3.1416'))

6220 ['3', '.', '1416']

6221

6222 >>> # will also erroneously match the following

6223 >>> print(real.parse_string('3. 1416'))

6224 ['3', '.', '1416']

6225

6226 >>> real = Combine(Word(nums) + '.' + Word(nums))

6227 >>> print(real.parse_string('3.1416'))

6228 ['3.1416']

6229

6230 >>> # no match when there are internal spaces

6231 >>> print(real.parse_string('3. 1416'))

6232 Traceback (most recent call last):

6233 ParseException: Expected W:(0123...)

6234 """

6235

6236 def __init__(

6237 self,

6238 expr: ParserElement,

6239 join_string: str = "",

6240 adjacent: bool = True,

6241 *,

6242 joinString: typing.Optional[str] = None,

6243 ) -> None:

6244 super().__init__(expr)

6245 joinString = joinString if joinString is not None else join_string

6246 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

6247 if adjacent:

6248 self.leave_whitespace()

6249 self.adjacent = adjacent

6250 self.skipWhitespace = True

6251 self.joinString = joinString

6252 self.callPreparse = True

6253

6254 def ignore(self, other) -> ParserElement:

6255 if self.adjacent:

6256 ParserElement.ignore(self, other)

6257 else:

6258 super().ignore(other)

6259 return self

6260

6261 def postParse(self, instring, loc, tokenlist):

6262 retToks = tokenlist.copy()

6263 del retToks[:]

6264 retToks += ParseResults(

6265 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

6266 )

6267

6268 if self.resultsName and retToks.haskeys():

6269 return [retToks]

6270 else:

6271 return retToks

6272

6273

6274class Group(TokenConverter):

6275 """Converter to return the matched tokens as a list - useful for

6276 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

6277

6278 The optional ``aslist`` argument when set to True will return the

6279 parsed tokens as a Python list instead of a pyparsing ParseResults.

6280

6281 Example:

6282

6283 .. doctest::

6284

6285 >>> ident = Word(alphas)

6286 >>> num = Word(nums)

6287 >>> term = ident | num

6288 >>> func = ident + Opt(DelimitedList(term))

6289 >>> print(func.parse_string("fn a, b, 100"))

6290 ['fn', 'a', 'b', '100']

6291

6292 >>> func = ident + Group(Opt(DelimitedList(term)))

6293 >>> print(func.parse_string("fn a, b, 100"))

6294 ['fn', ['a', 'b', '100']]

6295 """

6296

6297 def __init__(self, expr: ParserElement, aslist: bool = False) -> None:

6298 super().__init__(expr)

6299 self.saveAsList = True

6300 self._asPythonList = aslist

6301

6302 def postParse(self, instring, loc, tokenlist):

6303 if self._asPythonList:

6304 return ParseResults.List(

6305 tokenlist.asList()

6306 if isinstance(tokenlist, ParseResults)

6307 else list(tokenlist)

6308 )

6309

6310 return [tokenlist]

6311

6312

6313class Dict(TokenConverter):

6314 """Converter to return a repetitive expression as a list, but also

6315 as a dictionary. Each element can also be referenced using the first

6316 token in the expression as its key. Useful for tabular report

6317 scraping when the first column can be used as a item key.

6318

6319 The optional ``asdict`` argument when set to True will return the

6320 parsed tokens as a Python dict instead of a pyparsing ParseResults.

6321

6322 Example:

6323

6324 .. doctest::

6325

6326 >>> data_word = Word(alphas)

6327 >>> label = data_word + FollowedBy(':')

6328

6329 >>> attr_expr = (

6330 ... label + Suppress(':')

6331 ... + OneOrMore(data_word, stop_on=label)

6332 ... .set_parse_action(' '.join)

6333 ... )

6334

6335 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

6336

6337 >>> # print attributes as plain groups

6338 >>> print(attr_expr[1, ...].parse_string(text).dump())

6339 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

6340

6341 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...])

6342 # Dict will auto-assign names.

6343 >>> result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

6344 >>> print(result.dump())

6345 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

6346 - color: 'light blue'

6347 - posn: 'upper left'

6348 - shape: 'SQUARE'

6349 - texture: 'burlap'

6350 [0]:

6351 ['shape', 'SQUARE']

6352 [1]:

6353 ['posn', 'upper left']

6354 [2]:

6355 ['color', 'light blue']

6356 [3]:

6357 ['texture', 'burlap']

6358

6359 # access named fields as dict entries, or output as dict

6360 >>> print(result['shape'])

6361 SQUARE

6362 >>> print(result.as_dict())

6363 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}

6364

6365 See more examples at :class:`ParseResults` of accessing fields by results name.

6366 """

6367

6368 def __init__(self, expr: ParserElement, asdict: bool = False) -> None:

6369 super().__init__(expr)

6370 self.saveAsList = True

6371 self._asPythonDict = asdict

6372

6373 def postParse(self, instring, loc, tokenlist):

6374 for i, tok in enumerate(tokenlist):

6375 if len(tok) == 0:

6376 continue

6377

6378 ikey = tok[0]

6379 if isinstance(ikey, int):

6380 ikey = str(ikey).strip()

6381

6382 if len(tok) == 1:

6383 tokenlist[ikey] = _ParseResultsWithOffset("", i)

6384

6385 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

6386 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

6387

6388 else:

6389 try:

6390 dictvalue = tok.copy() # ParseResults(i)

6391 except Exception:

6392 exc = TypeError(

6393 "could not extract dict values from parsed results"

6394 " - Dict expression must contain Grouped expressions"

6395 )

6396 raise exc from None

6397

6398 del dictvalue[0]

6399

6400 if len(dictvalue) != 1 or (

6401 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

6402 ):

6403 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

6404 else:

6405 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

6406

6407 if self._asPythonDict:

6408 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

6409

6410 return [tokenlist] if self.resultsName else tokenlist

6411

6412

6413class Suppress(TokenConverter):

6414 """Converter for ignoring the results of a parsed expression.

6415

6416 Example:

6417

6418 .. doctest::

6419

6420 >>> source = "a, b, c,d"

6421 >>> wd = Word(alphas)

6422 >>> wd_list1 = wd + (',' + wd)[...]

6423 >>> print(wd_list1.parse_string(source))

6424 ['a', ',', 'b', ',', 'c', ',', 'd']

6425

6426 # often, delimiters that are useful during parsing are just in the

6427 # way afterward - use Suppress to keep them out of the parsed output

6428 >>> wd_list2 = wd + (Suppress(',') + wd)[...]

6429 >>> print(wd_list2.parse_string(source))

6430 ['a', 'b', 'c', 'd']

6431

6432 # Skipped text (using '...') can be suppressed as well

6433 >>> source = "lead in START relevant text END trailing text"

6434 >>> start_marker = Keyword("START")

6435 >>> end_marker = Keyword("END")

6436 >>> find_body = Suppress(...) + start_marker + ... + end_marker

6437 >>> print(find_body.parse_string(source))

6438 ['START', 'relevant text ', 'END']

6439

6440 (See also :class:`DelimitedList`.)

6441 """

6442

6443 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None:

6444 if expr is ...:

6445 expr = _PendingSkip(NoMatch())

6446 super().__init__(expr)

6447

6448 def __add__(self, other) -> ParserElement:

6449 if isinstance(self.expr, _PendingSkip):

6450 return Suppress(SkipTo(other)) + other

6451

6452 return super().__add__(other)

6453

6454 def __sub__(self, other) -> ParserElement:

6455 if isinstance(self.expr, _PendingSkip):

6456 return Suppress(SkipTo(other)) - other

6457

6458 return super().__sub__(other)

6459

6460 def postParse(self, instring, loc, tokenlist):

6461 return []

6462

6463 def suppress(self) -> ParserElement:

6464 return self

6465

6466

6467# XXX: Example needs to be re-done for updated output

6468def trace_parse_action(f: ParseAction) -> ParseAction:

6469 """Decorator for debugging parse actions.

6470

6471 When the parse action is called, this decorator will print

6472 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

6473 When the parse action completes, the decorator will print

6474 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

6475

6476 Example:

6477

6478 .. testsetup:: stderr

6479

6480 import sys

6481 sys.stderr = sys.stdout

6482

6483 .. testcleanup:: stderr

6484

6485 sys.stderr = sys.__stderr__

6486

6487 .. testcode:: stderr

6488

6489 wd = Word(alphas)

6490

6491 @trace_parse_action

6492 def remove_duplicate_chars(tokens):

6493 return ''.join(sorted(set(''.join(tokens))))

6494

6495 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

6496 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

6497

6498 prints:

6499

6500 .. testoutput:: stderr

6501 :options: +NORMALIZE_WHITESPACE

6502

6503 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf',

6504 0, ParseResults(['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

6505 <<leaving remove_duplicate_chars (ret: 'dfjkls')

6506 ['dfjkls']

6507

6508 .. versionchanged:: 3.1.0

6509 Exception type added to output

6510 """

6511 f = _trim_arity(f)

6512

6513 def z(*paArgs):

6514 thisFunc = f.__name__

6515 s, l, t = paArgs[-3:]

6516 if len(paArgs) > 3:

6517 thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}"

6518 sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n")

6519 try:

6520 ret = f(*paArgs)

6521 except Exception as exc:

6522 sys.stderr.write(

6523 f"<<leaving {thisFunc} (exception: {type(exc).__name__}: {exc})\n"

6524 )

6525 raise

6526 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret!r})\n")

6527 return ret

6528

6529 z.__name__ = f.__name__

6530 return z

6531

6532

6533# convenience constants for positional expressions

6534empty = Empty().set_name("empty")

6535line_start = LineStart().set_name("line_start")

6536line_end = LineEnd().set_name("line_end")

6537string_start = StringStart().set_name("string_start")

6538string_end = StringEnd().set_name("string_end")

6539

6540_escapedPunc = Regex(r"\\[\\[\]\/\-\*\.\$\+\^\?()~ ]").set_parse_action(

6541 lambda s, l, t: t[0][1]

6542)

6543_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

6544 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

6545)

6546_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

6547 lambda s, l, t: chr(int(t[0][1:], 8))

6548)

6549_singleChar = (

6550 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

6551)

6552_charRange = Group(_singleChar + Suppress("-") + _singleChar)

6553_reBracketExpr = (

6554 Literal("[")

6555 + Opt("^").set_results_name("negate")

6556 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

6557 + Literal("]")

6558)

6559

6560

6561def srange(s: str) -> str:

6562 r"""Helper to easily define string ranges for use in :class:`Word`

6563 construction. Borrows syntax from regexp ``'[]'`` string range

6564 definitions::

6565

6566 srange("[0-9]") -> "0123456789"

6567 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

6568 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

6569

6570 The input string must be enclosed in []'s, and the returned string

6571 is the expanded character set joined into a single string. The

6572 values enclosed in the []'s may be:

6573

6574 - a single character

6575 - an escaped character with a leading backslash (such as ``\-``

6576 or ``\]``)

6577 - an escaped hex character with a leading ``'\x'``

6578 (``\x21``, which is a ``'!'`` character) (``\0x##``

6579 is also supported for backwards compatibility)

6580 - an escaped octal character with a leading ``'\0'``

6581 (``\041``, which is a ``'!'`` character)

6582 - a range of any of the above, separated by a dash (``'a-z'``,

6583 etc.)

6584 - any combination of the above (``'aeiouy'``,

6585 ``'a-zA-Z0-9_$'``, etc.)

6586 """

6587

6588 def _expanded(p):

6589 if isinstance(p, ParseResults):

6590 yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

6591 else:

6592 yield p

6593

6594 try:

6595 return "".join(

6596 [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)]

6597 )

6598 except Exception as e:

6599 return ""

6600

6601

6602def token_map(func, *args) -> ParseAction:

6603 """Helper to define a parse action by mapping a function to all

6604 elements of a :class:`ParseResults` list. If any additional args are passed,

6605 they are forwarded to the given function as additional arguments

6606 after the token, as in

6607 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

6608 which will convert the parsed data to an integer using base 16.

6609

6610 Example (compare the last to example in :class:`ParserElement.transform_string`::

6611

6612 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

6613 hex_ints.run_tests('''

6614 00 11 22 aa FF 0a 0d 1a

6615 ''')

6616

6617 upperword = Word(alphas).set_parse_action(token_map(str.upper))

6618 upperword[1, ...].run_tests('''

6619 my kingdom for a horse

6620 ''')

6621

6622 wd = Word(alphas).set_parse_action(token_map(str.title))

6623 wd[1, ...].set_parse_action(' '.join).run_tests('''

6624 now is the winter of our discontent made glorious summer by this sun of york

6625 ''')

6626

6627 prints::

6628

6629 00 11 22 aa FF 0a 0d 1a

6630 [0, 17, 34, 170, 255, 10, 13, 26]

6631

6632 my kingdom for a horse

6633 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

6634

6635 now is the winter of our discontent made glorious summer by this sun of york

6636 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

6637 """

6638

6639 def pa(s, l, t):

6640 return [func(tokn, *args) for tokn in t]

6641

6642 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

6643 pa.__name__ = func_name

6644

6645 return pa

6646

6647

6648def autoname_elements() -> None:

6649 """

6650 Utility to simplify mass-naming of parser elements, for

6651 generating railroad diagram with named subdiagrams.

6652 """

6653

6654 # guard against _getframe not being implemented in the current Python

6655 getframe_fn = getattr(sys, "_getframe", lambda _: None)

6656 calling_frame = getframe_fn(1)

6657 if calling_frame is None:

6658 return

6659

6660 # find all locals in the calling frame that are ParserElements

6661 calling_frame = typing.cast(types.FrameType, calling_frame)

6662 for name, var in calling_frame.f_locals.items():

6663 # if no custom name defined, set the name to the var name

6664 if isinstance(var, ParserElement) and not var.customName:

6665 var.set_name(name)

6666

6667

6668dbl_quoted_string = Combine(

6669 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

6670).set_name("string enclosed in double quotes")

6671

6672sgl_quoted_string = Combine(

6673 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

6674).set_name("string enclosed in single quotes")

6675

6676quoted_string = Combine(

6677 (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6678 "double quoted string"

6679 )

6680 | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6681 "single quoted string"

6682 )

6683).set_name("quoted string using single or double quotes")

6684

6685# XXX: Is there some way to make this show up in API docs?

6686# .. versionadded:: 3.1.0

6687python_quoted_string = Combine(

6688 (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name(

6689 "multiline double quoted string"

6690 )

6691 ^ (

6692 Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''"

6693 ).set_name("multiline single quoted string")

6694 ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(

6695 "double quoted string"

6696 )

6697 ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(

6698 "single quoted string"

6699 )

6700).set_name("Python quoted string")

6701

6702unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

6703

6704

6705alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

6706punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

6707

6708# build list of built-in expressions, for future reference if a global default value

6709# gets updated

6710_builtin_exprs: list[ParserElement] = [

6711 v for v in vars().values() if isinstance(v, ParserElement)

6712]

6713

6714# Compatibility synonyms

6715# fmt: off

6716sglQuotedString = sgl_quoted_string

6717dblQuotedString = dbl_quoted_string

6718quotedString = quoted_string

6719unicodeString = unicode_string

6720lineStart = line_start

6721lineEnd = line_end

6722stringStart = string_start

6723stringEnd = string_end

6724nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action)

6725traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action)

6726conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action)

6727tokenMap = replaced_by_pep8("tokenMap", token_map)

6728# fmt: on