Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/core.py: 44%

2# core.py

4import os

5import typing

6from typing import (

7 NamedTuple,

8 Union,

9 Callable,

10 Any,

11 Generator,

12 Tuple,

13 List,

14 TextIO,

15 Set,

16 Sequence,

17)

18from abc import ABC, abstractmethod

19from enum import Enum

20import string

21import copy

22import warnings

23import re

24import sys

25from collections.abc import Iterable

26import traceback

27import types

28from operator import itemgetter

29from functools import wraps

30from threading import RLock

31from pathlib import Path

33from .util import (

34 _FifoCache,

35 _UnboundedCache,

36 __config_flags,

37 _collapse_string_to_ranges,

38 _escape_regex_range_chars,

39 _bslash,

40 _flatten,

41 LRUMemo as _LRUMemo,

42 UnboundedMemo as _UnboundedMemo,

43)

44from .exceptions import *

45from .actions import *

46from .results import ParseResults, _ParseResultsWithOffset

47from .unicode import pyparsing_unicode

49_MAX_INT = sys.maxsize

50str_type: Tuple[type, ...] = (str, bytes)

52#

54#

55# Permission is hereby granted, free of charge, to any person obtaining

56# a copy of this software and associated documentation files (the

57# "Software"), to deal in the Software without restriction, including

58# without limitation the rights to use, copy, modify, merge, publish,

59# distribute, sublicense, and/or sell copies of the Software, and to

60# permit persons to whom the Software is furnished to do so, subject to

61# the following conditions:

62#

63# The above copyright notice and this permission notice shall be

64# included in all copies or substantial portions of the Software.

65#

66# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

67# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

68# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

69# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

70# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

71# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

72# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

73#

76if sys.version_info >= (3, 8):

77 from functools import cached_property

78else:

80 class cached_property:

81 def __init__(self, func):

82 self._func = func

84 def __get__(self, instance, owner=None):

85 ret = instance.__dict__[self._func.__name__] = self._func(instance)

86 return ret

89class __compat__(__config_flags):

90 """

91 A cross-version compatibility configuration for pyparsing features that will be

92 released in a future version. By setting values in this configuration to True,

93 those features can be enabled in prior versions for compatibility development

94 and testing.

96 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping

97 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;

98 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1

99 behavior

100 """

101

102 _type_desc = "compatibility"

103

104 collect_all_And_tokens = True

105

106 _all_names = [__ for __ in locals() if not __.startswith("_")]

107 _fixed_names = """

108 collect_all_And_tokens

109 """.split()

110

111

112class __diag__(__config_flags):

113 _type_desc = "diagnostic"

114

115 warn_multiple_tokens_in_named_alternation = False

116 warn_ungrouped_named_tokens_in_collection = False

117 warn_name_set_on_empty_Forward = False

118 warn_on_parse_using_empty_Forward = False

119 warn_on_assignment_to_Forward = False

120 warn_on_multiple_string_args_to_oneof = False

121 warn_on_match_first_with_lshift_operator = False

122 enable_debug_on_named_expressions = False

123

124 _all_names = [__ for __ in locals() if not __.startswith("_")]

125 _warning_names = [name for name in _all_names if name.startswith("warn")]

126 _debug_names = [name for name in _all_names if name.startswith("enable_debug")]

127

128 @classmethod

129 def enable_all_warnings(cls) -> None:

130 for name in cls._warning_names:

131 cls.enable(name)

132

133

134class Diagnostics(Enum):

135 """

136 Diagnostic configuration (all default to disabled)

137 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results

138 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions

139 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results

140 name is defined on a containing expression with ungrouped subexpressions that also

141 have results names

142 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined

143 with a results name, but has no contents defined

144 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is

145 defined in a grammar but has never had an expression attached to it

146 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined

147 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``

148 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is

149 incorrectly called with multiple str arguments

150 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent

151 calls to :class:`ParserElement.set_name`

152

153 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.

154 All warnings can be enabled by calling :class:`enable_all_warnings`.

155 """

156

157 warn_multiple_tokens_in_named_alternation = 0

158 warn_ungrouped_named_tokens_in_collection = 1

159 warn_name_set_on_empty_Forward = 2

160 warn_on_parse_using_empty_Forward = 3

161 warn_on_assignment_to_Forward = 4

162 warn_on_multiple_string_args_to_oneof = 5

163 warn_on_match_first_with_lshift_operator = 6

164 enable_debug_on_named_expressions = 7

165

166

167def enable_diag(diag_enum: Diagnostics) -> None:

168 """

169 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

170 """

171 __diag__.enable(diag_enum.name)

172

173

174def disable_diag(diag_enum: Diagnostics) -> None:

175 """

176 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).

177 """

178 __diag__.disable(diag_enum.name)

179

180

181def enable_all_warnings() -> None:

182 """

183 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).

184 """

185 __diag__.enable_all_warnings()

186

187

188# hide abstract class

189del __config_flags

190

191

192def _should_enable_warnings(

193 cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str]

194) -> bool:

195 enable = bool(warn_env_var)

196 for warn_opt in cmd_line_warn_options:

197 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(

198 ":"

199 )[:5]

200 if not w_action.lower().startswith("i") and (

201 not (w_message or w_category or w_module) or w_module == "pyparsing"

202 ):

203 enable = True

204 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):

205 enable = False

206 return enable

207

208

209if _should_enable_warnings(

210 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")

211):

212 enable_all_warnings()

213

214

215# build list of single arg builtins, that can be used as parse actions

216_single_arg_builtins = {

217 sum,

218 len,

219 sorted,

220 reversed,

221 list,

222 tuple,

223 set,

224 any,

225 all,

226 min,

227 max,

228}

229

230_generatorType = types.GeneratorType

231ParseAction = Union[

232 Callable[[], Any],

233 Callable[[ParseResults], Any],

234 Callable[[int, ParseResults], Any],

235 Callable[[str, int, ParseResults], Any],

236]

237ParseCondition = Union[

238 Callable[[], bool],

239 Callable[[ParseResults], bool],

240 Callable[[int, ParseResults], bool],

241 Callable[[str, int, ParseResults], bool],

242]

243ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]

244DebugStartAction = Callable[[str, int, "ParserElement", bool], None]

245DebugSuccessAction = Callable[

246 [str, int, int, "ParserElement", ParseResults, bool], None

247]

248DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]

249

250

251alphas = string.ascii_uppercase + string.ascii_lowercase

252identchars = pyparsing_unicode.Latin1.identchars

253identbodychars = pyparsing_unicode.Latin1.identbodychars

254nums = "0123456789"

255hexnums = nums + "ABCDEFabcdef"

256alphanums = alphas + nums

257printables = "".join([c for c in string.printable if c not in string.whitespace])

258

259_trim_arity_call_line: traceback.StackSummary = None

260

261

262def _trim_arity(func, max_limit=3):

263 """decorator to trim function calls to match the arity of the target"""

264 global _trim_arity_call_line

265

266 if func in _single_arg_builtins:

267 return lambda s, l, t: func(t)

268

269 limit = 0

270 found_arity = False

271

272 def extract_tb(tb, limit=0):

273 frames = traceback.extract_tb(tb, limit=limit)

274 frame_summary = frames[-1]

275 return [frame_summary[:2]]

276

277 # synthesize what would be returned by traceback.extract_stack at the call to

278 # user's parse action 'func', so that we don't incur call penalty at parse time

279

280 # fmt: off

281 LINE_DIFF = 7

282 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND

283 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!

284 _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1])

285 pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF)

286

287 def wrapper(*args):

288 nonlocal found_arity, limit

289 while 1:

290 try:

291 ret = func(*args[limit:])

292 found_arity = True

293 return ret

294 except TypeError as te:

295 # re-raise TypeErrors if they did not come from our arity testing

296 if found_arity:

297 raise

298 else:

299 tb = te.__traceback__

300 trim_arity_type_error = (

301 extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth

302 )

303 del tb

304

305 if trim_arity_type_error:

306 if limit < max_limit:

307 limit += 1

308 continue

309

310 raise

311 # fmt: on

312

313 # copy func name to wrapper for sensible debug output

314 # (can't use functools.wraps, since that messes with function signature)

315 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

316 wrapper.__name__ = func_name

317 wrapper.__doc__ = func.__doc__

318

319 return wrapper

320

321

322def condition_as_parse_action(

323 fn: ParseCondition, message: str = None, fatal: bool = False

324) -> ParseAction:

325 """

326 Function to convert a simple predicate function that returns ``True`` or ``False``

327 into a parse action. Can be used in places when a parse action is required

328 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition

329 to an operator level in :class:`infix_notation`).

330

331 Optional keyword arguments:

332

333 - ``message`` - define a custom message to be used in the raised exception

334 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;

335 otherwise will raise :class:`ParseException`

336

337 """

338 msg = message if message is not None else "failed user-defined condition"

339 exc_type = ParseFatalException if fatal else ParseException

340 fn = _trim_arity(fn)

341

342 @wraps(fn)

343 def pa(s, l, t):

344 if not bool(fn(s, l, t)):

345 raise exc_type(s, l, msg)

346

347 return pa

348

349

350def _default_start_debug_action(

351 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False

352):

353 cache_hit_str = "*" if cache_hit else ""

354 print(

355 (

356 "{}Match {} at loc {}({},{})\n {}\n {}^".format(

357 cache_hit_str,

358 expr,

359 loc,

360 lineno(loc, instring),

361 col(loc, instring),

362 line(loc, instring),

363 " " * (col(loc, instring) - 1),

364 )

365 )

366 )

367

368

369def _default_success_debug_action(

370 instring: str,

371 startloc: int,

372 endloc: int,

373 expr: "ParserElement",

374 toks: ParseResults,

375 cache_hit: bool = False,

376):

377 cache_hit_str = "*" if cache_hit else ""

378 print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.as_list()))

379

380

381def _default_exception_debug_action(

382 instring: str,

383 loc: int,

384 expr: "ParserElement",

385 exc: Exception,

386 cache_hit: bool = False,

387):

388 cache_hit_str = "*" if cache_hit else ""

389 print(

390 "{}Match {} failed, {} raised: {}".format(

391 cache_hit_str, expr, type(exc).__name__, exc

392 )

393 )

394

395

396def null_debug_action(*args):

397 """'Do-nothing' debug action, to suppress debugging output during parsing."""

398

399

400class ParserElement(ABC):

401 """Abstract base level parser element class."""

402

403 DEFAULT_WHITE_CHARS: str = " \n\t\r"

404 verbose_stacktrace: bool = False

405 _literalStringClass: typing.Optional[type] = None

406

407 @staticmethod

408 def set_default_whitespace_chars(chars: str) -> None:

409 r"""

410 Overrides the default whitespace chars

411

412 Example::

413

414 # default whitespace chars are space, <TAB> and newline

415 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']

416

417 # change to just treat newline as significant

418 ParserElement.set_default_whitespace_chars(" \t")

419 Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def']

420 """

421 ParserElement.DEFAULT_WHITE_CHARS = chars

422

423 # update whitespace all parse expressions defined in this module

424 for expr in _builtin_exprs:

425 if expr.copyDefaultWhiteChars:

426 expr.whiteChars = set(chars)

427

428 @staticmethod

429 def inline_literals_using(cls: type) -> None:

430 """

431 Set class to be used for inclusion of string literals into a parser.

432

433 Example::

434

435 # default literal class used is Literal

436 integer = Word(nums)

437 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

438

439 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31']

440

441

442 # change to Suppress

443 ParserElement.inline_literals_using(Suppress)

444 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

445

446 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31']

447 """

448 ParserElement._literalStringClass = cls

449

450 class DebugActions(NamedTuple):

451 debug_try: typing.Optional[DebugStartAction]

452 debug_match: typing.Optional[DebugSuccessAction]

453 debug_fail: typing.Optional[DebugExceptionAction]

454

455 def __init__(self, savelist: bool = False):

456 self.parseAction: List[ParseAction] = list()

457 self.failAction: typing.Optional[ParseFailAction] = None

458 self.customName = None

459 self._defaultName = None

460 self.resultsName = None

461 self.saveAsList = savelist

462 self.skipWhitespace = True

463 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)

464 self.copyDefaultWhiteChars = True

465 # used when checking for left-recursion

466 self.mayReturnEmpty = False

467 self.keepTabs = False

468 self.ignoreExprs: List["ParserElement"] = list()

469 self.debug = False

470 self.streamlined = False

471 # optimize exception handling for subclasses that don't advance parse index

472 self.mayIndexError = True

473 self.errmsg = ""

474 # mark results names as modal (report only last) or cumulative (list all)

475 self.modalResults = True

476 # custom debug actions

477 self.debugActions = self.DebugActions(None, None, None)

478 # avoid redundant calls to preParse

479 self.callPreparse = True

480 self.callDuringTry = False

481 self.suppress_warnings_: List[Diagnostics] = []

482

483 def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement":

484 """

485 Suppress warnings emitted for a particular diagnostic on this expression.

486

487 Example::

488

489 base = pp.Forward()

490 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)

491

492 # statement would normally raise a warning, but is now suppressed

493 print(base.parseString("x"))

494

495 """

496 self.suppress_warnings_.append(warning_type)

497 return self

498

499 def copy(self) -> "ParserElement":

500 """

501 Make a copy of this :class:`ParserElement`. Useful for defining

502 different parse actions for the same parsing pattern, using copies of

503 the original parse element.

504

505 Example::

506

507 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))

508 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")

509 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

510

511 print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M"))

512

513 prints::

514

515 [5120, 100, 655360, 268435456]

516

517 Equivalent form of ``expr.copy()`` is just ``expr()``::

518

519 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")

520 """

521 cpy = copy.copy(self)

522 cpy.parseAction = self.parseAction[:]

523 cpy.ignoreExprs = self.ignoreExprs[:]

524 if self.copyDefaultWhiteChars:

525 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)

526 return cpy

527

528 def set_results_name(

529 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False

530 ) -> "ParserElement":

531 """

532 Define name for referencing matching tokens as a nested attribute

533 of the returned parse results.

534

535 Normally, results names are assigned as you would assign keys in a dict:

536 any existing value is overwritten by later values. If it is necessary to

537 keep all values captured for a particular results name, call ``set_results_name``

538 with ``list_all_matches`` = True.

539

540 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;

541 this is so that the client can define a basic element, such as an

542 integer, and reference it in multiple places with different names.

543

544 You can also set results names using the abbreviated syntax,

545 ``expr("name")`` in place of ``expr.set_results_name("name")``

546 - see :class:`__call__`. If ``list_all_matches`` is required, use

547 ``expr("name*")``.

548

549 Example::

550

551 date_str = (integer.set_results_name("year") + '/'

552 + integer.set_results_name("month") + '/'

553 + integer.set_results_name("day"))

554

555 # equivalent form:

556 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

557 """

558 listAllMatches = listAllMatches or list_all_matches

559 return self._setResultsName(name, listAllMatches)

560

561 def _setResultsName(self, name, listAllMatches=False):

562 if name is None:

563 return self

564 newself = self.copy()

565 if name.endswith("*"):

566 name = name[:-1]

567 listAllMatches = True

568 newself.resultsName = name

569 newself.modalResults = not listAllMatches

570 return newself

571

572 def set_break(self, break_flag: bool = True) -> "ParserElement":

573 """

574 Method to invoke the Python pdb debugger when this element is

575 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to

576 disable.

577 """

578 if break_flag:

579 _parseMethod = self._parse

580

581 def breaker(instring, loc, doActions=True, callPreParse=True):

582 import pdb

583

584 # this call to pdb.set_trace() is intentional, not a checkin error

585 pdb.set_trace()

586 return _parseMethod(instring, loc, doActions, callPreParse)

587

588 breaker._originalParseMethod = _parseMethod

589 self._parse = breaker

590 else:

591 if hasattr(self._parse, "_originalParseMethod"):

592 self._parse = self._parse._originalParseMethod

593 return self

594

595 def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":

596 """

597 Define one or more actions to perform when successfully matching parse element definition.

598

599 Parse actions can be called to perform data conversions, do extra validation,

600 update external data structures, or enhance or replace the parsed tokens.

601 Each parse action ``fn`` is a callable method with 0-3 arguments, called as

602 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:

603

604 - s = the original string being parsed (see note below)

605 - loc = the location of the matching substring

606 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object

607

608 The parsed tokens are passed to the parse action as ParseResults. They can be

609 modified in place using list-style append, extend, and pop operations to update

610 the parsed list elements; and with dictionary-style item set and del operations

611 to add, update, or remove any named results. If the tokens are modified in place,

612 it is not necessary to return them with a return statement.

613

614 Parse actions can also completely replace the given tokens, with another ``ParseResults``

615 object, or with some entirely different object (common for parse actions that perform data

616 conversions). A convenient way to build a new parse result is to define the values

617 using a dict, and then create the return value using :class:`ParseResults.from_dict`.

618

619 If None is passed as the ``fn`` parse action, all previously added parse actions for this

620 expression are cleared.

621

622 Optional keyword arguments:

623

624 - call_during_try = (default= ``False``) indicate if parse action should be run during

625 lookaheads and alternate testing. For parse actions that have side effects, it is

626 important to only call the parse action once it is determined that it is being

627 called as part of a successful parse. For parse actions that perform additional

628 validation, then call_during_try should be passed as True, so that the validation

629 code is included in the preliminary "try" parses.

630

631 Note: the default parsing behavior is to expand tabs in the input string

632 before starting the parsing process. See :class:`parse_string` for more

633 information on parsing strings containing ``<TAB>`` s, and suggested

634 methods to maintain a consistent view of the parsed string, the parse

635 location, and line and column positions within the parsed string.

636

637 Example::

638

639 # parse dates in the form YYYY/MM/DD

640

641 # use parse action to convert toks from str to int at parse time

642 def convert_to_int(toks):

643 return int(toks[0])

644

645 # use a parse action to verify that the date is a valid date

646 def is_valid_date(instring, loc, toks):

647 from datetime import date

648 year, month, day = toks[::2]

649 try:

650 date(year, month, day)

651 except ValueError:

652 raise ParseException(instring, loc, "invalid date given")

653

654 integer = Word(nums)

655 date_str = integer + '/' + integer + '/' + integer

656

657 # add parse actions

658 integer.set_parse_action(convert_to_int)

659 date_str.set_parse_action(is_valid_date)

660

661 # note that integer fields are now ints, not strings

662 date_str.run_tests('''

663 # successful parse - note that integer fields were converted to ints

664 1999/12/31

665

666 # fail - invalid date

667 1999/13/31

668 ''')

669 """

670 if list(fns) == [None]:

671 self.parseAction = []

672 else:

673 if not all(callable(fn) for fn in fns):

674 raise TypeError("parse actions must be callable")

675 self.parseAction = [_trim_arity(fn) for fn in fns]

676 self.callDuringTry = kwargs.get(

677 "call_during_try", kwargs.get("callDuringTry", False)

678 )

679 return self

680

681 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":

682 """

683 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.

684

685 See examples in :class:`copy`.

686 """

687 self.parseAction += [_trim_arity(fn) for fn in fns]

688 self.callDuringTry = self.callDuringTry or kwargs.get(

689 "call_during_try", kwargs.get("callDuringTry", False)

690 )

691 return self

692

693 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement":

694 """Add a boolean predicate function to expression's list of parse actions. See

695 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,

696 functions passed to ``add_condition`` need to return boolean success/fail of the condition.

697

698 Optional keyword arguments:

699

700 - message = define a custom message to be used in the raised exception

701 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise

702 ParseException

703 - call_during_try = boolean to indicate if this method should be called during internal tryParse calls,

704 default=False

705

706 Example::

707

708 integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))

709 year_int = integer.copy()

710 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")

711 date_str = year_int + '/' + integer + '/' + integer

712

713 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0),

714 (line:1, col:1)

715 """

716 for fn in fns:

717 self.parseAction.append(

718 condition_as_parse_action(

719 fn, message=kwargs.get("message"), fatal=kwargs.get("fatal", False)

720 )

721 )

722

723 self.callDuringTry = self.callDuringTry or kwargs.get(

724 "call_during_try", kwargs.get("callDuringTry", False)

725 )

726 return self

727

728 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement":

729 """

730 Define action to perform if parsing fails at this expression.

731 Fail acton fn is a callable function that takes the arguments

732 ``fn(s, loc, expr, err)`` where:

733

734 - s = string being parsed

735 - loc = location where expression match was attempted and failed

736 - expr = the parse expression that failed

737 - err = the exception thrown

738

739 The function returns no value. It may throw :class:`ParseFatalException`

740 if it is desired to stop parsing immediately."""

741 self.failAction = fn

742 return self

743

744 def _skipIgnorables(self, instring, loc):

745 exprsFound = True

746 while exprsFound:

747 exprsFound = False

748 for e in self.ignoreExprs:

749 try:

750 while 1:

751 loc, dummy = e._parse(instring, loc)

752 exprsFound = True

753 except ParseException:

754 pass

755 return loc

756

757 def preParse(self, instring, loc):

758 if self.ignoreExprs:

759 loc = self._skipIgnorables(instring, loc)

760

761 if self.skipWhitespace:

762 instrlen = len(instring)

763 white_chars = self.whiteChars

764 while loc < instrlen and instring[loc] in white_chars:

765 loc += 1

766

767 return loc

768

769 def parseImpl(self, instring, loc, doActions=True):

770 return loc, []

771

772 def postParse(self, instring, loc, tokenlist):

773 return tokenlist

774

775 # @profile

776 def _parseNoCache(

777 self, instring, loc, doActions=True, callPreParse=True

778 ) -> Tuple[int, ParseResults]:

779 TRY, MATCH, FAIL = 0, 1, 2

780 debugging = self.debug # and doActions)

781 len_instring = len(instring)

782

783 if debugging or self.failAction:

784 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))

785 try:

786 if callPreParse and self.callPreparse:

787 pre_loc = self.preParse(instring, loc)

788 else:

789 pre_loc = loc

790 tokens_start = pre_loc

791 if self.debugActions.debug_try:

792 self.debugActions.debug_try(instring, tokens_start, self, False)

793 if self.mayIndexError or pre_loc >= len_instring:

794 try:

795 loc, tokens = self.parseImpl(instring, pre_loc, doActions)

796 except IndexError:

797 raise ParseException(instring, len_instring, self.errmsg, self)

798 else:

799 loc, tokens = self.parseImpl(instring, pre_loc, doActions)

800 except Exception as err:

801 # print("Exception raised:", err)

802 if self.debugActions.debug_fail:

803 self.debugActions.debug_fail(

804 instring, tokens_start, self, err, False

805 )

806 if self.failAction:

807 self.failAction(instring, tokens_start, self, err)

808 raise

809 else:

810 if callPreParse and self.callPreparse:

811 pre_loc = self.preParse(instring, loc)

812 else:

813 pre_loc = loc

814 tokens_start = pre_loc

815 if self.mayIndexError or pre_loc >= len_instring:

816 try:

817 loc, tokens = self.parseImpl(instring, pre_loc, doActions)

818 except IndexError:

819 raise ParseException(instring, len_instring, self.errmsg, self)

820 else:

821 loc, tokens = self.parseImpl(instring, pre_loc, doActions)

822

823 tokens = self.postParse(instring, loc, tokens)

824

825 ret_tokens = ParseResults(

826 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults

827 )

828 if self.parseAction and (doActions or self.callDuringTry):

829 if debugging:

830 try:

831 for fn in self.parseAction:

832 try:

833 tokens = fn(instring, tokens_start, ret_tokens)

834 except IndexError as parse_action_exc:

835 exc = ParseException("exception raised in parse action")

836 raise exc from parse_action_exc

837

838 if tokens is not None and tokens is not ret_tokens:

839 ret_tokens = ParseResults(

840 tokens,

841 self.resultsName,

842 asList=self.saveAsList

843 and isinstance(tokens, (ParseResults, list)),

844 modal=self.modalResults,

845 )

846 except Exception as err:

847 # print "Exception raised in user parse action:", err

848 if self.debugActions.debug_fail:

849 self.debugActions.debug_fail(

850 instring, tokens_start, self, err, False

851 )

852 raise

853 else:

854 for fn in self.parseAction:

855 try:

856 tokens = fn(instring, tokens_start, ret_tokens)

857 except IndexError as parse_action_exc:

858 exc = ParseException("exception raised in parse action")

859 raise exc from parse_action_exc

860

861 if tokens is not None and tokens is not ret_tokens:

862 ret_tokens = ParseResults(

863 tokens,

864 self.resultsName,

865 asList=self.saveAsList

866 and isinstance(tokens, (ParseResults, list)),

867 modal=self.modalResults,

868 )

869 if debugging:

870 # print("Matched", self, "->", ret_tokens.as_list())

871 if self.debugActions.debug_match:

872 self.debugActions.debug_match(

873 instring, tokens_start, loc, self, ret_tokens, False

874 )

875

876 return loc, ret_tokens

877

878 def try_parse(self, instring: str, loc: int, raise_fatal: bool = False) -> int:

879 try:

880 return self._parse(instring, loc, doActions=False)[0]

881 except ParseFatalException:

882 if raise_fatal:

883 raise

884 raise ParseException(instring, loc, self.errmsg, self)

885

886 def can_parse_next(self, instring: str, loc: int) -> bool:

887 try:

888 self.try_parse(instring, loc)

889 except (ParseException, IndexError):

890 return False

891 else:

892 return True

893

894 # cache for left-recursion in Forward references

895 recursion_lock = RLock()

896 recursion_memos: typing.Dict[

897 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]]

898 ] = {}

899

900 # argument cache for optimizing repeated calls when backtracking through recursive expressions

901 packrat_cache = (

902 {}

903 ) # this is set later by enabled_packrat(); this is here so that reset_cache() doesn't fail

904 packrat_cache_lock = RLock()

905 packrat_cache_stats = [0, 0]

906

907 # this method gets repeatedly called during backtracking with the same arguments -

908 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

909 def _parseCache(

910 self, instring, loc, doActions=True, callPreParse=True

911 ) -> Tuple[int, ParseResults]:

912 HIT, MISS = 0, 1

913 TRY, MATCH, FAIL = 0, 1, 2

914 lookup = (self, instring, loc, callPreParse, doActions)

915 with ParserElement.packrat_cache_lock:

916 cache = ParserElement.packrat_cache

917 value = cache.get(lookup)

918 if value is cache.not_in_cache:

919 ParserElement.packrat_cache_stats[MISS] += 1

920 try:

921 value = self._parseNoCache(instring, loc, doActions, callPreParse)

922 except ParseBaseException as pe:

923 # cache a copy of the exception, without the traceback

924 cache.set(lookup, pe.__class__(*pe.args))

925 raise

926 else:

927 cache.set(lookup, (value[0], value[1].copy(), loc))

928 return value

929 else:

930 ParserElement.packrat_cache_stats[HIT] += 1

931 if self.debug and self.debugActions.debug_try:

932 try:

933 self.debugActions.debug_try(instring, loc, self, cache_hit=True)

934 except TypeError:

935 pass

936 if isinstance(value, Exception):

937 if self.debug and self.debugActions.debug_fail:

938 try:

939 self.debugActions.debug_fail(

940 instring, loc, self, value, cache_hit=True

941 )

942 except TypeError:

943 pass

944 raise value

945

946 loc_, result, endloc = value[0], value[1].copy(), value[2]

947 if self.debug and self.debugActions.debug_match:

948 try:

949 self.debugActions.debug_match(

950 instring, loc_, endloc, self, result, cache_hit=True

951 )

952 except TypeError:

953 pass

954

955 return loc_, result

956

957 _parse = _parseNoCache

958

959 @staticmethod

960 def reset_cache() -> None:

961 ParserElement.packrat_cache.clear()

962 ParserElement.packrat_cache_stats[:] = [0] * len(

963 ParserElement.packrat_cache_stats

964 )

965 ParserElement.recursion_memos.clear()

966

967 _packratEnabled = False

968 _left_recursion_enabled = False

969

970 @staticmethod

971 def disable_memoization() -> None:

972 """

973 Disables active Packrat or Left Recursion parsing and their memoization

974

975 This method also works if neither Packrat nor Left Recursion are enabled.

976 This makes it safe to call before activating Packrat nor Left Recursion

977 to clear any previous settings.

978 """

979 ParserElement.reset_cache()

980 ParserElement._left_recursion_enabled = False

981 ParserElement._packratEnabled = False

982 ParserElement._parse = ParserElement._parseNoCache

983

984 @staticmethod

985 def enable_left_recursion(

986 cache_size_limit: typing.Optional[int] = None, *, force=False

987 ) -> None:

988 """

989 Enables "bounded recursion" parsing, which allows for both direct and indirect

990 left-recursion. During parsing, left-recursive :class:`Forward` elements are

991 repeatedly matched with a fixed recursion depth that is gradually increased

992 until finding the longest match.

993

994 Example::

995

996 import pyparsing as pp

997 pp.ParserElement.enable_left_recursion()

998

999 E = pp.Forward("E")

1000 num = pp.Word(pp.nums)

1001 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...

1002 E <<= E + '+' - num | num

1003

1004 print(E.parse_string("1+2+3"))

1005

1006 Recursion search naturally memoizes matches of ``Forward`` elements and may

1007 thus skip reevaluation of parse actions during backtracking. This may break

1008 programs with parse actions which rely on strict ordering of side-effects.

1009

1010 Parameters:

1011

1012 - cache_size_limit - (default=``None``) - memoize at most this many

1013 ``Forward`` elements during matching; if ``None`` (the default),

1014 memoize all ``Forward`` elements.

1015

1016 Bounded Recursion parsing works similar but not identical to Packrat parsing,

1017 thus the two cannot be used together. Use ``force=True`` to disable any

1018 previous, conflicting settings.

1019 """

1020 if force:

1021 ParserElement.disable_memoization()

1022 elif ParserElement._packratEnabled:

1023 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1024 if cache_size_limit is None:

1025 ParserElement.recursion_memos = _UnboundedMemo()

1026 elif cache_size_limit > 0:

1027 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit)

1028 else:

1029 raise NotImplementedError("Memo size of %s" % cache_size_limit)

1030 ParserElement._left_recursion_enabled = True

1031

1032 @staticmethod

1033 def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None:

1034 """

1035 Enables "packrat" parsing, which adds memoizing to the parsing logic.

1036 Repeated parse attempts at the same string location (which happens

1037 often in many complex grammars) can immediately return a cached value,

1038 instead of re-executing parsing/validating code. Memoizing is done of

1039 both valid results and parsing exceptions.

1040

1041 Parameters:

1042

1043 - cache_size_limit - (default= ``128``) - if an integer value is provided

1044 will limit the size of the packrat cache; if None is passed, then

1045 the cache size will be unbounded; if 0 is passed, the cache will

1046 be effectively disabled.

1047

1048 This speedup may break existing programs that use parse actions that

1049 have side-effects. For this reason, packrat parsing is disabled when

1050 you first import pyparsing. To activate the packrat feature, your

1051 program must call the class method :class:`ParserElement.enable_packrat`.

1052 For best results, call ``enable_packrat()`` immediately after

1053 importing pyparsing.

1054

1055 Example::

1056

1057 import pyparsing

1058 pyparsing.ParserElement.enable_packrat()

1059

1060 Packrat parsing works similar but not identical to Bounded Recursion parsing,

1061 thus the two cannot be used together. Use ``force=True`` to disable any

1062 previous, conflicting settings.

1063 """

1064 if force:

1065 ParserElement.disable_memoization()

1066 elif ParserElement._left_recursion_enabled:

1067 raise RuntimeError("Packrat and Bounded Recursion are not compatible")

1068 if not ParserElement._packratEnabled:

1069 ParserElement._packratEnabled = True

1070 if cache_size_limit is None:

1071 ParserElement.packrat_cache = _UnboundedCache()

1072 else:

1073 ParserElement.packrat_cache = _FifoCache(cache_size_limit)

1074 ParserElement._parse = ParserElement._parseCache

1075

1076 def parse_string(

1077 self, instring: str, parse_all: bool = False, *, parseAll: bool = False

1078 ) -> ParseResults:

1079 """

1080 Parse a string with respect to the parser definition. This function is intended as the primary interface to the

1081 client code.

1082

1083 :param instring: The input string to be parsed.

1084 :param parse_all: If set, the entire input string must match the grammar.

1085 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.

1086 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.

1087 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or

1088 an object with attributes if the given parser includes results names.

1089

1090 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This

1091 is also equivalent to ending the grammar with :class:`StringEnd`().

1092

1093 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are

1094 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string

1095 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string

1096 being parsed, one can ensure a consistent view of the input string by doing one of the following:

1097

1098 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),

1099 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the

1100 parse action's ``s`` argument, or

1101 - explicitly expand the tabs in your input string before calling ``parse_string``.

1102

1103 Examples:

1104

1105 By default, partial matches are OK.

1106

1107 >>> res = Word('a').parse_string('aaaaabaaa')

1108 >>> print(res)

1109 ['aaaaa']

1110

1111 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children

1112 directly to see more examples.

1113

1114 It raises an exception if parse_all flag is set and instring does not match the whole grammar.

1115

1116 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)

1117 Traceback (most recent call last):

1118 ...

1119 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6)

1120 """

1121 parseAll = parse_all or parseAll

1122

1123 ParserElement.reset_cache()

1124 if not self.streamlined:

1125 self.streamline()

1126 for e in self.ignoreExprs:

1127 e.streamline()

1128 if not self.keepTabs:

1129 instring = instring.expandtabs()

1130 try:

1131 loc, tokens = self._parse(instring, 0)

1132 if parseAll:

1133 loc = self.preParse(instring, loc)

1134 se = Empty() + StringEnd()

1135 se._parse(instring, loc)

1136 except ParseBaseException as exc:

1137 if ParserElement.verbose_stacktrace:

1138 raise

1139 else:

1140 # catch and re-raise exception from here, clearing out pyparsing internal stack trace

1141 raise exc.with_traceback(None)

1142 else:

1143 return tokens

1144

1145 def scan_string(

1146 self,

1147 instring: str,

1148 max_matches: int = _MAX_INT,

1149 overlap: bool = False,

1150 *,

1151 debug: bool = False,

1152 maxMatches: int = _MAX_INT,

1153 ) -> Generator[Tuple[ParseResults, int, int], None, None]:

1154 """

1155 Scan the input string for expression matches. Each match will return the

1156 matching tokens, start location, and end location. May be called with optional

1157 ``max_matches`` argument, to clip scanning after 'n' matches are found. If

1158 ``overlap`` is specified, then overlapping matches will be reported.

1159

1160 Note that the start and end locations are reported relative to the string

1161 being parsed. See :class:`parse_string` for more information on parsing

1162 strings with embedded tabs.

1163

1164 Example::

1165

1166 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"

1167 print(source)

1168 for tokens, start, end in Word(alphas).scan_string(source):

1169 print(' '*start + '^'*(end-start))

1170 print(' '*start + tokens[0])

1171

1172 prints::

1173

1174 sldjf123lsdjjkf345sldkjf879lkjsfd987

1175 ^^^^^

1176 sldjf

1177 ^^^^^^^

1178 lsdjjkf

1179 ^^^^^^

1180 sldkjf

1181 ^^^^^^

1182 lkjsfd

1183 """

1184 maxMatches = min(maxMatches, max_matches)

1185 if not self.streamlined:

1186 self.streamline()

1187 for e in self.ignoreExprs:

1188 e.streamline()

1189

1190 if not self.keepTabs:

1191 instring = str(instring).expandtabs()

1192 instrlen = len(instring)

1193 loc = 0

1194 preparseFn = self.preParse

1195 parseFn = self._parse

1196 ParserElement.resetCache()

1197 matches = 0

1198 try:

1199 while loc <= instrlen and matches < maxMatches:

1200 try:

1201 preloc = preparseFn(instring, loc)

1202 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)

1203 except ParseException:

1204 loc = preloc + 1

1205 else:

1206 if nextLoc > loc:

1207 matches += 1

1208 if debug:

1209 print(

1210 {

1211 "tokens": tokens.asList(),

1212 "start": preloc,

1213 "end": nextLoc,

1214 }

1215 )

1216 yield tokens, preloc, nextLoc

1217 if overlap:

1218 nextloc = preparseFn(instring, loc)

1219 if nextloc > loc:

1220 loc = nextLoc

1221 else:

1222 loc += 1

1223 else:

1224 loc = nextLoc

1225 else:

1226 loc = preloc + 1

1227 except ParseBaseException as exc:

1228 if ParserElement.verbose_stacktrace:

1229 raise

1230 else:

1231 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1232 raise exc.with_traceback(None)

1233

1234 def transform_string(self, instring: str, *, debug: bool = False) -> str:

1235 """

1236 Extension to :class:`scan_string`, to modify matching text with modified tokens that may

1237 be returned from a parse action. To use ``transform_string``, define a grammar and

1238 attach a parse action to it that modifies the returned token list.

1239 Invoking ``transform_string()`` on a target string will then scan for matches,

1240 and replace the matched text patterns according to the logic in the parse

1241 action. ``transform_string()`` returns the resulting transformed string.

1242

1243 Example::

1244

1245 wd = Word(alphas)

1246 wd.set_parse_action(lambda toks: toks[0].title())

1247

1248 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))

1249

1250 prints::

1251

1252 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.

1253 """

1254 out: List[str] = []

1255 lastE = 0

1256 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to

1257 # keep string locs straight between transform_string and scan_string

1258 self.keepTabs = True

1259 try:

1260 for t, s, e in self.scan_string(instring, debug=debug):

1261 out.append(instring[lastE:s])

1262 if t:

1263 if isinstance(t, ParseResults):

1264 out += t.as_list()

1265 elif isinstance(t, Iterable) and not isinstance(t, str_type):

1266 out.extend(t)

1267 else:

1268 out.append(t)

1269 lastE = e

1270 out.append(instring[lastE:])

1271 out = [o for o in out if o]

1272 return "".join([str(s) for s in _flatten(out)])

1273 except ParseBaseException as exc:

1274 if ParserElement.verbose_stacktrace:

1275 raise

1276 else:

1277 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1278 raise exc.with_traceback(None)

1279

1280 def search_string(

1281 self,

1282 instring: str,

1283 max_matches: int = _MAX_INT,

1284 *,

1285 debug: bool = False,

1286 maxMatches: int = _MAX_INT,

1287 ) -> ParseResults:

1288 """

1289 Another extension to :class:`scan_string`, simplifying the access to the tokens found

1290 to match the given parse expression. May be called with optional

1291 ``max_matches`` argument, to clip searching after 'n' matches are found.

1292

1293 Example::

1294

1295 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters

1296 cap_word = Word(alphas.upper(), alphas.lower())

1297

1298 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))

1299

1300 # the sum() builtin can be used to merge results into a single ParseResults object

1301 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))

1302

1303 prints::

1304

1305 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]

1306 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']

1307 """

1308 maxMatches = min(maxMatches, max_matches)

1309 try:

1310 return ParseResults(

1311 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)]

1312 )

1313 except ParseBaseException as exc:

1314 if ParserElement.verbose_stacktrace:

1315 raise

1316 else:

1317 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1318 raise exc.with_traceback(None)

1319

1320 def split(

1321 self,

1322 instring: str,

1323 maxsplit: int = _MAX_INT,

1324 include_separators: bool = False,

1325 *,

1326 includeSeparators=False,

1327 ) -> Generator[str, None, None]:

1328 """

1329 Generator method to split a string using the given expression as a separator.

1330 May be called with optional ``maxsplit`` argument, to limit the number of splits;

1331 and the optional ``include_separators`` argument (default= ``False``), if the separating

1332 matching text should be included in the split results.

1333

1334 Example::

1335

1336 punc = one_of(list(".,;:/-!?"))

1337 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))

1338

1339 prints::

1340

1341 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']

1342 """

1343 includeSeparators = includeSeparators or include_separators

1344 last = 0

1345 for t, s, e in self.scan_string(instring, max_matches=maxsplit):

1346 yield instring[last:s]

1347 if includeSeparators:

1348 yield t[0]

1349 last = e

1350 yield instring[last:]

1351

1352 def __add__(self, other) -> "ParserElement":

1353 """

1354 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`

1355 converts them to :class:`Literal`s by default.

1356

1357 Example::

1358

1359 greet = Word(alphas) + "," + Word(alphas) + "!"

1360 hello = "Hello, World!"

1361 print(hello, "->", greet.parse_string(hello))

1362

1363 prints::

1364

1365 Hello, World! -> ['Hello', ',', 'World', '!']

1366

1367 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.

1368

1369 Literal('start') + ... + Literal('end')

1370

1371 is equivalent to:

1372

1373 Literal('start') + SkipTo('end')("_skipped*") + Literal('end')

1374

1375 Note that the skipped text is returned with '_skipped' as a results name,

1376 and to support having multiple skips in the same parser, the value returned is

1377 a list of all skipped text.

1378 """

1379 if other is Ellipsis:

1380 return _PendingSkip(self)

1381

1382 if isinstance(other, str_type):

1383 other = self._literalStringClass(other)

1384 if not isinstance(other, ParserElement):

1385 raise TypeError(

1386 "Cannot combine element of type {} with ParserElement".format(

1387 type(other).__name__

1388 )

1389 )

1390 return And([self, other])

1391

1392 def __radd__(self, other) -> "ParserElement":

1393 """

1394 Implementation of ``+`` operator when left operand is not a :class:`ParserElement`

1395 """

1396 if other is Ellipsis:

1397 return SkipTo(self)("_skipped*") + self

1398

1399 if isinstance(other, str_type):

1400 other = self._literalStringClass(other)

1401 if not isinstance(other, ParserElement):

1402 raise TypeError(

1403 "Cannot combine element of type {} with ParserElement".format(

1404 type(other).__name__

1405 )

1406 )

1407 return other + self

1408

1409 def __sub__(self, other) -> "ParserElement":

1410 """

1411 Implementation of ``-`` operator, returns :class:`And` with error stop

1412 """

1413 if isinstance(other, str_type):

1414 other = self._literalStringClass(other)

1415 if not isinstance(other, ParserElement):

1416 raise TypeError(

1417 "Cannot combine element of type {} with ParserElement".format(

1418 type(other).__name__

1419 )

1420 )

1421 return self + And._ErrorStop() + other

1422

1423 def __rsub__(self, other) -> "ParserElement":

1424 """

1425 Implementation of ``-`` operator when left operand is not a :class:`ParserElement`

1426 """

1427 if isinstance(other, str_type):

1428 other = self._literalStringClass(other)

1429 if not isinstance(other, ParserElement):

1430 raise TypeError(

1431 "Cannot combine element of type {} with ParserElement".format(

1432 type(other).__name__

1433 )

1434 )

1435 return other - self

1436

1437 def __mul__(self, other) -> "ParserElement":

1438 """

1439 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of

1440 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer

1441 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples

1442 may also include ``None`` as in:

1443 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent

1444 to ``expr*n + ZeroOrMore(expr)``

1445 (read as "at least n instances of ``expr``")

1446 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``

1447 (read as "0 to n instances of ``expr``")

1448 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``

1449 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``

1450

1451 Note that ``expr*(None, n)`` does not raise an exception if

1452 more than n exprs exist in the input stream; that is,

1453 ``expr*(None, n)`` does not enforce a maximum number of expr

1454 occurrences. If this behavior is desired, then write

1455 ``expr*(None, n) + ~expr``

1456 """

1457 if other is Ellipsis:

1458 other = (0, None)

1459 elif isinstance(other, tuple) and other[:1] == (Ellipsis,):

1460 other = ((0,) + other[1:] + (None,))[:2]

1461

1462 if isinstance(other, int):

1463 minElements, optElements = other, 0

1464 elif isinstance(other, tuple):

1465 other = tuple(o if o is not Ellipsis else None for o in other)

1466 other = (other + (None, None))[:2]

1467 if other[0] is None:

1468 other = (0, other[1])

1469 if isinstance(other[0], int) and other[1] is None:

1470 if other[0] == 0:

1471 return ZeroOrMore(self)

1472 if other[0] == 1:

1473 return OneOrMore(self)

1474 else:

1475 return self * other[0] + ZeroOrMore(self)

1476 elif isinstance(other[0], int) and isinstance(other[1], int):

1477 minElements, optElements = other

1478 optElements -= minElements

1479 else:

1480 raise TypeError(

1481 "cannot multiply ParserElement and ({}) objects".format(

1482 ",".join(type(item).__name__ for item in other)

1483 )

1484 )

1485 else:

1486 raise TypeError(

1487 "cannot multiply ParserElement and {} objects".format(

1488 type(other).__name__

1489 )

1490 )

1491

1492 if minElements < 0:

1493 raise ValueError("cannot multiply ParserElement by negative value")

1494 if optElements < 0:

1495 raise ValueError(

1496 "second tuple value must be greater or equal to first tuple value"

1497 )

1498 if minElements == optElements == 0:

1499 return And([])

1500

1501 if optElements:

1502

1503 def makeOptionalList(n):

1504 if n > 1:

1505 return Opt(self + makeOptionalList(n - 1))

1506 else:

1507 return Opt(self)

1508

1509 if minElements:

1510 if minElements == 1:

1511 ret = self + makeOptionalList(optElements)

1512 else:

1513 ret = And([self] * minElements) + makeOptionalList(optElements)

1514 else:

1515 ret = makeOptionalList(optElements)

1516 else:

1517 if minElements == 1:

1518 ret = self

1519 else:

1520 ret = And([self] * minElements)

1521 return ret

1522

1523 def __rmul__(self, other) -> "ParserElement":

1524 return self.__mul__(other)

1525

1526 def __or__(self, other) -> "ParserElement":

1527 """

1528 Implementation of ``|`` operator - returns :class:`MatchFirst`

1529 """

1530 if other is Ellipsis:

1531 return _PendingSkip(self, must_skip=True)

1532

1533 if isinstance(other, str_type):

1534 other = self._literalStringClass(other)

1535 if not isinstance(other, ParserElement):

1536 raise TypeError(

1537 "Cannot combine element of type {} with ParserElement".format(

1538 type(other).__name__

1539 )

1540 )

1541 return MatchFirst([self, other])

1542

1543 def __ror__(self, other) -> "ParserElement":

1544 """

1545 Implementation of ``|`` operator when left operand is not a :class:`ParserElement`

1546 """

1547 if isinstance(other, str_type):

1548 other = self._literalStringClass(other)

1549 if not isinstance(other, ParserElement):

1550 raise TypeError(

1551 "Cannot combine element of type {} with ParserElement".format(

1552 type(other).__name__

1553 )

1554 )

1555 return other | self

1556

1557 def __xor__(self, other) -> "ParserElement":

1558 """

1559 Implementation of ``^`` operator - returns :class:`Or`

1560 """

1561 if isinstance(other, str_type):

1562 other = self._literalStringClass(other)

1563 if not isinstance(other, ParserElement):

1564 raise TypeError(

1565 "Cannot combine element of type {} with ParserElement".format(

1566 type(other).__name__

1567 )

1568 )

1569 return Or([self, other])

1570

1571 def __rxor__(self, other) -> "ParserElement":

1572 """

1573 Implementation of ``^`` operator when left operand is not a :class:`ParserElement`

1574 """

1575 if isinstance(other, str_type):

1576 other = self._literalStringClass(other)

1577 if not isinstance(other, ParserElement):

1578 raise TypeError(

1579 "Cannot combine element of type {} with ParserElement".format(

1580 type(other).__name__

1581 )

1582 )

1583 return other ^ self

1584

1585 def __and__(self, other) -> "ParserElement":

1586 """

1587 Implementation of ``&`` operator - returns :class:`Each`

1588 """

1589 if isinstance(other, str_type):

1590 other = self._literalStringClass(other)

1591 if not isinstance(other, ParserElement):

1592 raise TypeError(

1593 "Cannot combine element of type {} with ParserElement".format(

1594 type(other).__name__

1595 )

1596 )

1597 return Each([self, other])

1598

1599 def __rand__(self, other) -> "ParserElement":

1600 """

1601 Implementation of ``&`` operator when left operand is not a :class:`ParserElement`

1602 """

1603 if isinstance(other, str_type):

1604 other = self._literalStringClass(other)

1605 if not isinstance(other, ParserElement):

1606 raise TypeError(

1607 "Cannot combine element of type {} with ParserElement".format(

1608 type(other).__name__

1609 )

1610 )

1611 return other & self

1612

1613 def __invert__(self) -> "ParserElement":

1614 """

1615 Implementation of ``~`` operator - returns :class:`NotAny`

1616 """

1617 return NotAny(self)

1618

1619 # disable __iter__ to override legacy use of sequential access to __getitem__ to

1620 # iterate over a sequence

1621 __iter__ = None

1622

1623 def __getitem__(self, key):

1624 """

1625 use ``[]`` indexing notation as a short form for expression repetition:

1626

1627 - ``expr[n]`` is equivalent to ``expr*n``

1628 - ``expr[m, n]`` is equivalent to ``expr*(m, n)``

1629 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent

1630 to ``expr*n + ZeroOrMore(expr)``

1631 (read as "at least n instances of ``expr``")

1632 - ``expr[..., n]`` is equivalent to ``expr*(0, n)``

1633 (read as "0 to n instances of ``expr``")

1634 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``

1635 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``

1636

1637 ``None`` may be used in place of ``...``.

1638

1639 Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception

1640 if more than ``n`` ``expr``s exist in the input stream. If this behavior is

1641 desired, then write ``expr[..., n] + ~expr``.

1642 """

1643

1644 # convert single arg keys to tuples

1645 try:

1646 if isinstance(key, str_type):

1647 key = (key,)

1648 iter(key)

1649 except TypeError:

1650 key = (key, key)

1651

1652 if len(key) > 2:

1653 raise TypeError(

1654 "only 1 or 2 index arguments supported ({}{})".format(

1655 key[:5], "... [{}]".format(len(key)) if len(key) > 5 else ""

1656 )

1657 )

1658

1659 # clip to 2 elements

1660 ret = self * tuple(key[:2])

1661 return ret

1662

1663 def __call__(self, name: str = None) -> "ParserElement":

1664 """

1665 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.

1666

1667 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be

1668 passed as ``True``.

1669

1670 If ``name` is omitted, same as calling :class:`copy`.

1671

1672 Example::

1673

1674 # these are equivalent

1675 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")

1676 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")

1677 """

1678 if name is not None:

1679 return self._setResultsName(name)

1680 else:

1681 return self.copy()

1682

1683 def suppress(self) -> "ParserElement":

1684 """

1685 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from

1686 cluttering up returned output.

1687 """

1688 return Suppress(self)

1689

1690 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement":

1691 """

1692 Enables the skipping of whitespace before matching the characters in the

1693 :class:`ParserElement`'s defined pattern.

1694

1695 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)

1696 """

1697 self.skipWhitespace = True

1698 return self

1699

1700 def leave_whitespace(self, recursive: bool = True) -> "ParserElement":

1701 """

1702 Disables the skipping of whitespace before matching the characters in the

1703 :class:`ParserElement`'s defined pattern. This is normally only used internally by

1704 the pyparsing module, but may be needed in some whitespace-sensitive grammars.

1705

1706 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)

1707 """

1708 self.skipWhitespace = False

1709 return self

1710

1711 def set_whitespace_chars(

1712 self, chars: Union[Set[str], str], copy_defaults: bool = False

1713 ) -> "ParserElement":

1714 """

1715 Overrides the default whitespace chars

1716 """

1717 self.skipWhitespace = True

1718 self.whiteChars = set(chars)

1719 self.copyDefaultWhiteChars = copy_defaults

1720 return self

1721

1722 def parse_with_tabs(self) -> "ParserElement":

1723 """

1724 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.

1725 Must be called before ``parse_string`` when the input grammar contains elements that

1726 match ``<TAB>`` characters.

1727 """

1728 self.keepTabs = True

1729 return self

1730

1731 def ignore(self, other: "ParserElement") -> "ParserElement":

1732 """

1733 Define expression to be ignored (e.g., comments) while doing pattern

1734 matching; may be called repeatedly, to define multiple comment or other

1735 ignorable patterns.

1736

1737 Example::

1738

1739 patt = Word(alphas)[1, ...]

1740 patt.parse_string('ablaj /* comment */ lskjd')

1741 # -> ['ablaj']

1742

1743 patt.ignore(c_style_comment)

1744 patt.parse_string('ablaj /* comment */ lskjd')

1745 # -> ['ablaj', 'lskjd']

1746 """

1747 import typing

1748

1749 if isinstance(other, str_type):

1750 other = Suppress(other)

1751

1752 if isinstance(other, Suppress):

1753 if other not in self.ignoreExprs:

1754 self.ignoreExprs.append(other)

1755 else:

1756 self.ignoreExprs.append(Suppress(other.copy()))

1757 return self

1758

1759 def set_debug_actions(

1760 self,

1761 start_action: DebugStartAction,

1762 success_action: DebugSuccessAction,

1763 exception_action: DebugExceptionAction,

1764 ) -> "ParserElement":

1765 """

1766 Customize display of debugging messages while doing pattern matching:

1767

1768 - ``start_action`` - method to be called when an expression is about to be parsed;

1769 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``

1770

1771 - ``success_action`` - method to be called when an expression has successfully parsed;

1772 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``

1773

1774 - ``exception_action`` - method to be called when expression fails to parse;

1775 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``

1776 """

1777 self.debugActions = self.DebugActions(

1778 start_action or _default_start_debug_action,

1779 success_action or _default_success_debug_action,

1780 exception_action or _default_exception_debug_action,

1781 )

1782 self.debug = True

1783 return self

1784

1785 def set_debug(self, flag: bool = True) -> "ParserElement":

1786 """

1787 Enable display of debugging messages while doing pattern matching.

1788 Set ``flag`` to ``True`` to enable, ``False`` to disable.

1789

1790 Example::

1791

1792 wd = Word(alphas).set_name("alphaword")

1793 integer = Word(nums).set_name("numword")

1794 term = wd | integer

1795

1796 # turn on debugging for wd

1797 wd.set_debug()

1798

1799 term[1, ...].parse_string("abc 123 xyz 890")

1800

1801 prints::

1802

1803 Match alphaword at loc 0(1,1)

1804 Matched alphaword -> ['abc']

1805 Match alphaword at loc 3(1,4)

1806 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

1807 Match alphaword at loc 7(1,8)

1808 Matched alphaword -> ['xyz']

1809 Match alphaword at loc 11(1,12)

1810 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

1811 Match alphaword at loc 15(1,16)

1812 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

1813

1814 The output shown is that produced by the default debug actions - custom debug actions can be

1815 specified using :class:`set_debug_actions`. Prior to attempting

1816 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``

1817 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``

1818 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,

1819 which makes debugging and exception messages easier to understand - for instance, the default

1820 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.

1821 """

1822 if flag:

1823 self.set_debug_actions(

1824 _default_start_debug_action,

1825 _default_success_debug_action,

1826 _default_exception_debug_action,

1827 )

1828 else:

1829 self.debug = False

1830 return self

1831

1832 @property

1833 def default_name(self) -> str:

1834 if self._defaultName is None:

1835 self._defaultName = self._generateDefaultName()

1836 return self._defaultName

1837

1838 @abstractmethod

1839 def _generateDefaultName(self):

1840 """

1841 Child classes must define this method, which defines how the ``default_name`` is set.

1842 """

1843

1844 def set_name(self, name: str) -> "ParserElement":

1845 """

1846 Define name for this expression, makes debugging and exception messages clearer.

1847 Example::

1848 Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)

1849 Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1850 """

1851 self.customName = name

1852 self.errmsg = "Expected " + self.name

1853 if __diag__.enable_debug_on_named_expressions:

1854 self.set_debug()

1855 return self

1856

1857 @property

1858 def name(self) -> str:

1859 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name

1860 return self.customName if self.customName is not None else self.default_name

1861

1862 def __str__(self) -> str:

1863 return self.name

1864

1865 def __repr__(self) -> str:

1866 return str(self)

1867

1868 def streamline(self) -> "ParserElement":

1869 self.streamlined = True

1870 self._defaultName = None

1871 return self

1872

1873 def recurse(self) -> Sequence["ParserElement"]:

1874 return []

1875

1876 def _checkRecursion(self, parseElementList):

1877 subRecCheckList = parseElementList[:] + [self]

1878 for e in self.recurse():

1879 e._checkRecursion(subRecCheckList)

1880

1881 def validate(self, validateTrace=None) -> None:

1882 """

1883 Check defined expressions for valid structure, check for infinite recursive definitions.

1884 """

1885 self._checkRecursion([])

1886

1887 def parse_file(

1888 self,

1889 file_or_filename: Union[str, Path, TextIO],

1890 encoding: str = "utf-8",

1891 parse_all: bool = False,

1892 *,

1893 parseAll: bool = False,

1894 ) -> ParseResults:

1895 """

1896 Execute the parse expression on the given file or filename.

1897 If a filename is specified (instead of a file object),

1898 the entire file is opened, read, and closed before parsing.

1899 """

1900 parseAll = parseAll or parse_all

1901 try:

1902 file_contents = file_or_filename.read()

1903 except AttributeError:

1904 with open(file_or_filename, "r", encoding=encoding) as f:

1905 file_contents = f.read()

1906 try:

1907 return self.parse_string(file_contents, parseAll)

1908 except ParseBaseException as exc:

1909 if ParserElement.verbose_stacktrace:

1910 raise

1911 else:

1912 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1913 raise exc.with_traceback(None)

1914

1915 def __eq__(self, other):

1916 if self is other:

1917 return True

1918 elif isinstance(other, str_type):

1919 return self.matches(other, parse_all=True)

1920 elif isinstance(other, ParserElement):

1921 return vars(self) == vars(other)

1922 return False

1923

1924 def __hash__(self):

1925 return id(self)

1926

1927 def matches(

1928 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True

1929 ) -> bool:

1930 """

1931 Method for quick testing of a parser against a test string. Good for simple

1932 inline microtests of sub expressions while building up larger parser.

1933

1934 Parameters:

1935 - ``test_string`` - to test against this expression for a match

1936 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

1937

1938 Example::

1939

1940 expr = Word(nums)

1941 assert expr.matches("100")

1942 """

1943 parseAll = parseAll and parse_all

1944 try:

1945 self.parse_string(str(test_string), parse_all=parseAll)

1946 return True

1947 except ParseBaseException:

1948 return False

1949

1950 def run_tests(

1951 self,

1952 tests: Union[str, List[str]],

1953 parse_all: bool = True,

1954 comment: typing.Optional[Union["ParserElement", str]] = "#",

1955 full_dump: bool = True,

1956 print_results: bool = True,

1957 failure_tests: bool = False,

1958 post_parse: Callable[[str, ParseResults], str] = None,

1959 file: typing.Optional[TextIO] = None,

1960 with_line_numbers: bool = False,

1961 *,

1962 parseAll: bool = True,

1963 fullDump: bool = True,

1964 printResults: bool = True,

1965 failureTests: bool = False,

1966 postParse: Callable[[str, ParseResults], str] = None,

1967 ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]:

1968 """

1969 Execute the parse expression on a series of test strings, showing each

1970 test, the parsed results or where the parse failed. Quick and easy way to

1971 run a parse expression against a list of sample strings.

1972

1973 Parameters:

1974 - ``tests`` - a list of separate test strings, or a multiline string of test strings

1975 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests

1976 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test

1977 string; pass None to disable comment filtering

1978 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;

1979 if False, only dump nested list

1980 - ``print_results`` - (default= ``True``) prints test output to stdout

1981 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing

1982 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as

1983 `fn(test_string, parse_results)` and returns a string to be added to the test output

1984 - ``file`` - (default= ``None``) optional file-like object to which test output will be written;

1985 if None, will default to ``sys.stdout``

1986 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers

1987

1988 Returns: a (success, results) tuple, where success indicates that all tests succeeded

1989 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each

1990 test's output

1991

1992 Example::

1993

1994 number_expr = pyparsing_common.number.copy()

1995

1996 result = number_expr.run_tests('''

1997 # unsigned integer

1998 100

1999 # negative integer

2000 -100

2001 # float with scientific notation

2002 6.02e23

2003 # integer with scientific notation

2004 1e-12

2005 ''')

2006 print("Success" if result[0] else "Failed!")

2007

2008 result = number_expr.run_tests('''

2009 # stray character

2010 100Z

2011 # missing leading digit before '.'

2012 -.100

2013 # too many '.'

2014 3.14.159

2015 ''', failure_tests=True)

2016 print("Success" if result[0] else "Failed!")

2017

2018 prints::

2019

2020 # unsigned integer

2021 100

2022 [100]

2023

2024 # negative integer

2025 -100

2026 [-100]

2027

2028 # float with scientific notation

2029 6.02e23

2030 [6.02e+23]

2031

2032 # integer with scientific notation

2033 1e-12

2034 [1e-12]

2035

2036 Success

2037

2038 # stray character

2039 100Z

2040 ^

2041 FAIL: Expected end of text (at char 3), (line:1, col:4)

2042

2043 # missing leading digit before '.'

2044 -.100

2045 ^

2046 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2047

2048 # too many '.'

2049 3.14.159

2050 ^

2051 FAIL: Expected end of text (at char 4), (line:1, col:5)

2052

2053 Success

2054

2055 Each test string must be on a single line. If you want to test a string that spans multiple

2056 lines, create a test like this::

2057

2058 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")

2059

2060 (Note that this is a raw string literal, you must include the leading ``'r'``.)

2061 """

2062 from .testing import pyparsing_test

2063

2064 parseAll = parseAll and parse_all

2065 fullDump = fullDump and full_dump

2066 printResults = printResults and print_results

2067 failureTests = failureTests or failure_tests

2068 postParse = postParse or post_parse

2069 if isinstance(tests, str_type):

2070 line_strip = type(tests).strip

2071 tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]

2072 if isinstance(comment, str_type):

2073 comment = Literal(comment)

2074 if file is None:

2075 file = sys.stdout

2076 print_ = file.write

2077

2078 result: Union[ParseResults, Exception]

2079 allResults = []

2080 comments = []

2081 success = True

2082 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)

2083 BOM = "\ufeff"

2084 for t in tests:

2085 if comment is not None and comment.matches(t, False) or comments and not t:

2086 comments.append(

2087 pyparsing_test.with_line_numbers(t) if with_line_numbers else t

2088 )

2089 continue

2090 if not t:

2091 continue

2092 out = [

2093 "\n" + "\n".join(comments) if comments else "",

2094 pyparsing_test.with_line_numbers(t) if with_line_numbers else t,

2095 ]

2096 comments = []

2097 try:

2098 # convert newline marks to actual newlines, and strip leading BOM if present

2099 t = NL.transform_string(t.lstrip(BOM))

2100 result = self.parse_string(t, parse_all=parseAll)

2101 except ParseBaseException as pe:

2102 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""

2103 out.append(pe.explain())

2104 out.append("FAIL: " + str(pe))

2105 if ParserElement.verbose_stacktrace:

2106 out.extend(traceback.format_tb(pe.__traceback__))

2107 success = success and failureTests

2108 result = pe

2109 except Exception as exc:

2110 out.append("FAIL-EXCEPTION: {}: {}".format(type(exc).__name__, exc))

2111 if ParserElement.verbose_stacktrace:

2112 out.extend(traceback.format_tb(exc.__traceback__))

2113 success = success and failureTests

2114 result = exc

2115 else:

2116 success = success and not failureTests

2117 if postParse is not None:

2118 try:

2119 pp_value = postParse(t, result)

2120 if pp_value is not None:

2121 if isinstance(pp_value, ParseResults):

2122 out.append(pp_value.dump())

2123 else:

2124 out.append(str(pp_value))

2125 else:

2126 out.append(result.dump())

2127 except Exception as e:

2128 out.append(result.dump(full=fullDump))

2129 out.append(

2130 "{} failed: {}: {}".format(

2131 postParse.__name__, type(e).__name__, e

2132 )

2133 )

2134 else:

2135 out.append(result.dump(full=fullDump))

2136 out.append("")

2137

2138 if printResults:

2139 print_("\n".join(out))

2140

2141 allResults.append((t, result))

2142

2143 return success, allResults

2144

2145 def create_diagram(

2146 self,

2147 output_html: Union[TextIO, Path, str],

2148 vertical: int = 3,

2149 show_results_names: bool = False,

2150 show_groups: bool = False,

2151 **kwargs,

2152 ) -> None:

2153 """

2154 Create a railroad diagram for the parser.

2155

2156 Parameters:

2157 - output_html (str or file-like object) - output target for generated

2158 diagram HTML

2159 - vertical (int) - threshold for formatting multiple alternatives vertically

2160 instead of horizontally (default=3)

2161 - show_results_names - bool flag whether diagram should show annotations for

2162 defined results names

2163 - show_groups - bool flag whether groups should be highlighted with an unlabeled surrounding box

2164 Additional diagram-formatting keyword arguments can also be included;

2165 see railroad.Diagram class.

2166 """

2167

2168 try:

2169 from .diagram import to_railroad, railroad_to_html

2170 except ImportError as ie:

2171 raise Exception(

2172 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"

2173 ) from ie

2174

2175 self.streamline()

2176

2177 railroad = to_railroad(

2178 self,

2179 vertical=vertical,

2180 show_results_names=show_results_names,

2181 show_groups=show_groups,

2182 diagram_kwargs=kwargs,

2183 )

2184 if isinstance(output_html, (str, Path)):

2185 with open(output_html, "w", encoding="utf-8") as diag_file:

2186 diag_file.write(railroad_to_html(railroad))

2187 else:

2188 # we were passed a file-like object, just write to it

2189 output_html.write(railroad_to_html(railroad))

2190

2191 setDefaultWhitespaceChars = set_default_whitespace_chars

2192 inlineLiteralsUsing = inline_literals_using

2193 setResultsName = set_results_name

2194 setBreak = set_break

2195 setParseAction = set_parse_action

2196 addParseAction = add_parse_action

2197 addCondition = add_condition

2198 setFailAction = set_fail_action

2199 tryParse = try_parse

2200 canParseNext = can_parse_next

2201 resetCache = reset_cache

2202 enableLeftRecursion = enable_left_recursion

2203 enablePackrat = enable_packrat

2204 parseString = parse_string

2205 scanString = scan_string

2206 searchString = search_string

2207 transformString = transform_string

2208 setWhitespaceChars = set_whitespace_chars

2209 parseWithTabs = parse_with_tabs

2210 setDebugActions = set_debug_actions

2211 setDebug = set_debug

2212 defaultName = default_name

2213 setName = set_name

2214 parseFile = parse_file

2215 runTests = run_tests

2216 ignoreWhitespace = ignore_whitespace

2217 leaveWhitespace = leave_whitespace

2218

2219

2220class _PendingSkip(ParserElement):

2221 # internal placeholder class to hold a place were '...' is added to a parser element,

2222 # once another ParserElement is added, this placeholder will be replaced with a SkipTo

2223 def __init__(self, expr: ParserElement, must_skip: bool = False):

2224 super().__init__()

2225 self.anchor = expr

2226 self.must_skip = must_skip

2227

2228 def _generateDefaultName(self):

2229 return str(self.anchor + Empty()).replace("Empty", "...")

2230

2231 def __add__(self, other) -> "ParserElement":

2232 skipper = SkipTo(other).set_name("...")("_skipped*")

2233 if self.must_skip:

2234

2235 def must_skip(t):

2236 if not t._skipped or t._skipped.as_list() == [""]:

2237 del t[0]

2238 t.pop("_skipped", None)

2239

2240 def show_skip(t):

2241 if t._skipped.as_list()[-1:] == [""]:

2242 t.pop("_skipped")

2243 t["_skipped"] = "missing <" + repr(self.anchor) + ">"

2244

2245 return (

2246 self.anchor + skipper().add_parse_action(must_skip)

2247 | skipper().add_parse_action(show_skip)

2248 ) + other

2249

2250 return self.anchor + skipper + other

2251

2252 def __repr__(self):

2253 return self.defaultName

2254

2255 def parseImpl(self, *args):

2256 raise Exception(

2257 "use of `...` expression without following SkipTo target expression"

2258 )

2259

2260

2261class Token(ParserElement):

2262 """Abstract :class:`ParserElement` subclass, for defining atomic

2263 matching patterns.

2264 """

2265

2266 def __init__(self):

2267 super().__init__(savelist=False)

2268

2269 def _generateDefaultName(self):

2270 return type(self).__name__

2271

2272

2273class Empty(Token):

2274 """

2275 An empty token, will always match.

2276 """

2277

2278 def __init__(self):

2279 super().__init__()

2280 self.mayReturnEmpty = True

2281 self.mayIndexError = False

2282

2283

2284class NoMatch(Token):

2285 """

2286 A token that will never match.

2287 """

2288

2289 def __init__(self):

2290 super().__init__()

2291 self.mayReturnEmpty = True

2292 self.mayIndexError = False

2293 self.errmsg = "Unmatchable token"

2294

2295 def parseImpl(self, instring, loc, doActions=True):

2296 raise ParseException(instring, loc, self.errmsg, self)

2297

2298

2299class Literal(Token):

2300 """

2301 Token to exactly match a specified string.

2302

2303 Example::

2304

2305 Literal('blah').parse_string('blah') # -> ['blah']

2306 Literal('blah').parse_string('blahfooblah') # -> ['blah']

2307 Literal('blah').parse_string('bla') # -> Exception: Expected "blah"

2308

2309 For case-insensitive matching, use :class:`CaselessLiteral`.

2310

2311 For keyword matching (force word break before and after the matched string),

2312 use :class:`Keyword` or :class:`CaselessKeyword`.

2313 """

2314

2315 def __init__(self, match_string: str = "", *, matchString: str = ""):

2316 super().__init__()

2317 match_string = matchString or match_string

2318 self.match = match_string

2319 self.matchLen = len(match_string)

2320 try:

2321 self.firstMatchChar = match_string[0]

2322 except IndexError:

2323 raise ValueError("null string passed to Literal; use Empty() instead")

2324 self.errmsg = "Expected " + self.name

2325 self.mayReturnEmpty = False

2326 self.mayIndexError = False

2327

2328 # Performance tuning: modify __class__ to select

2329 # a parseImpl optimized for single-character check

2330 if self.matchLen == 1 and type(self) is Literal:

2331 self.__class__ = _SingleCharLiteral

2332

2333 def _generateDefaultName(self):

2334 return repr(self.match)

2335

2336 def parseImpl(self, instring, loc, doActions=True):

2337 if instring[loc] == self.firstMatchChar and instring.startswith(

2338 self.match, loc

2339 ):

2340 return loc + self.matchLen, self.match

2341 raise ParseException(instring, loc, self.errmsg, self)

2342

2343

2344class _SingleCharLiteral(Literal):

2345 def parseImpl(self, instring, loc, doActions=True):

2346 if instring[loc] == self.firstMatchChar:

2347 return loc + 1, self.match

2348 raise ParseException(instring, loc, self.errmsg, self)

2349

2350

2351ParserElement._literalStringClass = Literal

2352

2353

2354class Keyword(Token):

2355 """

2356 Token to exactly match a specified string as a keyword, that is,

2357 it must be immediately followed by a non-keyword character. Compare

2358 with :class:`Literal`:

2359

2360 - ``Literal("if")`` will match the leading ``'if'`` in

2361 ``'ifAndOnlyIf'``.

2362 - ``Keyword("if")`` will not; it will only match the leading

2363 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``

2364

2365 Accepts two optional constructor arguments in addition to the

2366 keyword string:

2367

2368 - ``identChars`` is a string of characters that would be valid

2369 identifier characters, defaulting to all alphanumerics + "_" and

2370 "$"

2371 - ``caseless`` allows case-insensitive matching, default is ``False``.

2372

2373 Example::

2374

2375 Keyword("start").parse_string("start") # -> ['start']

2376 Keyword("start").parse_string("starting") # -> Exception

2377

2378 For case-insensitive matching, use :class:`CaselessKeyword`.

2379 """

2380

2381 DEFAULT_KEYWORD_CHARS = alphanums + "_$"

2382

2383 def __init__(

2384 self,

2385 match_string: str = "",

2386 ident_chars: typing.Optional[str] = None,

2387 caseless: bool = False,

2388 *,

2389 matchString: str = "",

2390 identChars: typing.Optional[str] = None,

2391 ):

2392 super().__init__()

2393 identChars = identChars or ident_chars

2394 if identChars is None:

2395 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2396 match_string = matchString or match_string

2397 self.match = match_string

2398 self.matchLen = len(match_string)

2399 try:

2400 self.firstMatchChar = match_string[0]

2401 except IndexError:

2402 raise ValueError("null string passed to Keyword; use Empty() instead")

2403 self.errmsg = "Expected {} {}".format(type(self).__name__, self.name)

2404 self.mayReturnEmpty = False

2405 self.mayIndexError = False

2406 self.caseless = caseless

2407 if caseless:

2408 self.caselessmatch = match_string.upper()

2409 identChars = identChars.upper()

2410 self.identChars = set(identChars)

2411

2412 def _generateDefaultName(self):

2413 return repr(self.match)

2414

2415 def parseImpl(self, instring, loc, doActions=True):

2416 errmsg = self.errmsg

2417 errloc = loc

2418 if self.caseless:

2419 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:

2420 if loc == 0 or instring[loc - 1].upper() not in self.identChars:

2421 if (

2422 loc >= len(instring) - self.matchLen

2423 or instring[loc + self.matchLen].upper() not in self.identChars

2424 ):

2425 return loc + self.matchLen, self.match

2426 else:

2427 # followed by keyword char

2428 errmsg += ", was immediately followed by keyword character"

2429 errloc = loc + self.matchLen

2430 else:

2431 # preceded by keyword char

2432 errmsg += ", keyword was immediately preceded by keyword character"

2433 errloc = loc - 1

2434 # else no match just raise plain exception

2435

2436 else:

2437 if (

2438 instring[loc] == self.firstMatchChar

2439 and self.matchLen == 1

2440 or instring.startswith(self.match, loc)

2441 ):

2442 if loc == 0 or instring[loc - 1] not in self.identChars:

2443 if (

2444 loc >= len(instring) - self.matchLen

2445 or instring[loc + self.matchLen] not in self.identChars

2446 ):

2447 return loc + self.matchLen, self.match

2448 else:

2449 # followed by keyword char

2450 errmsg += (

2451 ", keyword was immediately followed by keyword character"

2452 )

2453 errloc = loc + self.matchLen

2454 else:

2455 # preceded by keyword char

2456 errmsg += ", keyword was immediately preceded by keyword character"

2457 errloc = loc - 1

2458 # else no match just raise plain exception

2459

2460 raise ParseException(instring, errloc, errmsg, self)

2461

2462 @staticmethod

2463 def set_default_keyword_chars(chars) -> None:

2464 """

2465 Overrides the default characters used by :class:`Keyword` expressions.

2466 """

2467 Keyword.DEFAULT_KEYWORD_CHARS = chars

2468

2469 setDefaultKeywordChars = set_default_keyword_chars

2470

2471

2472class CaselessLiteral(Literal):

2473 """

2474 Token to match a specified string, ignoring case of letters.

2475 Note: the matched results will always be in the case of the given

2476 match string, NOT the case of the input text.

2477

2478 Example::

2479

2480 CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2481 # -> ['CMD', 'CMD', 'CMD']

2482

2483 (Contrast with example for :class:`CaselessKeyword`.)

2484 """

2485

2486 def __init__(self, match_string: str = "", *, matchString: str = ""):

2487 match_string = matchString or match_string

2488 super().__init__(match_string.upper())

2489 # Preserve the defining literal.

2490 self.returnString = match_string

2491 self.errmsg = "Expected " + self.name

2492

2493 def parseImpl(self, instring, loc, doActions=True):

2494 if instring[loc : loc + self.matchLen].upper() == self.match:

2495 return loc + self.matchLen, self.returnString

2496 raise ParseException(instring, loc, self.errmsg, self)

2497

2498

2499class CaselessKeyword(Keyword):

2500 """

2501 Caseless version of :class:`Keyword`.

2502

2503 Example::

2504

2505 CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10")

2506 # -> ['CMD', 'CMD']

2507

2508 (Contrast with example for :class:`CaselessLiteral`.)

2509 """

2510

2511 def __init__(

2512 self,

2513 match_string: str = "",

2514 ident_chars: typing.Optional[str] = None,

2515 *,

2516 matchString: str = "",

2517 identChars: typing.Optional[str] = None,

2518 ):

2519 identChars = identChars or ident_chars

2520 match_string = matchString or match_string

2521 super().__init__(match_string, identChars, caseless=True)

2522

2523

2524class CloseMatch(Token):

2525 """A variation on :class:`Literal` which matches "close" matches,

2526 that is, strings with at most 'n' mismatching characters.

2527 :class:`CloseMatch` takes parameters:

2528

2529 - ``match_string`` - string to be matched

2530 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters

2531 - ``max_mismatches`` - (``default=1``) maximum number of

2532 mismatches allowed to count as a match

2533

2534 The results from a successful parse will contain the matched text

2535 from the input string and the following named results:

2536

2537 - ``mismatches`` - a list of the positions within the

2538 match_string where mismatches were found

2539 - ``original`` - the original match_string used to compare

2540 against the input string

2541

2542 If ``mismatches`` is an empty list, then the match was an exact

2543 match.

2544

2545 Example::

2546

2547 patt = CloseMatch("ATCATCGAATGGA")

2548 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2549 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2550

2551 # exact match

2552 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2553

2554 # close match allowing up to 2 mismatches

2555 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)

2556 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2557 """

2558

2559 def __init__(

2560 self,

2561 match_string: str,

2562 max_mismatches: int = None,

2563 *,

2564 maxMismatches: int = 1,

2565 caseless=False,

2566 ):

2567 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches

2568 super().__init__()

2569 self.match_string = match_string

2570 self.maxMismatches = maxMismatches

2571 self.errmsg = "Expected {!r} (with up to {} mismatches)".format(

2572 self.match_string, self.maxMismatches

2573 )

2574 self.caseless = caseless

2575 self.mayIndexError = False

2576 self.mayReturnEmpty = False

2577

2578 def _generateDefaultName(self):

2579 return "{}:{!r}".format(type(self).__name__, self.match_string)

2580

2581 def parseImpl(self, instring, loc, doActions=True):

2582 start = loc

2583 instrlen = len(instring)

2584 maxloc = start + len(self.match_string)

2585

2586 if maxloc <= instrlen:

2587 match_string = self.match_string

2588 match_stringloc = 0

2589 mismatches = []

2590 maxMismatches = self.maxMismatches

2591

2592 for match_stringloc, s_m in enumerate(

2593 zip(instring[loc:maxloc], match_string)

2594 ):

2595 src, mat = s_m

2596 if self.caseless:

2597 src, mat = src.lower(), mat.lower()

2598

2599 if src != mat:

2600 mismatches.append(match_stringloc)

2601 if len(mismatches) > maxMismatches:

2602 break

2603 else:

2604 loc = start + match_stringloc + 1

2605 results = ParseResults([instring[start:loc]])

2606 results["original"] = match_string

2607 results["mismatches"] = mismatches

2608 return loc, results

2609

2610 raise ParseException(instring, loc, self.errmsg, self)

2611

2612

2613class Word(Token):

2614 """Token for matching words composed of allowed character sets.

2615 Parameters:

2616 - ``init_chars`` - string of all characters that should be used to

2617 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;

2618 if ``body_chars`` is also specified, then this is the string of

2619 initial characters

2620 - ``body_chars`` - string of characters that

2621 can be used for matching after a matched initial character as

2622 given in ``init_chars``; if omitted, same as the initial characters

2623 (default=``None``)

2624 - ``min`` - minimum number of characters to match (default=1)

2625 - ``max`` - maximum number of characters to match (default=0)

2626 - ``exact`` - exact number of characters to match (default=0)

2627 - ``as_keyword`` - match as a keyword (default=``False``)

2628 - ``exclude_chars`` - characters that might be

2629 found in the input ``body_chars`` string but which should not be

2630 accepted for matching ;useful to define a word of all

2631 printables except for one or two characters, for instance

2632 (default=``None``)

2633

2634 :class:`srange` is useful for defining custom character set strings

2635 for defining :class:`Word` expressions, using range notation from

2636 regular expression character sets.

2637

2638 A common mistake is to use :class:`Word` to match a specific literal

2639 string, as in ``Word("Address")``. Remember that :class:`Word`

2640 uses the string argument to define *sets* of matchable characters.

2641 This expression would match "Add", "AAA", "dAred", or any other word

2642 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an

2643 exact literal string, use :class:`Literal` or :class:`Keyword`.

2644

2645 pyparsing includes helper strings for building Words:

2646

2647 - :class:`alphas`

2648 - :class:`nums`

2649 - :class:`alphanums`

2650 - :class:`hexnums`

2651 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255

2652 - accented, tilded, umlauted, etc.)

2653 - :class:`punc8bit` (non-alphabetic characters in ASCII range

2654 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2655 - :class:`printables` (any non-whitespace character)

2656

2657 ``alphas``, ``nums``, and ``printables`` are also defined in several

2658 Unicode sets - see :class:`pyparsing_unicode``.

2659

2660 Example::

2661

2662 # a word composed of digits

2663 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2664

2665 # a word with a leading capital, and zero or more lowercase

2666 capital_word = Word(alphas.upper(), alphas.lower())

2667

2668 # hostnames are alphanumeric, with leading alpha, and '-'

2669 hostname = Word(alphas, alphanums + '-')

2670

2671 # roman numeral (not a strict parser, accepts invalid mix of characters)

2672 roman = Word("IVXLCDM")

2673

2674 # any string of non-whitespace characters, except for ','

2675 csv_value = Word(printables, exclude_chars=",")

2676 """

2677

2678 def __init__(

2679 self,

2680 init_chars: str = "",

2681 body_chars: typing.Optional[str] = None,

2682 min: int = 1,

2683 max: int = 0,

2684 exact: int = 0,

2685 as_keyword: bool = False,

2686 exclude_chars: typing.Optional[str] = None,

2687 *,

2688 initChars: typing.Optional[str] = None,

2689 bodyChars: typing.Optional[str] = None,

2690 asKeyword: bool = False,

2691 excludeChars: typing.Optional[str] = None,

2692 ):

2693 initChars = initChars or init_chars

2694 bodyChars = bodyChars or body_chars

2695 asKeyword = asKeyword or as_keyword

2696 excludeChars = excludeChars or exclude_chars

2697 super().__init__()

2698 if not initChars:

2699 raise ValueError(

2700 "invalid {}, initChars cannot be empty string".format(

2701 type(self).__name__

2702 )

2703 )

2704

2705 initChars = set(initChars)

2706 self.initChars = initChars

2707 if excludeChars:

2708 excludeChars = set(excludeChars)

2709 initChars -= excludeChars

2710 if bodyChars:

2711 bodyChars = set(bodyChars) - excludeChars

2712 self.initCharsOrig = "".join(sorted(initChars))

2713

2714 if bodyChars:

2715 self.bodyCharsOrig = "".join(sorted(bodyChars))

2716 self.bodyChars = set(bodyChars)

2717 else:

2718 self.bodyCharsOrig = "".join(sorted(initChars))

2719 self.bodyChars = set(initChars)

2720

2721 self.maxSpecified = max > 0

2722

2723 if min < 1:

2724 raise ValueError(

2725 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"

2726 )

2727

2728 self.minLen = min

2729

2730 if max > 0:

2731 self.maxLen = max

2732 else:

2733 self.maxLen = _MAX_INT

2734

2735 if exact > 0:

2736 self.maxLen = exact

2737 self.minLen = exact

2738

2739 self.errmsg = "Expected " + self.name

2740 self.mayIndexError = False

2741 self.asKeyword = asKeyword

2742

2743 # see if we can make a regex for this Word

2744 if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0):

2745 if self.bodyChars == self.initChars:

2746 if max == 0:

2747 repeat = "+"

2748 elif max == 1:

2749 repeat = ""

2750 else:

2751 repeat = "{{{},{}}}".format(

2752 self.minLen, "" if self.maxLen == _MAX_INT else self.maxLen

2753 )

2754 self.reString = "[{}]{}".format(

2755 _collapse_string_to_ranges(self.initChars),

2756 repeat,

2757 )

2758 elif len(self.initChars) == 1:

2759 if max == 0:

2760 repeat = "*"

2761 else:

2762 repeat = "{{0,{}}}".format(max - 1)

2763 self.reString = "{}[{}]{}".format(

2764 re.escape(self.initCharsOrig),

2765 _collapse_string_to_ranges(self.bodyChars),

2766 repeat,

2767 )

2768 else:

2769 if max == 0:

2770 repeat = "*"

2771 elif max == 2:

2772 repeat = ""

2773 else:

2774 repeat = "{{0,{}}}".format(max - 1)

2775 self.reString = "[{}][{}]{}".format(

2776 _collapse_string_to_ranges(self.initChars),

2777 _collapse_string_to_ranges(self.bodyChars),

2778 repeat,

2779 )

2780 if self.asKeyword:

2781 self.reString = r"\b" + self.reString + r"\b"

2782

2783 try:

2784 self.re = re.compile(self.reString)

2785 except re.error:

2786 self.re = None

2787 else:

2788 self.re_match = self.re.match

2789 self.__class__ = _WordRegex

2790

2791 def _generateDefaultName(self):

2792 def charsAsStr(s):

2793 max_repr_len = 16

2794 s = _collapse_string_to_ranges(s, re_escape=False)

2795 if len(s) > max_repr_len:

2796 return s[: max_repr_len - 3] + "..."

2797 else:

2798 return s

2799

2800 if self.initChars != self.bodyChars:

2801 base = "W:({}, {})".format(

2802 charsAsStr(self.initChars), charsAsStr(self.bodyChars)

2803 )

2804 else:

2805 base = "W:({})".format(charsAsStr(self.initChars))

2806

2807 # add length specification

2808 if self.minLen > 1 or self.maxLen != _MAX_INT:

2809 if self.minLen == self.maxLen:

2810 if self.minLen == 1:

2811 return base[2:]

2812 else:

2813 return base + "{{{}}}".format(self.minLen)

2814 elif self.maxLen == _MAX_INT:

2815 return base + "{{{},...}}".format(self.minLen)

2816 else:

2817 return base + "{{{},{}}}".format(self.minLen, self.maxLen)

2818 return base

2819

2820 def parseImpl(self, instring, loc, doActions=True):

2821 if instring[loc] not in self.initChars:

2822 raise ParseException(instring, loc, self.errmsg, self)

2823

2824 start = loc

2825 loc += 1

2826 instrlen = len(instring)

2827 bodychars = self.bodyChars

2828 maxloc = start + self.maxLen

2829 maxloc = min(maxloc, instrlen)

2830 while loc < maxloc and instring[loc] in bodychars:

2831 loc += 1

2832

2833 throwException = False

2834 if loc - start < self.minLen:

2835 throwException = True

2836 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:

2837 throwException = True

2838 elif self.asKeyword:

2839 if (

2840 start > 0

2841 and instring[start - 1] in bodychars

2842 or loc < instrlen

2843 and instring[loc] in bodychars

2844 ):

2845 throwException = True

2846

2847 if throwException:

2848 raise ParseException(instring, loc, self.errmsg, self)

2849

2850 return loc, instring[start:loc]

2851

2852

2853class _WordRegex(Word):

2854 def parseImpl(self, instring, loc, doActions=True):

2855 result = self.re_match(instring, loc)

2856 if not result:

2857 raise ParseException(instring, loc, self.errmsg, self)

2858

2859 loc = result.end()

2860 return loc, result.group()

2861

2862

2863class Char(_WordRegex):

2864 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,

2865 when defining a match of any single character in a string of

2866 characters.

2867 """

2868

2869 def __init__(

2870 self,

2871 charset: str,

2872 as_keyword: bool = False,

2873 exclude_chars: typing.Optional[str] = None,

2874 *,

2875 asKeyword: bool = False,

2876 excludeChars: typing.Optional[str] = None,

2877 ):

2878 asKeyword = asKeyword or as_keyword

2879 excludeChars = excludeChars or exclude_chars

2880 super().__init__(

2881 charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars

2882 )

2883 self.reString = "[{}]".format(_collapse_string_to_ranges(self.initChars))

2884 if asKeyword:

2885 self.reString = r"\b{}\b".format(self.reString)

2886 self.re = re.compile(self.reString)

2887 self.re_match = self.re.match

2888

2889

2890class Regex(Token):

2891 r"""Token for matching strings that match a given regular

2892 expression. Defined with string specifying the regular expression in

2893 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.

2894 If the given regex contains named groups (defined using ``(?P<name>...)``),

2895 these will be preserved as named :class:`ParseResults`.

2896

2897 If instead of the Python stdlib ``re`` module you wish to use a different RE module

2898 (such as the ``regex`` module), you can do so by building your ``Regex`` object with

2899 a compiled RE that was compiled using ``regex``.

2900

2901 Example::

2902

2903 realnum = Regex(r"[+-]?\d+\.\d*")

2904 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

2905 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

2906

2907 # named fields in a regex will be returned as named results

2908 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

2909

2910 # the Regex class will accept re's compiled using the regex module

2911 import regex

2912 parser = pp.Regex(regex.compile(r'[0-9]'))

2913 """

2914

2915 def __init__(

2916 self,

2917 pattern: Any,

2918 flags: Union[re.RegexFlag, int] = 0,

2919 as_group_list: bool = False,

2920 as_match: bool = False,

2921 *,

2922 asGroupList: bool = False,

2923 asMatch: bool = False,

2924 ):

2925 """The parameters ``pattern`` and ``flags`` are passed

2926 to the ``re.compile()`` function as-is. See the Python

2927 `re module <https://docs.python.org/3/library/re.html>`_ module for an

2928 explanation of the acceptable patterns and flags.

2929 """

2930 super().__init__()

2931 asGroupList = asGroupList or as_group_list

2932 asMatch = asMatch or as_match

2933

2934 if isinstance(pattern, str_type):

2935 if not pattern:

2936 raise ValueError("null string passed to Regex; use Empty() instead")

2937

2938 self._re = None

2939 self.reString = self.pattern = pattern

2940 self.flags = flags

2941

2942 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):

2943 self._re = pattern

2944 self.pattern = self.reString = pattern.pattern

2945 self.flags = flags

2946

2947 else:

2948 raise TypeError(

2949 "Regex may only be constructed with a string or a compiled RE object"

2950 )

2951

2952 self.errmsg = "Expected " + self.name

2953 self.mayIndexError = False

2954 self.asGroupList = asGroupList

2955 self.asMatch = asMatch

2956 if self.asGroupList:

2957 self.parseImpl = self.parseImplAsGroupList

2958 if self.asMatch:

2959 self.parseImpl = self.parseImplAsMatch

2960

2961 @cached_property

2962 def re(self):

2963 if self._re:

2964 return self._re

2965 else:

2966 try:

2967 return re.compile(self.pattern, self.flags)

2968 except re.error:

2969 raise ValueError(

2970 "invalid pattern ({!r}) passed to Regex".format(self.pattern)

2971 )

2972

2973 @cached_property

2974 def re_match(self):

2975 return self.re.match

2976

2977 @cached_property

2978 def mayReturnEmpty(self):

2979 return self.re_match("") is not None

2980

2981 def _generateDefaultName(self):

2982 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\"))

2983

2984 def parseImpl(self, instring, loc, doActions=True):

2985 result = self.re_match(instring, loc)

2986 if not result:

2987 raise ParseException(instring, loc, self.errmsg, self)

2988

2989 loc = result.end()

2990 ret = ParseResults(result.group())

2991 d = result.groupdict()

2992 if d:

2993 for k, v in d.items():

2994 ret[k] = v

2995 return loc, ret

2996

2997 def parseImplAsGroupList(self, instring, loc, doActions=True):

2998 result = self.re_match(instring, loc)

2999 if not result:

3000 raise ParseException(instring, loc, self.errmsg, self)

3001

3002 loc = result.end()

3003 ret = result.groups()

3004 return loc, ret

3005

3006 def parseImplAsMatch(self, instring, loc, doActions=True):

3007 result = self.re_match(instring, loc)

3008 if not result:

3009 raise ParseException(instring, loc, self.errmsg, self)

3010

3011 loc = result.end()

3012 ret = result

3013 return loc, ret

3014

3015 def sub(self, repl: str) -> ParserElement:

3016 r"""

3017 Return :class:`Regex` with an attached parse action to transform the parsed

3018 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.

3019

3020 Example::

3021

3022 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")

3023 print(make_html.transform_string("h1:main title:"))

3024 # prints "<h1>main title</h1>"

3025 """

3026 if self.asGroupList:

3027 raise TypeError("cannot use sub() with Regex(asGroupList=True)")

3028

3029 if self.asMatch and callable(repl):

3030 raise TypeError("cannot use sub() with a callable with Regex(asMatch=True)")

3031

3032 if self.asMatch:

3033

3034 def pa(tokens):

3035 return tokens[0].expand(repl)

3036

3037 else:

3038

3039 def pa(tokens):

3040 return self.re.sub(repl, tokens[0])

3041

3042 return self.add_parse_action(pa)

3043

3044

3045class QuotedString(Token):

3046 r"""

3047 Token for matching strings that are delimited by quoting characters.

3048

3049 Defined with the following parameters:

3050

3051 - ``quote_char`` - string of one or more characters defining the

3052 quote delimiting string

3053 - ``esc_char`` - character to re_escape quotes, typically backslash

3054 (default= ``None``)

3055 - ``esc_quote`` - special quote sequence to re_escape an embedded quote

3056 string (such as SQL's ``""`` to re_escape an embedded ``"``)

3057 (default= ``None``)

3058 - ``multiline`` - boolean indicating whether quotes can span

3059 multiple lines (default= ``False``)

3060 - ``unquote_results`` - boolean indicating whether the matched text

3061 should be unquoted (default= ``True``)

3062 - ``end_quote_char`` - string of one or more characters defining the

3063 end of the quote delimited string (default= ``None`` => same as

3064 quote_char)

3065 - ``convert_whitespace_escapes`` - convert escaped whitespace

3066 (``'\t'``, ``'\n'``, etc.) to actual whitespace

3067 (default= ``True``)

3068

3069 Example::

3070

3071 qs = QuotedString('"')

3072 print(qs.search_string('lsjdf "This is the quote" sldjf'))

3073 complex_qs = QuotedString('{{', end_quote_char='}}')

3074 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))

3075 sql_qs = QuotedString('"', esc_quote='""')

3076 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

3077

3078 prints::

3079

3080 [['This is the quote']]

3081 [['This is the "quote"']]

3082 [['This is the quote with "embedded" quotes']]

3083 """

3084 ws_map = ((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))

3085

3086 def __init__(

3087 self,

3088 quote_char: str = "",

3089 esc_char: typing.Optional[str] = None,

3090 esc_quote: typing.Optional[str] = None,

3091 multiline: bool = False,

3092 unquote_results: bool = True,

3093 end_quote_char: typing.Optional[str] = None,

3094 convert_whitespace_escapes: bool = True,

3095 *,

3096 quoteChar: str = "",

3097 escChar: typing.Optional[str] = None,

3098 escQuote: typing.Optional[str] = None,

3099 unquoteResults: bool = True,

3100 endQuoteChar: typing.Optional[str] = None,

3101 convertWhitespaceEscapes: bool = True,

3102 ):

3103 super().__init__()

3104 escChar = escChar or esc_char

3105 escQuote = escQuote or esc_quote

3106 unquoteResults = unquoteResults and unquote_results

3107 endQuoteChar = endQuoteChar or end_quote_char

3108 convertWhitespaceEscapes = (

3109 convertWhitespaceEscapes and convert_whitespace_escapes

3110 )

3111 quote_char = quoteChar or quote_char

3112

3113 # remove white space from quote chars - wont work anyway

3114 quote_char = quote_char.strip()

3115 if not quote_char:

3116 raise ValueError("quote_char cannot be the empty string")

3117

3118 if endQuoteChar is None:

3119 endQuoteChar = quote_char

3120 else:

3121 endQuoteChar = endQuoteChar.strip()

3122 if not endQuoteChar:

3123 raise ValueError("endQuoteChar cannot be the empty string")

3124

3125 self.quoteChar = quote_char

3126 self.quoteCharLen = len(quote_char)

3127 self.firstQuoteChar = quote_char[0]

3128 self.endQuoteChar = endQuoteChar

3129 self.endQuoteCharLen = len(endQuoteChar)

3130 self.escChar = escChar

3131 self.escQuote = escQuote

3132 self.unquoteResults = unquoteResults

3133 self.convertWhitespaceEscapes = convertWhitespaceEscapes

3134

3135 sep = ""

3136 inner_pattern = ""

3137

3138 if escQuote:

3139 inner_pattern += r"{}(?:{})".format(sep, re.escape(escQuote))

3140 sep = "|"

3141

3142 if escChar:

3143 inner_pattern += r"{}(?:{}.)".format(sep, re.escape(escChar))

3144 sep = "|"

3145 self.escCharReplacePattern = re.escape(self.escChar) + "(.)"

3146

3147 if len(self.endQuoteChar) > 1:

3148 inner_pattern += (

3149 "{}(?:".format(sep)

3150 + "|".join(

3151 "(?:{}(?!{}))".format(

3152 re.escape(self.endQuoteChar[:i]),

3153 re.escape(self.endQuoteChar[i:]),

3154 )

3155 for i in range(len(self.endQuoteChar) - 1, 0, -1)

3156 )

3157 + ")"

3158 )

3159 sep = "|"

3160

3161 if multiline:

3162 self.flags = re.MULTILINE | re.DOTALL

3163 inner_pattern += r"{}(?:[^{}{}])".format(

3164 sep,

3165 _escape_regex_range_chars(self.endQuoteChar[0]),

3166 (_escape_regex_range_chars(escChar) if escChar is not None else ""),

3167 )

3168 else:

3169 self.flags = 0

3170 inner_pattern += r"{}(?:[^{}\n\r{}])".format(

3171 sep,

3172 _escape_regex_range_chars(self.endQuoteChar[0]),

3173 (_escape_regex_range_chars(escChar) if escChar is not None else ""),

3174 )

3175

3176 self.pattern = "".join(

3177 [

3178 re.escape(self.quoteChar),

3179 "(?:",

3180 inner_pattern,

3181 ")*",

3182 re.escape(self.endQuoteChar),

3183 ]

3184 )

3185

3186 try:

3187 self.re = re.compile(self.pattern, self.flags)

3188 self.reString = self.pattern

3189 self.re_match = self.re.match

3190 except re.error:

3191 raise ValueError(

3192 "invalid pattern {!r} passed to Regex".format(self.pattern)

3193 )

3194

3195 self.errmsg = "Expected " + self.name

3196 self.mayIndexError = False

3197 self.mayReturnEmpty = True

3198

3199 def _generateDefaultName(self):

3200 if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type):

3201 return "string enclosed in {!r}".format(self.quoteChar)

3202

3203 return "quoted string, starting with {} ending with {}".format(

3204 self.quoteChar, self.endQuoteChar

3205 )

3206

3207 def parseImpl(self, instring, loc, doActions=True):

3208 result = (

3209 instring[loc] == self.firstQuoteChar

3210 and self.re_match(instring, loc)

3211 or None

3212 )

3213 if not result:

3214 raise ParseException(instring, loc, self.errmsg, self)

3215

3216 loc = result.end()

3217 ret = result.group()

3218

3219 if self.unquoteResults:

3220

3221 # strip off quotes

3222 ret = ret[self.quoteCharLen : -self.endQuoteCharLen]

3223

3224 if isinstance(ret, str_type):

3225 # replace escaped whitespace

3226 if "\\" in ret and self.convertWhitespaceEscapes:

3227 for wslit, wschar in self.ws_map:

3228 ret = ret.replace(wslit, wschar)

3229

3230 # replace escaped characters

3231 if self.escChar:

3232 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)

3233

3234 # replace escaped quotes

3235 if self.escQuote:

3236 ret = ret.replace(self.escQuote, self.endQuoteChar)

3237

3238 return loc, ret

3239

3240

3241class CharsNotIn(Token):

3242 """Token for matching words composed of characters *not* in a given

3243 set (will include whitespace in matched characters if not listed in

3244 the provided exclusion set - see example). Defined with string

3245 containing all disallowed characters, and an optional minimum,

3246 maximum, and/or exact length. The default value for ``min`` is

3247 1 (a minimum value < 1 is not valid); the default values for

3248 ``max`` and ``exact`` are 0, meaning no maximum or exact

3249 length restriction.

3250

3251 Example::

3252

3253 # define a comma-separated-value as anything that is not a ','

3254 csv_value = CharsNotIn(',')

3255 print(delimited_list(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))

3256

3257 prints::

3258

3259 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

3260 """

3261

3262 def __init__(

3263 self,

3264 not_chars: str = "",

3265 min: int = 1,

3266 max: int = 0,

3267 exact: int = 0,

3268 *,

3269 notChars: str = "",

3270 ):

3271 super().__init__()

3272 self.skipWhitespace = False

3273 self.notChars = not_chars or notChars

3274 self.notCharsSet = set(self.notChars)

3275

3276 if min < 1:

3277 raise ValueError(

3278 "cannot specify a minimum length < 1; use "

3279 "Opt(CharsNotIn()) if zero-length char group is permitted"

3280 )

3281

3282 self.minLen = min

3283

3284 if max > 0:

3285 self.maxLen = max

3286 else:

3287 self.maxLen = _MAX_INT

3288

3289 if exact > 0:

3290 self.maxLen = exact

3291 self.minLen = exact

3292

3293 self.errmsg = "Expected " + self.name

3294 self.mayReturnEmpty = self.minLen == 0

3295 self.mayIndexError = False

3296

3297 def _generateDefaultName(self):

3298 not_chars_str = _collapse_string_to_ranges(self.notChars)

3299 if len(not_chars_str) > 16:

3300 return "!W:({}...)".format(self.notChars[: 16 - 3])

3301 else:

3302 return "!W:({})".format(self.notChars)

3303

3304 def parseImpl(self, instring, loc, doActions=True):

3305 notchars = self.notCharsSet

3306 if instring[loc] in notchars:

3307 raise ParseException(instring, loc, self.errmsg, self)

3308

3309 start = loc

3310 loc += 1

3311 maxlen = min(start + self.maxLen, len(instring))

3312 while loc < maxlen and instring[loc] not in notchars:

3313 loc += 1

3314

3315 if loc - start < self.minLen:

3316 raise ParseException(instring, loc, self.errmsg, self)

3317

3318 return loc, instring[start:loc]

3319

3320

3321class White(Token):

3322 """Special matching class for matching whitespace. Normally,

3323 whitespace is ignored by pyparsing grammars. This class is included

3324 when some whitespace structures are significant. Define with

3325 a string containing the whitespace characters to be matched; default

3326 is ``" \\t\\r\\n"``. Also takes optional ``min``,

3327 ``max``, and ``exact`` arguments, as defined for the

3328 :class:`Word` class.

3329 """

3330

3331 whiteStrs = {

3332 " ": "<SP>",

3333 "\t": "<TAB>",

3334 "\n": "<LF>",

3335 "\r": "<CR>",

3336 "\f": "<FF>",

3337 "\u00A0": "<NBSP>",

3338 "\u1680": "<OGHAM_SPACE_MARK>",

3339 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",

3340 "\u2000": "<EN_QUAD>",

3341 "\u2001": "<EM_QUAD>",

3342 "\u2002": "<EN_SPACE>",

3343 "\u2003": "<EM_SPACE>",

3344 "\u2004": "<THREE-PER-EM_SPACE>",

3345 "\u2005": "<FOUR-PER-EM_SPACE>",

3346 "\u2006": "<SIX-PER-EM_SPACE>",

3347 "\u2007": "<FIGURE_SPACE>",

3348 "\u2008": "<PUNCTUATION_SPACE>",

3349 "\u2009": "<THIN_SPACE>",

3350 "\u200A": "<HAIR_SPACE>",

3351 "\u200B": "<ZERO_WIDTH_SPACE>",

3352 "\u202F": "<NNBSP>",

3353 "\u205F": "<MMSP>",

3354 "\u3000": "<IDEOGRAPHIC_SPACE>",

3355 }

3356

3357 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):

3358 super().__init__()

3359 self.matchWhite = ws

3360 self.set_whitespace_chars(

3361 "".join(c for c in self.whiteStrs if c not in self.matchWhite),

3362 copy_defaults=True,

3363 )

3364 # self.leave_whitespace()

3365 self.mayReturnEmpty = True

3366 self.errmsg = "Expected " + self.name

3367

3368 self.minLen = min

3369

3370 if max > 0:

3371 self.maxLen = max

3372 else:

3373 self.maxLen = _MAX_INT

3374

3375 if exact > 0:

3376 self.maxLen = exact

3377 self.minLen = exact

3378

3379 def _generateDefaultName(self):

3380 return "".join(White.whiteStrs[c] for c in self.matchWhite)

3381

3382 def parseImpl(self, instring, loc, doActions=True):

3383 if instring[loc] not in self.matchWhite:

3384 raise ParseException(instring, loc, self.errmsg, self)

3385 start = loc

3386 loc += 1

3387 maxloc = start + self.maxLen

3388 maxloc = min(maxloc, len(instring))

3389 while loc < maxloc and instring[loc] in self.matchWhite:

3390 loc += 1

3391

3392 if loc - start < self.minLen:

3393 raise ParseException(instring, loc, self.errmsg, self)

3394

3395 return loc, instring[start:loc]

3396

3397

3398class PositionToken(Token):

3399 def __init__(self):

3400 super().__init__()

3401 self.mayReturnEmpty = True

3402 self.mayIndexError = False

3403

3404

3405class GoToColumn(PositionToken):

3406 """Token to advance to a specific column of input text; useful for

3407 tabular report scraping.

3408 """

3409

3410 def __init__(self, colno: int):

3411 super().__init__()

3412 self.col = colno

3413

3414 def preParse(self, instring, loc):

3415 if col(loc, instring) != self.col:

3416 instrlen = len(instring)

3417 if self.ignoreExprs:

3418 loc = self._skipIgnorables(instring, loc)

3419 while (

3420 loc < instrlen

3421 and instring[loc].isspace()

3422 and col(loc, instring) != self.col

3423 ):

3424 loc += 1

3425 return loc

3426

3427 def parseImpl(self, instring, loc, doActions=True):

3428 thiscol = col(loc, instring)

3429 if thiscol > self.col:

3430 raise ParseException(instring, loc, "Text not in expected column", self)

3431 newloc = loc + self.col - thiscol

3432 ret = instring[loc:newloc]

3433 return newloc, ret

3434

3435

3436class LineStart(PositionToken):

3437 r"""Matches if current position is at the beginning of a line within

3438 the parse string

3439

3440 Example::

3441

3442 test = '''\

3443 AAA this line

3444 AAA and this line

3445 AAA but not this one

3446 B AAA and definitely not this one

3447 '''

3448

3449 for t in (LineStart() + 'AAA' + restOfLine).search_string(test):

3450 print(t)

3451

3452 prints::

3453

3454 ['AAA', ' this line']

3455 ['AAA', ' and this line']

3456

3457 """

3458

3459 def __init__(self):

3460 super().__init__()

3461 self.leave_whitespace()

3462 self.orig_whiteChars = set() | self.whiteChars

3463 self.whiteChars.discard("\n")

3464 self.skipper = Empty().set_whitespace_chars(self.whiteChars)

3465 self.errmsg = "Expected start of line"

3466

3467 def preParse(self, instring, loc):

3468 if loc == 0:

3469 return loc

3470 else:

3471 ret = self.skipper.preParse(instring, loc)

3472 if "\n" in self.orig_whiteChars:

3473 while instring[ret : ret + 1] == "\n":

3474 ret = self.skipper.preParse(instring, ret + 1)

3475 return ret

3476

3477 def parseImpl(self, instring, loc, doActions=True):

3478 if col(loc, instring) == 1:

3479 return loc, []

3480 raise ParseException(instring, loc, self.errmsg, self)

3481

3482

3483class LineEnd(PositionToken):

3484 """Matches if current position is at the end of a line within the

3485 parse string

3486 """

3487

3488 def __init__(self):

3489 super().__init__()

3490 self.whiteChars.discard("\n")

3491 self.set_whitespace_chars(self.whiteChars, copy_defaults=False)

3492 self.errmsg = "Expected end of line"

3493

3494 def parseImpl(self, instring, loc, doActions=True):

3495 if loc < len(instring):

3496 if instring[loc] == "\n":

3497 return loc + 1, "\n"

3498 else:

3499 raise ParseException(instring, loc, self.errmsg, self)

3500 elif loc == len(instring):

3501 return loc + 1, []

3502 else:

3503 raise ParseException(instring, loc, self.errmsg, self)

3504

3505

3506class StringStart(PositionToken):

3507 """Matches if current position is at the beginning of the parse

3508 string

3509 """

3510

3511 def __init__(self):

3512 super().__init__()

3513 self.errmsg = "Expected start of text"

3514

3515 def parseImpl(self, instring, loc, doActions=True):

3516 if loc != 0:

3517 # see if entire string up to here is just whitespace and ignoreables

3518 if loc != self.preParse(instring, 0):

3519 raise ParseException(instring, loc, self.errmsg, self)

3520 return loc, []

3521

3522

3523class StringEnd(PositionToken):

3524 """

3525 Matches if current position is at the end of the parse string

3526 """

3527

3528 def __init__(self):

3529 super().__init__()

3530 self.errmsg = "Expected end of text"

3531

3532 def parseImpl(self, instring, loc, doActions=True):

3533 if loc < len(instring):

3534 raise ParseException(instring, loc, self.errmsg, self)

3535 elif loc == len(instring):

3536 return loc + 1, []

3537 elif loc > len(instring):

3538 return loc, []

3539 else:

3540 raise ParseException(instring, loc, self.errmsg, self)

3541

3542

3543class WordStart(PositionToken):

3544 """Matches if the current position is at the beginning of a

3545 :class:`Word`, and is not preceded by any character in a given

3546 set of ``word_chars`` (default= ``printables``). To emulate the

3547 ``\b`` behavior of regular expressions, use

3548 ``WordStart(alphanums)``. ``WordStart`` will also match at

3549 the beginning of the string being parsed, or at the beginning of

3550 a line.

3551 """

3552

3553 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3554 wordChars = word_chars if wordChars == printables else wordChars

3555 super().__init__()

3556 self.wordChars = set(wordChars)

3557 self.errmsg = "Not at the start of a word"

3558

3559 def parseImpl(self, instring, loc, doActions=True):

3560 if loc != 0:

3561 if (

3562 instring[loc - 1] in self.wordChars

3563 or instring[loc] not in self.wordChars

3564 ):

3565 raise ParseException(instring, loc, self.errmsg, self)

3566 return loc, []

3567

3568

3569class WordEnd(PositionToken):

3570 """Matches if the current position is at the end of a :class:`Word`,

3571 and is not followed by any character in a given set of ``word_chars``

3572 (default= ``printables``). To emulate the ``\b`` behavior of

3573 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``

3574 will also match at the end of the string being parsed, or at the end

3575 of a line.

3576 """

3577

3578 def __init__(self, word_chars: str = printables, *, wordChars: str = printables):

3579 wordChars = word_chars if wordChars == printables else wordChars

3580 super().__init__()

3581 self.wordChars = set(wordChars)

3582 self.skipWhitespace = False

3583 self.errmsg = "Not at the end of a word"

3584

3585 def parseImpl(self, instring, loc, doActions=True):

3586 instrlen = len(instring)

3587 if instrlen > 0 and loc < instrlen:

3588 if (

3589 instring[loc] in self.wordChars

3590 or instring[loc - 1] not in self.wordChars

3591 ):

3592 raise ParseException(instring, loc, self.errmsg, self)

3593 return loc, []

3594

3595

3596class ParseExpression(ParserElement):

3597 """Abstract subclass of ParserElement, for combining and

3598 post-processing parsed tokens.

3599 """

3600

3601 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

3602 super().__init__(savelist)

3603 self.exprs: List[ParserElement]

3604 if isinstance(exprs, _generatorType):

3605 exprs = list(exprs)

3606

3607 if isinstance(exprs, str_type):

3608 self.exprs = [self._literalStringClass(exprs)]

3609 elif isinstance(exprs, ParserElement):

3610 self.exprs = [exprs]

3611 elif isinstance(exprs, Iterable):

3612 exprs = list(exprs)

3613 # if sequence of strings provided, wrap with Literal

3614 if any(isinstance(expr, str_type) for expr in exprs):

3615 exprs = (

3616 self._literalStringClass(e) if isinstance(e, str_type) else e

3617 for e in exprs

3618 )

3619 self.exprs = list(exprs)

3620 else:

3621 try:

3622 self.exprs = list(exprs)

3623 except TypeError:

3624 self.exprs = [exprs]

3625 self.callPreparse = False

3626

3627 def recurse(self) -> Sequence[ParserElement]:

3628 return self.exprs[:]

3629

3630 def append(self, other) -> ParserElement:

3631 self.exprs.append(other)

3632 self._defaultName = None

3633 return self

3634

3635 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

3636 """

3637 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3638 all contained expressions.

3639 """

3640 super().leave_whitespace(recursive)

3641

3642 if recursive:

3643 self.exprs = [e.copy() for e in self.exprs]

3644 for e in self.exprs:

3645 e.leave_whitespace(recursive)

3646 return self

3647

3648 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

3649 """

3650 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on

3651 all contained expressions.

3652 """

3653 super().ignore_whitespace(recursive)

3654 if recursive:

3655 self.exprs = [e.copy() for e in self.exprs]

3656 for e in self.exprs:

3657 e.ignore_whitespace(recursive)

3658 return self

3659

3660 def ignore(self, other) -> ParserElement:

3661 if isinstance(other, Suppress):

3662 if other not in self.ignoreExprs:

3663 super().ignore(other)

3664 for e in self.exprs:

3665 e.ignore(self.ignoreExprs[-1])

3666 else:

3667 super().ignore(other)

3668 for e in self.exprs:

3669 e.ignore(self.ignoreExprs[-1])

3670 return self

3671

3672 def _generateDefaultName(self):

3673 return "{}:({})".format(self.__class__.__name__, str(self.exprs))

3674

3675 def streamline(self) -> ParserElement:

3676 if self.streamlined:

3677 return self

3678

3679 super().streamline()

3680

3681 for e in self.exprs:

3682 e.streamline()

3683

3684 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``

3685 # but only if there are no parse actions or resultsNames on the nested And's

3686 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)

3687 if len(self.exprs) == 2:

3688 other = self.exprs[0]

3689 if (

3690 isinstance(other, self.__class__)

3691 and not other.parseAction

3692 and other.resultsName is None

3693 and not other.debug

3694 ):

3695 self.exprs = other.exprs[:] + [self.exprs[1]]

3696 self._defaultName = None

3697 self.mayReturnEmpty |= other.mayReturnEmpty

3698 self.mayIndexError |= other.mayIndexError

3699

3700 other = self.exprs[-1]

3701 if (

3702 isinstance(other, self.__class__)

3703 and not other.parseAction

3704 and other.resultsName is None

3705 and not other.debug

3706 ):

3707 self.exprs = self.exprs[:-1] + other.exprs[:]

3708 self._defaultName = None

3709 self.mayReturnEmpty |= other.mayReturnEmpty

3710 self.mayIndexError |= other.mayIndexError

3711

3712 self.errmsg = "Expected " + str(self)

3713

3714 return self

3715

3716 def validate(self, validateTrace=None) -> None:

3717 tmp = (validateTrace if validateTrace is not None else [])[:] + [self]

3718 for e in self.exprs:

3719 e.validate(tmp)

3720 self._checkRecursion([])

3721

3722 def copy(self) -> ParserElement:

3723 ret = super().copy()

3724 ret.exprs = [e.copy() for e in self.exprs]

3725 return ret

3726

3727 def _setResultsName(self, name, listAllMatches=False):

3728 if (

3729 __diag__.warn_ungrouped_named_tokens_in_collection

3730 and Diagnostics.warn_ungrouped_named_tokens_in_collection

3731 not in self.suppress_warnings_

3732 ):

3733 for e in self.exprs:

3734 if (

3735 isinstance(e, ParserElement)

3736 and e.resultsName

3737 and Diagnostics.warn_ungrouped_named_tokens_in_collection

3738 not in e.suppress_warnings_

3739 ):

3740 warnings.warn(

3741 "{}: setting results name {!r} on {} expression "

3742 "collides with {!r} on contained expression".format(

3743 "warn_ungrouped_named_tokens_in_collection",

3744 name,

3745 type(self).__name__,

3746 e.resultsName,

3747 ),

3748 stacklevel=3,

3749 )

3750

3751 return super()._setResultsName(name, listAllMatches)

3752

3753 ignoreWhitespace = ignore_whitespace

3754 leaveWhitespace = leave_whitespace

3755

3756

3757class And(ParseExpression):

3758 """

3759 Requires all given :class:`ParseExpression` s to be found in the given order.

3760 Expressions may be separated by whitespace.

3761 May be constructed using the ``'+'`` operator.

3762 May also be constructed using the ``'-'`` operator, which will

3763 suppress backtracking.

3764

3765 Example::

3766

3767 integer = Word(nums)

3768 name_expr = Word(alphas)[1, ...]

3769

3770 expr = And([integer("id"), name_expr("name"), integer("age")])

3771 # more easily written as:

3772 expr = integer("id") + name_expr("name") + integer("age")

3773 """

3774

3775 class _ErrorStop(Empty):

3776 def __init__(self, *args, **kwargs):

3777 super().__init__(*args, **kwargs)

3778 self.leave_whitespace()

3779

3780 def _generateDefaultName(self):

3781 return "-"

3782

3783 def __init__(

3784 self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True

3785 ):

3786 exprs: List[ParserElement] = list(exprs_arg)

3787 if exprs and Ellipsis in exprs:

3788 tmp = []

3789 for i, expr in enumerate(exprs):

3790 if expr is Ellipsis:

3791 if i < len(exprs) - 1:

3792 skipto_arg: ParserElement = (Empty() + exprs[i + 1]).exprs[-1]

3793 tmp.append(SkipTo(skipto_arg)("_skipped*"))

3794 else:

3795 raise Exception(

3796 "cannot construct And with sequence ending in ..."

3797 )

3798 else:

3799 tmp.append(expr)

3800 exprs[:] = tmp

3801 super().__init__(exprs, savelist)

3802 if self.exprs:

3803 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3804 if not isinstance(self.exprs[0], White):

3805 self.set_whitespace_chars(

3806 self.exprs[0].whiteChars,

3807 copy_defaults=self.exprs[0].copyDefaultWhiteChars,

3808 )

3809 self.skipWhitespace = self.exprs[0].skipWhitespace

3810 else:

3811 self.skipWhitespace = False

3812 else:

3813 self.mayReturnEmpty = True

3814 self.callPreparse = True

3815

3816 def streamline(self) -> ParserElement:

3817 # collapse any _PendingSkip's

3818 if self.exprs:

3819 if any(

3820 isinstance(e, ParseExpression)

3821 and e.exprs

3822 and isinstance(e.exprs[-1], _PendingSkip)

3823 for e in self.exprs[:-1]

3824 ):

3825 for i, e in enumerate(self.exprs[:-1]):

3826 if e is None:

3827 continue

3828 if (

3829 isinstance(e, ParseExpression)

3830 and e.exprs

3831 and isinstance(e.exprs[-1], _PendingSkip)

3832 ):

3833 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]

3834 self.exprs[i + 1] = None

3835 self.exprs = [e for e in self.exprs if e is not None]

3836

3837 super().streamline()

3838

3839 # link any IndentedBlocks to the prior expression

3840 for prev, cur in zip(self.exprs, self.exprs[1:]):

3841 # traverse cur or any first embedded expr of cur looking for an IndentedBlock

3842 # (but watch out for recursive grammar)

3843 seen = set()

3844 while cur:

3845 if id(cur) in seen:

3846 break

3847 seen.add(id(cur))

3848 if isinstance(cur, IndentedBlock):

3849 prev.add_parse_action(

3850 lambda s, l, t, cur_=cur: setattr(

3851 cur_, "parent_anchor", col(l, s)

3852 )

3853 )

3854 break

3855 subs = cur.recurse()

3856 cur = next(iter(subs), None)

3857

3858 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3859 return self

3860

3861 def parseImpl(self, instring, loc, doActions=True):

3862 # pass False as callPreParse arg to _parse for first element, since we already

3863 # pre-parsed the string as part of our And pre-parsing

3864 loc, resultlist = self.exprs[0]._parse(

3865 instring, loc, doActions, callPreParse=False

3866 )

3867 errorStop = False

3868 for e in self.exprs[1:]:

3869 # if isinstance(e, And._ErrorStop):

3870 if type(e) is And._ErrorStop:

3871 errorStop = True

3872 continue

3873 if errorStop:

3874 try:

3875 loc, exprtokens = e._parse(instring, loc, doActions)

3876 except ParseSyntaxException:

3877 raise

3878 except ParseBaseException as pe:

3879 pe.__traceback__ = None

3880 raise ParseSyntaxException._from_exception(pe)

3881 except IndexError:

3882 raise ParseSyntaxException(

3883 instring, len(instring), self.errmsg, self

3884 )

3885 else:

3886 loc, exprtokens = e._parse(instring, loc, doActions)

3887 if exprtokens or exprtokens.haskeys():

3888 resultlist += exprtokens

3889 return loc, resultlist

3890

3891 def __iadd__(self, other):

3892 if isinstance(other, str_type):

3893 other = self._literalStringClass(other)

3894 return self.append(other) # And([self, other])

3895

3896 def _checkRecursion(self, parseElementList):

3897 subRecCheckList = parseElementList[:] + [self]

3898 for e in self.exprs:

3899 e._checkRecursion(subRecCheckList)

3900 if not e.mayReturnEmpty:

3901 break

3902

3903 def _generateDefaultName(self):

3904 inner = " ".join(str(e) for e in self.exprs)

3905 # strip off redundant inner {}'s

3906 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

3907 inner = inner[1:-1]

3908 return "{" + inner + "}"

3909

3910

3911class Or(ParseExpression):

3912 """Requires that at least one :class:`ParseExpression` is found. If

3913 two expressions match, the expression that matches the longest

3914 string will be used. May be constructed using the ``'^'``

3915 operator.

3916

3917 Example::

3918

3919 # construct Or using '^' operator

3920

3921 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

3922 print(number.search_string("123 3.1416 789"))

3923

3924 prints::

3925

3926 [['123'], ['3.1416'], ['789']]

3927 """

3928

3929 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

3930 super().__init__(exprs, savelist)

3931 if self.exprs:

3932 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

3933 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

3934 else:

3935 self.mayReturnEmpty = True

3936

3937 def streamline(self) -> ParserElement:

3938 super().streamline()

3939 if self.exprs:

3940 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

3941 self.saveAsList = any(e.saveAsList for e in self.exprs)

3942 self.skipWhitespace = all(

3943 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

3944 )

3945 else:

3946 self.saveAsList = False

3947 return self

3948

3949 def parseImpl(self, instring, loc, doActions=True):

3950 maxExcLoc = -1

3951 maxException = None

3952 matches = []

3953 fatals = []

3954 if all(e.callPreparse for e in self.exprs):

3955 loc = self.preParse(instring, loc)

3956 for e in self.exprs:

3957 try:

3958 loc2 = e.try_parse(instring, loc, raise_fatal=True)

3959 except ParseFatalException as pfe:

3960 pfe.__traceback__ = None

3961 pfe.parserElement = e

3962 fatals.append(pfe)

3963 maxException = None

3964 maxExcLoc = -1

3965 except ParseException as err:

3966 if not fatals:

3967 err.__traceback__ = None

3968 if err.loc > maxExcLoc:

3969 maxException = err

3970 maxExcLoc = err.loc

3971 except IndexError:

3972 if len(instring) > maxExcLoc:

3973 maxException = ParseException(

3974 instring, len(instring), e.errmsg, self

3975 )

3976 maxExcLoc = len(instring)

3977 else:

3978 # save match among all matches, to retry longest to shortest

3979 matches.append((loc2, e))

3980

3981 if matches:

3982 # re-evaluate all matches in descending order of length of match, in case attached actions

3983 # might change whether or how much they match of the input.

3984 matches.sort(key=itemgetter(0), reverse=True)

3985

3986 if not doActions:

3987 # no further conditions or parse actions to change the selection of

3988 # alternative, so the first match will be the best match

3989 best_expr = matches[0][1]

3990 return best_expr._parse(instring, loc, doActions)

3991

3992 longest = -1, None

3993 for loc1, expr1 in matches:

3994 if loc1 <= longest[0]:

3995 # already have a longer match than this one will deliver, we are done

3996 return longest

3997

3998 try:

3999 loc2, toks = expr1._parse(instring, loc, doActions)

4000 except ParseException as err:

4001 err.__traceback__ = None

4002 if err.loc > maxExcLoc:

4003 maxException = err

4004 maxExcLoc = err.loc

4005 else:

4006 if loc2 >= loc1:

4007 return loc2, toks

4008 # didn't match as much as before

4009 elif loc2 > longest[0]:

4010 longest = loc2, toks

4011

4012 if longest != (-1, None):

4013 return longest

4014

4015 if fatals:

4016 if len(fatals) > 1:

4017 fatals.sort(key=lambda e: -e.loc)

4018 if fatals[0].loc == fatals[1].loc:

4019 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement))))

4020 max_fatal = fatals[0]

4021 raise max_fatal

4022

4023 if maxException is not None:

4024 maxException.msg = self.errmsg

4025 raise maxException

4026 else:

4027 raise ParseException(

4028 instring, loc, "no defined alternatives to match", self

4029 )

4030

4031 def __ixor__(self, other):

4032 if isinstance(other, str_type):

4033 other = self._literalStringClass(other)

4034 return self.append(other) # Or([self, other])

4035

4036 def _generateDefaultName(self):

4037 return "{" + " ^ ".join(str(e) for e in self.exprs) + "}"

4038

4039 def _setResultsName(self, name, listAllMatches=False):

4040 if (

4041 __diag__.warn_multiple_tokens_in_named_alternation

4042 and Diagnostics.warn_multiple_tokens_in_named_alternation

4043 not in self.suppress_warnings_

4044 ):

4045 if any(

4046 isinstance(e, And)

4047 and Diagnostics.warn_multiple_tokens_in_named_alternation

4048 not in e.suppress_warnings_

4049 for e in self.exprs

4050 ):

4051 warnings.warn(

4052 "{}: setting results name {!r} on {} expression "

4053 "will return a list of all parsed tokens in an And alternative, "

4054 "in prior versions only the first token was returned; enclose "

4055 "contained argument in Group".format(

4056 "warn_multiple_tokens_in_named_alternation",

4057 name,

4058 type(self).__name__,

4059 ),

4060 stacklevel=3,

4061 )

4062

4063 return super()._setResultsName(name, listAllMatches)

4064

4065

4066class MatchFirst(ParseExpression):

4067 """Requires that at least one :class:`ParseExpression` is found. If

4068 more than one expression matches, the first one listed is the one that will

4069 match. May be constructed using the ``'|'`` operator.

4070

4071 Example::

4072

4073 # construct MatchFirst using '|' operator

4074

4075 # watch the order of expressions to match

4076 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

4077 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

4078

4079 # put more selective expression first

4080 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

4081 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

4082 """

4083

4084 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False):

4085 super().__init__(exprs, savelist)

4086 if self.exprs:

4087 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4088 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)

4089 else:

4090 self.mayReturnEmpty = True

4091

4092 def streamline(self) -> ParserElement:

4093 if self.streamlined:

4094 return self

4095

4096 super().streamline()

4097 if self.exprs:

4098 self.saveAsList = any(e.saveAsList for e in self.exprs)

4099 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

4100 self.skipWhitespace = all(

4101 e.skipWhitespace and not isinstance(e, White) for e in self.exprs

4102 )

4103 else:

4104 self.saveAsList = False

4105 self.mayReturnEmpty = True

4106 return self

4107

4108 def parseImpl(self, instring, loc, doActions=True):

4109 maxExcLoc = -1

4110 maxException = None

4111

4112 for e in self.exprs:

4113 try:

4114 return e._parse(

4115 instring,

4116 loc,

4117 doActions,

4118 )

4119 except ParseFatalException as pfe:

4120 pfe.__traceback__ = None

4121 pfe.parserElement = e

4122 raise

4123 except ParseException as err:

4124 if err.loc > maxExcLoc:

4125 maxException = err

4126 maxExcLoc = err.loc

4127 except IndexError:

4128 if len(instring) > maxExcLoc:

4129 maxException = ParseException(

4130 instring, len(instring), e.errmsg, self

4131 )

4132 maxExcLoc = len(instring)

4133

4134 if maxException is not None:

4135 maxException.msg = self.errmsg

4136 raise maxException

4137 else:

4138 raise ParseException(

4139 instring, loc, "no defined alternatives to match", self

4140 )

4141

4142 def __ior__(self, other):

4143 if isinstance(other, str_type):

4144 other = self._literalStringClass(other)

4145 return self.append(other) # MatchFirst([self, other])

4146

4147 def _generateDefaultName(self):

4148 return "{" + " | ".join(str(e) for e in self.exprs) + "}"

4149

4150 def _setResultsName(self, name, listAllMatches=False):

4151 if (

4152 __diag__.warn_multiple_tokens_in_named_alternation

4153 and Diagnostics.warn_multiple_tokens_in_named_alternation

4154 not in self.suppress_warnings_

4155 ):

4156 if any(

4157 isinstance(e, And)

4158 and Diagnostics.warn_multiple_tokens_in_named_alternation

4159 not in e.suppress_warnings_

4160 for e in self.exprs

4161 ):

4162 warnings.warn(

4163 "{}: setting results name {!r} on {} expression "

4164 "will return a list of all parsed tokens in an And alternative, "

4165 "in prior versions only the first token was returned; enclose "

4166 "contained argument in Group".format(

4167 "warn_multiple_tokens_in_named_alternation",

4168 name,

4169 type(self).__name__,

4170 ),

4171 stacklevel=3,

4172 )

4173

4174 return super()._setResultsName(name, listAllMatches)

4175

4176

4177class Each(ParseExpression):

4178 """Requires all given :class:`ParseExpression` s to be found, but in

4179 any order. Expressions may be separated by whitespace.

4180

4181 May be constructed using the ``'&'`` operator.

4182

4183 Example::

4184

4185 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

4186 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

4187 integer = Word(nums)

4188 shape_attr = "shape:" + shape_type("shape")

4189 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

4190 color_attr = "color:" + color("color")

4191 size_attr = "size:" + integer("size")

4192

4193 # use Each (using operator '&') to accept attributes in any order

4194 # (shape and posn are required, color and size are optional)

4195 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)

4196

4197 shape_spec.run_tests('''

4198 shape: SQUARE color: BLACK posn: 100, 120

4199 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4200 color:GREEN size:20 shape:TRIANGLE posn:20,40

4201 '''

4202 )

4203

4204 prints::

4205

4206 shape: SQUARE color: BLACK posn: 100, 120

4207 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

4208 - color: BLACK

4209 - posn: ['100', ',', '120']

4210 - x: 100

4211 - y: 120

4212 - shape: SQUARE

4213

4214

4215 shape: CIRCLE size: 50 color: BLUE posn: 50,80

4216 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

4217 - color: BLUE

4218 - posn: ['50', ',', '80']

4219 - x: 50

4220 - y: 80

4221 - shape: CIRCLE

4222 - size: 50

4223

4224

4225 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

4226 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

4227 - color: GREEN

4228 - posn: ['20', ',', '40']

4229 - x: 20

4230 - y: 40

4231 - shape: TRIANGLE

4232 - size: 20

4233 """

4234

4235 def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True):

4236 super().__init__(exprs, savelist)

4237 if self.exprs:

4238 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4239 else:

4240 self.mayReturnEmpty = True

4241 self.skipWhitespace = True

4242 self.initExprGroups = True

4243 self.saveAsList = True

4244

4245 def streamline(self) -> ParserElement:

4246 super().streamline()

4247 if self.exprs:

4248 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

4249 else:

4250 self.mayReturnEmpty = True

4251 return self

4252

4253 def parseImpl(self, instring, loc, doActions=True):

4254 if self.initExprGroups:

4255 self.opt1map = dict(

4256 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)

4257 )

4258 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]

4259 opt2 = [

4260 e

4261 for e in self.exprs

4262 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))

4263 ]

4264 self.optionals = opt1 + opt2

4265 self.multioptionals = [

4266 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4267 for e in self.exprs

4268 if isinstance(e, _MultipleMatch)

4269 ]

4270 self.multirequired = [

4271 e.expr.set_results_name(e.resultsName, list_all_matches=True)

4272 for e in self.exprs

4273 if isinstance(e, OneOrMore)

4274 ]

4275 self.required = [

4276 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))

4277 ]

4278 self.required += self.multirequired

4279 self.initExprGroups = False

4280

4281 tmpLoc = loc

4282 tmpReqd = self.required[:]

4283 tmpOpt = self.optionals[:]

4284 multis = self.multioptionals[:]

4285 matchOrder = []

4286

4287 keepMatching = True

4288 failed = []

4289 fatals = []

4290 while keepMatching:

4291 tmpExprs = tmpReqd + tmpOpt + multis

4292 failed.clear()

4293 fatals.clear()

4294 for e in tmpExprs:

4295 try:

4296 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)

4297 except ParseFatalException as pfe:

4298 pfe.__traceback__ = None

4299 pfe.parserElement = e

4300 fatals.append(pfe)

4301 failed.append(e)

4302 except ParseException:

4303 failed.append(e)

4304 else:

4305 matchOrder.append(self.opt1map.get(id(e), e))

4306 if e in tmpReqd:

4307 tmpReqd.remove(e)

4308 elif e in tmpOpt:

4309 tmpOpt.remove(e)

4310 if len(failed) == len(tmpExprs):

4311 keepMatching = False

4312

4313 # look for any ParseFatalExceptions

4314 if fatals:

4315 if len(fatals) > 1:

4316 fatals.sort(key=lambda e: -e.loc)

4317 if fatals[0].loc == fatals[1].loc:

4318 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement))))

4319 max_fatal = fatals[0]

4320 raise max_fatal

4321

4322 if tmpReqd:

4323 missing = ", ".join([str(e) for e in tmpReqd])

4324 raise ParseException(

4325 instring,

4326 loc,

4327 "Missing one or more required elements ({})".format(missing),

4328 )

4329

4330 # add any unmatched Opts, in case they have default values defined

4331 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]

4332

4333 total_results = ParseResults([])

4334 for e in matchOrder:

4335 loc, results = e._parse(instring, loc, doActions)

4336 total_results += results

4337

4338 return loc, total_results

4339

4340 def _generateDefaultName(self):

4341 return "{" + " & ".join(str(e) for e in self.exprs) + "}"

4342

4343

4344class ParseElementEnhance(ParserElement):

4345 """Abstract subclass of :class:`ParserElement`, for combining and

4346 post-processing parsed tokens.

4347 """

4348

4349 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

4350 super().__init__(savelist)

4351 if isinstance(expr, str_type):

4352 if issubclass(self._literalStringClass, Token):

4353 expr = self._literalStringClass(expr)

4354 elif issubclass(type(self), self._literalStringClass):

4355 expr = Literal(expr)

4356 else:

4357 expr = self._literalStringClass(Literal(expr))

4358 self.expr = expr

4359 if expr is not None:

4360 self.mayIndexError = expr.mayIndexError

4361 self.mayReturnEmpty = expr.mayReturnEmpty

4362 self.set_whitespace_chars(

4363 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars

4364 )

4365 self.skipWhitespace = expr.skipWhitespace

4366 self.saveAsList = expr.saveAsList

4367 self.callPreparse = expr.callPreparse

4368 self.ignoreExprs.extend(expr.ignoreExprs)

4369

4370 def recurse(self) -> Sequence[ParserElement]:

4371 return [self.expr] if self.expr is not None else []

4372

4373 def parseImpl(self, instring, loc, doActions=True):

4374 if self.expr is not None:

4375 return self.expr._parse(instring, loc, doActions, callPreParse=False)

4376 else:

4377 raise ParseException(instring, loc, "No expression defined", self)

4378

4379 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

4380 super().leave_whitespace(recursive)

4381

4382 if recursive:

4383 self.expr = self.expr.copy()

4384 if self.expr is not None:

4385 self.expr.leave_whitespace(recursive)

4386 return self

4387

4388 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

4389 super().ignore_whitespace(recursive)

4390

4391 if recursive:

4392 self.expr = self.expr.copy()

4393 if self.expr is not None:

4394 self.expr.ignore_whitespace(recursive)

4395 return self

4396

4397 def ignore(self, other) -> ParserElement:

4398 if isinstance(other, Suppress):

4399 if other not in self.ignoreExprs:

4400 super().ignore(other)

4401 if self.expr is not None:

4402 self.expr.ignore(self.ignoreExprs[-1])

4403 else:

4404 super().ignore(other)

4405 if self.expr is not None:

4406 self.expr.ignore(self.ignoreExprs[-1])

4407 return self

4408

4409 def streamline(self) -> ParserElement:

4410 super().streamline()

4411 if self.expr is not None:

4412 self.expr.streamline()

4413 return self

4414

4415 def _checkRecursion(self, parseElementList):

4416 if self in parseElementList:

4417 raise RecursiveGrammarException(parseElementList + [self])

4418 subRecCheckList = parseElementList[:] + [self]

4419 if self.expr is not None:

4420 self.expr._checkRecursion(subRecCheckList)

4421

4422 def validate(self, validateTrace=None) -> None:

4423 if validateTrace is None:

4424 validateTrace = []

4425 tmp = validateTrace[:] + [self]

4426 if self.expr is not None:

4427 self.expr.validate(tmp)

4428 self._checkRecursion([])

4429

4430 def _generateDefaultName(self):

4431 return "{}:({})".format(self.__class__.__name__, str(self.expr))

4432

4433 ignoreWhitespace = ignore_whitespace

4434 leaveWhitespace = leave_whitespace

4435

4436

4437class IndentedBlock(ParseElementEnhance):

4438 """

4439 Expression to match one or more expressions at a given indentation level.

4440 Useful for parsing text where structure is implied by indentation (like Python source code).

4441 """

4442

4443 class _Indent(Empty):

4444 def __init__(self, ref_col: int):

4445 super().__init__()

4446 self.errmsg = "expected indent at column {}".format(ref_col)

4447 self.add_condition(lambda s, l, t: col(l, s) == ref_col)

4448

4449 class _IndentGreater(Empty):

4450 def __init__(self, ref_col: int):

4451 super().__init__()

4452 self.errmsg = "expected indent at column greater than {}".format(ref_col)

4453 self.add_condition(lambda s, l, t: col(l, s) > ref_col)

4454

4455 def __init__(

4456 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True

4457 ):

4458 super().__init__(expr, savelist=True)

4459 # if recursive:

4460 # raise NotImplementedError("IndentedBlock with recursive is not implemented")

4461 self._recursive = recursive

4462 self._grouped = grouped

4463 self.parent_anchor = 1

4464

4465 def parseImpl(self, instring, loc, doActions=True):

4466 # advance parse position to non-whitespace by using an Empty()

4467 # this should be the column to be used for all subsequent indented lines

4468 anchor_loc = Empty().preParse(instring, loc)

4469

4470 # see if self.expr matches at the current location - if not it will raise an exception

4471 # and no further work is necessary

4472 self.expr.try_parse(instring, anchor_loc, doActions)

4473

4474 indent_col = col(anchor_loc, instring)

4475 peer_detect_expr = self._Indent(indent_col)

4476

4477 inner_expr = Empty() + peer_detect_expr + self.expr

4478 if self._recursive:

4479 sub_indent = self._IndentGreater(indent_col)

4480 nested_block = IndentedBlock(

4481 self.expr, recursive=self._recursive, grouped=self._grouped

4482 )

4483 nested_block.set_debug(self.debug)

4484 nested_block.parent_anchor = indent_col

4485 inner_expr += Opt(sub_indent + nested_block)

4486

4487 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")

4488 block = OneOrMore(inner_expr)

4489

4490 trailing_undent = self._Indent(self.parent_anchor) | StringEnd()

4491

4492 if self._grouped:

4493 wrapper = Group

4494 else:

4495 wrapper = lambda expr: expr

4496 return (wrapper(block) + Optional(trailing_undent)).parseImpl(

4497 instring, anchor_loc, doActions

4498 )

4499

4500

4501class AtStringStart(ParseElementEnhance):

4502 """Matches if expression matches at the beginning of the parse

4503 string::

4504

4505 AtStringStart(Word(nums)).parse_string("123")

4506 # prints ["123"]

4507

4508 AtStringStart(Word(nums)).parse_string(" 123")

4509 # raises ParseException

4510 """

4511

4512 def __init__(self, expr: Union[ParserElement, str]):

4513 super().__init__(expr)

4514 self.callPreparse = False

4515

4516 def parseImpl(self, instring, loc, doActions=True):

4517 if loc != 0:

4518 raise ParseException(instring, loc, "not found at string start")

4519 return super().parseImpl(instring, loc, doActions)

4520

4521

4522class AtLineStart(ParseElementEnhance):

4523 r"""Matches if an expression matches at the beginning of a line within

4524 the parse string

4525

4526 Example::

4527

4528 test = '''\

4529 AAA this line

4530 AAA and this line

4531 AAA but not this one

4532 B AAA and definitely not this one

4533 '''

4534

4535 for t in (AtLineStart('AAA') + restOfLine).search_string(test):

4536 print(t)

4537

4538 prints::

4539

4540 ['AAA', ' this line']

4541 ['AAA', ' and this line']

4542

4543 """

4544

4545 def __init__(self, expr: Union[ParserElement, str]):

4546 super().__init__(expr)

4547 self.callPreparse = False

4548

4549 def parseImpl(self, instring, loc, doActions=True):

4550 if col(loc, instring) != 1:

4551 raise ParseException(instring, loc, "not found at line start")

4552 return super().parseImpl(instring, loc, doActions)

4553

4554

4555class FollowedBy(ParseElementEnhance):

4556 """Lookahead matching of the given parse expression.

4557 ``FollowedBy`` does *not* advance the parsing position within

4558 the input string, it only verifies that the specified parse

4559 expression matches at the current position. ``FollowedBy``

4560 always returns a null token list. If any results names are defined

4561 in the lookahead expression, those *will* be returned for access by

4562 name.

4563

4564 Example::

4565

4566 # use FollowedBy to match a label only if it is followed by a ':'

4567 data_word = Word(alphas)

4568 label = data_word + FollowedBy(':')

4569 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

4570

4571 attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()

4572

4573 prints::

4574

4575 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

4576 """

4577

4578 def __init__(self, expr: Union[ParserElement, str]):

4579 super().__init__(expr)

4580 self.mayReturnEmpty = True

4581

4582 def parseImpl(self, instring, loc, doActions=True):

4583 # by using self._expr.parse and deleting the contents of the returned ParseResults list

4584 # we keep any named results that were defined in the FollowedBy expression

4585 _, ret = self.expr._parse(instring, loc, doActions=doActions)

4586 del ret[:]

4587

4588 return loc, ret

4589

4590

4591class PrecededBy(ParseElementEnhance):

4592 """Lookbehind matching of the given parse expression.

4593 ``PrecededBy`` does not advance the parsing position within the

4594 input string, it only verifies that the specified parse expression

4595 matches prior to the current position. ``PrecededBy`` always

4596 returns a null token list, but if a results name is defined on the

4597 given expression, it is returned.

4598

4599 Parameters:

4600

4601 - expr - expression that must match prior to the current parse

4602 location

4603 - retreat - (default= ``None``) - (int) maximum number of characters

4604 to lookbehind prior to the current parse location

4605

4606 If the lookbehind expression is a string, :class:`Literal`,

4607 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`

4608 with a specified exact or maximum length, then the retreat

4609 parameter is not required. Otherwise, retreat must be specified to

4610 give a maximum number of characters to look back from

4611 the current parse position for a lookbehind match.

4612

4613 Example::

4614

4615 # VB-style variable names with type prefixes

4616 int_var = PrecededBy("#") + pyparsing_common.identifier

4617 str_var = PrecededBy("$") + pyparsing_common.identifier

4618

4619 """

4620

4621 def __init__(

4622 self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None

4623 ):

4624 super().__init__(expr)

4625 self.expr = self.expr().leave_whitespace()

4626 self.mayReturnEmpty = True

4627 self.mayIndexError = False

4628 self.exact = False

4629 if isinstance(expr, str_type):

4630 retreat = len(expr)

4631 self.exact = True

4632 elif isinstance(expr, (Literal, Keyword)):

4633 retreat = expr.matchLen

4634 self.exact = True

4635 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:

4636 retreat = expr.maxLen

4637 self.exact = True

4638 elif isinstance(expr, PositionToken):

4639 retreat = 0

4640 self.exact = True

4641 self.retreat = retreat

4642 self.errmsg = "not preceded by " + str(expr)

4643 self.skipWhitespace = False

4644 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))

4645

4646 def parseImpl(self, instring, loc=0, doActions=True):

4647 if self.exact:

4648 if loc < self.retreat:

4649 raise ParseException(instring, loc, self.errmsg)

4650 start = loc - self.retreat

4651 _, ret = self.expr._parse(instring, start)

4652 else:

4653 # retreat specified a maximum lookbehind window, iterate

4654 test_expr = self.expr + StringEnd()

4655 instring_slice = instring[max(0, loc - self.retreat) : loc]

4656 last_expr = ParseException(instring, loc, self.errmsg)

4657 for offset in range(1, min(loc, self.retreat + 1) + 1):

4658 try:

4659 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))

4660 _, ret = test_expr._parse(

4661 instring_slice, len(instring_slice) - offset

4662 )

4663 except ParseBaseException as pbe:

4664 last_expr = pbe

4665 else:

4666 break

4667 else:

4668 raise last_expr

4669 return loc, ret

4670

4671

4672class Located(ParseElementEnhance):

4673 """

4674 Decorates a returned token with its starting and ending

4675 locations in the input string.

4676

4677 This helper adds the following results names:

4678

4679 - ``locn_start`` - location where matched expression begins

4680 - ``locn_end`` - location where matched expression ends

4681 - ``value`` - the actual parsed results

4682

4683 Be careful if the input text contains ``<TAB>`` characters, you

4684 may want to call :class:`ParserElement.parse_with_tabs`

4685

4686 Example::

4687

4688 wd = Word(alphas)

4689 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

4690 print(match)

4691

4692 prints::

4693

4694 [0, ['ljsdf'], 5]

4695 [8, ['lksdjjf'], 15]

4696 [18, ['lkkjj'], 23]

4697

4698 """

4699

4700 def parseImpl(self, instring, loc, doActions=True):

4701 start = loc

4702 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False)

4703 ret_tokens = ParseResults([start, tokens, loc])

4704 ret_tokens["locn_start"] = start

4705 ret_tokens["value"] = tokens

4706 ret_tokens["locn_end"] = loc

4707 if self.resultsName:

4708 # must return as a list, so that the name will be attached to the complete group

4709 return loc, [ret_tokens]

4710 else:

4711 return loc, ret_tokens

4712

4713

4714class NotAny(ParseElementEnhance):

4715 """

4716 Lookahead to disallow matching with the given parse expression.

4717 ``NotAny`` does *not* advance the parsing position within the

4718 input string, it only verifies that the specified parse expression

4719 does *not* match at the current position. Also, ``NotAny`` does

4720 *not* skip over leading whitespace. ``NotAny`` always returns

4721 a null token list. May be constructed using the ``'~'`` operator.

4722

4723 Example::

4724

4725 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())

4726

4727 # take care not to mistake keywords for identifiers

4728 ident = ~(AND | OR | NOT) + Word(alphas)

4729 boolean_term = Opt(NOT) + ident

4730

4731 # very crude boolean expression - to support parenthesis groups and

4732 # operation hierarchy, use infix_notation

4733 boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...]

4734

4735 # integers that are followed by "." are actually floats

4736 integer = Word(nums) + ~Char(".")

4737 """

4738

4739 def __init__(self, expr: Union[ParserElement, str]):

4740 super().__init__(expr)

4741 # do NOT use self.leave_whitespace(), don't want to propagate to exprs

4742 # self.leave_whitespace()

4743 self.skipWhitespace = False

4744

4745 self.mayReturnEmpty = True

4746 self.errmsg = "Found unwanted token, " + str(self.expr)

4747

4748 def parseImpl(self, instring, loc, doActions=True):

4749 if self.expr.can_parse_next(instring, loc):

4750 raise ParseException(instring, loc, self.errmsg, self)

4751 return loc, []

4752

4753 def _generateDefaultName(self):

4754 return "~{" + str(self.expr) + "}"

4755

4756

4757class _MultipleMatch(ParseElementEnhance):

4758 def __init__(

4759 self,

4760 expr: ParserElement,

4761 stop_on: typing.Optional[Union[ParserElement, str]] = None,

4762 *,

4763 stopOn: typing.Optional[Union[ParserElement, str]] = None,

4764 ):

4765 super().__init__(expr)

4766 stopOn = stopOn or stop_on

4767 self.saveAsList = True

4768 ender = stopOn

4769 if isinstance(ender, str_type):

4770 ender = self._literalStringClass(ender)

4771 self.stopOn(ender)

4772

4773 def stopOn(self, ender) -> ParserElement:

4774 if isinstance(ender, str_type):

4775 ender = self._literalStringClass(ender)

4776 self.not_ender = ~ender if ender is not None else None

4777 return self

4778

4779 def parseImpl(self, instring, loc, doActions=True):

4780 self_expr_parse = self.expr._parse

4781 self_skip_ignorables = self._skipIgnorables

4782 check_ender = self.not_ender is not None

4783 if check_ender:

4784 try_not_ender = self.not_ender.tryParse

4785

4786 # must be at least one (but first see if we are the stopOn sentinel;

4787 # if so, fail)

4788 if check_ender:

4789 try_not_ender(instring, loc)

4790 loc, tokens = self_expr_parse(instring, loc, doActions)

4791 try:

4792 hasIgnoreExprs = not not self.ignoreExprs

4793 while 1:

4794 if check_ender:

4795 try_not_ender(instring, loc)

4796 if hasIgnoreExprs:

4797 preloc = self_skip_ignorables(instring, loc)

4798 else:

4799 preloc = loc

4800 loc, tmptokens = self_expr_parse(instring, preloc, doActions)

4801 if tmptokens or tmptokens.haskeys():

4802 tokens += tmptokens

4803 except (ParseException, IndexError):

4804 pass

4805

4806 return loc, tokens

4807

4808 def _setResultsName(self, name, listAllMatches=False):

4809 if (

4810 __diag__.warn_ungrouped_named_tokens_in_collection

4811 and Diagnostics.warn_ungrouped_named_tokens_in_collection

4812 not in self.suppress_warnings_

4813 ):

4814 for e in [self.expr] + self.expr.recurse():

4815 if (

4816 isinstance(e, ParserElement)

4817 and e.resultsName

4818 and Diagnostics.warn_ungrouped_named_tokens_in_collection

4819 not in e.suppress_warnings_

4820 ):

4821 warnings.warn(

4822 "{}: setting results name {!r} on {} expression "

4823 "collides with {!r} on contained expression".format(

4824 "warn_ungrouped_named_tokens_in_collection",

4825 name,

4826 type(self).__name__,

4827 e.resultsName,

4828 ),

4829 stacklevel=3,

4830 )

4831

4832 return super()._setResultsName(name, listAllMatches)

4833

4834

4835class OneOrMore(_MultipleMatch):

4836 """

4837 Repetition of one or more of the given expression.

4838

4839 Parameters:

4840 - expr - expression that must match one or more times

4841 - stop_on - (default= ``None``) - expression for a terminating sentinel

4842 (only required if the sentinel would ordinarily match the repetition

4843 expression)

4844

4845 Example::

4846

4847 data_word = Word(alphas)

4848 label = data_word + FollowedBy(':')

4849 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))

4850

4851 text = "shape: SQUARE posn: upper left color: BLACK"

4852 attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

4853

4854 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data

4855 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

4856 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

4857

4858 # could also be written as

4859 (attr_expr * (1,)).parse_string(text).pprint()

4860 """

4861

4862 def _generateDefaultName(self):

4863 return "{" + str(self.expr) + "}..."

4864

4865

4866class ZeroOrMore(_MultipleMatch):

4867 """

4868 Optional repetition of zero or more of the given expression.

4869

4870 Parameters:

4871 - ``expr`` - expression that must match zero or more times

4872 - ``stop_on`` - expression for a terminating sentinel

4873 (only required if the sentinel would ordinarily match the repetition

4874 expression) - (default= ``None``)

4875

4876 Example: similar to :class:`OneOrMore`

4877 """

4878

4879 def __init__(

4880 self,

4881 expr: ParserElement,

4882 stop_on: typing.Optional[Union[ParserElement, str]] = None,

4883 *,

4884 stopOn: typing.Optional[Union[ParserElement, str]] = None,

4885 ):

4886 super().__init__(expr, stopOn=stopOn or stop_on)

4887 self.mayReturnEmpty = True

4888

4889 def parseImpl(self, instring, loc, doActions=True):

4890 try:

4891 return super().parseImpl(instring, loc, doActions)

4892 except (ParseException, IndexError):

4893 return loc, ParseResults([], name=self.resultsName)

4894

4895 def _generateDefaultName(self):

4896 return "[" + str(self.expr) + "]..."

4897

4898

4899class _NullToken:

4900 def __bool__(self):

4901 return False

4902

4903 def __str__(self):

4904 return ""

4905

4906

4907class Opt(ParseElementEnhance):

4908 """

4909 Optional matching of the given expression.

4910

4911 Parameters:

4912 - ``expr`` - expression that must match zero or more times

4913 - ``default`` (optional) - value to be returned if the optional expression is not found.

4914

4915 Example::

4916

4917 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

4918 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))

4919 zip.run_tests('''

4920 # traditional ZIP code

4921 12345

4922

4923 # ZIP+4 form

4924 12101-0001

4925

4926 # invalid ZIP

4927 98765-

4928 ''')

4929

4930 prints::

4931

4932 # traditional ZIP code

4933 12345

4934 ['12345']

4935

4936 # ZIP+4 form

4937 12101-0001

4938 ['12101-0001']

4939

4940 # invalid ZIP

4941 98765-

4942 ^

4943 FAIL: Expected end of text (at char 5), (line:1, col:6)

4944 """

4945

4946 __optionalNotMatched = _NullToken()

4947

4948 def __init__(

4949 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched

4950 ):

4951 super().__init__(expr, savelist=False)

4952 self.saveAsList = self.expr.saveAsList

4953 self.defaultValue = default

4954 self.mayReturnEmpty = True

4955

4956 def parseImpl(self, instring, loc, doActions=True):

4957 self_expr = self.expr

4958 try:

4959 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False)

4960 except (ParseException, IndexError):

4961 default_value = self.defaultValue

4962 if default_value is not self.__optionalNotMatched:

4963 if self_expr.resultsName:

4964 tokens = ParseResults([default_value])

4965 tokens[self_expr.resultsName] = default_value

4966 else:

4967 tokens = [default_value]

4968 else:

4969 tokens = []

4970 return loc, tokens

4971

4972 def _generateDefaultName(self):

4973 inner = str(self.expr)

4974 # strip off redundant inner {}'s

4975 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":

4976 inner = inner[1:-1]

4977 return "[" + inner + "]"

4980Optional = Opt

4983class SkipTo(ParseElementEnhance):

4984 """

4985 Token for skipping over all undefined text until the matched

4986 expression is found.

4987

4988 Parameters:

4989 - ``expr`` - target expression marking the end of the data to be skipped

4990 - ``include`` - if ``True``, the target expression is also parsed

4991 (the skipped text and target expression are returned as a 2-element

4992 list) (default= ``False``).

4993 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and

4994 comments) that might contain false matches to the target expression

4995 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be

4996 included in the skipped test; if found before the target expression is found,

4997 the :class:`SkipTo` is not a match

4998

4999 Example::

5000

5001 report = '''

5002 Outstanding Issues Report - 1 Jan 2000

5003

5004 # | Severity | Description | Days Open

5005 -----+----------+-------------------------------------------+-----------

5006 101 | Critical | Intermittent system crash | 6

5007 94 | Cosmetic | Spelling error on Login ('log|n') | 14

5008 79 | Minor | System slow when running too many reports | 47

5009 '''

5010 integer = Word(nums)

5011 SEP = Suppress('|')

5012 # use SkipTo to simply match everything up until the next SEP

5013 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

5014 # - parse action will call token.strip() for each matched token, i.e., the description body

5015 string_data = SkipTo(SEP, ignore=quoted_string)

5016 string_data.set_parse_action(token_map(str.strip))

5017 ticket_expr = (integer("issue_num") + SEP

5018 + string_data("sev") + SEP

5019 + string_data("desc") + SEP

5020 + integer("days_open"))

5021

5022 for tkt in ticket_expr.search_string(report):

5023 print tkt.dump()

5024

5025 prints::

5026

5027 ['101', 'Critical', 'Intermittent system crash', '6']

5028 - days_open: '6'

5029 - desc: 'Intermittent system crash'

5030 - issue_num: '101'

5031 - sev: 'Critical'

5032 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

5033 - days_open: '14'

5034 - desc: "Spelling error on Login ('log|n')"

5035 - issue_num: '94'

5036 - sev: 'Cosmetic'

5037 ['79', 'Minor', 'System slow when running too many reports', '47']

5038 - days_open: '47'

5039 - desc: 'System slow when running too many reports'

5040 - issue_num: '79'

5041 - sev: 'Minor'

5042 """

5043

5044 def __init__(

5045 self,

5046 other: Union[ParserElement, str],

5047 include: bool = False,

5048 ignore: bool = None,

5049 fail_on: typing.Optional[Union[ParserElement, str]] = None,

5050 *,

5051 failOn: Union[ParserElement, str] = None,

5052 ):

5053 super().__init__(other)

5054 failOn = failOn or fail_on

5055 self.ignoreExpr = ignore

5056 self.mayReturnEmpty = True

5057 self.mayIndexError = False

5058 self.includeMatch = include

5059 self.saveAsList = False

5060 if isinstance(failOn, str_type):

5061 self.failOn = self._literalStringClass(failOn)

5062 else:

5063 self.failOn = failOn

5064 self.errmsg = "No match found for " + str(self.expr)

5065

5066 def parseImpl(self, instring, loc, doActions=True):

5067 startloc = loc

5068 instrlen = len(instring)

5069 self_expr_parse = self.expr._parse

5070 self_failOn_canParseNext = (

5071 self.failOn.canParseNext if self.failOn is not None else None

5072 )

5073 self_ignoreExpr_tryParse = (

5074 self.ignoreExpr.tryParse if self.ignoreExpr is not None else None

5075 )

5076

5077 tmploc = loc

5078 while tmploc <= instrlen:

5079 if self_failOn_canParseNext is not None:

5080 # break if failOn expression matches

5081 if self_failOn_canParseNext(instring, tmploc):

5082 break

5083

5084 if self_ignoreExpr_tryParse is not None:

5085 # advance past ignore expressions

5086 while 1:

5087 try:

5088 tmploc = self_ignoreExpr_tryParse(instring, tmploc)

5089 except ParseBaseException:

5090 break

5091

5092 try:

5093 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False)

5094 except (ParseException, IndexError):

5095 # no match, advance loc in string

5096 tmploc += 1

5097 else:

5098 # matched skipto expr, done

5099 break

5100

5101 else:

5102 # ran off the end of the input string without matching skipto expr, fail

5103 raise ParseException(instring, loc, self.errmsg, self)

5104

5105 # build up return values

5106 loc = tmploc

5107 skiptext = instring[startloc:loc]

5108 skipresult = ParseResults(skiptext)

5109

5110 if self.includeMatch:

5111 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False)

5112 skipresult += mat

5113

5114 return loc, skipresult

5115

5116

5117class Forward(ParseElementEnhance):

5118 """

5119 Forward declaration of an expression to be defined later -

5120 used for recursive grammars, such as algebraic infix notation.

5121 When the expression is known, it is assigned to the ``Forward``

5122 variable using the ``'<<'`` operator.

5123

5124 Note: take care when assigning to ``Forward`` not to overlook

5125 precedence of operators.

5126

5127 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::

5128

5129 fwd_expr << a | b | c

5130

5131 will actually be evaluated as::

5132

5133 (fwd_expr << a) | b | c

5134

5135 thereby leaving b and c out as parseable alternatives. It is recommended that you

5136 explicitly group the values inserted into the ``Forward``::

5137

5138 fwd_expr << (a | b | c)

5139

5140 Converting to use the ``'<<='`` operator instead will avoid this problem.

5141

5142 See :class:`ParseResults.pprint` for an example of a recursive

5143 parser created using ``Forward``.

5144 """

5145

5146 def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None):

5147 self.caller_frame = traceback.extract_stack(limit=2)[0]

5148 super().__init__(other, savelist=False)

5149 self.lshift_line = None

5150

5151 def __lshift__(self, other):

5152 if hasattr(self, "caller_frame"):

5153 del self.caller_frame

5154 if isinstance(other, str_type):

5155 other = self._literalStringClass(other)

5156 self.expr = other

5157 self.mayIndexError = self.expr.mayIndexError

5158 self.mayReturnEmpty = self.expr.mayReturnEmpty

5159 self.set_whitespace_chars(

5160 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars

5161 )

5162 self.skipWhitespace = self.expr.skipWhitespace

5163 self.saveAsList = self.expr.saveAsList

5164 self.ignoreExprs.extend(self.expr.ignoreExprs)

5165 self.lshift_line = traceback.extract_stack(limit=2)[-2]

5166 return self

5167

5168 def __ilshift__(self, other):

5169 return self << other

5170

5171 def __or__(self, other):

5172 caller_line = traceback.extract_stack(limit=2)[-2]

5173 if (

5174 __diag__.warn_on_match_first_with_lshift_operator

5175 and caller_line == self.lshift_line

5176 and Diagnostics.warn_on_match_first_with_lshift_operator

5177 not in self.suppress_warnings_

5178 ):

5179 warnings.warn(

5180 "using '<<' operator with '|' is probably an error, use '<<='",

5181 stacklevel=2,

5182 )

5183 ret = super().__or__(other)

5184 return ret

5185

5186 def __del__(self):

5187 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'

5188 if (

5189 self.expr is None

5190 and __diag__.warn_on_assignment_to_Forward

5191 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_

5192 ):

5193 warnings.warn_explicit(

5194 "Forward defined here but no expression attached later using '<<=' or '<<'",

5195 UserWarning,

5196 filename=self.caller_frame.filename,

5197 lineno=self.caller_frame.lineno,

5198 )

5199

5200 def parseImpl(self, instring, loc, doActions=True):

5201 if (

5202 self.expr is None

5203 and __diag__.warn_on_parse_using_empty_Forward

5204 and Diagnostics.warn_on_parse_using_empty_Forward

5205 not in self.suppress_warnings_

5206 ):

5207 # walk stack until parse_string, scan_string, search_string, or transform_string is found

5208 parse_fns = [

5209 "parse_string",

5210 "scan_string",

5211 "search_string",

5212 "transform_string",

5213 ]

5214 tb = traceback.extract_stack(limit=200)

5215 for i, frm in enumerate(reversed(tb), start=1):

5216 if frm.name in parse_fns:

5217 stacklevel = i + 1

5218 break

5219 else:

5220 stacklevel = 2

5221 warnings.warn(

5222 "Forward expression was never assigned a value, will not parse any input",

5223 stacklevel=stacklevel,

5224 )

5225 if not ParserElement._left_recursion_enabled:

5226 return super().parseImpl(instring, loc, doActions)

5227 # ## Bounded Recursion algorithm ##

5228 # Recursion only needs to be processed at ``Forward`` elements, since they are

5229 # the only ones that can actually refer to themselves. The general idea is

5230 # to handle recursion stepwise: We start at no recursion, then recurse once,

5231 # recurse twice, ..., until more recursion offers no benefit (we hit the bound).

5232 #

5233 # The "trick" here is that each ``Forward`` gets evaluated in two contexts

5234 # - to *match* a specific recursion level, and

5235 # - to *search* the bounded recursion level

5236 # and the two run concurrently. The *search* must *match* each recursion level

5237 # to find the best possible match. This is handled by a memo table, which

5238 # provides the previous match to the next level match attempt.

5239 #

5240 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.

5241 #

5242 # There is a complication since we not only *parse* but also *transform* via

5243 # actions: We do not want to run the actions too often while expanding. Thus,

5244 # we expand using `doActions=False` and only run `doActions=True` if the next

5245 # recursion level is acceptable.

5246 with ParserElement.recursion_lock:

5247 memo = ParserElement.recursion_memos

5248 try:

5249 # we are parsing at a specific recursion expansion - use it as-is

5250 prev_loc, prev_result = memo[loc, self, doActions]

5251 if isinstance(prev_result, Exception):

5252 raise prev_result

5253 return prev_loc, prev_result.copy()

5254 except KeyError:

5255 act_key = (loc, self, True)

5256 peek_key = (loc, self, False)

5257 # we are searching for the best recursion expansion - keep on improving

5258 # both `doActions` cases must be tracked separately here!

5259 prev_loc, prev_peek = memo[peek_key] = (

5260 loc - 1,

5261 ParseException(

5262 instring, loc, "Forward recursion without base case", self

5263 ),

5264 )

5265 if doActions:

5266 memo[act_key] = memo[peek_key]

5267 while True:

5268 try:

5269 new_loc, new_peek = super().parseImpl(instring, loc, False)

5270 except ParseException:

5271 # we failed before getting any match – do not hide the error

5272 if isinstance(prev_peek, Exception):

5273 raise

5274 new_loc, new_peek = prev_loc, prev_peek

5275 # the match did not get better: we are done

5276 if new_loc <= prev_loc:

5277 if doActions:

5278 # replace the match for doActions=False as well,

5279 # in case the action did backtrack

5280 prev_loc, prev_result = memo[peek_key] = memo[act_key]

5281 del memo[peek_key], memo[act_key]

5282 return prev_loc, prev_result.copy()

5283 del memo[peek_key]

5284 return prev_loc, prev_peek.copy()

5285 # the match did get better: see if we can improve further

5286 else:

5287 if doActions:

5288 try:

5289 memo[act_key] = super().parseImpl(instring, loc, True)

5290 except ParseException as e:

5291 memo[peek_key] = memo[act_key] = (new_loc, e)

5292 raise

5293 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek

5294

5295 def leave_whitespace(self, recursive: bool = True) -> ParserElement:

5296 self.skipWhitespace = False

5297 return self

5298

5299 def ignore_whitespace(self, recursive: bool = True) -> ParserElement:

5300 self.skipWhitespace = True

5301 return self

5302

5303 def streamline(self) -> ParserElement:

5304 if not self.streamlined:

5305 self.streamlined = True

5306 if self.expr is not None:

5307 self.expr.streamline()

5308 return self

5309

5310 def validate(self, validateTrace=None) -> None:

5311 if validateTrace is None:

5312 validateTrace = []

5313

5314 if self not in validateTrace:

5315 tmp = validateTrace[:] + [self]

5316 if self.expr is not None:

5317 self.expr.validate(tmp)

5318 self._checkRecursion([])

5319

5320 def _generateDefaultName(self):

5321 # Avoid infinite recursion by setting a temporary _defaultName

5322 self._defaultName = ": ..."

5323

5324 # Use the string representation of main expression.

5325 retString = "..."

5326 try:

5327 if self.expr is not None:

5328 retString = str(self.expr)[:1000]

5329 else:

5330 retString = "None"

5331 finally:

5332 return self.__class__.__name__ + ": " + retString

5333

5334 def copy(self) -> ParserElement:

5335 if self.expr is not None:

5336 return super().copy()

5337 else:

5338 ret = Forward()

5339 ret <<= self

5340 return ret

5341

5342 def _setResultsName(self, name, list_all_matches=False):

5343 if (

5344 __diag__.warn_name_set_on_empty_Forward

5345 and Diagnostics.warn_name_set_on_empty_Forward

5346 not in self.suppress_warnings_

5347 ):

5348 if self.expr is None:

5349 warnings.warn(

5350 "{}: setting results name {!r} on {} expression "

5351 "that has no contained expression".format(

5352 "warn_name_set_on_empty_Forward", name, type(self).__name__

5353 ),

5354 stacklevel=3,

5355 )

5356

5357 return super()._setResultsName(name, list_all_matches)

5358

5359 ignoreWhitespace = ignore_whitespace

5360 leaveWhitespace = leave_whitespace

5361

5362

5363class TokenConverter(ParseElementEnhance):

5364 """

5365 Abstract subclass of :class:`ParseExpression`, for converting parsed results.

5366 """

5367

5368 def __init__(self, expr: Union[ParserElement, str], savelist=False):

5369 super().__init__(expr) # , savelist)

5370 self.saveAsList = False

5371

5372

5373class Combine(TokenConverter):

5374 """Converter to concatenate all matching tokens to a single string.

5375 By default, the matching patterns must also be contiguous in the

5376 input string; this can be disabled by specifying

5377 ``'adjacent=False'`` in the constructor.

5378

5379 Example::

5380

5381 real = Word(nums) + '.' + Word(nums)

5382 print(real.parse_string('3.1416')) # -> ['3', '.', '1416']

5383 # will also erroneously match the following

5384 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']

5385

5386 real = Combine(Word(nums) + '.' + Word(nums))

5387 print(real.parse_string('3.1416')) # -> ['3.1416']

5388 # no match when there are internal spaces

5389 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)

5390 """

5391

5392 def __init__(

5393 self,

5394 expr: ParserElement,

5395 join_string: str = "",

5396 adjacent: bool = True,

5397 *,

5398 joinString: typing.Optional[str] = None,

5399 ):

5400 super().__init__(expr)

5401 joinString = joinString if joinString is not None else join_string

5402 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

5403 if adjacent:

5404 self.leave_whitespace()

5405 self.adjacent = adjacent

5406 self.skipWhitespace = True

5407 self.joinString = joinString

5408 self.callPreparse = True

5409

5410 def ignore(self, other) -> ParserElement:

5411 if self.adjacent:

5412 ParserElement.ignore(self, other)

5413 else:

5414 super().ignore(other)

5415 return self

5416

5417 def postParse(self, instring, loc, tokenlist):

5418 retToks = tokenlist.copy()

5419 del retToks[:]

5420 retToks += ParseResults(

5421 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults

5422 )

5423

5424 if self.resultsName and retToks.haskeys():

5425 return [retToks]

5426 else:

5427 return retToks

5428

5429

5430class Group(TokenConverter):

5431 """Converter to return the matched tokens as a list - useful for

5432 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.

5433

5434 The optional ``aslist`` argument when set to True will return the

5435 parsed tokens as a Python list instead of a pyparsing ParseResults.

5436

5437 Example::

5438

5439 ident = Word(alphas)

5440 num = Word(nums)

5441 term = ident | num

5442 func = ident + Opt(delimited_list(term))

5443 print(func.parse_string("fn a, b, 100"))

5444 # -> ['fn', 'a', 'b', '100']

5445

5446 func = ident + Group(Opt(delimited_list(term)))

5447 print(func.parse_string("fn a, b, 100"))

5448 # -> ['fn', ['a', 'b', '100']]

5449 """

5450

5451 def __init__(self, expr: ParserElement, aslist: bool = False):

5452 super().__init__(expr)

5453 self.saveAsList = True

5454 self._asPythonList = aslist

5455

5456 def postParse(self, instring, loc, tokenlist):

5457 if self._asPythonList:

5458 return ParseResults.List(

5459 tokenlist.asList()

5460 if isinstance(tokenlist, ParseResults)

5461 else list(tokenlist)

5462 )

5463 else:

5464 return [tokenlist]

5465

5466

5467class Dict(TokenConverter):

5468 """Converter to return a repetitive expression as a list, but also

5469 as a dictionary. Each element can also be referenced using the first

5470 token in the expression as its key. Useful for tabular report

5471 scraping when the first column can be used as a item key.

5472

5473 The optional ``asdict`` argument when set to True will return the

5474 parsed tokens as a Python dict instead of a pyparsing ParseResults.

5475

5476 Example::

5477

5478 data_word = Word(alphas)

5479 label = data_word + FollowedBy(':')

5480

5481 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

5482 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

5483

5484 # print attributes as plain groups

5485 print(attr_expr[1, ...].parse_string(text).dump())

5486

5487 # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names

5488 result = Dict(Group(attr_expr)[1, ...]).parse_string(text)

5489 print(result.dump())

5490

5491 # access named fields as dict entries, or output as dict

5492 print(result['shape'])

5493 print(result.as_dict())

5494

5495 prints::

5496

5497 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

5498 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

5499 - color: 'light blue'

5500 - posn: 'upper left'

5501 - shape: 'SQUARE'

5502 - texture: 'burlap'

5503 SQUARE

5504 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

5505

5506 See more examples at :class:`ParseResults` of accessing fields by results name.

5507 """

5508

5509 def __init__(self, expr: ParserElement, asdict: bool = False):

5510 super().__init__(expr)

5511 self.saveAsList = True

5512 self._asPythonDict = asdict

5513

5514 def postParse(self, instring, loc, tokenlist):

5515 for i, tok in enumerate(tokenlist):

5516 if len(tok) == 0:

5517 continue

5518

5519 ikey = tok[0]

5520 if isinstance(ikey, int):

5521 ikey = str(ikey).strip()

5522

5523 if len(tok) == 1:

5524 tokenlist[ikey] = _ParseResultsWithOffset("", i)

5525

5526 elif len(tok) == 2 and not isinstance(tok[1], ParseResults):

5527 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)

5528

5529 else:

5530 try:

5531 dictvalue = tok.copy() # ParseResults(i)

5532 except Exception:

5533 exc = TypeError(

5534 "could not extract dict values from parsed results"

5535 " - Dict expression must contain Grouped expressions"

5536 )

5537 raise exc from None

5538

5539 del dictvalue[0]

5540

5541 if len(dictvalue) != 1 or (

5542 isinstance(dictvalue, ParseResults) and dictvalue.haskeys()

5543 ):

5544 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)

5545 else:

5546 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)

5547

5548 if self._asPythonDict:

5549 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()

5550 else:

5551 return [tokenlist] if self.resultsName else tokenlist

5552

5553

5554class Suppress(TokenConverter):

5555 """Converter for ignoring the results of a parsed expression.

5556

5557 Example::

5558

5559 source = "a, b, c,d"

5560 wd = Word(alphas)

5561 wd_list1 = wd + (',' + wd)[...]

5562 print(wd_list1.parse_string(source))

5563

5564 # often, delimiters that are useful during parsing are just in the

5565 # way afterward - use Suppress to keep them out of the parsed output

5566 wd_list2 = wd + (Suppress(',') + wd)[...]

5567 print(wd_list2.parse_string(source))

5568

5569 # Skipped text (using '...') can be suppressed as well

5570 source = "lead in START relevant text END trailing text"

5571 start_marker = Keyword("START")

5572 end_marker = Keyword("END")

5573 find_body = Suppress(...) + start_marker + ... + end_marker

5574 print(find_body.parse_string(source)

5575

5576 prints::

5577

5578 ['a', ',', 'b', ',', 'c', ',', 'd']

5579 ['a', 'b', 'c', 'd']

5580 ['START', 'relevant text ', 'END']

5581

5582 (See also :class:`delimited_list`.)

5583 """

5584

5585 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):

5586 if expr is ...:

5587 expr = _PendingSkip(NoMatch())

5588 super().__init__(expr)

5589

5590 def __add__(self, other) -> "ParserElement":

5591 if isinstance(self.expr, _PendingSkip):

5592 return Suppress(SkipTo(other)) + other

5593 else:

5594 return super().__add__(other)

5595

5596 def __sub__(self, other) -> "ParserElement":

5597 if isinstance(self.expr, _PendingSkip):

5598 return Suppress(SkipTo(other)) - other

5599 else:

5600 return super().__sub__(other)

5601

5602 def postParse(self, instring, loc, tokenlist):

5603 return []

5604

5605 def suppress(self) -> ParserElement:

5606 return self

5607

5608

5609def trace_parse_action(f: ParseAction) -> ParseAction:

5610 """Decorator for debugging parse actions.

5611

5612 When the parse action is called, this decorator will print

5613 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.

5614 When the parse action completes, the decorator will print

5615 ``"<<"`` followed by the returned value, or any exception that the parse action raised.

5616

5617 Example::

5618

5619 wd = Word(alphas)

5620

5621 @trace_parse_action

5622 def remove_duplicate_chars(tokens):

5623 return ''.join(sorted(set(''.join(tokens))))

5624

5625 wds = wd[1, ...].set_parse_action(remove_duplicate_chars)

5626 print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))

5627

5628 prints::

5629

5630 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

5631 <<leaving remove_duplicate_chars (ret: 'dfjkls')

5632 ['dfjkls']

5633 """

5634 f = _trim_arity(f)

5635

5636 def z(*paArgs):

5637 thisFunc = f.__name__

5638 s, l, t = paArgs[-3:]

5639 if len(paArgs) > 3:

5640 thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc

5641 sys.stderr.write(

5642 ">>entering {}(line: {!r}, {}, {!r})\n".format(thisFunc, line(l, s), l, t)

5643 )

5644 try:

5645 ret = f(*paArgs)

5646 except Exception as exc:

5647 sys.stderr.write("<<leaving {} (exception: {})\n".format(thisFunc, exc))

5648 raise

5649 sys.stderr.write("<<leaving {} (ret: {!r})\n".format(thisFunc, ret))

5650 return ret

5651

5652 z.__name__ = f.__name__

5653 return z

5654

5655

5656# convenience constants for positional expressions

5657empty = Empty().set_name("empty")

5658line_start = LineStart().set_name("line_start")

5659line_end = LineEnd().set_name("line_end")

5660string_start = StringStart().set_name("string_start")

5661string_end = StringEnd().set_name("string_end")

5662

5663_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).set_parse_action(

5664 lambda s, l, t: t[0][1]

5665)

5666_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(

5667 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))

5668)

5669_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(

5670 lambda s, l, t: chr(int(t[0][1:], 8))

5671)

5672_singleChar = (

5673 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)

5674)

5675_charRange = Group(_singleChar + Suppress("-") + _singleChar)

5676_reBracketExpr = (

5677 Literal("[")

5678 + Opt("^").set_results_name("negate")

5679 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")

5680 + "]"

5681)

5682

5683

5684def srange(s: str) -> str:

5685 r"""Helper to easily define string ranges for use in :class:`Word`

5686 construction. Borrows syntax from regexp ``'[]'`` string range

5687 definitions::

5688

5689 srange("[0-9]") -> "0123456789"

5690 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

5691 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

5692

5693 The input string must be enclosed in []'s, and the returned string

5694 is the expanded character set joined into a single string. The

5695 values enclosed in the []'s may be:

5696

5697 - a single character

5698 - an escaped character with a leading backslash (such as ``\-``

5699 or ``\]``)

5700 - an escaped hex character with a leading ``'\x'``

5701 (``\x21``, which is a ``'!'`` character) (``\0x##``

5702 is also supported for backwards compatibility)

5703 - an escaped octal character with a leading ``'\0'``

5704 (``\041``, which is a ``'!'`` character)

5705 - a range of any of the above, separated by a dash (``'a-z'``,

5706 etc.)

5707 - any combination of the above (``'aeiouy'``,

5708 ``'a-zA-Z0-9_$'``, etc.)

5709 """

5710 _expanded = (

5711 lambda p: p

5712 if not isinstance(p, ParseResults)

5713 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))

5714 )

5715 try:

5716 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)

5717 except Exception:

5718 return ""

5719

5720

5721def token_map(func, *args) -> ParseAction:

5722 """Helper to define a parse action by mapping a function to all

5723 elements of a :class:`ParseResults` list. If any additional args are passed,

5724 they are forwarded to the given function as additional arguments

5725 after the token, as in

5726 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,

5727 which will convert the parsed data to an integer using base 16.

5728

5729 Example (compare the last to example in :class:`ParserElement.transform_string`::

5730

5731 hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16))

5732 hex_ints.run_tests('''

5733 00 11 22 aa FF 0a 0d 1a

5734 ''')

5735

5736 upperword = Word(alphas).set_parse_action(token_map(str.upper))

5737 upperword[1, ...].run_tests('''

5738 my kingdom for a horse

5739 ''')

5740

5741 wd = Word(alphas).set_parse_action(token_map(str.title))

5742 wd[1, ...].set_parse_action(' '.join).run_tests('''

5743 now is the winter of our discontent made glorious summer by this sun of york

5744 ''')

5745

5746 prints::

5747

5748 00 11 22 aa FF 0a 0d 1a

5749 [0, 17, 34, 170, 255, 10, 13, 26]

5750

5751 my kingdom for a horse

5752 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

5753

5754 now is the winter of our discontent made glorious summer by this sun of york

5755 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

5756 """

5757

5758 def pa(s, l, t):

5759 return [func(tokn, *args) for tokn in t]

5760

5761 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)

5762 pa.__name__ = func_name

5763

5764 return pa

5765

5766

5767def autoname_elements() -> None:

5768 """

5769 Utility to simplify mass-naming of parser elements, for

5770 generating railroad diagram with named subdiagrams.

5771 """

5772 for name, var in sys._getframe().f_back.f_locals.items():

5773 if isinstance(var, ParserElement) and not var.customName:

5774 var.set_name(name)

5775

5776

5777dbl_quoted_string = Combine(

5778 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

5779).set_name("string enclosed in double quotes")

5780

5781sgl_quoted_string = Combine(

5782 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

5783).set_name("string enclosed in single quotes")

5784

5785quoted_string = Combine(

5786 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'

5787 | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"

5788).set_name("quotedString using single or double quotes")

5789

5790unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")

5791

5792

5793alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

5794punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

5795

5796# build list of built-in expressions, for future reference if a global default value

5797# gets updated

5798_builtin_exprs: List[ParserElement] = [

5799 v for v in vars().values() if isinstance(v, ParserElement)

5800]

5801

5802# backward compatibility names

5803tokenMap = token_map

5804conditionAsParseAction = condition_as_parse_action

5805nullDebugAction = null_debug_action

5806sglQuotedString = sgl_quoted_string

5807dblQuotedString = dbl_quoted_string

5808quotedString = quoted_string

5809unicodeString = unicode_string

5810lineStart = line_start

5811lineEnd = line_end

5812stringStart = string_start

5813stringEnd = string_end

5814traceParseAction = trace_parse_action