Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/helpers.py: 27%

1# helpers.py

2import html.entities

3import operator

4import re

5import sys

6import typing

8from . import __diag__

9from .core import *

10from .util import (

11 _bslash,

12 _flatten,

13 _escape_regex_range_chars,

14 make_compressed_re,

15 replaced_by_pep8,

16)

19#

20# global helpers

21#

22def counted_array(

23 expr: ParserElement,

24 int_expr: typing.Optional[ParserElement] = None,

25 *,

26 intExpr: typing.Optional[ParserElement] = None,

27) -> ParserElement:

28 """Helper to define a counted list of expressions.

30 This helper defines a pattern of the form::

32 integer expr expr expr...

34 where the leading integer tells how many expr expressions follow.

35 The matched tokens returns the array of expr tokens as a list - the

36 leading count token is suppressed.

38 If ``int_expr`` is specified, it should be a pyparsing expression

39 that produces an integer value.

41 Examples:

43 .. doctest::

45 >>> counted_array(Word(alphas)).parse_string('2 ab cd ef')

46 ParseResults(['ab', 'cd'], {})

48 - In this parser, the leading integer value is given in binary,

49 '10' indicating that 2 values are in the array:

51 .. doctest::

53 >>> binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))

54 >>> counted_array(Word(alphas), int_expr=binary_constant

55 ... ).parse_string('10 ab cd ef')

56 ParseResults(['ab', 'cd'], {})

58 - If other fields must be parsed after the count but before the

59 list items, give the fields results names and they will

60 be preserved in the returned ParseResults:

62 .. doctest::

64 >>> ppc = pyparsing.common

65 >>> count_with_metadata = ppc.integer + Word(alphas)("type")

66 >>> typed_array = counted_array(Word(alphanums),

67 ... int_expr=count_with_metadata)("items")

68 >>> result = typed_array.parse_string("3 bool True True False")

69 >>> print(result.dump())

70 ['True', 'True', 'False']

71 - items: ['True', 'True', 'False']

72 - type: 'bool'

73 """

74 intExpr = intExpr or int_expr

75 array_expr = Forward()

77 def count_field_parse_action(s, l, t):

78 nonlocal array_expr

79 n = t[0]

80 array_expr <<= (expr * n) if n else Empty()

81 # clear list contents, but keep any named results

82 del t[:]

84 if intExpr is None:

85 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))

86 else:

87 intExpr = intExpr.copy()

88 intExpr.set_name("arrayLen")

89 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)

90 return (intExpr + array_expr).set_name(f"(len) {expr}...")

93def match_previous_literal(expr: ParserElement) -> ParserElement:

94 """Helper to define an expression that is indirectly defined from

95 the tokens matched in a previous expression, that is, it looks for

96 a 'repeat' of a previous expression. For example::

98 .. testcode::

100 first = Word(nums)

101 second = match_previous_literal(first)

102 match_expr = first + ":" + second

103

104 will match ``"1:1"``, but not ``"1:2"``. Because this

105 matches a previous literal, will also match the leading

106 ``"1:1"`` in ``"1:10"``. If this is not desired, use

107 :class:`match_previous_expr`. Do *not* use with packrat parsing

108 enabled.

109 """

110 rep = Forward()

111

112 def copy_token_to_repeater(s, l, t):

113 if not t:

114 rep << Empty()

115 return

116

117 if len(t) == 1:

118 rep << t[0]

119 return

120

121 # flatten t tokens

122 tflat = _flatten(t.as_list())

123 rep << And(Literal(tt) for tt in tflat)

124

125 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)

126 rep.set_name("(prev) " + str(expr))

127 return rep

128

129

130def match_previous_expr(expr: ParserElement) -> ParserElement:

131 """Helper to define an expression that is indirectly defined from

132 the tokens matched in a previous expression, that is, it looks for

133 a 'repeat' of a previous expression. For example:

134

135 .. testcode::

136

137 first = Word(nums)

138 second = match_previous_expr(first)

139 match_expr = first + ":" + second

140

141 will match ``"1:1"``, but not ``"1:2"``. Because this

142 matches by expressions, will *not* match the leading ``"1:1"``

143 in ``"1:10"``; the expressions are evaluated first, and then

144 compared, so ``"1"`` is compared with ``"10"``. Do *not* use

145 with packrat parsing enabled.

146 """

147 rep = Forward()

148 e2 = expr.copy()

149 rep <<= e2

150

151 def copy_token_to_repeater(s, l, t):

152 matchTokens = _flatten(t.as_list())

153

154 def must_match_these_tokens(s, l, t):

155 theseTokens = _flatten(t.as_list())

156 if theseTokens != matchTokens:

157 raise ParseException(

158 s, l, f"Expected {matchTokens}, found{theseTokens}"

159 )

160

161 rep.set_parse_action(must_match_these_tokens, callDuringTry=True)

162

163 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)

164 rep.set_name("(prev) " + str(expr))

165 return rep

166

167

168def one_of(

169 strs: Union[typing.Iterable[str], str],

170 caseless: bool = False,

171 use_regex: bool = True,

172 as_keyword: bool = False,

173 *,

174 useRegex: bool = True,

175 asKeyword: bool = False,

176) -> ParserElement:

177 """Helper to quickly define a set of alternative :class:`Literal` s,

178 and makes sure to do longest-first testing when there is a conflict,

179 regardless of the input order, but returns

180 a :class:`MatchFirst` for best performance.

181

182 :param strs: a string of space-delimited literals, or a collection of

183 string literals

184 :param caseless: treat all literals as caseless

185 :param use_regex: bool - as an optimization, will

186 generate a :class:`Regex` object; otherwise, will generate

187 a :class:`MatchFirst` object (if ``caseless=True`` or

188 ``as_keyword=True``, or if creating a :class:`Regex` raises an exception)

189 :param as_keyword: bool - enforce :class:`Keyword`-style matching on the

190 generated expressions

191

192 Parameters ``asKeyword`` and ``useRegex`` are retained for pre-PEP8

193 compatibility, but will be removed in a future release.

194

195 Example:

196

197 .. testcode::

198

199 comp_oper = one_of("< = > <= >= !=")

200 var = Word(alphas)

201 number = Word(nums)

202 term = var | number

203 comparison_expr = term + comp_oper + term

204 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))

205

206 prints:

207

208 .. testoutput::

209

210 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]

211 """

212 asKeyword = asKeyword or as_keyword

213 useRegex = useRegex and use_regex

214

215 if (

216 isinstance(caseless, str_type)

217 and __diag__.warn_on_multiple_string_args_to_oneof

218 ):

219 warnings.warn(

220 "warn_on_multiple_string_args_to_oneof:"

221 " More than one string argument passed to one_of, pass"

222 " choices as a list or space-delimited string",

223 stacklevel=2,

224 )

225

226 if caseless:

227 is_equal = lambda a, b: a.upper() == b.upper()

228 masks = lambda a, b: b.upper().startswith(a.upper())

229 else:

230 is_equal = operator.eq

231 masks = lambda a, b: b.startswith(a)

232

233 symbols: list[str]

234 if isinstance(strs, str_type):

235 strs = typing.cast(str, strs)

236 symbols = strs.split()

237 elif isinstance(strs, Iterable):

238 symbols = list(strs)

239 else:

240 raise TypeError("Invalid argument to one_of, expected string or iterable")

241 if not symbols:

242 return NoMatch()

243

244 # reorder given symbols to take care to avoid masking longer choices with shorter ones

245 # (but only if the given symbols are not just single characters)

246 i = 0

247 while i < len(symbols) - 1:

248 cur = symbols[i]

249 for j, other in enumerate(symbols[i + 1 :]):

250 if is_equal(other, cur):

251 del symbols[i + j + 1]

252 break

253 if len(other) > len(cur) and masks(cur, other):

254 del symbols[i + j + 1]

255 symbols.insert(i, other)

256 break

257 else:

258 i += 1

259

260 if useRegex:

261 re_flags: int = re.IGNORECASE if caseless else 0

262

263 try:

264 if all(len(sym) == 1 for sym in symbols):

265 # symbols are just single characters, create range regex pattern

266 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"

267 else:

268 patt = "|".join(re.escape(sym) for sym in symbols)

269

270 # wrap with \b word break markers if defining as keywords

271 if asKeyword:

272 patt = rf"\b(?:{patt})\b"

273

274 ret = Regex(patt, flags=re_flags)

275 ret.set_name(" | ".join(repr(s) for s in symbols))

276

277 if caseless:

278 # add parse action to return symbols as specified, not in random

279 # casing as found in input string

280 symbol_map = {sym.lower(): sym for sym in symbols}

281 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])

282

283 return ret

284

285 except re.error:

286 warnings.warn(

287 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2

288 )

289

290 # last resort, just use MatchFirst of Token class corresponding to caseless

291 # and asKeyword settings

292 CASELESS = KEYWORD = True

293 parse_element_class = {

294 (CASELESS, KEYWORD): CaselessKeyword,

295 (CASELESS, not KEYWORD): CaselessLiteral,

296 (not CASELESS, KEYWORD): Keyword,

297 (not CASELESS, not KEYWORD): Literal,

298 }[(caseless, asKeyword)]

299 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name(

300 " | ".join(symbols)

301 )

302

303

304def dict_of(key: ParserElement, value: ParserElement) -> Dict:

305 """Helper to easily and clearly define a dictionary by specifying

306 the respective patterns for the key and value. Takes care of

307 defining the :class:`Dict`, :class:`ZeroOrMore`, and

308 :class:`Group` tokens in the proper order. The key pattern

309 can include delimiting markers or punctuation, as long as they are

310 suppressed, thereby leaving the significant key text. The value

311 pattern can include named results, so that the :class:`Dict` results

312 can include named token fields.

313

314 Example:

315

316 .. doctest::

317

318 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

319

320 >>> data_word = Word(alphas)

321 >>> label = data_word + FollowedBy(':')

322 >>> attr_expr = (

323 ... label

324 ... + Suppress(':')

325 ... + OneOrMore(data_word, stop_on=label)

326 ... .set_parse_action(' '.join))

327 >>> print(attr_expr[1, ...].parse_string(text).dump())

328 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

329

330 >>> attr_label = label

331 >>> attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label

332 ... ).set_parse_action(' '.join)

333

334 # similar to Dict, but simpler call format

335 >>> result = dict_of(attr_label, attr_value).parse_string(text)

336 >>> print(result.dump())

337 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

338 - color: 'light blue'

339 - posn: 'upper left'

340 - shape: 'SQUARE'

341 - texture: 'burlap'

342 [0]:

343 ['shape', 'SQUARE']

344 [1]:

345 ['posn', 'upper left']

346 [2]:

347 ['color', 'light blue']

348 [3]:

349 ['texture', 'burlap']

350

351 >>> print(result['shape'])

352 SQUARE

353 >>> print(result.shape) # object attribute access works too

354 SQUARE

355 >>> print(result.as_dict())

356 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}

357 """

358 return Dict(OneOrMore(Group(key + value)))

359

360

361def original_text_for(

362 expr: ParserElement, as_string: bool = True, *, asString: bool = True

363) -> ParserElement:

364 """Helper to return the original, untokenized text for a given

365 expression. Useful to restore the parsed fields of an HTML start

366 tag into the raw tag text itself, or to revert separate tokens with

367 intervening whitespace back to the original matching input text. By

368 default, returns a string containing the original parsed text.

369

370 If the optional ``as_string`` argument is passed as

371 ``False``, then the return value is

372 a :class:`ParseResults` containing any results names that

373 were originally matched, and a single token containing the original

374 matched text from the input string. So if the expression passed to

375 :class:`original_text_for` contains expressions with defined

376 results names, you must set ``as_string`` to ``False`` if you

377 want to preserve those results name values.

378

379 The ``asString`` pre-PEP8 argument is retained for compatibility,

380 but will be removed in a future release.

381

382 Example:

383

384 .. testcode::

385

386 src = "this is test bold text normal text "

387 for tag in ("b", "i"):

388 opener, closer = make_html_tags(tag)

389 patt = original_text_for(opener + ... + closer)

390 print(patt.search_string(src)[0])

391

392 prints:

393

394 .. testoutput::

395

396 [' bold text ']

397 ['text']

398 """

399 asString = asString and as_string

400

401 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)

402 endlocMarker = locMarker.copy()

403 endlocMarker.callPreparse = False

404 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")

405 if asString:

406 extractText = lambda s, l, t: s[t._original_start : t._original_end]

407 else:

408

409 def extractText(s, l, t):

410 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]

411

412 matchExpr.set_parse_action(extractText)

413 matchExpr.ignoreExprs = expr.ignoreExprs

414 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)

415 return matchExpr

416

417

418def ungroup(expr: ParserElement) -> ParserElement:

419 """Helper to undo pyparsing's default grouping of And expressions,

420 even if all but one are non-empty.

421 """

422 return TokenConverter(expr).add_parse_action(lambda t: t[0])

423

424

425def locatedExpr(expr: ParserElement) -> ParserElement:

426 """

427 .. deprecated:: 3.0.0

428 Use the :class:`Located` class instead.

429

430 Helper to decorate a returned token with its starting and ending

431 locations in the input string.

432

433 This helper adds the following results names:

434

435 - ``locn_start`` - location where matched expression begins

436 - ``locn_end`` - location where matched expression ends

437 - ``value`` - the actual parsed results

438

439 Be careful if the input text contains ``<TAB>`` characters, you

440 may want to call :meth:`ParserElement.parse_with_tabs`

441

442 Example:

443

444 .. testcode::

445

446 wd = Word(alphas)

447 res = locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222")

448 for match in res:

449 print(match)

450

451 prints:

452

453 .. testoutput::

454

455 [[0, 'ljsdf', 5]]

456 [[8, 'lksdjjf', 15]]

457 [[18, 'lkkjj', 23]]

458 """

459 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)

460 return Group(

461 locator("locn_start")

462 + expr("value")

463 + locator.copy().leaveWhitespace()("locn_end")

464 )

465

466

467# define special default value to permit None as a significant value for

468# ignore_expr

469_NO_IGNORE_EXPR_GIVEN = NoMatch()

470

471

472def nested_expr(

473 opener: Union[str, ParserElement] = "(",

474 closer: Union[str, ParserElement] = ")",

475 content: typing.Optional[ParserElement] = None,

476 ignore_expr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN,

477 *,

478 ignoreExpr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN,

479) -> ParserElement:

480 """Helper method for defining nested lists enclosed in opening and

481 closing delimiters (``"("`` and ``")"`` are the default).

482

483 :param opener: str - opening character for a nested list

484 (default= ``"("``); can also be a pyparsing expression

485

486 :param closer: str - closing character for a nested list

487 (default= ``")"``); can also be a pyparsing expression

488

489 :param content: expression for items within the nested lists

490

491 :param ignore_expr: expression for ignoring opening and closing delimiters

492 (default = :class:`quoted_string`)

493

494 Parameter ``ignoreExpr`` is retained for compatibility

495 but will be removed in a future release.

496

497 If an expression is not provided for the content argument, the

498 nested expression will capture all whitespace-delimited content

499 between delimiters as a list of separate values.

500

501 Use the ``ignore_expr`` argument to define expressions that may

502 contain opening or closing characters that should not be treated as

503 opening or closing characters for nesting, such as quoted_string or

504 a comment expression. Specify multiple expressions using an

505 :class:`Or` or :class:`MatchFirst`. The default is

506 :class:`quoted_string`, but if no expressions are to be ignored, then

507 pass ``None`` for this argument.

508

509 Example:

510

511 .. testcode::

512

513 data_type = one_of("void int short long char float double")

514 decl_data_type = Combine(data_type + Opt(Word('*')))

515 ident = Word(alphas+'_', alphanums+'_')

516 number = pyparsing_common.number

517 arg = Group(decl_data_type + ident)

518 LPAR, RPAR = map(Suppress, "()")

519

520 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))

521

522 c_function = (decl_data_type("type")

523 + ident("name")

524 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR

525 + code_body("body"))

526 c_function.ignore(c_style_comment)

527

528 source_code = '''

529 int is_odd(int x) {

530 return (x%2);

531 }

532

533 int dec_to_hex(char hchar) {

534 if (hchar >= '0' && hchar <= '9') {

535 return (ord(hchar)-ord('0'));

536 } else {

537 return (10+ord(hchar)-ord('A'));

538 }

539 }

540 '''

541 for func in c_function.search_string(source_code):

542 print(f"{func.name} ({func.type}) args: {func.args}")

545 prints:

547 .. testoutput::

549 is_odd (int) args: [['int', 'x']]

550 dec_to_hex (int) args: [['char', 'hchar']]

551 """

552 if ignoreExpr != ignore_expr:

553 ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr

554

555 if ignoreExpr is _NO_IGNORE_EXPR_GIVEN:

556 ignoreExpr = quoted_string()

557

558 if opener == closer:

559 raise ValueError("opening and closing strings cannot be the same")

560

561 if content is None:

562 if isinstance(opener, str_type) and isinstance(closer, str_type):

563 opener = typing.cast(str, opener)

564 closer = typing.cast(str, closer)

565 if len(opener) == 1 and len(closer) == 1:

566 if ignoreExpr is not None:

567 content = Combine(

568 OneOrMore(

569 ~ignoreExpr

570 + CharsNotIn(

571 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,

572 exact=1,

573 )

574 )

575 )

576 else:

577 content = Combine(

578 Empty()

579 + CharsNotIn(

580 opener + closer + ParserElement.DEFAULT_WHITE_CHARS

581 )

582 )

583 else:

584 if ignoreExpr is not None:

585 content = Combine(

586 OneOrMore(

587 ~ignoreExpr

588 + ~Literal(opener)

589 + ~Literal(closer)

590 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

591 )

592 )

593 else:

594 content = Combine(

595 OneOrMore(

596 ~Literal(opener)

597 + ~Literal(closer)

598 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

599 )

600 )

601 else:

602 raise ValueError(

603 "opening and closing arguments must be strings if no content expression is given"

604 )

605

606 # for these internally-created context expressions, simulate whitespace-skipping

607 if ParserElement.DEFAULT_WHITE_CHARS:

608 content.set_parse_action(

609 lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS)

610 )

611

612 ret = Forward()

613 if ignoreExpr is not None:

614 ret <<= Group(

615 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)

616 )

617 else:

618 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))

619

620 ret.set_name(f"nested {opener}{closer} expression")

621

622 # don't override error message from content expressions

623 ret.errmsg = None

624 return ret

625

626

627def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):

628 """Internal helper to construct opening and closing tag expressions,

629 given a tag name"""

630 if isinstance(tagStr, str_type):

631 resname = tagStr

632 tagStr = Keyword(tagStr, caseless=not xml)

633 else:

634 resname = tagStr.name

635

636 tagAttrName = Word(alphas, alphanums + "_-:")

637 if xml:

638 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)

639 openTag = (

640 suppress_LT

641 + tagStr("tag")

642 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))

643 + Opt("/", default=[False])("empty").set_parse_action(

644 lambda s, l, t: t[0] == "/"

645 )

646 + suppress_GT

647 )

648 else:

649 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(

650 printables, exclude_chars=">"

651 )

652 openTag = (

653 suppress_LT

654 + tagStr("tag")

655 + Dict(

656 ZeroOrMore(

657 Group(

658 tagAttrName.set_parse_action(lambda t: t[0].lower())

659 + Opt(Suppress("=") + tagAttrValue)

660 )

661 )

662 )

663 + Opt("/", default=[False])("empty").set_parse_action(

664 lambda s, l, t: t[0] == "/"

665 )

666 + suppress_GT

667 )

668 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)

669

670 openTag.set_name(f"<{resname}>")

671 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels

672 openTag.add_parse_action(

673 lambda t: t.__setitem__(

674 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()

675 )

676 )

677 closeTag = closeTag(

678 "end" + "".join(resname.replace(":", " ").title().split())

679 ).set_name(f"</{resname}>")

680 openTag.tag = resname

681 closeTag.tag = resname

682 openTag.tag_body = SkipTo(closeTag())

683 return openTag, closeTag

684

685

686def make_html_tags(

687 tag_str: Union[str, ParserElement],

688) -> tuple[ParserElement, ParserElement]:

689 """Helper to construct opening and closing tag expressions for HTML,

690 given a tag name. Matches tags in either upper or lower case,

691 attributes with namespaces and with quoted or unquoted values.

692

693 Example:

694

695 .. testcode::

696

697 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'

698 # make_html_tags returns pyparsing expressions for the opening and

699 # closing tags as a 2-tuple

700 a, a_end = make_html_tags("A")

701 link_expr = a + SkipTo(a_end)("link_text") + a_end

702

703 for link in link_expr.search_string(text):

704 # attributes in the <A> tag (like "href" shown here) are

705 # also accessible as named results

706 print(link.link_text, '->', link.href)

707

708 prints:

709

710 .. testoutput::

711

712 pyparsing -> https://github.com/pyparsing/pyparsing/wiki

713 """

714 return _makeTags(tag_str, False)

715

716

717def make_xml_tags(

718 tag_str: Union[str, ParserElement],

719) -> tuple[ParserElement, ParserElement]:

720 """Helper to construct opening and closing tag expressions for XML,

721 given a tag name. Matches tags only in the given upper/lower case.

722

723 Example: similar to :class:`make_html_tags`

724 """

725 return _makeTags(tag_str, True)

726

727

728any_open_tag: ParserElement

729any_close_tag: ParserElement

730any_open_tag, any_close_tag = make_html_tags(

731 Word(alphas, alphanums + "_:").set_name("any tag")

732)

733

734_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}

735_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace(

736 " ", "|"

737)

738common_html_entity = Regex(

739 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});"

740).set_name("common HTML entity")

741

742

743def replace_html_entity(s, l, t):

744 """Helper parser action to replace common HTML entities with their special characters"""

745 return _htmlEntityMap.get(t.entity)

746

747

748class OpAssoc(Enum):

749 """Enumeration of operator associativity

750 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""

751

752 LEFT = 1

753 RIGHT = 2

754

755

756InfixNotationOperatorArgType = Union[

757 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]]

758]

759InfixNotationOperatorSpec = Union[

760 tuple[

761 InfixNotationOperatorArgType,

762 int,

763 OpAssoc,

764 typing.Optional[ParseAction],

765 ],

766 tuple[

767 InfixNotationOperatorArgType,

768 int,

769 OpAssoc,

770 ],

771]

772

773

774def infix_notation(

775 base_expr: ParserElement,

776 op_list: list[InfixNotationOperatorSpec],

777 lpar: Union[str, ParserElement] = Suppress("("),

778 rpar: Union[str, ParserElement] = Suppress(")"),

779) -> Forward:

780 """Helper method for constructing grammars of expressions made up of

781 operators working in a precedence hierarchy. Operators may be unary

782 or binary, left- or right-associative. Parse actions can also be

783 attached to operator expressions. The generated parser will also

784 recognize the use of parentheses to override operator precedences

785 (see example below).

786

787 Note: if you define a deep operator list, you may see performance

788 issues when using infix_notation. See

789 :class:`ParserElement.enable_packrat` for a mechanism to potentially

790 improve your parser performance.

791

792 Parameters:

793

794 :param base_expr: expression representing the most basic operand to

795 be used in the expression

796 :param op_list: list of tuples, one for each operator precedence level

797 in the expression grammar; each tuple is of the form ``(op_expr,

798 num_operands, right_left_assoc, (optional)parse_action)``, where:

799

800 - ``op_expr`` is the pyparsing expression for the operator; may also

801 be a string, which will be converted to a Literal; if ``num_operands``

802 is 3, ``op_expr`` is a tuple of two expressions, for the two

803 operators separating the 3 terms

804 - ``num_operands`` is the number of terms for this operator (must be 1,

805 2, or 3)

806 - ``right_left_assoc`` is the indicator whether the operator is right

807 or left associative, using the pyparsing-defined constants

808 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.

809 - ``parse_action`` is the parse action to be associated with

810 expressions matching this operator expression (the parse action

811 tuple member may be omitted); if the parse action is passed

812 a tuple or list of functions, this is equivalent to calling

813 ``set_parse_action(*fn)``

814 (:class:`ParserElement.set_parse_action`)

815

816 :param lpar: expression for matching left-parentheses; if passed as a

817 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as

818 an expression (such as ``Literal('(')``), then it will be kept in

819 the parsed results, and grouped with them. (default= ``Suppress('(')``)

820 :param rpar: expression for matching right-parentheses; if passed as a

821 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as

822 an expression (such as ``Literal(')')``), then it will be kept in

823 the parsed results, and grouped with them. (default= ``Suppress(')')``)

824

825 Example:

826

827 .. testcode::

828

829 # simple example of four-function arithmetic with ints and

830 # variable names

831 integer = pyparsing_common.signed_integer

832 varname = pyparsing_common.identifier

833

834 arith_expr = infix_notation(integer | varname,

835 [

836 ('-', 1, OpAssoc.RIGHT),

837 (one_of('* /'), 2, OpAssoc.LEFT),

838 (one_of('+ -'), 2, OpAssoc.LEFT),

839 ])

840

841 arith_expr.run_tests('''

842 5+3*6

843 (5+3)*6

844 (5+x)*y

845 -2--11

846 ''', full_dump=False)

847

848 prints:

849

850 .. testoutput::

851 :options: +NORMALIZE_WHITESPACE

852

853

854 5+3*6

855 [[5, '+', [3, '*', 6]]]

856

857 (5+3)*6

858 [[[5, '+', 3], '*', 6]]

859

860 (5+x)*y

861 [[[5, '+', 'x'], '*', 'y']]

862

863 -2--11

864 [[['-', 2], '-', ['-', 11]]]

865 """

866

867 # captive version of FollowedBy that does not do parse actions or capture results names

868 class _FB(FollowedBy):

869 def parseImpl(self, instring, loc, doActions=True):

870 self.expr.try_parse(instring, loc)

871 return loc, []

872

873 _FB.__name__ = "FollowedBy>"

874

875 ret = Forward()

876 ret.set_name(f"{base_expr.name}_expression")

877 if isinstance(lpar, str):

878 lpar = Suppress(lpar)

879 if isinstance(rpar, str):

880 rpar = Suppress(rpar)

881

882 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}_expression")

883

884 # if lpar and rpar are not suppressed, wrap in group

885 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)):

886 lastExpr = base_expr | Group(nested_expr)

887 else:

888 lastExpr = base_expr | nested_expr

889

890 arity: int

891 rightLeftAssoc: opAssoc

892 pa: typing.Optional[ParseAction]

893 opExpr1: ParserElement

894 opExpr2: ParserElement

895 matchExpr: ParserElement

896 match_lookahead: ParserElement

897 for operDef in op_list:

898 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]

899 if isinstance(opExpr, str_type):

900 opExpr = ParserElement._literalStringClass(opExpr)

901 opExpr = typing.cast(ParserElement, opExpr)

902 if arity == 3:

903 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:

904 raise ValueError(

905 "if numterms=3, opExpr must be a tuple or list of two expressions"

906 )

907 opExpr1, opExpr2 = opExpr

908 term_name = f"{opExpr1}{opExpr2} operations"

909 else:

910 term_name = f"{opExpr} operations"

911

912 if not 1 <= arity <= 3:

913 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

914

915 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):

916 raise ValueError("operator must indicate right or left associativity")

917

918 thisExpr: ParserElement = Forward().set_name(term_name)

919 thisExpr = typing.cast(Forward, thisExpr)

920 match_lookahead = And([])

921 if rightLeftAssoc is OpAssoc.LEFT:

922 if arity == 1:

923 match_lookahead = _FB(lastExpr + opExpr)

924 matchExpr = Group(lastExpr + opExpr[1, ...])

925 elif arity == 2:

926 if opExpr is not None:

927 match_lookahead = _FB(lastExpr + opExpr + lastExpr)

928 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...])

929 else:

930 match_lookahead = _FB(lastExpr + lastExpr)

931 matchExpr = Group(lastExpr[2, ...])

932 elif arity == 3:

933 match_lookahead = _FB(

934 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr

935 )

936 matchExpr = Group(

937 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...]

938 )

939 elif rightLeftAssoc is OpAssoc.RIGHT:

940 if arity == 1:

941 # try to avoid LR with this extra test

942 if not isinstance(opExpr, Opt):

943 opExpr = Opt(opExpr)

944 match_lookahead = _FB(opExpr.expr + thisExpr)

945 matchExpr = Group(opExpr + thisExpr)

946 elif arity == 2:

947 if opExpr is not None:

948 match_lookahead = _FB(lastExpr + opExpr + thisExpr)

949 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...])

950 else:

951 match_lookahead = _FB(lastExpr + thisExpr)

952 matchExpr = Group(lastExpr + thisExpr[1, ...])

953 elif arity == 3:

954 match_lookahead = _FB(

955 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr

956 )

957 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)

958

959 # suppress lookahead expr from railroad diagrams

960 match_lookahead.show_in_diagram = False

961

962 # TODO - determine why this statement can't be included in the following

963 # if pa block

964 matchExpr = match_lookahead + matchExpr

965

966 if pa:

967 if isinstance(pa, (tuple, list)):

968 matchExpr.set_parse_action(*pa)

969 else:

970 matchExpr.set_parse_action(pa)

971

972 thisExpr <<= (matchExpr | lastExpr).setName(term_name)

973 lastExpr = thisExpr

974

975 ret <<= lastExpr

976 return ret

977

978

979def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):

980 """

981 .. deprecated:: 3.0.0

982 Use the :class:`IndentedBlock` class instead.

983

984 Helper method for defining space-delimited indentation blocks,

985 such as those used to define block statements in Python source code.

986

987 :param blockStatementExpr: expression defining syntax of statement that

988 is repeated within the indented block

989

990 :param indentStack: list created by caller to manage indentation stack

991 (multiple ``statementWithIndentedBlock`` expressions within a single

992 grammar should share a common ``indentStack``)

993

994 :param indent: boolean indicating whether block must be indented beyond

995 the current level; set to ``False`` for block of left-most statements

996

997 A valid block must contain at least one ``blockStatement``.

998

999 (Note that indentedBlock uses internal parse actions which make it

1000 incompatible with packrat parsing.)

1001

1002 Example:

1003

1004 .. testcode::

1005

1006 data = '''

1007 def A(z):

1008 A1

1009 B = 100

1010 G = A2

1011 A2

1012 A3

1013 B

1014 def BB(a,b,c):

1015 BB1

1016 def BBA():

1017 bba1

1018 bba2

1019 bba3

1020 C

1021 D

1022 def spam(x,y):

1023 def eggs(z):

1024 pass

1025 '''

1026

1027 indentStack = [1]

1028 stmt = Forward()

1029

1030 identifier = Word(alphas, alphanums)

1031 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")

1032 func_body = indentedBlock(stmt, indentStack)

1033 funcDef = Group(funcDecl + func_body)

1034

1035 rvalue = Forward()

1036 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")

1037 rvalue << (funcCall | identifier | Word(nums))

1038 assignment = Group(identifier + "=" + rvalue)

1039 stmt << (funcDef | assignment | identifier)

1040

1041 module_body = stmt[1, ...]

1042

1043 parseTree = module_body.parseString(data)

1044 parseTree.pprint()

1045

1046 prints:

1047

1048 .. testoutput::

1049

1050 [['def',

1051 'A',

1052 ['(', 'z', ')'],

1053 ':',

1054 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],

1055 'B',

1056 ['def',

1057 'BB',

1058 ['(', 'a', 'b', 'c', ')'],

1059 ':',

1060 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],

1061 'C',

1062 'D',

1063 ['def',

1064 'spam',

1065 ['(', 'x', 'y', ')'],

1066 ':',

1067 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]

1068 """

1069 backup_stacks.append(indentStack[:])

1070

1071 def reset_stack():

1072 indentStack[:] = backup_stacks[-1]

1073

1074 def checkPeerIndent(s, l, t):

1075 if l >= len(s):

1076 return

1077 curCol = col(l, s)

1078 if curCol != indentStack[-1]:

1079 if curCol > indentStack[-1]:

1080 raise ParseException(s, l, "illegal nesting")

1081 raise ParseException(s, l, "not a peer entry")

1082

1083 def checkSubIndent(s, l, t):

1084 curCol = col(l, s)

1085 if curCol > indentStack[-1]:

1086 indentStack.append(curCol)

1087 else:

1088 raise ParseException(s, l, "not a subentry")

1089

1090 def checkUnindent(s, l, t):

1091 if l >= len(s):

1092 return

1093 curCol = col(l, s)

1094 if not (indentStack and curCol in indentStack):

1095 raise ParseException(s, l, "not an unindent")

1096 if curCol < indentStack[-1]:

1097 indentStack.pop()

1098

1099 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())

1100 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")

1101 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")

1102 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")

1103 if indent:

1104 smExpr = Group(

1105 Opt(NL)

1106 + INDENT

1107 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

1108 + UNDENT

1109 )

1110 else:

1111 smExpr = Group(

1112 Opt(NL)

1113 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

1114 + Opt(UNDENT)

1115 )

1116

1117 # add a parse action to remove backup_stack from list of backups

1118 smExpr.add_parse_action(

1119 lambda: backup_stacks.pop(-1) and None if backup_stacks else None

1120 )

1121 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())

1122 blockStatementExpr.ignore(_bslash + LineEnd())

1123 return smExpr.set_name("indented block")

1124

1125

1126# it's easy to get these comment structures wrong - they're very common,

1127# so may as well make them available

1128c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment")

1129"Comment of the form ``/* ... */``"

1130

1131html_comment = Regex(r"").set_name("HTML comment")

1132"Comment of the form ````"

1133

1134rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")

1135dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")

1136"Comment of the form ``// ... (to end of line)``"

1137

1138cpp_style_comment = Regex(

1139 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)"

1140).set_name("C++ style comment")

1141"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"

1142

1143java_style_comment = cpp_style_comment

1144"Same as :class:`cpp_style_comment`"

1145

1146python_style_comment = Regex(r"#.*").set_name("Python style comment")

1147"Comment of the form ``# ... (to end of line)``"

1148

1149

1150# build list of built-in expressions, for future reference if a global default value

1151# gets updated

1152_builtin_exprs: list[ParserElement] = [

1153 v for v in vars().values() if isinstance(v, ParserElement)

1154]

1155

1156

1157# compatibility function, superseded by DelimitedList class

1158def delimited_list(

1159 expr: Union[str, ParserElement],

1160 delim: Union[str, ParserElement] = ",",

1161 combine: bool = False,

1162 min: typing.Optional[int] = None,

1163 max: typing.Optional[int] = None,

1164 *,

1165 allow_trailing_delim: bool = False,

1166) -> ParserElement:

1167 """

1168 .. deprecated:: 3.1.0

1169 Use the :class:`DelimitedList` class instead.

1170 """

1171 return DelimitedList(

1172 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim

1173 )

1174

1175

1176# Compatibility synonyms

1177# fmt: off

1178opAssoc = OpAssoc

1179anyOpenTag = any_open_tag

1180anyCloseTag = any_close_tag

1181commonHTMLEntity = common_html_entity

1182cStyleComment = c_style_comment

1183htmlComment = html_comment

1184restOfLine = rest_of_line

1185dblSlashComment = dbl_slash_comment

1186cppStyleComment = cpp_style_comment

1187javaStyleComment = java_style_comment

1188pythonStyleComment = python_style_comment

1189delimitedList = replaced_by_pep8("delimitedList", DelimitedList)

1190delimited_list = replaced_by_pep8("delimited_list", DelimitedList)

1191countedArray = replaced_by_pep8("countedArray", counted_array)

1192matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal)

1193matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr)

1194oneOf = replaced_by_pep8("oneOf", one_of)

1195dictOf = replaced_by_pep8("dictOf", dict_of)

1196originalTextFor = replaced_by_pep8("originalTextFor", original_text_for)

1197nestedExpr = replaced_by_pep8("nestedExpr", nested_expr)

1198makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags)

1199makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags)

1200replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity)

1201infixNotation = replaced_by_pep8("infixNotation", infix_notation)

1202# fmt: on