Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/helpers.py: 26%

1# helpers.py

2import html.entities

3import operator

4import re

5import sys

6import typing

8from . import __diag__

9from .core import *

10from .util import (

11 _bslash,

12 _flatten,

13 _escape_regex_range_chars,

14 make_compressed_re,

15 replaced_by_pep8,

16)

19def _suppression(expr: Union[ParserElement, str]) -> ParserElement:

20 # internal helper to avoid wrapping Suppress inside another Suppress

21 if isinstance(expr, Suppress):

22 return expr

23 return Suppress(expr)

26#

27# global helpers

28#

29def counted_array(

30 expr: ParserElement, int_expr: typing.Optional[ParserElement] = None, **kwargs

31) -> ParserElement:

32 """Helper to define a counted list of expressions.

34 This helper defines a pattern of the form::

36 integer expr expr expr...

38 where the leading integer tells how many expr expressions follow.

39 The matched tokens returns the array of expr tokens as a list - the

40 leading count token is suppressed.

42 If ``int_expr`` is specified, it should be a pyparsing expression

43 that produces an integer value.

45 Examples:

47 .. doctest::

49 >>> counted_array(Word(alphas)).parse_string('2 ab cd ef')

50 ParseResults(['ab', 'cd'], {})

52 - In this parser, the leading integer value is given in binary,

53 '10' indicating that 2 values are in the array:

55 .. doctest::

57 >>> binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))

58 >>> counted_array(Word(alphas), int_expr=binary_constant

59 ... ).parse_string('10 ab cd ef')

60 ParseResults(['ab', 'cd'], {})

62 - If other fields must be parsed after the count but before the

63 list items, give the fields results names and they will

64 be preserved in the returned ParseResults:

66 .. doctest::

68 >>> ppc = pyparsing.common

69 >>> count_with_metadata = ppc.integer + Word(alphas)("type")

70 >>> typed_array = counted_array(Word(alphanums),

71 ... int_expr=count_with_metadata)("items")

72 >>> result = typed_array.parse_string("3 bool True True False")

73 >>> print(result.dump())

74 ['True', 'True', 'False']

75 - items: ['True', 'True', 'False']

76 - type: 'bool'

77 """

78 intExpr: typing.Optional[ParserElement] = deprecate_argument(

79 kwargs, "intExpr", None

80 )

82 intExpr = intExpr or int_expr

83 array_expr = Forward()

85 def count_field_parse_action(s, l, t):

86 nonlocal array_expr

87 n = t[0]

88 array_expr <<= (expr * n) if n else Empty()

89 # clear list contents, but keep any named results

90 del t[:]

92 if intExpr is None:

93 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))

94 else:

95 intExpr = intExpr.copy()

96 intExpr.set_name("arrayLen")

97 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)

98 return (intExpr + array_expr).set_name(f"(len) {expr}...")

100

101def match_previous_literal(expr: ParserElement) -> ParserElement:

102 """Helper to define an expression that is indirectly defined from

103 the tokens matched in a previous expression, that is, it looks for

104 a 'repeat' of a previous expression. For example::

105

106 .. testcode::

107

108 first = Word(nums)

109 second = match_previous_literal(first)

110 match_expr = first + ":" + second

111

112 will match ``"1:1"``, but not ``"1:2"``. Because this

113 matches a previous literal, will also match the leading

114 ``"1:1"`` in ``"1:10"``. If this is not desired, use

115 :class:`match_previous_expr`. Do *not* use with packrat parsing

116 enabled.

117 """

118 rep = Forward()

119

120 def copy_token_to_repeater(s, l, t):

121 if not t:

122 rep << Empty()

123 return

124

125 if len(t) == 1:

126 rep << t[0]

127 return

128

129 # flatten t tokens

130 tflat = _flatten(t.as_list())

131 rep << And(Literal(tt) for tt in tflat)

132

133 expr.add_parse_action(copy_token_to_repeater, call_during_try=True)

134 rep.set_name("(prev) " + str(expr))

135 return rep

136

137

138def match_previous_expr(expr: ParserElement) -> ParserElement:

139 """Helper to define an expression that is indirectly defined from

140 the tokens matched in a previous expression, that is, it looks for

141 a 'repeat' of a previous expression. For example:

142

143 .. testcode::

144

145 first = Word(nums)

146 second = match_previous_expr(first)

147 match_expr = first + ":" + second

148

149 will match ``"1:1"``, but not ``"1:2"``. Because this

150 matches by expressions, will *not* match the leading ``"1:1"``

151 in ``"1:10"``; the expressions are evaluated first, and then

152 compared, so ``"1"`` is compared with ``"10"``. Do *not* use

153 with packrat parsing enabled.

154 """

155 rep = Forward()

156 e2 = expr.copy()

157 rep <<= e2

158

159 def copy_token_to_repeater(s, l, t):

160 matchTokens = _flatten(t.as_list())

161

162 def must_match_these_tokens(s, l, t):

163 theseTokens = _flatten(t.as_list())

164 if theseTokens != matchTokens:

165 raise ParseException(

166 s, l, f"Expected {matchTokens}, found{theseTokens}"

167 )

168

169 rep.set_parse_action(must_match_these_tokens, call_during_try=True)

170

171 expr.add_parse_action(copy_token_to_repeater, call_during_try=True)

172 rep.set_name("(prev) " + str(expr))

173 return rep

174

175

176def one_of(

177 strs: Union[typing.Iterable[str], str],

178 caseless: bool = False,

179 use_regex: bool = True,

180 as_keyword: bool = False,

181 **kwargs,

182) -> ParserElement:

183 """Helper to quickly define a set of alternative :class:`Literal` s,

184 and makes sure to do longest-first testing when there is a conflict,

185 regardless of the input order, but returns

186 a :class:`MatchFirst` for best performance.

187

188 :param strs: a string of space-delimited literals, or a collection of

189 string literals

190 :param caseless: treat all literals as caseless

191 :param use_regex: bool - as an optimization, will

192 generate a :class:`Regex` object; otherwise, will generate

193 a :class:`MatchFirst` object (if ``caseless=True`` or

194 ``as_keyword=True``, or if creating a :class:`Regex` raises an exception)

195 :param as_keyword: bool - enforce :class:`Keyword`-style matching on the

196 generated expressions

197

198 Parameters ``asKeyword`` and ``useRegex`` are retained for pre-PEP8

199 compatibility, but will be removed in a future release.

200

201 Example:

202

203 .. testcode::

204

205 comp_oper = one_of("< = > <= >= !=")

206 var = Word(alphas)

207 number = Word(nums)

208 term = var | number

209 comparison_expr = term + comp_oper + term

210 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))

211

212 prints:

213

214 .. testoutput::

215

216 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]

217 """

218 useRegex: bool = deprecate_argument(kwargs, "useRegex", True)

219 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False)

220

221 asKeyword = asKeyword or as_keyword

222 useRegex = useRegex and use_regex

223

224 if (

225 isinstance(caseless, str_type)

226 and __diag__.warn_on_multiple_string_args_to_oneof

227 ):

228 warnings.warn(

229 "warn_on_multiple_string_args_to_oneof:"

230 " More than one string argument passed to one_of, pass"

231 " choices as a list or space-delimited string",

232 PyparsingDiagnosticWarning,

233 stacklevel=2,

234 )

235

236 if caseless:

237 is_equal = lambda a, b: a.upper() == b.upper()

238 masks = lambda a, b: b.upper().startswith(a.upper())

239 else:

240 is_equal = operator.eq

241 masks = lambda a, b: b.startswith(a)

242

243 symbols: list[str]

244 if isinstance(strs, str_type):

245 strs = typing.cast(str, strs)

246 symbols = strs.split()

247 elif isinstance(strs, Iterable):

248 symbols = list(strs)

249 else:

250 raise TypeError("Invalid argument to one_of, expected string or iterable")

251 if not symbols:

252 return NoMatch()

253

254 # reorder given symbols to take care to avoid masking longer choices with shorter ones

255 # (but only if the given symbols are not just single characters)

256 i = 0

257 while i < len(symbols) - 1:

258 cur = symbols[i]

259 for j, other in enumerate(symbols[i + 1 :]):

260 if is_equal(other, cur):

261 del symbols[i + j + 1]

262 break

263 if len(other) > len(cur) and masks(cur, other):

264 del symbols[i + j + 1]

265 symbols.insert(i, other)

266 break

267 else:

268 i += 1

269

270 if useRegex:

271 re_flags: int = re.IGNORECASE if caseless else 0

272

273 try:

274 if all(len(sym) == 1 for sym in symbols):

275 # symbols are just single characters, create range regex pattern

276 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"

277 else:

278 patt = "|".join(re.escape(sym) for sym in symbols)

279

280 # wrap with \b word break markers if defining as keywords

281 if asKeyword:

282 patt = rf"\b(?:{patt})\b"

283

284 ret = Regex(patt, flags=re_flags)

285 ret.set_name(" | ".join(repr(s) for s in symbols))

286

287 if caseless:

288 # add parse action to return symbols as specified, not in random

289 # casing as found in input string

290 symbol_map = {sym.lower(): sym for sym in symbols}

291 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])

292

293 return ret

294

295 except re.error:

296 warnings.warn(

297 "Exception creating Regex for one_of, building MatchFirst",

298 PyparsingDiagnosticWarning,

299 stacklevel=2,

300 )

301

302 # last resort, just use MatchFirst of Token class corresponding to caseless

303 # and asKeyword settings

304 CASELESS = KEYWORD = True

305 parse_element_class = {

306 (CASELESS, KEYWORD): CaselessKeyword,

307 (CASELESS, not KEYWORD): CaselessLiteral,

308 (not CASELESS, KEYWORD): Keyword,

309 (not CASELESS, not KEYWORD): Literal,

310 }[(caseless, asKeyword)]

311 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name(

312 " | ".join(symbols)

313 )

314

315

316def dict_of(key: ParserElement, value: ParserElement) -> Dict:

317 """Helper to easily and clearly define a dictionary by specifying

318 the respective patterns for the key and value. Takes care of

319 defining the :class:`Dict`, :class:`ZeroOrMore`, and

320 :class:`Group` tokens in the proper order. The key pattern

321 can include delimiting markers or punctuation, as long as they are

322 suppressed, thereby leaving the significant key text. The value

323 pattern can include named results, so that the :class:`Dict` results

324 can include named token fields.

325

326 Example:

327

328 .. doctest::

329

330 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

331

332 >>> data_word = Word(alphas)

333 >>> label = data_word + FollowedBy(':')

334 >>> attr_expr = (

335 ... label

336 ... + Suppress(':')

337 ... + OneOrMore(data_word, stop_on=label)

338 ... .set_parse_action(' '.join))

339 >>> print(attr_expr[1, ...].parse_string(text).dump())

340 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

341

342 >>> attr_label = label

343 >>> attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label

344 ... ).set_parse_action(' '.join)

345

346 # similar to Dict, but simpler call format

347 >>> result = dict_of(attr_label, attr_value).parse_string(text)

348 >>> print(result.dump())

349 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

350 - color: 'light blue'

351 - posn: 'upper left'

352 - shape: 'SQUARE'

353 - texture: 'burlap'

354 [0]:

355 ['shape', 'SQUARE']

356 [1]:

357 ['posn', 'upper left']

358 [2]:

359 ['color', 'light blue']

360 [3]:

361 ['texture', 'burlap']

362

363 >>> print(result['shape'])

364 SQUARE

365 >>> print(result.shape) # object attribute access works too

366 SQUARE

367 >>> print(result.as_dict())

368 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'}

369 """

370 return Dict(OneOrMore(Group(key + value)))

371

372

373def original_text_for(

374 expr: ParserElement, as_string: bool = True, **kwargs

375) -> ParserElement:

376 """Helper to return the original, untokenized text for a given

377 expression. Useful to restore the parsed fields of an HTML start

378 tag into the raw tag text itself, or to revert separate tokens with

379 intervening whitespace back to the original matching input text. By

380 default, returns a string containing the original parsed text.

381

382 If the optional ``as_string`` argument is passed as

383 ``False``, then the return value is

384 a :class:`ParseResults` containing any results names that

385 were originally matched, and a single token containing the original

386 matched text from the input string. So if the expression passed to

387 :class:`original_text_for` contains expressions with defined

388 results names, you must set ``as_string`` to ``False`` if you

389 want to preserve those results name values.

390

391 The ``asString`` pre-PEP8 argument is retained for compatibility,

392 but will be removed in a future release.

393

394 Example:

395

396 .. testcode::

397

398 src = "this is test bold text normal text "

399 for tag in ("b", "i"):

400 opener, closer = make_html_tags(tag)

401 patt = original_text_for(opener + ... + closer)

402 print(patt.search_string(src)[0])

403

404 prints:

405

406 .. testoutput::

407

408 [' bold text ']

409 ['text']

410 """

411 asString: bool = deprecate_argument(kwargs, "asString", True)

412

413 asString = asString and as_string

414

415 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)

416 endlocMarker = locMarker.copy()

417 endlocMarker.callPreparse = False

418 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")

419 if asString:

420 extractText = lambda s, l, t: s[t._original_start : t._original_end]

421 else:

422

423 def extractText(s, l, t):

424 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]

425

426 matchExpr.set_parse_action(extractText)

427 matchExpr.ignoreExprs = expr.ignoreExprs

428 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)

429 return matchExpr

430

431

432def ungroup(expr: ParserElement) -> ParserElement:

433 """Helper to undo pyparsing's default grouping of And expressions,

434 even if all but one are non-empty.

435 """

436 return TokenConverter(expr).add_parse_action(lambda t: t[0])

437

438

439def locatedExpr(expr: ParserElement) -> ParserElement:

440 """

441 .. deprecated:: 3.0.0

442 Use the :class:`Located` class instead. Note that `Located`

443 returns results with one less grouping level.

444

445 Helper to decorate a returned token with its starting and ending

446 locations in the input string.

447

448 This helper adds the following results names:

449

450 - ``locn_start`` - location where matched expression begins

451 - ``locn_end`` - location where matched expression ends

452 - ``value`` - the actual parsed results

453

454 Be careful if the input text contains ``<TAB>`` characters, you

455 may want to call :meth:`ParserElement.parse_with_tabs`

456 """

457 warnings.warn(

458 f"{'locatedExpr'!r} deprecated - use {'Located'!r}",

459 PyparsingDeprecationWarning,

460 stacklevel=2,

461 )

462

463 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)

464 return Group(

465 locator("locn_start")

466 + expr("value")

467 + locator.copy().leave_whitespace()("locn_end")

468 )

469

470

471# define special default value to permit None as a significant value for

472# ignore_expr

473_NO_IGNORE_EXPR_GIVEN = NoMatch()

474

475

476def nested_expr(

477 opener: Union[str, ParserElement] = "(",

478 closer: Union[str, ParserElement] = ")",

479 content: typing.Optional[ParserElement] = None,

480 ignore_expr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN,

481 **kwargs,

482) -> ParserElement:

483 """Helper method for defining nested lists enclosed in opening and

484 closing delimiters (``"("`` and ``")"`` are the default).

485

486 :param opener: str - opening character for a nested list

487 (default= ``"("``); can also be a pyparsing expression

488

489 :param closer: str - closing character for a nested list

490 (default= ``")"``); can also be a pyparsing expression

491

492 :param content: expression for items within the nested lists

493

494 :param ignore_expr: expression for ignoring opening and closing delimiters

495 (default = :class:`quoted_string`)

496

497 Parameter ``ignoreExpr`` is retained for compatibility

498 but will be removed in a future release.

499

500 If an expression is not provided for the content argument, the

501 nested expression will capture all whitespace-delimited content

502 between delimiters as a list of separate values.

503

504 Use the ``ignore_expr`` argument to define expressions that may

505 contain opening or closing characters that should not be treated as

506 opening or closing characters for nesting, such as quoted_string or

507 a comment expression. Specify multiple expressions using an

508 :class:`Or` or :class:`MatchFirst`. The default is

509 :class:`quoted_string`, but if no expressions are to be ignored, then

510 pass ``None`` for this argument.

511

512 Example:

513

514 .. testcode::

515

516 data_type = one_of("void int short long char float double")

517 decl_data_type = Combine(data_type + Opt(Word('*')))

518 ident = Word(alphas+'_', alphanums+'_')

519 number = pyparsing_common.number

520 arg = Group(decl_data_type + ident)

521 LPAR, RPAR = map(Suppress, "()")

522

523 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))

524

525 c_function = (decl_data_type("type")

526 + ident("name")

527 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR

528 + code_body("body"))

529 c_function.ignore(c_style_comment)

530

531 source_code = '''

532 int is_odd(int x) {

533 return (x%2);

534 }

535

536 int dec_to_hex(char hchar) {

537 if (hchar >= '0' && hchar <= '9') {

538 return (ord(hchar)-ord('0'));

539 } else {

540 return (10+ord(hchar)-ord('A'));

541 }

542 }

543 '''

544 for func in c_function.search_string(source_code):

545 print(f"{func.name} ({func.type}) args: {func.args}")

548 prints:

550 .. testoutput::

552 is_odd (int) args: [['int', 'x']]

553 dec_to_hex (int) args: [['char', 'hchar']]

554 """

555 ignoreExpr: ParserElement = deprecate_argument(

556 kwargs, "ignoreExpr", _NO_IGNORE_EXPR_GIVEN

557 )

558

559 if ignoreExpr != ignore_expr:

560 ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr # type: ignore [assignment]

561

562 if ignoreExpr is _NO_IGNORE_EXPR_GIVEN:

563 ignoreExpr = quoted_string()

564

565 if opener == closer:

566 raise ValueError("opening and closing strings cannot be the same")

567

568 if content is None:

569 if isinstance(opener, str_type) and isinstance(closer, str_type):

570 opener = typing.cast(str, opener)

571 closer = typing.cast(str, closer)

572 if len(opener) == 1 and len(closer) == 1:

573 if ignoreExpr is not None:

574 content = Combine(

575 OneOrMore(

576 ~ignoreExpr

577 + CharsNotIn(

578 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,

579 exact=1,

580 )

581 )

582 )

583 else:

584 content = Combine(

585 Empty()

586 + CharsNotIn(

587 opener + closer + ParserElement.DEFAULT_WHITE_CHARS

588 )

589 )

590 else:

591 if ignoreExpr is not None:

592 content = Combine(

593 OneOrMore(

594 ~ignoreExpr

595 + ~Literal(opener)

596 + ~Literal(closer)

597 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

598 )

599 )

600 else:

601 content = Combine(

602 OneOrMore(

603 ~Literal(opener)

604 + ~Literal(closer)

605 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

606 )

607 )

608 else:

609 raise ValueError(

610 "opening and closing arguments must be strings if no content expression is given"

611 )

612

613 # for these internally-created context expressions, simulate whitespace-skipping

614 if ParserElement.DEFAULT_WHITE_CHARS:

615 content.set_parse_action(

616 lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS)

617 )

618

619 ret = Forward()

620 if ignoreExpr is not None:

621 ret <<= Group(

622 _suppression(opener)

623 + ZeroOrMore(ignoreExpr | ret | content)

624 + _suppression(closer)

625 )

626 else:

627 ret <<= Group(

628 _suppression(opener) + ZeroOrMore(ret | content) + _suppression(closer)

629 )

630

631 ret.set_name(f"nested {opener}{closer} expression")

632

633 # don't override error message from content expressions

634 ret.errmsg = None

635 return ret

636

637

638def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):

639 """Internal helper to construct opening and closing tag expressions,

640 given a tag name"""

641 if isinstance(tagStr, str_type):

642 resname = tagStr

643 tagStr = Keyword(tagStr, caseless=not xml)

644 else:

645 resname = tagStr.name

646

647 tagAttrName = Word(alphas, alphanums + "_-:")

648 if xml:

649 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)

650 openTag = (

651 suppress_LT

652 + tagStr("tag")

653 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))

654 + Opt("/", default=[False])("empty").set_parse_action(

655 lambda s, l, t: t[0] == "/"

656 )

657 + suppress_GT

658 )

659 else:

660 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(

661 printables, exclude_chars=">"

662 )

663 openTag = (

664 suppress_LT

665 + tagStr("tag")

666 + Dict(

667 ZeroOrMore(

668 Group(

669 tagAttrName.set_parse_action(lambda t: t[0].lower())

670 + Opt(Suppress("=") + tagAttrValue)

671 )

672 )

673 )

674 + Opt("/", default=[False])("empty").set_parse_action(

675 lambda s, l, t: t[0] == "/"

676 )

677 + suppress_GT

678 )

679 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)

680

681 openTag.set_name(f"<{resname}>")

682 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels

683 openTag.add_parse_action(

684 lambda t: t.__setitem__(

685 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()

686 )

687 )

688 closeTag = closeTag(

689 "end" + "".join(resname.replace(":", " ").title().split())

690 ).set_name(f"</{resname}>")

691 openTag.tag = resname

692 closeTag.tag = resname

693 openTag.tag_body = SkipTo(closeTag())

694 return openTag, closeTag

695

696

697def make_html_tags(

698 tag_str: Union[str, ParserElement],

699) -> tuple[ParserElement, ParserElement]:

700 """Helper to construct opening and closing tag expressions for HTML,

701 given a tag name. Matches tags in either upper or lower case,

702 attributes with namespaces and with quoted or unquoted values.

703

704 Example:

705

706 .. testcode::

707

708 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'

709 # make_html_tags returns pyparsing expressions for the opening and

710 # closing tags as a 2-tuple

711 a, a_end = make_html_tags("A")

712 link_expr = a + SkipTo(a_end)("link_text") + a_end

713

714 for link in link_expr.search_string(text):

715 # attributes in the <A> tag (like "href" shown here) are

716 # also accessible as named results

717 print(link.link_text, '->', link.href)

718

719 prints:

720

721 .. testoutput::

722

723 pyparsing -> https://github.com/pyparsing/pyparsing/wiki

724 """

725 return _makeTags(tag_str, False)

726

727

728def make_xml_tags(

729 tag_str: Union[str, ParserElement],

730) -> tuple[ParserElement, ParserElement]:

731 """Helper to construct opening and closing tag expressions for XML,

732 given a tag name. Matches tags only in the given upper/lower case.

733

734 Example: similar to :class:`make_html_tags`

735 """

736 return _makeTags(tag_str, True)

737

738

739any_open_tag: ParserElement

740any_close_tag: ParserElement

741any_open_tag, any_close_tag = make_html_tags(

742 Word(alphas, alphanums + "_:").set_name("any tag")

743)

744

745_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}

746_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace(

747 " ", "|"

748)

749common_html_entity = Regex(

750 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});"

751).set_name("common HTML entity")

752

753

754def replace_html_entity(s, l, t):

755 """Helper parser action to replace common HTML entities with their special characters"""

756 return _htmlEntityMap.get(t.entity)

757

758

759class OpAssoc(Enum):

760 """Enumeration of operator associativity

761 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""

762

763 LEFT = 1

764 RIGHT = 2

765

766

767InfixNotationOperatorArgType = Union[

768 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]]

769]

770InfixNotationOperatorSpec = Union[

771 tuple[

772 InfixNotationOperatorArgType,

773 int,

774 OpAssoc,

775 typing.Optional[ParseAction],

776 ],

777 tuple[

778 InfixNotationOperatorArgType,

779 int,

780 OpAssoc,

781 ],

782]

783

784

785def infix_notation(

786 base_expr: ParserElement,

787 op_list: list[InfixNotationOperatorSpec],

788 lpar: Union[str, ParserElement] = Suppress("("),

789 rpar: Union[str, ParserElement] = Suppress(")"),

790) -> Forward:

791 """Helper method for constructing grammars of expressions made up of

792 operators working in a precedence hierarchy. Operators may be unary

793 or binary, left- or right-associative. Parse actions can also be

794 attached to operator expressions. The generated parser will also

795 recognize the use of parentheses to override operator precedences

796 (see example below).

797

798 Note: if you define a deep operator list, you may see performance

799 issues when using infix_notation. See

800 :class:`ParserElement.enable_packrat` for a mechanism to potentially

801 improve your parser performance.

802

803 Parameters:

804

805 :param base_expr: expression representing the most basic operand to

806 be used in the expression

807 :param op_list: list of tuples, one for each operator precedence level

808 in the expression grammar; each tuple is of the form ``(op_expr,

809 num_operands, right_left_assoc, (optional)parse_action)``, where:

810

811 - ``op_expr`` is the pyparsing expression for the operator; may also

812 be a string, which will be converted to a Literal; if ``num_operands``

813 is 3, ``op_expr`` is a tuple of two expressions, for the two

814 operators separating the 3 terms

815 - ``num_operands`` is the number of terms for this operator (must be 1,

816 2, or 3)

817 - ``right_left_assoc`` is the indicator whether the operator is right

818 or left associative, using the pyparsing-defined constants

819 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.

820 - ``parse_action`` is the parse action to be associated with

821 expressions matching this operator expression (the parse action

822 tuple member may be omitted); if the parse action is passed

823 a tuple or list of functions, this is equivalent to calling

824 ``set_parse_action(*fn)``

825 (:class:`ParserElement.set_parse_action`)

826

827 :param lpar: expression for matching left-parentheses; if passed as a

828 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as

829 an expression (such as ``Literal('(')``), then it will be kept in

830 the parsed results, and grouped with them. (default= ``Suppress('(')``)

831 :param rpar: expression for matching right-parentheses; if passed as a

832 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as

833 an expression (such as ``Literal(')')``), then it will be kept in

834 the parsed results, and grouped with them. (default= ``Suppress(')')``)

835

836 Example:

837

838 .. testcode::

839

840 # simple example of four-function arithmetic with ints and

841 # variable names

842 integer = pyparsing_common.signed_integer

843 varname = pyparsing_common.identifier

844

845 arith_expr = infix_notation(integer | varname,

846 [

847 ('-', 1, OpAssoc.RIGHT),

848 (one_of('* /'), 2, OpAssoc.LEFT),

849 (one_of('+ -'), 2, OpAssoc.LEFT),

850 ])

851

852 arith_expr.run_tests('''

853 5+3*6

854 (5+3)*6

855 (5+x)*y

856 -2--11

857 ''', full_dump=False)

858

859 prints:

860

861 .. testoutput::

862 :options: +NORMALIZE_WHITESPACE

863

864

865 5+3*6

866 [[5, '+', [3, '*', 6]]]

867

868 (5+3)*6

869 [[[5, '+', 3], '*', 6]]

870

871 (5+x)*y

872 [[[5, '+', 'x'], '*', 'y']]

873

874 -2--11

875 [[['-', 2], '-', ['-', 11]]]

876 """

877

878 # captive version of FollowedBy that does not do parse actions or capture results names

879 class _FB(FollowedBy):

880 def parseImpl(self, instring, loc, doActions=True):

881 self.expr.try_parse(instring, loc)

882 return loc, []

883

884 _FB.__name__ = "FollowedBy>"

885

886 ret = Forward()

887 ret.set_name(f"{base_expr.name}_expression")

888 if isinstance(lpar, str):

889 lpar = Suppress(lpar)

890 if isinstance(rpar, str):

891 rpar = Suppress(rpar)

892

893 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}_expression")

894

895 # if lpar and rpar are not suppressed, wrap in group

896 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)):

897 lastExpr = base_expr | Group(nested_expr)

898 else:

899 lastExpr = base_expr | nested_expr

900

901 arity: int

902 rightLeftAssoc: opAssoc

903 pa: typing.Optional[ParseAction]

904 opExpr1: ParserElement

905 opExpr2: ParserElement

906 matchExpr: ParserElement

907 match_lookahead: ParserElement

908 for operDef in op_list:

909 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]

910 if isinstance(opExpr, str_type):

911 opExpr = ParserElement._literalStringClass(opExpr)

912 opExpr = typing.cast(ParserElement, opExpr)

913 if arity == 3:

914 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:

915 raise ValueError(

916 "if numterms=3, opExpr must be a tuple or list of two expressions"

917 )

918 opExpr1, opExpr2 = opExpr

919 term_name = f"{opExpr1}{opExpr2} operations"

920 else:

921 term_name = f"{opExpr} operations"

922

923 if not 1 <= arity <= 3:

924 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

925

926 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):

927 raise ValueError("operator must indicate right or left associativity")

928

929 thisExpr: ParserElement = Forward().set_name(term_name)

930 thisExpr = typing.cast(Forward, thisExpr)

931 match_lookahead = And([])

932 if rightLeftAssoc is OpAssoc.LEFT:

933 if arity == 1:

934 match_lookahead = _FB(lastExpr + opExpr)

935 matchExpr = Group(lastExpr + opExpr[1, ...])

936 elif arity == 2:

937 if opExpr is not None:

938 match_lookahead = _FB(lastExpr + opExpr + lastExpr)

939 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...])

940 else:

941 match_lookahead = _FB(lastExpr + lastExpr)

942 matchExpr = Group(lastExpr[2, ...])

943 elif arity == 3:

944 match_lookahead = _FB(

945 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr

946 )

947 matchExpr = Group(

948 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...]

949 )

950 elif rightLeftAssoc is OpAssoc.RIGHT:

951 if arity == 1:

952 # try to avoid LR with this extra test

953 if not isinstance(opExpr, Opt):

954 opExpr = Opt(opExpr)

955 match_lookahead = _FB(opExpr.expr + thisExpr)

956 matchExpr = Group(opExpr + thisExpr)

957 elif arity == 2:

958 if opExpr is not None:

959 match_lookahead = _FB(lastExpr + opExpr + thisExpr)

960 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...])

961 else:

962 match_lookahead = _FB(lastExpr + thisExpr)

963 matchExpr = Group(lastExpr + thisExpr[1, ...])

964 elif arity == 3:

965 match_lookahead = _FB(

966 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr

967 )

968 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)

969

970 # suppress lookahead expr from railroad diagrams

971 match_lookahead.show_in_diagram = False

972

973 # TODO - determine why this statement can't be included in the following

974 # if pa block

975 matchExpr = match_lookahead + matchExpr

976

977 if pa:

978 if isinstance(pa, (tuple, list)):

979 matchExpr.set_parse_action(*pa)

980 else:

981 matchExpr.set_parse_action(pa)

982

983 thisExpr <<= (matchExpr | lastExpr).set_name(term_name)

984 lastExpr = thisExpr

985

986 ret <<= lastExpr

987 return ret

988

989

990def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):

991 """

992 .. deprecated:: 3.0.0

993 Use the :class:`IndentedBlock` class instead. Note that `IndentedBlock`

994 has a difference method signature.

995

996 Helper method for defining space-delimited indentation blocks,

997 such as those used to define block statements in Python source code.

998

999 :param blockStatementExpr: expression defining syntax of statement that

1000 is repeated within the indented block

1001

1002 :param indentStack: list created by caller to manage indentation stack

1003 (multiple ``statementWithIndentedBlock`` expressions within a single

1004 grammar should share a common ``indentStack``)

1005

1006 :param indent: boolean indicating whether block must be indented beyond

1007 the current level; set to ``False`` for block of left-most statements

1008

1009 A valid block must contain at least one ``blockStatement``.

1010

1011 (Note that indentedBlock uses internal parse actions which make it

1012 incompatible with packrat parsing.)

1013

1014 Example:

1015

1016 .. testcode::

1017

1018 data = '''

1019 def A(z):

1020 A1

1021 B = 100

1022 G = A2

1023 A2

1024 A3

1025 B

1026 def BB(a,b,c):

1027 BB1

1028 def BBA():

1029 bba1

1030 bba2

1031 bba3

1032 C

1033 D

1034 def spam(x,y):

1035 def eggs(z):

1036 pass

1037 '''

1038

1039 indentStack = [1]

1040 stmt = Forward()

1041

1042 identifier = Word(alphas, alphanums)

1043 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")

1044 func_body = indentedBlock(stmt, indentStack)

1045 funcDef = Group(funcDecl + func_body)

1046

1047 rvalue = Forward()

1048 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")

1049 rvalue << (funcCall | identifier | Word(nums))

1050 assignment = Group(identifier + "=" + rvalue)

1051 stmt << (funcDef | assignment | identifier)

1052

1053 module_body = stmt[1, ...]

1054

1055 parseTree = module_body.parseString(data)

1056 parseTree.pprint()

1057

1058 prints:

1059

1060 .. testoutput::

1061

1062 [['def',

1063 'A',

1064 ['(', 'z', ')'],

1065 ':',

1066 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],

1067 'B',

1068 ['def',

1069 'BB',

1070 ['(', 'a', 'b', 'c', ')'],

1071 ':',

1072 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],

1073 'C',

1074 'D',

1075 ['def',

1076 'spam',

1077 ['(', 'x', 'y', ')'],

1078 ':',

1079 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]

1080 """

1081 warnings.warn(

1082 f"{'indentedBlock'!r} deprecated - use {'IndentedBlock'!r}",

1083 PyparsingDeprecationWarning,

1084 stacklevel=2,

1085 )

1086

1087 backup_stacks.append(indentStack[:])

1088

1089 def reset_stack():

1090 indentStack[:] = backup_stacks[-1]

1091

1092 def checkPeerIndent(s, l, t):

1093 if l >= len(s):

1094 return

1095 curCol = col(l, s)

1096 if curCol != indentStack[-1]:

1097 if curCol > indentStack[-1]:

1098 raise ParseException(s, l, "illegal nesting")

1099 raise ParseException(s, l, "not a peer entry")

1100

1101 def checkSubIndent(s, l, t):

1102 curCol = col(l, s)

1103 if curCol > indentStack[-1]:

1104 indentStack.append(curCol)

1105 else:

1106 raise ParseException(s, l, "not a subentry")

1107

1108 def checkUnindent(s, l, t):

1109 if l >= len(s):

1110 return

1111 curCol = col(l, s)

1112 if not (indentStack and curCol in indentStack):

1113 raise ParseException(s, l, "not an unindent")

1114 if curCol < indentStack[-1]:

1115 indentStack.pop()

1116

1117 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())

1118 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")

1119 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")

1120 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")

1121 if indent:

1122 smExpr = Group(

1123 Opt(NL)

1124 + INDENT

1125 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

1126 + UNDENT

1127 )

1128 else:

1129 smExpr = Group(

1130 Opt(NL)

1131 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

1132 + Opt(UNDENT)

1133 )

1134

1135 # add a parse action to remove backup_stack from list of backups

1136 smExpr.add_parse_action(

1137 lambda: backup_stacks.pop(-1) and None if backup_stacks else None

1138 )

1139 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())

1140 blockStatementExpr.ignore(_bslash + LineEnd())

1141 return smExpr.set_name("indented block")

1142

1143

1144# it's easy to get these comment structures wrong - they're very common,

1145# so may as well make them available

1146c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment")

1147"Comment of the form ``/* ... */``"

1148

1149html_comment = Regex(r"").set_name("HTML comment")

1150"Comment of the form ````"

1151

1152rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")

1153dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")

1154"Comment of the form ``// ... (to end of line)``"

1155

1156cpp_style_comment = Regex(

1157 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)"

1158).set_name("C++ style comment")

1159"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"

1160

1161java_style_comment = cpp_style_comment

1162"Same as :class:`cpp_style_comment`"

1163

1164python_style_comment = Regex(r"#.*").set_name("Python style comment")

1165"Comment of the form ``# ... (to end of line)``"

1166

1167

1168# build list of built-in expressions, for future reference if a global default value

1169# gets updated

1170_builtin_exprs: list[ParserElement] = [

1171 v for v in vars().values() if isinstance(v, ParserElement)

1172]

1173

1174

1175# compatibility function, superseded by DelimitedList class

1176def delimited_list(

1177 expr: Union[str, ParserElement],

1178 delim: Union[str, ParserElement] = ",",

1179 combine: bool = False,

1180 min: typing.Optional[int] = None,

1181 max: typing.Optional[int] = None,

1182 *,

1183 allow_trailing_delim: bool = False,

1184) -> ParserElement:

1185 """

1186 .. deprecated:: 3.1.0

1187 Use the :class:`DelimitedList` class instead.

1188 """

1189 return DelimitedList(

1190 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim

1191 )

1192

1193

1194# Compatibility synonyms

1195# fmt: off

1196opAssoc = OpAssoc

1197anyOpenTag = any_open_tag

1198anyCloseTag = any_close_tag

1199commonHTMLEntity = common_html_entity

1200cStyleComment = c_style_comment

1201htmlComment = html_comment

1202restOfLine = rest_of_line

1203dblSlashComment = dbl_slash_comment

1204cppStyleComment = cpp_style_comment

1205javaStyleComment = java_style_comment

1206pythonStyleComment = python_style_comment

1207delimitedList = replaced_by_pep8("delimitedList", DelimitedList)

1208delimited_list = replaced_by_pep8("delimited_list", DelimitedList)

1209countedArray = replaced_by_pep8("countedArray", counted_array)

1210matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal)

1211matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr)

1212oneOf = replaced_by_pep8("oneOf", one_of)

1213dictOf = replaced_by_pep8("dictOf", dict_of)

1214originalTextFor = replaced_by_pep8("originalTextFor", original_text_for)

1215nestedExpr = replaced_by_pep8("nestedExpr", nested_expr)

1216makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags)

1217makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags)

1218replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity)

1219infixNotation = replaced_by_pep8("infixNotation", infix_notation)

1220# fmt: on