Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pyparsing/helpers.py: 33%

1# helpers.py

2import html.entities

3import operator

4import re

5import sys

6import typing

8from . import __diag__

9from .core import *

10from .util import (

11 _bslash,

12 _flatten,

13 _escape_regex_range_chars,

14 make_compressed_re,

15 replaced_by_pep8,

16)

19#

20# global helpers

21#

22def counted_array(

23 expr: ParserElement,

24 int_expr: typing.Optional[ParserElement] = None,

25 *,

26 intExpr: typing.Optional[ParserElement] = None,

27) -> ParserElement:

28 """Helper to define a counted list of expressions.

30 This helper defines a pattern of the form::

32 integer expr expr expr...

34 where the leading integer tells how many expr expressions follow.

35 The matched tokens returns the array of expr tokens as a list - the

36 leading count token is suppressed.

38 If ``int_expr`` is specified, it should be a pyparsing expression

39 that produces an integer value.

41 Example::

43 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']

45 # in this parser, the leading integer value is given in binary,

46 # '10' indicating that 2 values are in the array

47 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))

48 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']

50 # if other fields must be parsed after the count but before the

51 # list items, give the fields results names and they will

52 # be preserved in the returned ParseResults:

53 count_with_metadata = integer + Word(alphas)("type")

54 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")

55 result = typed_array.parse_string("3 bool True True False")

56 print(result.dump())

58 # prints

59 # ['True', 'True', 'False']

60 # - items: ['True', 'True', 'False']

61 # - type: 'bool'

62 """

63 intExpr = intExpr or int_expr

64 array_expr = Forward()

66 def count_field_parse_action(s, l, t):

67 nonlocal array_expr

68 n = t[0]

69 array_expr <<= (expr * n) if n else Empty()

70 # clear list contents, but keep any named results

71 del t[:]

73 if intExpr is None:

74 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))

75 else:

76 intExpr = intExpr.copy()

77 intExpr.set_name("arrayLen")

78 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)

79 return (intExpr + array_expr).set_name(f"(len) {expr}...")

82def match_previous_literal(expr: ParserElement) -> ParserElement:

83 """Helper to define an expression that is indirectly defined from

84 the tokens matched in a previous expression, that is, it looks for

85 a 'repeat' of a previous expression. For example::

87 first = Word(nums)

88 second = match_previous_literal(first)

89 match_expr = first + ":" + second

91 will match ``"1:1"``, but not ``"1:2"``. Because this

92 matches a previous literal, will also match the leading

93 ``"1:1"`` in ``"1:10"``. If this is not desired, use

94 :class:`match_previous_expr`. Do *not* use with packrat parsing

95 enabled.

96 """

97 rep = Forward()

99 def copy_token_to_repeater(s, l, t):

100 if not t:

101 rep << Empty()

102 return

103

104 if len(t) == 1:

105 rep << t[0]

106 return

107

108 # flatten t tokens

109 tflat = _flatten(t.as_list())

110 rep << And(Literal(tt) for tt in tflat)

111

112 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)

113 rep.set_name("(prev) " + str(expr))

114 return rep

115

116

117def match_previous_expr(expr: ParserElement) -> ParserElement:

118 """Helper to define an expression that is indirectly defined from

119 the tokens matched in a previous expression, that is, it looks for

120 a 'repeat' of a previous expression. For example::

121

122 first = Word(nums)

123 second = match_previous_expr(first)

124 match_expr = first + ":" + second

125

126 will match ``"1:1"``, but not ``"1:2"``. Because this

127 matches by expressions, will *not* match the leading ``"1:1"``

128 in ``"1:10"``; the expressions are evaluated first, and then

129 compared, so ``"1"`` is compared with ``"10"``. Do *not* use

130 with packrat parsing enabled.

131 """

132 rep = Forward()

133 e2 = expr.copy()

134 rep <<= e2

135

136 def copy_token_to_repeater(s, l, t):

137 matchTokens = _flatten(t.as_list())

138

139 def must_match_these_tokens(s, l, t):

140 theseTokens = _flatten(t.as_list())

141 if theseTokens != matchTokens:

142 raise ParseException(

143 s, l, f"Expected {matchTokens}, found{theseTokens}"

144 )

145

146 rep.set_parse_action(must_match_these_tokens, callDuringTry=True)

147

148 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)

149 rep.set_name("(prev) " + str(expr))

150 return rep

151

152

153def one_of(

154 strs: Union[typing.Iterable[str], str],

155 caseless: bool = False,

156 use_regex: bool = True,

157 as_keyword: bool = False,

158 *,

159 useRegex: bool = True,

160 asKeyword: bool = False,

161) -> ParserElement:

162 """Helper to quickly define a set of alternative :class:`Literal` s,

163 and makes sure to do longest-first testing when there is a conflict,

164 regardless of the input order, but returns

165 a :class:`MatchFirst` for best performance.

166

167 Parameters:

168

169 - ``strs`` - a string of space-delimited literals, or a collection of

170 string literals

171 - ``caseless`` - treat all literals as caseless - (default= ``False``)

172 - ``use_regex`` - as an optimization, will

173 generate a :class:`Regex` object; otherwise, will generate

174 a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if

175 creating a :class:`Regex` raises an exception) - (default= ``True``)

176 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the

177 generated expressions - (default= ``False``)

178 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,

179 but will be removed in a future release

180

181 Example::

182

183 comp_oper = one_of("< = > <= >= !=")

184 var = Word(alphas)

185 number = Word(nums)

186 term = var | number

187 comparison_expr = term + comp_oper + term

188 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))

189

190 prints::

191

192 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]

193 """

194 asKeyword = asKeyword or as_keyword

195 useRegex = useRegex and use_regex

196

197 if (

198 isinstance(caseless, str_type)

199 and __diag__.warn_on_multiple_string_args_to_oneof

200 ):

201 warnings.warn(

202 "warn_on_multiple_string_args_to_oneof:"

203 " More than one string argument passed to one_of, pass"

204 " choices as a list or space-delimited string",

205 stacklevel=2,

206 )

207

208 if caseless:

209 is_equal = lambda a, b: a.upper() == b.upper()

210 masks = lambda a, b: b.upper().startswith(a.upper())

211 parse_element_class = CaselessKeyword if asKeyword else CaselessLiteral

212 else:

213 is_equal = operator.eq

214 masks = lambda a, b: b.startswith(a)

215 parse_element_class = Keyword if asKeyword else Literal

216

217 symbols: list[str]

218 if isinstance(strs, str_type):

219 strs = typing.cast(str, strs)

220 symbols = strs.split()

221 elif isinstance(strs, Iterable):

222 symbols = list(strs)

223 else:

224 raise TypeError("Invalid argument to one_of, expected string or iterable")

225 if not symbols:

226 return NoMatch()

227

228 # reorder given symbols to take care to avoid masking longer choices with shorter ones

229 # (but only if the given symbols are not just single characters)

230 i = 0

231 while i < len(symbols) - 1:

232 cur = symbols[i]

233 for j, other in enumerate(symbols[i + 1 :]):

234 if is_equal(other, cur):

235 del symbols[i + j + 1]

236 break

237 if len(other) > len(cur) and masks(cur, other):

238 del symbols[i + j + 1]

239 symbols.insert(i, other)

240 break

241 else:

242 i += 1

243

244 if useRegex:

245 re_flags: int = re.IGNORECASE if caseless else 0

246

247 try:

248 if all(len(sym) == 1 for sym in symbols):

249 # symbols are just single characters, create range regex pattern

250 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"

251 else:

252 patt = "|".join(re.escape(sym) for sym in symbols)

253

254 # wrap with \b word break markers if defining as keywords

255 if asKeyword:

256 patt = rf"\b(?:{patt})\b"

257

258 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))

259

260 if caseless:

261 # add parse action to return symbols as specified, not in random

262 # casing as found in input string

263 symbol_map = {sym.lower(): sym for sym in symbols}

264 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])

265

266 return ret

267

268 except re.error:

269 warnings.warn(

270 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2

271 )

272

273 # last resort, just use MatchFirst

274 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name(

275 " | ".join(symbols)

276 )

277

278

279def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:

280 """Helper to easily and clearly define a dictionary by specifying

281 the respective patterns for the key and value. Takes care of

282 defining the :class:`Dict`, :class:`ZeroOrMore`, and

283 :class:`Group` tokens in the proper order. The key pattern

284 can include delimiting markers or punctuation, as long as they are

285 suppressed, thereby leaving the significant key text. The value

286 pattern can include named results, so that the :class:`Dict` results

287 can include named token fields.

288

289 Example::

290

291 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

292 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

293 print(attr_expr[1, ...].parse_string(text).dump())

294

295 attr_label = label

296 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)

297

298 # similar to Dict, but simpler call format

299 result = dict_of(attr_label, attr_value).parse_string(text)

300 print(result.dump())

301 print(result['shape'])

302 print(result.shape) # object attribute access works too

303 print(result.as_dict())

304

305 prints::

306

307 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

308 - color: 'light blue'

309 - posn: 'upper left'

310 - shape: 'SQUARE'

311 - texture: 'burlap'

312 SQUARE

313 SQUARE

314 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}

315 """

316 return Dict(OneOrMore(Group(key + value)))

317

318

319def original_text_for(

320 expr: ParserElement, as_string: bool = True, *, asString: bool = True

321) -> ParserElement:

322 """Helper to return the original, untokenized text for a given

323 expression. Useful to restore the parsed fields of an HTML start

324 tag into the raw tag text itself, or to revert separate tokens with

325 intervening whitespace back to the original matching input text. By

326 default, returns a string containing the original parsed text.

327

328 If the optional ``as_string`` argument is passed as

329 ``False``, then the return value is

330 a :class:`ParseResults` containing any results names that

331 were originally matched, and a single token containing the original

332 matched text from the input string. So if the expression passed to

333 :class:`original_text_for` contains expressions with defined

334 results names, you must set ``as_string`` to ``False`` if you

335 want to preserve those results name values.

336

337 The ``asString`` pre-PEP8 argument is retained for compatibility,

338 but will be removed in a future release.

339

340 Example::

341

342 src = "this is test bold text normal text "

343 for tag in ("b", "i"):

344 opener, closer = make_html_tags(tag)

345 patt = original_text_for(opener + ... + closer)

346 print(patt.search_string(src)[0])

347

348 prints::

349

350 [' bold text ']

351 ['text']

352 """

353 asString = asString and as_string

354

355 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)

356 endlocMarker = locMarker.copy()

357 endlocMarker.callPreparse = False

358 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")

359 if asString:

360 extractText = lambda s, l, t: s[t._original_start : t._original_end]

361 else:

362

363 def extractText(s, l, t):

364 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]

365

366 matchExpr.set_parse_action(extractText)

367 matchExpr.ignoreExprs = expr.ignoreExprs

368 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)

369 return matchExpr

370

371

372def ungroup(expr: ParserElement) -> ParserElement:

373 """Helper to undo pyparsing's default grouping of And expressions,

374 even if all but one are non-empty.

375 """

376 return TokenConverter(expr).add_parse_action(lambda t: t[0])

377

378

379def locatedExpr(expr: ParserElement) -> ParserElement:

380 """

381 (DEPRECATED - future code should use the :class:`Located` class)

382 Helper to decorate a returned token with its starting and ending

383 locations in the input string.

384

385 This helper adds the following results names:

386

387 - ``locn_start`` - location where matched expression begins

388 - ``locn_end`` - location where matched expression ends

389 - ``value`` - the actual parsed results

390

391 Be careful if the input text contains ``<TAB>`` characters, you

392 may want to call :class:`ParserElement.parse_with_tabs`

393

394 Example::

395

396 wd = Word(alphas)

397 for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

398 print(match)

399

400 prints::

401

402 [[0, 'ljsdf', 5]]

403 [[8, 'lksdjjf', 15]]

404 [[18, 'lkkjj', 23]]

405 """

406 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)

407 return Group(

408 locator("locn_start")

409 + expr("value")

410 + locator.copy().leaveWhitespace()("locn_end")

411 )

412

413

414def nested_expr(

415 opener: Union[str, ParserElement] = "(",

416 closer: Union[str, ParserElement] = ")",

417 content: typing.Optional[ParserElement] = None,

418 ignore_expr: ParserElement = quoted_string(),

419 *,

420 ignoreExpr: ParserElement = quoted_string(),

421) -> ParserElement:

422 """Helper method for defining nested lists enclosed in opening and

423 closing delimiters (``"("`` and ``")"`` are the default).

424

425 Parameters:

426

427 - ``opener`` - opening character for a nested list

428 (default= ``"("``); can also be a pyparsing expression

429 - ``closer`` - closing character for a nested list

430 (default= ``")"``); can also be a pyparsing expression

431 - ``content`` - expression for items within the nested lists

432 (default= ``None``)

433 - ``ignore_expr`` - expression for ignoring opening and closing delimiters

434 (default= :class:`quoted_string`)

435 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility

436 but will be removed in a future release

437

438 If an expression is not provided for the content argument, the

439 nested expression will capture all whitespace-delimited content

440 between delimiters as a list of separate values.

441

442 Use the ``ignore_expr`` argument to define expressions that may

443 contain opening or closing characters that should not be treated as

444 opening or closing characters for nesting, such as quoted_string or

445 a comment expression. Specify multiple expressions using an

446 :class:`Or` or :class:`MatchFirst`. The default is

447 :class:`quoted_string`, but if no expressions are to be ignored, then

448 pass ``None`` for this argument.

449

450 Example::

451

452 data_type = one_of("void int short long char float double")

453 decl_data_type = Combine(data_type + Opt(Word('*')))

454 ident = Word(alphas+'_', alphanums+'_')

455 number = pyparsing_common.number

456 arg = Group(decl_data_type + ident)

457 LPAR, RPAR = map(Suppress, "()")

458

459 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))

460

461 c_function = (decl_data_type("type")

462 + ident("name")

463 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR

464 + code_body("body"))

465 c_function.ignore(c_style_comment)

466

467 source_code = '''

468 int is_odd(int x) {

469 return (x%2);

470 }

471

472 int dec_to_hex(char hchar) {

473 if (hchar >= '0' && hchar <= '9') {

474 return (ord(hchar)-ord('0'));

475 } else {

476 return (10+ord(hchar)-ord('A'));

477 }

478 }

479 '''

480 for func in c_function.search_string(source_code):

481 print("%(name)s (%(type)s) args: %(args)s" % func)

482

483

484 prints::

485

486 is_odd (int) args: [['int', 'x']]

487 dec_to_hex (int) args: [['char', 'hchar']]

488 """

489 if ignoreExpr != ignore_expr:

490 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr

491 if opener == closer:

492 raise ValueError("opening and closing strings cannot be the same")

493 if content is None:

494 if isinstance(opener, str_type) and isinstance(closer, str_type):

495 opener = typing.cast(str, opener)

496 closer = typing.cast(str, closer)

497 if len(opener) == 1 and len(closer) == 1:

498 if ignoreExpr is not None:

499 content = Combine(

500 OneOrMore(

501 ~ignoreExpr

502 + CharsNotIn(

503 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,

504 exact=1,

505 )

506 )

507 ).set_parse_action(lambda t: t[0].strip())

508 else:

509 content = empty.copy() + CharsNotIn(

510 opener + closer + ParserElement.DEFAULT_WHITE_CHARS

511 ).set_parse_action(lambda t: t[0].strip())

512 else:

513 if ignoreExpr is not None:

514 content = Combine(

515 OneOrMore(

516 ~ignoreExpr

517 + ~Literal(opener)

518 + ~Literal(closer)

519 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

520 )

521 ).set_parse_action(lambda t: t[0].strip())

522 else:

523 content = Combine(

524 OneOrMore(

525 ~Literal(opener)

526 + ~Literal(closer)

527 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

528 )

529 ).set_parse_action(lambda t: t[0].strip())

530 else:

531 raise ValueError(

532 "opening and closing arguments must be strings if no content expression is given"

533 )

534 ret = Forward()

535 if ignoreExpr is not None:

536 ret <<= Group(

537 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)

538 )

539 else:

540 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))

541 ret.set_name(f"nested {opener}{closer} expression")

542 # don't override error message from content expressions

543 ret.errmsg = None

544 return ret

545

546

547def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):

548 """Internal helper to construct opening and closing tag expressions, given a tag name"""

549 if isinstance(tagStr, str_type):

550 resname = tagStr

551 tagStr = Keyword(tagStr, caseless=not xml)

552 else:

553 resname = tagStr.name

554

555 tagAttrName = Word(alphas, alphanums + "_-:")

556 if xml:

557 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)

558 openTag = (

559 suppress_LT

560 + tagStr("tag")

561 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))

562 + Opt("/", default=[False])("empty").set_parse_action(

563 lambda s, l, t: t[0] == "/"

564 )

565 + suppress_GT

566 )

567 else:

568 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(

569 printables, exclude_chars=">"

570 )

571 openTag = (

572 suppress_LT

573 + tagStr("tag")

574 + Dict(

575 ZeroOrMore(

576 Group(

577 tagAttrName.set_parse_action(lambda t: t[0].lower())

578 + Opt(Suppress("=") + tagAttrValue)

579 )

580 )

581 )

582 + Opt("/", default=[False])("empty").set_parse_action(

583 lambda s, l, t: t[0] == "/"

584 )

585 + suppress_GT

586 )

587 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)

588

589 openTag.set_name(f"<{resname}>")

590 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels

591 openTag.add_parse_action(

592 lambda t: t.__setitem__(

593 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()

594 )

595 )

596 closeTag = closeTag(

597 "end" + "".join(resname.replace(":", " ").title().split())

598 ).set_name(f"</{resname}>")

599 openTag.tag = resname

600 closeTag.tag = resname

601 openTag.tag_body = SkipTo(closeTag())

602 return openTag, closeTag

603

604

605def make_html_tags(

606 tag_str: Union[str, ParserElement]

607) -> tuple[ParserElement, ParserElement]:

608 """Helper to construct opening and closing tag expressions for HTML,

609 given a tag name. Matches tags in either upper or lower case,

610 attributes with namespaces and with quoted or unquoted values.

611

612 Example::

613

614 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'

615 # make_html_tags returns pyparsing expressions for the opening and

616 # closing tags as a 2-tuple

617 a, a_end = make_html_tags("A")

618 link_expr = a + SkipTo(a_end)("link_text") + a_end

619

620 for link in link_expr.search_string(text):

621 # attributes in the <A> tag (like "href" shown here) are

622 # also accessible as named results

623 print(link.link_text, '->', link.href)

624

625 prints::

626

627 pyparsing -> https://github.com/pyparsing/pyparsing/wiki

628 """

629 return _makeTags(tag_str, False)

630

631

632def make_xml_tags(

633 tag_str: Union[str, ParserElement]

634) -> tuple[ParserElement, ParserElement]:

635 """Helper to construct opening and closing tag expressions for XML,

636 given a tag name. Matches tags only in the given upper/lower case.

637

638 Example: similar to :class:`make_html_tags`

639 """

640 return _makeTags(tag_str, True)

641

642

643any_open_tag: ParserElement

644any_close_tag: ParserElement

645any_open_tag, any_close_tag = make_html_tags(

646 Word(alphas, alphanums + "_:").set_name("any tag")

647)

648

649_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}

650_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace(

651 " ", "|"

652)

653common_html_entity = Regex(

654 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});"

655).set_name("common HTML entity")

656

657

658def replace_html_entity(s, l, t):

659 """Helper parser action to replace common HTML entities with their special characters"""

660 return _htmlEntityMap.get(t.entity)

661

662

663class OpAssoc(Enum):

664 """Enumeration of operator associativity

665 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""

666

667 LEFT = 1

668 RIGHT = 2

669

670

671InfixNotationOperatorArgType = Union[

672 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]]

673]

674InfixNotationOperatorSpec = Union[

675 tuple[

676 InfixNotationOperatorArgType,

677 int,

678 OpAssoc,

679 typing.Optional[ParseAction],

680 ],

681 tuple[

682 InfixNotationOperatorArgType,

683 int,

684 OpAssoc,

685 ],

686]

687

688

689def infix_notation(

690 base_expr: ParserElement,

691 op_list: list[InfixNotationOperatorSpec],

692 lpar: Union[str, ParserElement] = Suppress("("),

693 rpar: Union[str, ParserElement] = Suppress(")"),

694) -> ParserElement:

695 """Helper method for constructing grammars of expressions made up of

696 operators working in a precedence hierarchy. Operators may be unary

697 or binary, left- or right-associative. Parse actions can also be

698 attached to operator expressions. The generated parser will also

699 recognize the use of parentheses to override operator precedences

700 (see example below).

701

702 Note: if you define a deep operator list, you may see performance

703 issues when using infix_notation. See

704 :class:`ParserElement.enable_packrat` for a mechanism to potentially

705 improve your parser performance.

706

707 Parameters:

708

709 - ``base_expr`` - expression representing the most basic operand to

710 be used in the expression

711 - ``op_list`` - list of tuples, one for each operator precedence level

712 in the expression grammar; each tuple is of the form ``(op_expr,

713 num_operands, right_left_assoc, (optional)parse_action)``, where:

714

715 - ``op_expr`` is the pyparsing expression for the operator; may also

716 be a string, which will be converted to a Literal; if ``num_operands``

717 is 3, ``op_expr`` is a tuple of two expressions, for the two

718 operators separating the 3 terms

719 - ``num_operands`` is the number of terms for this operator (must be 1,

720 2, or 3)

721 - ``right_left_assoc`` is the indicator whether the operator is right

722 or left associative, using the pyparsing-defined constants

723 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.

724 - ``parse_action`` is the parse action to be associated with

725 expressions matching this operator expression (the parse action

726 tuple member may be omitted); if the parse action is passed

727 a tuple or list of functions, this is equivalent to calling

728 ``set_parse_action(*fn)``

729 (:class:`ParserElement.set_parse_action`)

730 - ``lpar`` - expression for matching left-parentheses; if passed as a

731 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as

732 an expression (such as ``Literal('(')``), then it will be kept in

733 the parsed results, and grouped with them. (default= ``Suppress('(')``)

734 - ``rpar`` - expression for matching right-parentheses; if passed as a

735 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as

736 an expression (such as ``Literal(')')``), then it will be kept in

737 the parsed results, and grouped with them. (default= ``Suppress(')')``)

738

739 Example::

740

741 # simple example of four-function arithmetic with ints and

742 # variable names

743 integer = pyparsing_common.signed_integer

744 varname = pyparsing_common.identifier

745

746 arith_expr = infix_notation(integer | varname,

747 [

748 ('-', 1, OpAssoc.RIGHT),

749 (one_of('* /'), 2, OpAssoc.LEFT),

750 (one_of('+ -'), 2, OpAssoc.LEFT),

751 ])

752

753 arith_expr.run_tests('''

754 5+3*6

755 (5+3)*6

756 -2--11

757 ''', full_dump=False)

758

759 prints::

760

761 5+3*6

762 [[5, '+', [3, '*', 6]]]

763

764 (5+3)*6

765 [[[5, '+', 3], '*', 6]]

766

767 (5+x)*y

768 [[[5, '+', 'x'], '*', 'y']]

769

770 -2--11

771 [[['-', 2], '-', ['-', 11]]]

772 """

773

774 # captive version of FollowedBy that does not do parse actions or capture results names

775 class _FB(FollowedBy):

776 def parseImpl(self, instring, loc, doActions=True):

777 self.expr.try_parse(instring, loc)

778 return loc, []

779

780 _FB.__name__ = "FollowedBy>"

781

782 ret = Forward()

783 ret.set_name(f"{base_expr.name}_expression")

784 if isinstance(lpar, str):

785 lpar = Suppress(lpar)

786 if isinstance(rpar, str):

787 rpar = Suppress(rpar)

788

789 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}")

790

791 # if lpar and rpar are not suppressed, wrap in group

792 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)):

793 lastExpr = base_expr | Group(nested_expr)

794 else:

795 lastExpr = base_expr | nested_expr

796

797 arity: int

798 rightLeftAssoc: opAssoc

799 pa: typing.Optional[ParseAction]

800 opExpr1: ParserElement

801 opExpr2: ParserElement

802 matchExpr: ParserElement

803 match_lookahead: ParserElement

804 for operDef in op_list:

805 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]

806 if isinstance(opExpr, str_type):

807 opExpr = ParserElement._literalStringClass(opExpr)

808 opExpr = typing.cast(ParserElement, opExpr)

809 if arity == 3:

810 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:

811 raise ValueError(

812 "if numterms=3, opExpr must be a tuple or list of two expressions"

813 )

814 opExpr1, opExpr2 = opExpr

815 term_name = f"{opExpr1}{opExpr2} operations"

816 else:

817 term_name = f"{opExpr} operations"

818

819 if not 1 <= arity <= 3:

820 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

821

822 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):

823 raise ValueError("operator must indicate right or left associativity")

824

825 thisExpr: ParserElement = Forward().set_name(term_name)

826 thisExpr = typing.cast(Forward, thisExpr)

827 match_lookahead = And([])

828 if rightLeftAssoc is OpAssoc.LEFT:

829 if arity == 1:

830 match_lookahead = _FB(lastExpr + opExpr)

831 matchExpr = Group(lastExpr + opExpr[1, ...])

832 elif arity == 2:

833 if opExpr is not None:

834 match_lookahead = _FB(lastExpr + opExpr + lastExpr)

835 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...])

836 else:

837 match_lookahead = _FB(lastExpr + lastExpr)

838 matchExpr = Group(lastExpr[2, ...])

839 elif arity == 3:

840 match_lookahead = _FB(

841 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr

842 )

843 matchExpr = Group(

844 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...]

845 )

846 elif rightLeftAssoc is OpAssoc.RIGHT:

847 if arity == 1:

848 # try to avoid LR with this extra test

849 if not isinstance(opExpr, Opt):

850 opExpr = Opt(opExpr)

851 match_lookahead = _FB(opExpr.expr + thisExpr)

852 matchExpr = Group(opExpr + thisExpr)

853 elif arity == 2:

854 if opExpr is not None:

855 match_lookahead = _FB(lastExpr + opExpr + thisExpr)

856 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...])

857 else:

858 match_lookahead = _FB(lastExpr + thisExpr)

859 matchExpr = Group(lastExpr + thisExpr[1, ...])

860 elif arity == 3:

861 match_lookahead = _FB(

862 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr

863 )

864 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)

865

866 # suppress lookahead expr from railroad diagrams

867 match_lookahead.show_in_diagram = False

868

869 # TODO - determine why this statement can't be included in the following

870 # if pa block

871 matchExpr = match_lookahead + matchExpr

872

873 if pa:

874 if isinstance(pa, (tuple, list)):

875 matchExpr.set_parse_action(*pa)

876 else:

877 matchExpr.set_parse_action(pa)

878

879 thisExpr <<= (matchExpr | lastExpr).setName(term_name)

880 lastExpr = thisExpr

881

882 ret <<= lastExpr

883 return ret

884

885

886def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):

887 """

888 (DEPRECATED - use :class:`IndentedBlock` class instead)

889 Helper method for defining space-delimited indentation blocks,

890 such as those used to define block statements in Python source code.

891

892 Parameters:

893

894 - ``blockStatementExpr`` - expression defining syntax of statement that

895 is repeated within the indented block

896 - ``indentStack`` - list created by caller to manage indentation stack

897 (multiple ``statementWithIndentedBlock`` expressions within a single

898 grammar should share a common ``indentStack``)

899 - ``indent`` - boolean indicating whether block must be indented beyond

900 the current level; set to ``False`` for block of left-most statements

901 (default= ``True``)

902

903 A valid block must contain at least one ``blockStatement``.

904

905 (Note that indentedBlock uses internal parse actions which make it

906 incompatible with packrat parsing.)

907

908 Example::

909

910 data = '''

911 def A(z):

912 A1

913 B = 100

914 G = A2

915 A2

916 A3

917 B

918 def BB(a,b,c):

919 BB1

920 def BBA():

921 bba1

922 bba2

923 bba3

924 C

925 D

926 def spam(x,y):

927 def eggs(z):

928 pass

929 '''

930

931

932 indentStack = [1]

933 stmt = Forward()

934

935 identifier = Word(alphas, alphanums)

936 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")

937 func_body = indentedBlock(stmt, indentStack)

938 funcDef = Group(funcDecl + func_body)

939

940 rvalue = Forward()

941 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")

942 rvalue << (funcCall | identifier | Word(nums))

943 assignment = Group(identifier + "=" + rvalue)

944 stmt << (funcDef | assignment | identifier)

945

946 module_body = stmt[1, ...]

947

948 parseTree = module_body.parseString(data)

949 parseTree.pprint()

950

951 prints::

952

953 [['def',

954 'A',

955 ['(', 'z', ')'],

956 ':',

957 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],

958 'B',

959 ['def',

960 'BB',

961 ['(', 'a', 'b', 'c', ')'],

962 ':',

963 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],

964 'C',

965 'D',

966 ['def',

967 'spam',

968 ['(', 'x', 'y', ')'],

969 ':',

970 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]

971 """

972 backup_stacks.append(indentStack[:])

973

974 def reset_stack():

975 indentStack[:] = backup_stacks[-1]

976

977 def checkPeerIndent(s, l, t):

978 if l >= len(s):

979 return

980 curCol = col(l, s)

981 if curCol != indentStack[-1]:

982 if curCol > indentStack[-1]:

983 raise ParseException(s, l, "illegal nesting")

984 raise ParseException(s, l, "not a peer entry")

985

986 def checkSubIndent(s, l, t):

987 curCol = col(l, s)

988 if curCol > indentStack[-1]:

989 indentStack.append(curCol)

990 else:

991 raise ParseException(s, l, "not a subentry")

992

993 def checkUnindent(s, l, t):

994 if l >= len(s):

995 return

996 curCol = col(l, s)

997 if not (indentStack and curCol in indentStack):

998 raise ParseException(s, l, "not an unindent")

999 if curCol < indentStack[-1]:

1000 indentStack.pop()

1001

1002 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())

1003 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")

1004 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")

1005 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")

1006 if indent:

1007 smExpr = Group(

1008 Opt(NL)

1009 + INDENT

1010 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

1011 + UNDENT

1012 )

1013 else:

1014 smExpr = Group(

1015 Opt(NL)

1016 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

1017 + Opt(UNDENT)

1018 )

1019

1020 # add a parse action to remove backup_stack from list of backups

1021 smExpr.add_parse_action(

1022 lambda: backup_stacks.pop(-1) and None if backup_stacks else None

1023 )

1024 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())

1025 blockStatementExpr.ignore(_bslash + LineEnd())

1026 return smExpr.set_name("indented block")

1027

1028

1029# it's easy to get these comment structures wrong - they're very common,

1030# so may as well make them available

1031c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment")

1032"Comment of the form ``/* ... */``"

1033

1034html_comment = Regex(r"").set_name("HTML comment")

1035"Comment of the form ````"

1036

1037rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")

1038dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")

1039"Comment of the form ``// ... (to end of line)``"

1040

1041cpp_style_comment = Regex(

1042 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)"

1043).set_name("C++ style comment")

1044"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"

1045

1046java_style_comment = cpp_style_comment

1047"Same as :class:`cpp_style_comment`"

1048

1049python_style_comment = Regex(r"#.*").set_name("Python style comment")

1050"Comment of the form ``# ... (to end of line)``"

1051

1052

1053# build list of built-in expressions, for future reference if a global default value

1054# gets updated

1055_builtin_exprs: list[ParserElement] = [

1056 v for v in vars().values() if isinstance(v, ParserElement)

1057]

1058

1059

1060# compatibility function, superseded by DelimitedList class

1061def delimited_list(

1062 expr: Union[str, ParserElement],

1063 delim: Union[str, ParserElement] = ",",

1064 combine: bool = False,

1065 min: typing.Optional[int] = None,

1066 max: typing.Optional[int] = None,

1067 *,

1068 allow_trailing_delim: bool = False,

1069) -> ParserElement:

1070 """(DEPRECATED - use :class:`DelimitedList` class)"""

1071 return DelimitedList(

1072 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim

1073 )

1074

1075

1076# Compatibility synonyms

1077# fmt: off

1078opAssoc = OpAssoc

1079anyOpenTag = any_open_tag

1080anyCloseTag = any_close_tag

1081commonHTMLEntity = common_html_entity

1082cStyleComment = c_style_comment

1083htmlComment = html_comment

1084restOfLine = rest_of_line

1085dblSlashComment = dbl_slash_comment

1086cppStyleComment = cpp_style_comment

1087javaStyleComment = java_style_comment

1088pythonStyleComment = python_style_comment

1089delimitedList = replaced_by_pep8("delimitedList", DelimitedList)

1090delimited_list = replaced_by_pep8("delimited_list", DelimitedList)

1091countedArray = replaced_by_pep8("countedArray", counted_array)

1092matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal)

1093matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr)

1094oneOf = replaced_by_pep8("oneOf", one_of)

1095dictOf = replaced_by_pep8("dictOf", dict_of)

1096originalTextFor = replaced_by_pep8("originalTextFor", original_text_for)

1097nestedExpr = replaced_by_pep8("nestedExpr", nested_expr)

1098makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags)

1099makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags)

1100replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity)

1101infixNotation = replaced_by_pep8("infixNotation", infix_notation)

1102# fmt: on