Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/helpers.py: 29%

1# helpers.py

2import html.entities

3import re

4import sys

5import typing

7from . import __diag__

8from .core import *

9from .util import (

10 _bslash,

11 _flatten,

12 _escape_regex_range_chars,

13 replaced_by_pep8,

14)

17#

18# global helpers

19#

20def counted_array(

21 expr: ParserElement,

22 int_expr: typing.Optional[ParserElement] = None,

23 *,

24 intExpr: typing.Optional[ParserElement] = None,

25) -> ParserElement:

26 """Helper to define a counted list of expressions.

28 This helper defines a pattern of the form::

30 integer expr expr expr...

32 where the leading integer tells how many expr expressions follow.

33 The matched tokens returns the array of expr tokens as a list - the

34 leading count token is suppressed.

36 If ``int_expr`` is specified, it should be a pyparsing expression

37 that produces an integer value.

39 Example::

41 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']

43 # in this parser, the leading integer value is given in binary,

44 # '10' indicating that 2 values are in the array

45 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))

46 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']

48 # if other fields must be parsed after the count but before the

49 # list items, give the fields results names and they will

50 # be preserved in the returned ParseResults:

51 count_with_metadata = integer + Word(alphas)("type")

52 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")

53 result = typed_array.parse_string("3 bool True True False")

54 print(result.dump())

56 # prints

57 # ['True', 'True', 'False']

58 # - items: ['True', 'True', 'False']

59 # - type: 'bool'

60 """

61 intExpr = intExpr or int_expr

62 array_expr = Forward()

64 def count_field_parse_action(s, l, t):

65 nonlocal array_expr

66 n = t[0]

67 array_expr <<= (expr * n) if n else Empty()

68 # clear list contents, but keep any named results

69 del t[:]

71 if intExpr is None:

72 intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))

73 else:

74 intExpr = intExpr.copy()

75 intExpr.set_name("arrayLen")

76 intExpr.add_parse_action(count_field_parse_action, call_during_try=True)

77 return (intExpr + array_expr).set_name("(len) " + str(expr) + "...")

80def match_previous_literal(expr: ParserElement) -> ParserElement:

81 """Helper to define an expression that is indirectly defined from

82 the tokens matched in a previous expression, that is, it looks for

83 a 'repeat' of a previous expression. For example::

85 first = Word(nums)

86 second = match_previous_literal(first)

87 match_expr = first + ":" + second

89 will match ``"1:1"``, but not ``"1:2"``. Because this

90 matches a previous literal, will also match the leading

91 ``"1:1"`` in ``"1:10"``. If this is not desired, use

92 :class:`match_previous_expr`. Do *not* use with packrat parsing

93 enabled.

94 """

95 rep = Forward()

97 def copy_token_to_repeater(s, l, t):

98 if t:

99 if len(t) == 1:

100 rep << t[0]

101 else:

102 # flatten t tokens

103 tflat = _flatten(t.as_list())

104 rep << And(Literal(tt) for tt in tflat)

105 else:

106 rep << Empty()

107

108 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)

109 rep.set_name("(prev) " + str(expr))

110 return rep

111

112

113def match_previous_expr(expr: ParserElement) -> ParserElement:

114 """Helper to define an expression that is indirectly defined from

115 the tokens matched in a previous expression, that is, it looks for

116 a 'repeat' of a previous expression. For example::

117

118 first = Word(nums)

119 second = match_previous_expr(first)

120 match_expr = first + ":" + second

121

122 will match ``"1:1"``, but not ``"1:2"``. Because this

123 matches by expressions, will *not* match the leading ``"1:1"``

124 in ``"1:10"``; the expressions are evaluated first, and then

125 compared, so ``"1"`` is compared with ``"10"``. Do *not* use

126 with packrat parsing enabled.

127 """

128 rep = Forward()

129 e2 = expr.copy()

130 rep <<= e2

131

132 def copy_token_to_repeater(s, l, t):

133 matchTokens = _flatten(t.as_list())

134

135 def must_match_these_tokens(s, l, t):

136 theseTokens = _flatten(t.as_list())

137 if theseTokens != matchTokens:

138 raise ParseException(

139 s, l, f"Expected {matchTokens}, found{theseTokens}"

140 )

141

142 rep.set_parse_action(must_match_these_tokens, callDuringTry=True)

143

144 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)

145 rep.set_name("(prev) " + str(expr))

146 return rep

147

148

149def one_of(

150 strs: Union[typing.Iterable[str], str],

151 caseless: bool = False,

152 use_regex: bool = True,

153 as_keyword: bool = False,

154 *,

155 useRegex: bool = True,

156 asKeyword: bool = False,

157) -> ParserElement:

158 """Helper to quickly define a set of alternative :class:`Literal` s,

159 and makes sure to do longest-first testing when there is a conflict,

160 regardless of the input order, but returns

161 a :class:`MatchFirst` for best performance.

162

163 Parameters:

164

165 - ``strs`` - a string of space-delimited literals, or a collection of

166 string literals

167 - ``caseless`` - treat all literals as caseless - (default= ``False``)

168 - ``use_regex`` - as an optimization, will

169 generate a :class:`Regex` object; otherwise, will generate

170 a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if

171 creating a :class:`Regex` raises an exception) - (default= ``True``)

172 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the

173 generated expressions - (default= ``False``)

174 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,

175 but will be removed in a future release

176

177 Example::

178

179 comp_oper = one_of("< = > <= >= !=")

180 var = Word(alphas)

181 number = Word(nums)

182 term = var | number

183 comparison_expr = term + comp_oper + term

184 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))

185

186 prints::

187

188 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]

189 """

190 asKeyword = asKeyword or as_keyword

191 useRegex = useRegex and use_regex

192

193 if (

194 isinstance(caseless, str_type)

195 and __diag__.warn_on_multiple_string_args_to_oneof

196 ):

197 warnings.warn(

198 "More than one string argument passed to one_of, pass"

199 " choices as a list or space-delimited string",

200 stacklevel=2,

201 )

202

203 if caseless:

204 isequal = lambda a, b: a.upper() == b.upper()

205 masks = lambda a, b: b.upper().startswith(a.upper())

206 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral

207 else:

208 isequal = lambda a, b: a == b

209 masks = lambda a, b: b.startswith(a)

210 parseElementClass = Keyword if asKeyword else Literal

211

212 symbols: List[str] = []

213 if isinstance(strs, str_type):

214 strs = typing.cast(str, strs)

215 symbols = strs.split()

216 elif isinstance(strs, Iterable):

217 symbols = list(strs)

218 else:

219 raise TypeError("Invalid argument to one_of, expected string or iterable")

220 if not symbols:

221 return NoMatch()

222

223 # reorder given symbols to take care to avoid masking longer choices with shorter ones

224 # (but only if the given symbols are not just single characters)

225 if any(len(sym) > 1 for sym in symbols):

226 i = 0

227 while i < len(symbols) - 1:

228 cur = symbols[i]

229 for j, other in enumerate(symbols[i + 1 :]):

230 if isequal(other, cur):

231 del symbols[i + j + 1]

232 break

233 elif masks(cur, other):

234 del symbols[i + j + 1]

235 symbols.insert(i, other)

236 break

237 else:

238 i += 1

239

240 if useRegex:

241 re_flags: int = re.IGNORECASE if caseless else 0

242

243 try:

244 if all(len(sym) == 1 for sym in symbols):

245 # symbols are just single characters, create range regex pattern

246 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"

247 else:

248 patt = "|".join(re.escape(sym) for sym in symbols)

249

250 # wrap with \b word break markers if defining as keywords

251 if asKeyword:

252 patt = rf"\b(?:{patt})\b"

253

254 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))

255

256 if caseless:

257 # add parse action to return symbols as specified, not in random

258 # casing as found in input string

259 symbol_map = {sym.lower(): sym for sym in symbols}

260 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])

261

262 return ret

263

264 except re.error:

265 warnings.warn(

266 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2

267 )

268

269 # last resort, just use MatchFirst

270 return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(

271 " | ".join(symbols)

272 )

273

274

275def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:

276 """Helper to easily and clearly define a dictionary by specifying

277 the respective patterns for the key and value. Takes care of

278 defining the :class:`Dict`, :class:`ZeroOrMore`, and

279 :class:`Group` tokens in the proper order. The key pattern

280 can include delimiting markers or punctuation, as long as they are

281 suppressed, thereby leaving the significant key text. The value

282 pattern can include named results, so that the :class:`Dict` results

283 can include named token fields.

284

285 Example::

286

287 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

288 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))

289 print(attr_expr[1, ...].parse_string(text).dump())

290

291 attr_label = label

292 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)

293

294 # similar to Dict, but simpler call format

295 result = dict_of(attr_label, attr_value).parse_string(text)

296 print(result.dump())

297 print(result['shape'])

298 print(result.shape) # object attribute access works too

299 print(result.as_dict())

300

301 prints::

302

303 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

304 - color: 'light blue'

305 - posn: 'upper left'

306 - shape: 'SQUARE'

307 - texture: 'burlap'

308 SQUARE

309 SQUARE

310 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}

311 """

312 return Dict(OneOrMore(Group(key + value)))

313

314

315def original_text_for(

316 expr: ParserElement, as_string: bool = True, *, asString: bool = True

317) -> ParserElement:

318 """Helper to return the original, untokenized text for a given

319 expression. Useful to restore the parsed fields of an HTML start

320 tag into the raw tag text itself, or to revert separate tokens with

321 intervening whitespace back to the original matching input text. By

322 default, returns a string containing the original parsed text.

323

324 If the optional ``as_string`` argument is passed as

325 ``False``, then the return value is

326 a :class:`ParseResults` containing any results names that

327 were originally matched, and a single token containing the original

328 matched text from the input string. So if the expression passed to

329 :class:`original_text_for` contains expressions with defined

330 results names, you must set ``as_string`` to ``False`` if you

331 want to preserve those results name values.

332

333 The ``asString`` pre-PEP8 argument is retained for compatibility,

334 but will be removed in a future release.

335

336 Example::

337

338 src = "this is test bold text normal text "

339 for tag in ("b", "i"):

340 opener, closer = make_html_tags(tag)

341 patt = original_text_for(opener + ... + closer)

342 print(patt.search_string(src)[0])

343

344 prints::

345

346 [' bold text ']

347 ['text']

348 """

349 asString = asString and as_string

350

351 locMarker = Empty().set_parse_action(lambda s, loc, t: loc)

352 endlocMarker = locMarker.copy()

353 endlocMarker.callPreparse = False

354 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")

355 if asString:

356 extractText = lambda s, l, t: s[t._original_start : t._original_end]

357 else:

358

359 def extractText(s, l, t):

360 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]

361

362 matchExpr.set_parse_action(extractText)

363 matchExpr.ignoreExprs = expr.ignoreExprs

364 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)

365 return matchExpr

366

367

368def ungroup(expr: ParserElement) -> ParserElement:

369 """Helper to undo pyparsing's default grouping of And expressions,

370 even if all but one are non-empty.

371 """

372 return TokenConverter(expr).add_parse_action(lambda t: t[0])

373

374

375def locatedExpr(expr: ParserElement) -> ParserElement:

376 """

377 (DEPRECATED - future code should use the :class:`Located` class)

378 Helper to decorate a returned token with its starting and ending

379 locations in the input string.

380

381 This helper adds the following results names:

382

383 - ``locn_start`` - location where matched expression begins

384 - ``locn_end`` - location where matched expression ends

385 - ``value`` - the actual parsed results

386

387 Be careful if the input text contains ``<TAB>`` characters, you

388 may want to call :class:`ParserElement.parse_with_tabs`

389

390 Example::

391

392 wd = Word(alphas)

393 for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):

394 print(match)

395

396 prints::

397

398 [[0, 'ljsdf', 5]]

399 [[8, 'lksdjjf', 15]]

400 [[18, 'lkkjj', 23]]

401 """

402 locator = Empty().set_parse_action(lambda ss, ll, tt: ll)

403 return Group(

404 locator("locn_start")

405 + expr("value")

406 + locator.copy().leaveWhitespace()("locn_end")

407 )

408

409

410def nested_expr(

411 opener: Union[str, ParserElement] = "(",

412 closer: Union[str, ParserElement] = ")",

413 content: typing.Optional[ParserElement] = None,

414 ignore_expr: ParserElement = quoted_string(),

415 *,

416 ignoreExpr: ParserElement = quoted_string(),

417) -> ParserElement:

418 """Helper method for defining nested lists enclosed in opening and

419 closing delimiters (``"("`` and ``")"`` are the default).

420

421 Parameters:

422

423 - ``opener`` - opening character for a nested list

424 (default= ``"("``); can also be a pyparsing expression

425 - ``closer`` - closing character for a nested list

426 (default= ``")"``); can also be a pyparsing expression

427 - ``content`` - expression for items within the nested lists

428 (default= ``None``)

429 - ``ignore_expr`` - expression for ignoring opening and closing delimiters

430 (default= :class:`quoted_string`)

431 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility

432 but will be removed in a future release

433

434 If an expression is not provided for the content argument, the

435 nested expression will capture all whitespace-delimited content

436 between delimiters as a list of separate values.

437

438 Use the ``ignore_expr`` argument to define expressions that may

439 contain opening or closing characters that should not be treated as

440 opening or closing characters for nesting, such as quoted_string or

441 a comment expression. Specify multiple expressions using an

442 :class:`Or` or :class:`MatchFirst`. The default is

443 :class:`quoted_string`, but if no expressions are to be ignored, then

444 pass ``None`` for this argument.

445

446 Example::

447

448 data_type = one_of("void int short long char float double")

449 decl_data_type = Combine(data_type + Opt(Word('*')))

450 ident = Word(alphas+'_', alphanums+'_')

451 number = pyparsing_common.number

452 arg = Group(decl_data_type + ident)

453 LPAR, RPAR = map(Suppress, "()")

454

455 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))

456

457 c_function = (decl_data_type("type")

458 + ident("name")

459 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR

460 + code_body("body"))

461 c_function.ignore(c_style_comment)

462

463 source_code = '''

464 int is_odd(int x) {

465 return (x%2);

466 }

467

468 int dec_to_hex(char hchar) {

469 if (hchar >= '0' && hchar <= '9') {

470 return (ord(hchar)-ord('0'));

471 } else {

472 return (10+ord(hchar)-ord('A'));

473 }

474 }

475 '''

476 for func in c_function.search_string(source_code):

477 print("%(name)s (%(type)s) args: %(args)s" % func)

478

479

480 prints::

481

482 is_odd (int) args: [['int', 'x']]

483 dec_to_hex (int) args: [['char', 'hchar']]

484 """

485 if ignoreExpr != ignore_expr:

486 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr

487 if opener == closer:

488 raise ValueError("opening and closing strings cannot be the same")

489 if content is None:

490 if isinstance(opener, str_type) and isinstance(closer, str_type):

491 opener = typing.cast(str, opener)

492 closer = typing.cast(str, closer)

493 if len(opener) == 1 and len(closer) == 1:

494 if ignoreExpr is not None:

495 content = Combine(

496 OneOrMore(

497 ~ignoreExpr

498 + CharsNotIn(

499 opener + closer + ParserElement.DEFAULT_WHITE_CHARS,

500 exact=1,

501 )

502 )

503 ).set_parse_action(lambda t: t[0].strip())

504 else:

505 content = empty.copy() + CharsNotIn(

506 opener + closer + ParserElement.DEFAULT_WHITE_CHARS

507 ).set_parse_action(lambda t: t[0].strip())

508 else:

509 if ignoreExpr is not None:

510 content = Combine(

511 OneOrMore(

512 ~ignoreExpr

513 + ~Literal(opener)

514 + ~Literal(closer)

515 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

516 )

517 ).set_parse_action(lambda t: t[0].strip())

518 else:

519 content = Combine(

520 OneOrMore(

521 ~Literal(opener)

522 + ~Literal(closer)

523 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)

524 )

525 ).set_parse_action(lambda t: t[0].strip())

526 else:

527 raise ValueError(

528 "opening and closing arguments must be strings if no content expression is given"

529 )

530 ret = Forward()

531 if ignoreExpr is not None:

532 ret <<= Group(

533 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)

534 )

535 else:

536 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))

537 ret.set_name("nested %s%s expression" % (opener, closer))

538 return ret

539

540

541def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):

542 """Internal helper to construct opening and closing tag expressions, given a tag name"""

543 if isinstance(tagStr, str_type):

544 resname = tagStr

545 tagStr = Keyword(tagStr, caseless=not xml)

546 else:

547 resname = tagStr.name

548

549 tagAttrName = Word(alphas, alphanums + "_-:")

550 if xml:

551 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)

552 openTag = (

553 suppress_LT

554 + tagStr("tag")

555 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))

556 + Opt("/", default=[False])("empty").set_parse_action(

557 lambda s, l, t: t[0] == "/"

558 )

559 + suppress_GT

560 )

561 else:

562 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(

563 printables, exclude_chars=">"

564 )

565 openTag = (

566 suppress_LT

567 + tagStr("tag")

568 + Dict(

569 ZeroOrMore(

570 Group(

571 tagAttrName.set_parse_action(lambda t: t[0].lower())

572 + Opt(Suppress("=") + tagAttrValue)

573 )

574 )

575 )

576 + Opt("/", default=[False])("empty").set_parse_action(

577 lambda s, l, t: t[0] == "/"

578 )

579 + suppress_GT

580 )

581 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)

582

583 openTag.set_name("<%s>" % resname)

584 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels

585 openTag.add_parse_action(

586 lambda t: t.__setitem__(

587 "start" + "".join(resname.replace(":", " ").title().split()), t.copy()

588 )

589 )

590 closeTag = closeTag(

591 "end" + "".join(resname.replace(":", " ").title().split())

592 ).set_name("</%s>" % resname)

593 openTag.tag = resname

594 closeTag.tag = resname

595 openTag.tag_body = SkipTo(closeTag())

596 return openTag, closeTag

597

598

599def make_html_tags(

600 tag_str: Union[str, ParserElement]

601) -> Tuple[ParserElement, ParserElement]:

602 """Helper to construct opening and closing tag expressions for HTML,

603 given a tag name. Matches tags in either upper or lower case,

604 attributes with namespaces and with quoted or unquoted values.

605

606 Example::

607

608 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'

609 # make_html_tags returns pyparsing expressions for the opening and

610 # closing tags as a 2-tuple

611 a, a_end = make_html_tags("A")

612 link_expr = a + SkipTo(a_end)("link_text") + a_end

613

614 for link in link_expr.search_string(text):

615 # attributes in the <A> tag (like "href" shown here) are

616 # also accessible as named results

617 print(link.link_text, '->', link.href)

618

619 prints::

620

621 pyparsing -> https://github.com/pyparsing/pyparsing/wiki

622 """

623 return _makeTags(tag_str, False)

624

625

626def make_xml_tags(

627 tag_str: Union[str, ParserElement]

628) -> Tuple[ParserElement, ParserElement]:

629 """Helper to construct opening and closing tag expressions for XML,

630 given a tag name. Matches tags only in the given upper/lower case.

631

632 Example: similar to :class:`make_html_tags`

633 """

634 return _makeTags(tag_str, True)

635

636

637any_open_tag: ParserElement

638any_close_tag: ParserElement

639any_open_tag, any_close_tag = make_html_tags(

640 Word(alphas, alphanums + "_:").set_name("any tag")

641)

642

643_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}

644common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(

645 "common HTML entity"

646)

647

648

649def replace_html_entity(s, l, t):

650 """Helper parser action to replace common HTML entities with their special characters"""

651 return _htmlEntityMap.get(t.entity)

652

653

654class OpAssoc(Enum):

655 """Enumeration of operator associativity

656 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""

657

658 LEFT = 1

659 RIGHT = 2

660

661

662InfixNotationOperatorArgType = Union[

663 ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]

664]

665InfixNotationOperatorSpec = Union[

666 Tuple[

667 InfixNotationOperatorArgType,

668 int,

669 OpAssoc,

670 typing.Optional[ParseAction],

671 ],

672 Tuple[

673 InfixNotationOperatorArgType,

674 int,

675 OpAssoc,

676 ],

677]

678

679

680def infix_notation(

681 base_expr: ParserElement,

682 op_list: List[InfixNotationOperatorSpec],

683 lpar: Union[str, ParserElement] = Suppress("("),

684 rpar: Union[str, ParserElement] = Suppress(")"),

685) -> ParserElement:

686 """Helper method for constructing grammars of expressions made up of

687 operators working in a precedence hierarchy. Operators may be unary

688 or binary, left- or right-associative. Parse actions can also be

689 attached to operator expressions. The generated parser will also

690 recognize the use of parentheses to override operator precedences

691 (see example below).

692

693 Note: if you define a deep operator list, you may see performance

694 issues when using infix_notation. See

695 :class:`ParserElement.enable_packrat` for a mechanism to potentially

696 improve your parser performance.

697

698 Parameters:

699

700 - ``base_expr`` - expression representing the most basic operand to

701 be used in the expression

702 - ``op_list`` - list of tuples, one for each operator precedence level

703 in the expression grammar; each tuple is of the form ``(op_expr,

704 num_operands, right_left_assoc, (optional)parse_action)``, where:

705

706 - ``op_expr`` is the pyparsing expression for the operator; may also

707 be a string, which will be converted to a Literal; if ``num_operands``

708 is 3, ``op_expr`` is a tuple of two expressions, for the two

709 operators separating the 3 terms

710 - ``num_operands`` is the number of terms for this operator (must be 1,

711 2, or 3)

712 - ``right_left_assoc`` is the indicator whether the operator is right

713 or left associative, using the pyparsing-defined constants

714 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.

715 - ``parse_action`` is the parse action to be associated with

716 expressions matching this operator expression (the parse action

717 tuple member may be omitted); if the parse action is passed

718 a tuple or list of functions, this is equivalent to calling

719 ``set_parse_action(*fn)``

720 (:class:`ParserElement.set_parse_action`)

721 - ``lpar`` - expression for matching left-parentheses; if passed as a

722 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as

723 an expression (such as ``Literal('(')``), then it will be kept in

724 the parsed results, and grouped with them. (default= ``Suppress('(')``)

725 - ``rpar`` - expression for matching right-parentheses; if passed as a

726 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as

727 an expression (such as ``Literal(')')``), then it will be kept in

728 the parsed results, and grouped with them. (default= ``Suppress(')')``)

729

730 Example::

731

732 # simple example of four-function arithmetic with ints and

733 # variable names

734 integer = pyparsing_common.signed_integer

735 varname = pyparsing_common.identifier

736

737 arith_expr = infix_notation(integer | varname,

738 [

739 ('-', 1, OpAssoc.RIGHT),

740 (one_of('* /'), 2, OpAssoc.LEFT),

741 (one_of('+ -'), 2, OpAssoc.LEFT),

742 ])

743

744 arith_expr.run_tests('''

745 5+3*6

746 (5+3)*6

747 -2--11

748 ''', full_dump=False)

749

750 prints::

751

752 5+3*6

753 [[5, '+', [3, '*', 6]]]

754

755 (5+3)*6

756 [[[5, '+', 3], '*', 6]]

757

758 (5+x)*y

759 [[[5, '+', 'x'], '*', 'y']]

760

761 -2--11

762 [[['-', 2], '-', ['-', 11]]]

763 """

764

765 # captive version of FollowedBy that does not do parse actions or capture results names

766 class _FB(FollowedBy):

767 def parseImpl(self, instring, loc, doActions=True):

768 self.expr.try_parse(instring, loc)

769 return loc, []

770

771 _FB.__name__ = "FollowedBy>"

772

773 ret = Forward()

774 if isinstance(lpar, str):

775 lpar = Suppress(lpar)

776 if isinstance(rpar, str):

777 rpar = Suppress(rpar)

778

779 # if lpar and rpar are not suppressed, wrap in group

780 if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):

781 lastExpr = base_expr | Group(lpar + ret + rpar)

782 else:

783 lastExpr = base_expr | (lpar + ret + rpar)

784

785 arity: int

786 rightLeftAssoc: opAssoc

787 pa: typing.Optional[ParseAction]

788 opExpr1: ParserElement

789 opExpr2: ParserElement

790 for i, operDef in enumerate(op_list):

791 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]

792 if isinstance(opExpr, str_type):

793 opExpr = ParserElement._literalStringClass(opExpr)

794 opExpr = typing.cast(ParserElement, opExpr)

795 if arity == 3:

796 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:

797 raise ValueError(

798 "if numterms=3, opExpr must be a tuple or list of two expressions"

799 )

800 opExpr1, opExpr2 = opExpr

801 term_name = f"{opExpr1}{opExpr2} term"

802 else:

803 term_name = f"{opExpr} term"

804

805 if not 1 <= arity <= 3:

806 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

807

808 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):

809 raise ValueError("operator must indicate right or left associativity")

810

811 thisExpr: ParserElement = Forward().set_name(term_name)

812 thisExpr = typing.cast(Forward, thisExpr)

813 if rightLeftAssoc is OpAssoc.LEFT:

814 if arity == 1:

815 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])

816 elif arity == 2:

817 if opExpr is not None:

818 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(

819 lastExpr + (opExpr + lastExpr)[1, ...]

820 )

821 else:

822 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])

823 elif arity == 3:

824 matchExpr = _FB(

825 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr

826 ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))

827 elif rightLeftAssoc is OpAssoc.RIGHT:

828 if arity == 1:

829 # try to avoid LR with this extra test

830 if not isinstance(opExpr, Opt):

831 opExpr = Opt(opExpr)

832 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)

833 elif arity == 2:

834 if opExpr is not None:

835 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(

836 lastExpr + (opExpr + thisExpr)[1, ...]

837 )

838 else:

839 matchExpr = _FB(lastExpr + thisExpr) + Group(

840 lastExpr + thisExpr[1, ...]

841 )

842 elif arity == 3:

843 matchExpr = _FB(

844 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr

845 ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)

846 if pa:

847 if isinstance(pa, (tuple, list)):

848 matchExpr.set_parse_action(*pa)

849 else:

850 matchExpr.set_parse_action(pa)

851 thisExpr <<= (matchExpr | lastExpr).setName(term_name)

852 lastExpr = thisExpr

853 ret <<= lastExpr

854 return ret

855

856

857def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):

858 """

859 (DEPRECATED - use :class:`IndentedBlock` class instead)

860 Helper method for defining space-delimited indentation blocks,

861 such as those used to define block statements in Python source code.

862

863 Parameters:

864

865 - ``blockStatementExpr`` - expression defining syntax of statement that

866 is repeated within the indented block

867 - ``indentStack`` - list created by caller to manage indentation stack

868 (multiple ``statementWithIndentedBlock`` expressions within a single

869 grammar should share a common ``indentStack``)

870 - ``indent`` - boolean indicating whether block must be indented beyond

871 the current level; set to ``False`` for block of left-most statements

872 (default= ``True``)

873

874 A valid block must contain at least one ``blockStatement``.

875

876 (Note that indentedBlock uses internal parse actions which make it

877 incompatible with packrat parsing.)

878

879 Example::

880

881 data = '''

882 def A(z):

883 A1

884 B = 100

885 G = A2

886 A2

887 A3

888 B

889 def BB(a,b,c):

890 BB1

891 def BBA():

892 bba1

893 bba2

894 bba3

895 C

896 D

897 def spam(x,y):

898 def eggs(z):

899 pass

900 '''

901

902

903 indentStack = [1]

904 stmt = Forward()

905

906 identifier = Word(alphas, alphanums)

907 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")

908 func_body = indentedBlock(stmt, indentStack)

909 funcDef = Group(funcDecl + func_body)

910

911 rvalue = Forward()

912 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")

913 rvalue << (funcCall | identifier | Word(nums))

914 assignment = Group(identifier + "=" + rvalue)

915 stmt << (funcDef | assignment | identifier)

916

917 module_body = stmt[1, ...]

918

919 parseTree = module_body.parseString(data)

920 parseTree.pprint()

921

922 prints::

923

924 [['def',

925 'A',

926 ['(', 'z', ')'],

927 ':',

928 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],

929 'B',

930 ['def',

931 'BB',

932 ['(', 'a', 'b', 'c', ')'],

933 ':',

934 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],

935 'C',

936 'D',

937 ['def',

938 'spam',

939 ['(', 'x', 'y', ')'],

940 ':',

941 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]

942 """

943 backup_stacks.append(indentStack[:])

944

945 def reset_stack():

946 indentStack[:] = backup_stacks[-1]

947

948 def checkPeerIndent(s, l, t):

949 if l >= len(s):

950 return

951 curCol = col(l, s)

952 if curCol != indentStack[-1]:

953 if curCol > indentStack[-1]:

954 raise ParseException(s, l, "illegal nesting")

955 raise ParseException(s, l, "not a peer entry")

956

957 def checkSubIndent(s, l, t):

958 curCol = col(l, s)

959 if curCol > indentStack[-1]:

960 indentStack.append(curCol)

961 else:

962 raise ParseException(s, l, "not a subentry")

963

964 def checkUnindent(s, l, t):

965 if l >= len(s):

966 return

967 curCol = col(l, s)

968 if not (indentStack and curCol in indentStack):

969 raise ParseException(s, l, "not an unindent")

970 if curCol < indentStack[-1]:

971 indentStack.pop()

972

973 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())

974 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")

975 PEER = Empty().set_parse_action(checkPeerIndent).set_name("")

976 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")

977 if indent:

978 smExpr = Group(

979 Opt(NL)

980 + INDENT

981 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

982 + UNDENT

983 )

984 else:

985 smExpr = Group(

986 Opt(NL)

987 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))

988 + Opt(UNDENT)

989 )

990

991 # add a parse action to remove backup_stack from list of backups

992 smExpr.add_parse_action(

993 lambda: backup_stacks.pop(-1) and None if backup_stacks else None

994 )

995 smExpr.set_fail_action(lambda a, b, c, d: reset_stack())

996 blockStatementExpr.ignore(_bslash + LineEnd())

997 return smExpr.set_name("indented block")

998

999

1000# it's easy to get these comment structures wrong - they're very common, so may as well make them available

1001c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(

1002 "C style comment"

1003)

1004"Comment of the form ``/* ... */``"

1005

1006html_comment = Regex(r"").set_name("HTML comment")

1007"Comment of the form ````"

1008

1009rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")

1010dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")

1011"Comment of the form ``// ... (to end of line)``"

1012

1013cpp_style_comment = Combine(

1014 Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment

1015).set_name("C++ style comment")

1016"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"

1017

1018java_style_comment = cpp_style_comment

1019"Same as :class:`cpp_style_comment`"

1020

1021python_style_comment = Regex(r"#.*").set_name("Python style comment")

1022"Comment of the form ``# ... (to end of line)``"

1023

1024

1025# build list of built-in expressions, for future reference if a global default value

1026# gets updated

1027_builtin_exprs: List[ParserElement] = [

1028 v for v in vars().values() if isinstance(v, ParserElement)

1029]

1030

1031

1032# compatibility function, superseded by DelimitedList class

1033def delimited_list(

1034 expr: Union[str, ParserElement],

1035 delim: Union[str, ParserElement] = ",",

1036 combine: bool = False,

1037 min: typing.Optional[int] = None,

1038 max: typing.Optional[int] = None,

1039 *,

1040 allow_trailing_delim: bool = False,

1041) -> ParserElement:

1042 """(DEPRECATED - use :class:`DelimitedList` class)"""

1043 return DelimitedList(

1044 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim

1045 )

1046

1047

1048# pre-PEP8 compatible names

1049# fmt: off

1050opAssoc = OpAssoc

1051anyOpenTag = any_open_tag

1052anyCloseTag = any_close_tag

1053commonHTMLEntity = common_html_entity

1054cStyleComment = c_style_comment

1055htmlComment = html_comment

1056restOfLine = rest_of_line

1057dblSlashComment = dbl_slash_comment

1058cppStyleComment = cpp_style_comment

1059javaStyleComment = java_style_comment

1060pythonStyleComment = python_style_comment

1061

1062@replaced_by_pep8(DelimitedList)

1063def delimitedList(): ...

1064

1065@replaced_by_pep8(DelimitedList)

1066def delimited_list(): ...

1067

1068@replaced_by_pep8(counted_array)

1069def countedArray(): ...

1070

1071@replaced_by_pep8(match_previous_literal)

1072def matchPreviousLiteral(): ...

1073

1074@replaced_by_pep8(match_previous_expr)

1075def matchPreviousExpr(): ...

1076

1077@replaced_by_pep8(one_of)

1078def oneOf(): ...

1079

1080@replaced_by_pep8(dict_of)

1081def dictOf(): ...

1082

1083@replaced_by_pep8(original_text_for)

1084def originalTextFor(): ...

1085

1086@replaced_by_pep8(nested_expr)

1087def nestedExpr(): ...

1088

1089@replaced_by_pep8(make_html_tags)

1090def makeHTMLTags(): ...

1091

1092@replaced_by_pep8(make_xml_tags)

1093def makeXMLTags(): ...

1094

1095@replaced_by_pep8(replace_html_entity)

1096def replaceHTMLEntity(): ...

1097

1098@replaced_by_pep8(infix_notation)

1099def infixNotation(): ...

1100# fmt: on