Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/helpers.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

340 statements  

1# helpers.py 

2import html.entities 

3import operator 

4import re 

5import sys 

6import typing 

7 

8from . import __diag__ 

9from .core import * 

10from .util import ( 

11 _bslash, 

12 _flatten, 

13 _escape_regex_range_chars, 

14 make_compressed_re, 

15 replaced_by_pep8, 

16) 

17 

18 

19# 

20# global helpers 

21# 

22def counted_array( 

23 expr: ParserElement, int_expr: typing.Optional[ParserElement] = None, **kwargs 

24) -> ParserElement: 

25 """Helper to define a counted list of expressions. 

26 

27 This helper defines a pattern of the form:: 

28 

29 integer expr expr expr... 

30 

31 where the leading integer tells how many expr expressions follow. 

32 The matched tokens returns the array of expr tokens as a list - the 

33 leading count token is suppressed. 

34 

35 If ``int_expr`` is specified, it should be a pyparsing expression 

36 that produces an integer value. 

37 

38 Examples: 

39 

40 .. doctest:: 

41 

42 >>> counted_array(Word(alphas)).parse_string('2 ab cd ef') 

43 ParseResults(['ab', 'cd'], {}) 

44 

45 - In this parser, the leading integer value is given in binary, 

46 '10' indicating that 2 values are in the array: 

47 

48 .. doctest:: 

49 

50 >>> binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) 

51 >>> counted_array(Word(alphas), int_expr=binary_constant 

52 ... ).parse_string('10 ab cd ef') 

53 ParseResults(['ab', 'cd'], {}) 

54 

55 - If other fields must be parsed after the count but before the 

56 list items, give the fields results names and they will 

57 be preserved in the returned ParseResults: 

58 

59 .. doctest:: 

60 

61 >>> ppc = pyparsing.common 

62 >>> count_with_metadata = ppc.integer + Word(alphas)("type") 

63 >>> typed_array = counted_array(Word(alphanums), 

64 ... int_expr=count_with_metadata)("items") 

65 >>> result = typed_array.parse_string("3 bool True True False") 

66 >>> print(result.dump()) 

67 ['True', 'True', 'False'] 

68 - items: ['True', 'True', 'False'] 

69 - type: 'bool' 

70 """ 

71 intExpr: typing.Optional[ParserElement] = deprecate_argument( 

72 kwargs, "intExpr", None 

73 ) 

74 

75 intExpr = intExpr or int_expr 

76 array_expr = Forward() 

77 

78 def count_field_parse_action(s, l, t): 

79 nonlocal array_expr 

80 n = t[0] 

81 array_expr <<= (expr * n) if n else Empty() 

82 # clear list contents, but keep any named results 

83 del t[:] 

84 

85 if intExpr is None: 

86 intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) 

87 else: 

88 intExpr = intExpr.copy() 

89 intExpr.set_name("arrayLen") 

90 intExpr.add_parse_action(count_field_parse_action, call_during_try=True) 

91 return (intExpr + array_expr).set_name(f"(len) {expr}...") 

92 

93 

94def match_previous_literal(expr: ParserElement) -> ParserElement: 

95 """Helper to define an expression that is indirectly defined from 

96 the tokens matched in a previous expression, that is, it looks for 

97 a 'repeat' of a previous expression. For example:: 

98 

99 .. testcode:: 

100 

101 first = Word(nums) 

102 second = match_previous_literal(first) 

103 match_expr = first + ":" + second 

104 

105 will match ``"1:1"``, but not ``"1:2"``. Because this 

106 matches a previous literal, will also match the leading 

107 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

108 :class:`match_previous_expr`. Do *not* use with packrat parsing 

109 enabled. 

110 """ 

111 rep = Forward() 

112 

113 def copy_token_to_repeater(s, l, t): 

114 if not t: 

115 rep << Empty() 

116 return 

117 

118 if len(t) == 1: 

119 rep << t[0] 

120 return 

121 

122 # flatten t tokens 

123 tflat = _flatten(t.as_list()) 

124 rep << And(Literal(tt) for tt in tflat) 

125 

126 expr.add_parse_action(copy_token_to_repeater, call_during_try=True) 

127 rep.set_name("(prev) " + str(expr)) 

128 return rep 

129 

130 

131def match_previous_expr(expr: ParserElement) -> ParserElement: 

132 """Helper to define an expression that is indirectly defined from 

133 the tokens matched in a previous expression, that is, it looks for 

134 a 'repeat' of a previous expression. For example: 

135 

136 .. testcode:: 

137 

138 first = Word(nums) 

139 second = match_previous_expr(first) 

140 match_expr = first + ":" + second 

141 

142 will match ``"1:1"``, but not ``"1:2"``. Because this 

143 matches by expressions, will *not* match the leading ``"1:1"`` 

144 in ``"1:10"``; the expressions are evaluated first, and then 

145 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

146 with packrat parsing enabled. 

147 """ 

148 rep = Forward() 

149 e2 = expr.copy() 

150 rep <<= e2 

151 

152 def copy_token_to_repeater(s, l, t): 

153 matchTokens = _flatten(t.as_list()) 

154 

155 def must_match_these_tokens(s, l, t): 

156 theseTokens = _flatten(t.as_list()) 

157 if theseTokens != matchTokens: 

158 raise ParseException( 

159 s, l, f"Expected {matchTokens}, found{theseTokens}" 

160 ) 

161 

162 rep.set_parse_action(must_match_these_tokens, call_during_try=True) 

163 

164 expr.add_parse_action(copy_token_to_repeater, call_during_try=True) 

165 rep.set_name("(prev) " + str(expr)) 

166 return rep 

167 

168 

169def one_of( 

170 strs: Union[typing.Iterable[str], str], 

171 caseless: bool = False, 

172 use_regex: bool = True, 

173 as_keyword: bool = False, 

174 **kwargs, 

175) -> ParserElement: 

176 """Helper to quickly define a set of alternative :class:`Literal` s, 

177 and makes sure to do longest-first testing when there is a conflict, 

178 regardless of the input order, but returns 

179 a :class:`MatchFirst` for best performance. 

180 

181 :param strs: a string of space-delimited literals, or a collection of 

182 string literals 

183 :param caseless: treat all literals as caseless 

184 :param use_regex: bool - as an optimization, will 

185 generate a :class:`Regex` object; otherwise, will generate 

186 a :class:`MatchFirst` object (if ``caseless=True`` or 

187 ``as_keyword=True``, or if creating a :class:`Regex` raises an exception) 

188 :param as_keyword: bool - enforce :class:`Keyword`-style matching on the 

189 generated expressions 

190 

191 Parameters ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 

192 compatibility, but will be removed in a future release. 

193 

194 Example: 

195 

196 .. testcode:: 

197 

198 comp_oper = one_of("< = > <= >= !=") 

199 var = Word(alphas) 

200 number = Word(nums) 

201 term = var | number 

202 comparison_expr = term + comp_oper + term 

203 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) 

204 

205 prints: 

206 

207 .. testoutput:: 

208 

209 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

210 """ 

211 useRegex: bool = deprecate_argument(kwargs, "useRegex", True) 

212 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

213 

214 asKeyword = asKeyword or as_keyword 

215 useRegex = useRegex and use_regex 

216 

217 if ( 

218 isinstance(caseless, str_type) 

219 and __diag__.warn_on_multiple_string_args_to_oneof 

220 ): 

221 warnings.warn( 

222 "warn_on_multiple_string_args_to_oneof:" 

223 " More than one string argument passed to one_of, pass" 

224 " choices as a list or space-delimited string", 

225 stacklevel=2, 

226 ) 

227 

228 if caseless: 

229 is_equal = lambda a, b: a.upper() == b.upper() 

230 masks = lambda a, b: b.upper().startswith(a.upper()) 

231 else: 

232 is_equal = operator.eq 

233 masks = lambda a, b: b.startswith(a) 

234 

235 symbols: list[str] 

236 if isinstance(strs, str_type): 

237 strs = typing.cast(str, strs) 

238 symbols = strs.split() 

239 elif isinstance(strs, Iterable): 

240 symbols = list(strs) 

241 else: 

242 raise TypeError("Invalid argument to one_of, expected string or iterable") 

243 if not symbols: 

244 return NoMatch() 

245 

246 # reorder given symbols to take care to avoid masking longer choices with shorter ones 

247 # (but only if the given symbols are not just single characters) 

248 i = 0 

249 while i < len(symbols) - 1: 

250 cur = symbols[i] 

251 for j, other in enumerate(symbols[i + 1 :]): 

252 if is_equal(other, cur): 

253 del symbols[i + j + 1] 

254 break 

255 if len(other) > len(cur) and masks(cur, other): 

256 del symbols[i + j + 1] 

257 symbols.insert(i, other) 

258 break 

259 else: 

260 i += 1 

261 

262 if useRegex: 

263 re_flags: int = re.IGNORECASE if caseless else 0 

264 

265 try: 

266 if all(len(sym) == 1 for sym in symbols): 

267 # symbols are just single characters, create range regex pattern 

268 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" 

269 else: 

270 patt = "|".join(re.escape(sym) for sym in symbols) 

271 

272 # wrap with \b word break markers if defining as keywords 

273 if asKeyword: 

274 patt = rf"\b(?:{patt})\b" 

275 

276 ret = Regex(patt, flags=re_flags) 

277 ret.set_name(" | ".join(repr(s) for s in symbols)) 

278 

279 if caseless: 

280 # add parse action to return symbols as specified, not in random 

281 # casing as found in input string 

282 symbol_map = {sym.lower(): sym for sym in symbols} 

283 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) 

284 

285 return ret 

286 

287 except re.error: 

288 warnings.warn( 

289 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 

290 ) 

291 

292 # last resort, just use MatchFirst of Token class corresponding to caseless 

293 # and asKeyword settings 

294 CASELESS = KEYWORD = True 

295 parse_element_class = { 

296 (CASELESS, KEYWORD): CaselessKeyword, 

297 (CASELESS, not KEYWORD): CaselessLiteral, 

298 (not CASELESS, KEYWORD): Keyword, 

299 (not CASELESS, not KEYWORD): Literal, 

300 }[(caseless, asKeyword)] 

301 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name( 

302 " | ".join(symbols) 

303 ) 

304 

305 

306def dict_of(key: ParserElement, value: ParserElement) -> Dict: 

307 """Helper to easily and clearly define a dictionary by specifying 

308 the respective patterns for the key and value. Takes care of 

309 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

310 :class:`Group` tokens in the proper order. The key pattern 

311 can include delimiting markers or punctuation, as long as they are 

312 suppressed, thereby leaving the significant key text. The value 

313 pattern can include named results, so that the :class:`Dict` results 

314 can include named token fields. 

315 

316 Example: 

317 

318 .. doctest:: 

319 

320 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

321 

322 >>> data_word = Word(alphas) 

323 >>> label = data_word + FollowedBy(':') 

324 >>> attr_expr = ( 

325 ... label 

326 ... + Suppress(':') 

327 ... + OneOrMore(data_word, stop_on=label) 

328 ... .set_parse_action(' '.join)) 

329 >>> print(attr_expr[1, ...].parse_string(text).dump()) 

330 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

331 

332 >>> attr_label = label 

333 >>> attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label 

334 ... ).set_parse_action(' '.join) 

335 

336 # similar to Dict, but simpler call format 

337 >>> result = dict_of(attr_label, attr_value).parse_string(text) 

338 >>> print(result.dump()) 

339 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

340 - color: 'light blue' 

341 - posn: 'upper left' 

342 - shape: 'SQUARE' 

343 - texture: 'burlap' 

344 [0]: 

345 ['shape', 'SQUARE'] 

346 [1]: 

347 ['posn', 'upper left'] 

348 [2]: 

349 ['color', 'light blue'] 

350 [3]: 

351 ['texture', 'burlap'] 

352 

353 >>> print(result['shape']) 

354 SQUARE 

355 >>> print(result.shape) # object attribute access works too 

356 SQUARE 

357 >>> print(result.as_dict()) 

358 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'} 

359 """ 

360 return Dict(OneOrMore(Group(key + value))) 

361 

362 

363def original_text_for( 

364 expr: ParserElement, as_string: bool = True, **kwargs 

365) -> ParserElement: 

366 """Helper to return the original, untokenized text for a given 

367 expression. Useful to restore the parsed fields of an HTML start 

368 tag into the raw tag text itself, or to revert separate tokens with 

369 intervening whitespace back to the original matching input text. By 

370 default, returns a string containing the original parsed text. 

371 

372 If the optional ``as_string`` argument is passed as 

373 ``False``, then the return value is 

374 a :class:`ParseResults` containing any results names that 

375 were originally matched, and a single token containing the original 

376 matched text from the input string. So if the expression passed to 

377 :class:`original_text_for` contains expressions with defined 

378 results names, you must set ``as_string`` to ``False`` if you 

379 want to preserve those results name values. 

380 

381 The ``asString`` pre-PEP8 argument is retained for compatibility, 

382 but will be removed in a future release. 

383 

384 Example: 

385 

386 .. testcode:: 

387 

388 src = "this is test <b> bold <i>text</i> </b> normal text " 

389 for tag in ("b", "i"): 

390 opener, closer = make_html_tags(tag) 

391 patt = original_text_for(opener + ... + closer) 

392 print(patt.search_string(src)[0]) 

393 

394 prints: 

395 

396 .. testoutput:: 

397 

398 ['<b> bold <i>text</i> </b>'] 

399 ['<i>text</i>'] 

400 """ 

401 asString: bool = deprecate_argument(kwargs, "asString", True) 

402 

403 asString = asString and as_string 

404 

405 locMarker = Empty().set_parse_action(lambda s, loc, t: loc) 

406 endlocMarker = locMarker.copy() 

407 endlocMarker.callPreparse = False 

408 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

409 if asString: 

410 extractText = lambda s, l, t: s[t._original_start : t._original_end] 

411 else: 

412 

413 def extractText(s, l, t): 

414 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] 

415 

416 matchExpr.set_parse_action(extractText) 

417 matchExpr.ignoreExprs = expr.ignoreExprs 

418 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) 

419 return matchExpr 

420 

421 

422def ungroup(expr: ParserElement) -> ParserElement: 

423 """Helper to undo pyparsing's default grouping of And expressions, 

424 even if all but one are non-empty. 

425 """ 

426 return TokenConverter(expr).add_parse_action(lambda t: t[0]) 

427 

428 

429def locatedExpr(expr: ParserElement) -> ParserElement: 

430 """ 

431 .. deprecated:: 3.0.0 

432 Use the :class:`Located` class instead. Note that `Located` 

433 returns results with one less grouping level. 

434 

435 Helper to decorate a returned token with its starting and ending 

436 locations in the input string. 

437 

438 This helper adds the following results names: 

439 

440 - ``locn_start`` - location where matched expression begins 

441 - ``locn_end`` - location where matched expression ends 

442 - ``value`` - the actual parsed results 

443 

444 Be careful if the input text contains ``<TAB>`` characters, you 

445 may want to call :meth:`ParserElement.parse_with_tabs` 

446 """ 

447 warnings.warn( 

448 f"{'locatedExpr'!r} deprecated - use {'Located'!r}", 

449 DeprecationWarning, 

450 stacklevel=2, 

451 ) 

452 

453 locator = Empty().set_parse_action(lambda ss, ll, tt: ll) 

454 return Group( 

455 locator("locn_start") 

456 + expr("value") 

457 + locator.copy().leave_whitespace()("locn_end") 

458 ) 

459 

460 

461# define special default value to permit None as a significant value for 

462# ignore_expr 

463_NO_IGNORE_EXPR_GIVEN = NoMatch() 

464 

465 

466def nested_expr( 

467 opener: Union[str, ParserElement] = "(", 

468 closer: Union[str, ParserElement] = ")", 

469 content: typing.Optional[ParserElement] = None, 

470 ignore_expr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN, 

471 **kwargs, 

472) -> ParserElement: 

473 """Helper method for defining nested lists enclosed in opening and 

474 closing delimiters (``"("`` and ``")"`` are the default). 

475 

476 :param opener: str - opening character for a nested list 

477 (default= ``"("``); can also be a pyparsing expression 

478 

479 :param closer: str - closing character for a nested list 

480 (default= ``")"``); can also be a pyparsing expression 

481 

482 :param content: expression for items within the nested lists 

483 

484 :param ignore_expr: expression for ignoring opening and closing delimiters 

485 (default = :class:`quoted_string`) 

486 

487 Parameter ``ignoreExpr`` is retained for compatibility 

488 but will be removed in a future release. 

489 

490 If an expression is not provided for the content argument, the 

491 nested expression will capture all whitespace-delimited content 

492 between delimiters as a list of separate values. 

493 

494 Use the ``ignore_expr`` argument to define expressions that may 

495 contain opening or closing characters that should not be treated as 

496 opening or closing characters for nesting, such as quoted_string or 

497 a comment expression. Specify multiple expressions using an 

498 :class:`Or` or :class:`MatchFirst`. The default is 

499 :class:`quoted_string`, but if no expressions are to be ignored, then 

500 pass ``None`` for this argument. 

501 

502 Example: 

503 

504 .. testcode:: 

505 

506 data_type = one_of("void int short long char float double") 

507 decl_data_type = Combine(data_type + Opt(Word('*'))) 

508 ident = Word(alphas+'_', alphanums+'_') 

509 number = pyparsing_common.number 

510 arg = Group(decl_data_type + ident) 

511 LPAR, RPAR = map(Suppress, "()") 

512 

513 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) 

514 

515 c_function = (decl_data_type("type") 

516 + ident("name") 

517 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR 

518 + code_body("body")) 

519 c_function.ignore(c_style_comment) 

520 

521 source_code = ''' 

522 int is_odd(int x) { 

523 return (x%2); 

524 } 

525 

526 int dec_to_hex(char hchar) { 

527 if (hchar >= '0' && hchar <= '9') { 

528 return (ord(hchar)-ord('0')); 

529 } else { 

530 return (10+ord(hchar)-ord('A')); 

531 } 

532 } 

533 ''' 

534 for func in c_function.search_string(source_code): 

535 print(f"{func.name} ({func.type}) args: {func.args}") 

536 

537 

538 prints: 

539 

540 .. testoutput:: 

541 

542 is_odd (int) args: [['int', 'x']] 

543 dec_to_hex (int) args: [['char', 'hchar']] 

544 """ 

545 ignoreExpr: ParserElement = deprecate_argument( 

546 kwargs, "ignoreExpr", _NO_IGNORE_EXPR_GIVEN 

547 ) 

548 

549 if ignoreExpr != ignore_expr: 

550 ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr # type: ignore [assignment] 

551 

552 if ignoreExpr is _NO_IGNORE_EXPR_GIVEN: 

553 ignoreExpr = quoted_string() 

554 

555 if opener == closer: 

556 raise ValueError("opening and closing strings cannot be the same") 

557 

558 if content is None: 

559 if isinstance(opener, str_type) and isinstance(closer, str_type): 

560 opener = typing.cast(str, opener) 

561 closer = typing.cast(str, closer) 

562 if len(opener) == 1 and len(closer) == 1: 

563 if ignoreExpr is not None: 

564 content = Combine( 

565 OneOrMore( 

566 ~ignoreExpr 

567 + CharsNotIn( 

568 opener + closer + ParserElement.DEFAULT_WHITE_CHARS, 

569 exact=1, 

570 ) 

571 ) 

572 ) 

573 else: 

574 content = Combine( 

575 Empty() 

576 + CharsNotIn( 

577 opener + closer + ParserElement.DEFAULT_WHITE_CHARS 

578 ) 

579 ) 

580 else: 

581 if ignoreExpr is not None: 

582 content = Combine( 

583 OneOrMore( 

584 ~ignoreExpr 

585 + ~Literal(opener) 

586 + ~Literal(closer) 

587 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

588 ) 

589 ) 

590 else: 

591 content = Combine( 

592 OneOrMore( 

593 ~Literal(opener) 

594 + ~Literal(closer) 

595 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

596 ) 

597 ) 

598 else: 

599 raise ValueError( 

600 "opening and closing arguments must be strings if no content expression is given" 

601 ) 

602 

603 # for these internally-created context expressions, simulate whitespace-skipping 

604 if ParserElement.DEFAULT_WHITE_CHARS: 

605 content.set_parse_action( 

606 lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS) 

607 ) 

608 

609 ret = Forward() 

610 if ignoreExpr is not None: 

611 ret <<= Group( 

612 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) 

613 ) 

614 else: 

615 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) 

616 

617 ret.set_name(f"nested {opener}{closer} expression") 

618 

619 # don't override error message from content expressions 

620 ret.errmsg = None 

621 return ret 

622 

623 

624def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): 

625 """Internal helper to construct opening and closing tag expressions, 

626 given a tag name""" 

627 if isinstance(tagStr, str_type): 

628 resname = tagStr 

629 tagStr = Keyword(tagStr, caseless=not xml) 

630 else: 

631 resname = tagStr.name 

632 

633 tagAttrName = Word(alphas, alphanums + "_-:") 

634 if xml: 

635 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) 

636 openTag = ( 

637 suppress_LT 

638 + tagStr("tag") 

639 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

640 + Opt("/", default=[False])("empty").set_parse_action( 

641 lambda s, l, t: t[0] == "/" 

642 ) 

643 + suppress_GT 

644 ) 

645 else: 

646 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( 

647 printables, exclude_chars=">" 

648 ) 

649 openTag = ( 

650 suppress_LT 

651 + tagStr("tag") 

652 + Dict( 

653 ZeroOrMore( 

654 Group( 

655 tagAttrName.set_parse_action(lambda t: t[0].lower()) 

656 + Opt(Suppress("=") + tagAttrValue) 

657 ) 

658 ) 

659 ) 

660 + Opt("/", default=[False])("empty").set_parse_action( 

661 lambda s, l, t: t[0] == "/" 

662 ) 

663 + suppress_GT 

664 ) 

665 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) 

666 

667 openTag.set_name(f"<{resname}>") 

668 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

669 openTag.add_parse_action( 

670 lambda t: t.__setitem__( 

671 "start" + "".join(resname.replace(":", " ").title().split()), t.copy() 

672 ) 

673 ) 

674 closeTag = closeTag( 

675 "end" + "".join(resname.replace(":", " ").title().split()) 

676 ).set_name(f"</{resname}>") 

677 openTag.tag = resname 

678 closeTag.tag = resname 

679 openTag.tag_body = SkipTo(closeTag()) 

680 return openTag, closeTag 

681 

682 

683def make_html_tags( 

684 tag_str: Union[str, ParserElement], 

685) -> tuple[ParserElement, ParserElement]: 

686 """Helper to construct opening and closing tag expressions for HTML, 

687 given a tag name. Matches tags in either upper or lower case, 

688 attributes with namespaces and with quoted or unquoted values. 

689 

690 Example: 

691 

692 .. testcode:: 

693 

694 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

695 # make_html_tags returns pyparsing expressions for the opening and 

696 # closing tags as a 2-tuple 

697 a, a_end = make_html_tags("A") 

698 link_expr = a + SkipTo(a_end)("link_text") + a_end 

699 

700 for link in link_expr.search_string(text): 

701 # attributes in the <A> tag (like "href" shown here) are 

702 # also accessible as named results 

703 print(link.link_text, '->', link.href) 

704 

705 prints: 

706 

707 .. testoutput:: 

708 

709 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

710 """ 

711 return _makeTags(tag_str, False) 

712 

713 

714def make_xml_tags( 

715 tag_str: Union[str, ParserElement], 

716) -> tuple[ParserElement, ParserElement]: 

717 """Helper to construct opening and closing tag expressions for XML, 

718 given a tag name. Matches tags only in the given upper/lower case. 

719 

720 Example: similar to :class:`make_html_tags` 

721 """ 

722 return _makeTags(tag_str, True) 

723 

724 

725any_open_tag: ParserElement 

726any_close_tag: ParserElement 

727any_open_tag, any_close_tag = make_html_tags( 

728 Word(alphas, alphanums + "_:").set_name("any tag") 

729) 

730 

731_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} 

732_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace( 

733 " ", "|" 

734) 

735common_html_entity = Regex( 

736 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});" 

737).set_name("common HTML entity") 

738 

739 

740def replace_html_entity(s, l, t): 

741 """Helper parser action to replace common HTML entities with their special characters""" 

742 return _htmlEntityMap.get(t.entity) 

743 

744 

745class OpAssoc(Enum): 

746 """Enumeration of operator associativity 

747 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" 

748 

749 LEFT = 1 

750 RIGHT = 2 

751 

752 

753InfixNotationOperatorArgType = Union[ 

754 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]] 

755] 

756InfixNotationOperatorSpec = Union[ 

757 tuple[ 

758 InfixNotationOperatorArgType, 

759 int, 

760 OpAssoc, 

761 typing.Optional[ParseAction], 

762 ], 

763 tuple[ 

764 InfixNotationOperatorArgType, 

765 int, 

766 OpAssoc, 

767 ], 

768] 

769 

770 

771def infix_notation( 

772 base_expr: ParserElement, 

773 op_list: list[InfixNotationOperatorSpec], 

774 lpar: Union[str, ParserElement] = Suppress("("), 

775 rpar: Union[str, ParserElement] = Suppress(")"), 

776) -> Forward: 

777 """Helper method for constructing grammars of expressions made up of 

778 operators working in a precedence hierarchy. Operators may be unary 

779 or binary, left- or right-associative. Parse actions can also be 

780 attached to operator expressions. The generated parser will also 

781 recognize the use of parentheses to override operator precedences 

782 (see example below). 

783 

784 Note: if you define a deep operator list, you may see performance 

785 issues when using infix_notation. See 

786 :class:`ParserElement.enable_packrat` for a mechanism to potentially 

787 improve your parser performance. 

788 

789 Parameters: 

790 

791 :param base_expr: expression representing the most basic operand to 

792 be used in the expression 

793 :param op_list: list of tuples, one for each operator precedence level 

794 in the expression grammar; each tuple is of the form ``(op_expr, 

795 num_operands, right_left_assoc, (optional)parse_action)``, where: 

796 

797 - ``op_expr`` is the pyparsing expression for the operator; may also 

798 be a string, which will be converted to a Literal; if ``num_operands`` 

799 is 3, ``op_expr`` is a tuple of two expressions, for the two 

800 operators separating the 3 terms 

801 - ``num_operands`` is the number of terms for this operator (must be 1, 

802 2, or 3) 

803 - ``right_left_assoc`` is the indicator whether the operator is right 

804 or left associative, using the pyparsing-defined constants 

805 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. 

806 - ``parse_action`` is the parse action to be associated with 

807 expressions matching this operator expression (the parse action 

808 tuple member may be omitted); if the parse action is passed 

809 a tuple or list of functions, this is equivalent to calling 

810 ``set_parse_action(*fn)`` 

811 (:class:`ParserElement.set_parse_action`) 

812 

813 :param lpar: expression for matching left-parentheses; if passed as a 

814 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as 

815 an expression (such as ``Literal('(')``), then it will be kept in 

816 the parsed results, and grouped with them. (default= ``Suppress('(')``) 

817 :param rpar: expression for matching right-parentheses; if passed as a 

818 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as 

819 an expression (such as ``Literal(')')``), then it will be kept in 

820 the parsed results, and grouped with them. (default= ``Suppress(')')``) 

821 

822 Example: 

823 

824 .. testcode:: 

825 

826 # simple example of four-function arithmetic with ints and 

827 # variable names 

828 integer = pyparsing_common.signed_integer 

829 varname = pyparsing_common.identifier 

830 

831 arith_expr = infix_notation(integer | varname, 

832 [ 

833 ('-', 1, OpAssoc.RIGHT), 

834 (one_of('* /'), 2, OpAssoc.LEFT), 

835 (one_of('+ -'), 2, OpAssoc.LEFT), 

836 ]) 

837 

838 arith_expr.run_tests(''' 

839 5+3*6 

840 (5+3)*6 

841 (5+x)*y 

842 -2--11 

843 ''', full_dump=False) 

844 

845 prints: 

846 

847 .. testoutput:: 

848 :options: +NORMALIZE_WHITESPACE 

849 

850 

851 5+3*6 

852 [[5, '+', [3, '*', 6]]] 

853 

854 (5+3)*6 

855 [[[5, '+', 3], '*', 6]] 

856 

857 (5+x)*y 

858 [[[5, '+', 'x'], '*', 'y']] 

859 

860 -2--11 

861 [[['-', 2], '-', ['-', 11]]] 

862 """ 

863 

864 # captive version of FollowedBy that does not do parse actions or capture results names 

865 class _FB(FollowedBy): 

866 def parseImpl(self, instring, loc, doActions=True): 

867 self.expr.try_parse(instring, loc) 

868 return loc, [] 

869 

870 _FB.__name__ = "FollowedBy>" 

871 

872 ret = Forward() 

873 ret.set_name(f"{base_expr.name}_expression") 

874 if isinstance(lpar, str): 

875 lpar = Suppress(lpar) 

876 if isinstance(rpar, str): 

877 rpar = Suppress(rpar) 

878 

879 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}_expression") 

880 

881 # if lpar and rpar are not suppressed, wrap in group 

882 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): 

883 lastExpr = base_expr | Group(nested_expr) 

884 else: 

885 lastExpr = base_expr | nested_expr 

886 

887 arity: int 

888 rightLeftAssoc: opAssoc 

889 pa: typing.Optional[ParseAction] 

890 opExpr1: ParserElement 

891 opExpr2: ParserElement 

892 matchExpr: ParserElement 

893 match_lookahead: ParserElement 

894 for operDef in op_list: 

895 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] 

896 if isinstance(opExpr, str_type): 

897 opExpr = ParserElement._literalStringClass(opExpr) 

898 opExpr = typing.cast(ParserElement, opExpr) 

899 if arity == 3: 

900 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: 

901 raise ValueError( 

902 "if numterms=3, opExpr must be a tuple or list of two expressions" 

903 ) 

904 opExpr1, opExpr2 = opExpr 

905 term_name = f"{opExpr1}{opExpr2} operations" 

906 else: 

907 term_name = f"{opExpr} operations" 

908 

909 if not 1 <= arity <= 3: 

910 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

911 

912 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): 

913 raise ValueError("operator must indicate right or left associativity") 

914 

915 thisExpr: ParserElement = Forward().set_name(term_name) 

916 thisExpr = typing.cast(Forward, thisExpr) 

917 match_lookahead = And([]) 

918 if rightLeftAssoc is OpAssoc.LEFT: 

919 if arity == 1: 

920 match_lookahead = _FB(lastExpr + opExpr) 

921 matchExpr = Group(lastExpr + opExpr[1, ...]) 

922 elif arity == 2: 

923 if opExpr is not None: 

924 match_lookahead = _FB(lastExpr + opExpr + lastExpr) 

925 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...]) 

926 else: 

927 match_lookahead = _FB(lastExpr + lastExpr) 

928 matchExpr = Group(lastExpr[2, ...]) 

929 elif arity == 3: 

930 match_lookahead = _FB( 

931 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr 

932 ) 

933 matchExpr = Group( 

934 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...] 

935 ) 

936 elif rightLeftAssoc is OpAssoc.RIGHT: 

937 if arity == 1: 

938 # try to avoid LR with this extra test 

939 if not isinstance(opExpr, Opt): 

940 opExpr = Opt(opExpr) 

941 match_lookahead = _FB(opExpr.expr + thisExpr) 

942 matchExpr = Group(opExpr + thisExpr) 

943 elif arity == 2: 

944 if opExpr is not None: 

945 match_lookahead = _FB(lastExpr + opExpr + thisExpr) 

946 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...]) 

947 else: 

948 match_lookahead = _FB(lastExpr + thisExpr) 

949 matchExpr = Group(lastExpr + thisExpr[1, ...]) 

950 elif arity == 3: 

951 match_lookahead = _FB( 

952 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr 

953 ) 

954 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

955 

956 # suppress lookahead expr from railroad diagrams 

957 match_lookahead.show_in_diagram = False 

958 

959 # TODO - determine why this statement can't be included in the following 

960 # if pa block 

961 matchExpr = match_lookahead + matchExpr 

962 

963 if pa: 

964 if isinstance(pa, (tuple, list)): 

965 matchExpr.set_parse_action(*pa) 

966 else: 

967 matchExpr.set_parse_action(pa) 

968 

969 thisExpr <<= (matchExpr | lastExpr).set_name(term_name) 

970 lastExpr = thisExpr 

971 

972 ret <<= lastExpr 

973 return ret 

974 

975 

976def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): 

977 """ 

978 .. deprecated:: 3.0.0 

979 Use the :class:`IndentedBlock` class instead. Note that `IndentedBlock` 

980 has a difference method signature. 

981 

982 Helper method for defining space-delimited indentation blocks, 

983 such as those used to define block statements in Python source code. 

984 

985 :param blockStatementExpr: expression defining syntax of statement that 

986 is repeated within the indented block 

987 

988 :param indentStack: list created by caller to manage indentation stack 

989 (multiple ``statementWithIndentedBlock`` expressions within a single 

990 grammar should share a common ``indentStack``) 

991 

992 :param indent: boolean indicating whether block must be indented beyond 

993 the current level; set to ``False`` for block of left-most statements 

994 

995 A valid block must contain at least one ``blockStatement``. 

996 

997 (Note that indentedBlock uses internal parse actions which make it 

998 incompatible with packrat parsing.) 

999 

1000 Example: 

1001 

1002 .. testcode:: 

1003 

1004 data = ''' 

1005 def A(z): 

1006 A1 

1007 B = 100 

1008 G = A2 

1009 A2 

1010 A3 

1011 B 

1012 def BB(a,b,c): 

1013 BB1 

1014 def BBA(): 

1015 bba1 

1016 bba2 

1017 bba3 

1018 C 

1019 D 

1020 def spam(x,y): 

1021 def eggs(z): 

1022 pass 

1023 ''' 

1024 

1025 indentStack = [1] 

1026 stmt = Forward() 

1027 

1028 identifier = Word(alphas, alphanums) 

1029 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") 

1030 func_body = indentedBlock(stmt, indentStack) 

1031 funcDef = Group(funcDecl + func_body) 

1032 

1033 rvalue = Forward() 

1034 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") 

1035 rvalue << (funcCall | identifier | Word(nums)) 

1036 assignment = Group(identifier + "=" + rvalue) 

1037 stmt << (funcDef | assignment | identifier) 

1038 

1039 module_body = stmt[1, ...] 

1040 

1041 parseTree = module_body.parseString(data) 

1042 parseTree.pprint() 

1043 

1044 prints: 

1045 

1046 .. testoutput:: 

1047 

1048 [['def', 

1049 'A', 

1050 ['(', 'z', ')'], 

1051 ':', 

1052 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

1053 'B', 

1054 ['def', 

1055 'BB', 

1056 ['(', 'a', 'b', 'c', ')'], 

1057 ':', 

1058 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

1059 'C', 

1060 'D', 

1061 ['def', 

1062 'spam', 

1063 ['(', 'x', 'y', ')'], 

1064 ':', 

1065 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

1066 """ 

1067 warnings.warn( 

1068 f"{'indentedBlock'!r} deprecated - use {'IndentedBlock'!r}", 

1069 DeprecationWarning, 

1070 stacklevel=2, 

1071 ) 

1072 

1073 backup_stacks.append(indentStack[:]) 

1074 

1075 def reset_stack(): 

1076 indentStack[:] = backup_stacks[-1] 

1077 

1078 def checkPeerIndent(s, l, t): 

1079 if l >= len(s): 

1080 return 

1081 curCol = col(l, s) 

1082 if curCol != indentStack[-1]: 

1083 if curCol > indentStack[-1]: 

1084 raise ParseException(s, l, "illegal nesting") 

1085 raise ParseException(s, l, "not a peer entry") 

1086 

1087 def checkSubIndent(s, l, t): 

1088 curCol = col(l, s) 

1089 if curCol > indentStack[-1]: 

1090 indentStack.append(curCol) 

1091 else: 

1092 raise ParseException(s, l, "not a subentry") 

1093 

1094 def checkUnindent(s, l, t): 

1095 if l >= len(s): 

1096 return 

1097 curCol = col(l, s) 

1098 if not (indentStack and curCol in indentStack): 

1099 raise ParseException(s, l, "not an unindent") 

1100 if curCol < indentStack[-1]: 

1101 indentStack.pop() 

1102 

1103 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) 

1104 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") 

1105 PEER = Empty().set_parse_action(checkPeerIndent).set_name("") 

1106 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") 

1107 if indent: 

1108 smExpr = Group( 

1109 Opt(NL) 

1110 + INDENT 

1111 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1112 + UNDENT 

1113 ) 

1114 else: 

1115 smExpr = Group( 

1116 Opt(NL) 

1117 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1118 + Opt(UNDENT) 

1119 ) 

1120 

1121 # add a parse action to remove backup_stack from list of backups 

1122 smExpr.add_parse_action( 

1123 lambda: backup_stacks.pop(-1) and None if backup_stacks else None 

1124 ) 

1125 smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) 

1126 blockStatementExpr.ignore(_bslash + LineEnd()) 

1127 return smExpr.set_name("indented block") 

1128 

1129 

1130# it's easy to get these comment structures wrong - they're very common, 

1131# so may as well make them available 

1132c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment") 

1133"Comment of the form ``/* ... */``" 

1134 

1135html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") 

1136"Comment of the form ``<!-- ... -->``" 

1137 

1138rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") 

1139dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") 

1140"Comment of the form ``// ... (to end of line)``" 

1141 

1142cpp_style_comment = Regex( 

1143 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)" 

1144).set_name("C++ style comment") 

1145"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" 

1146 

1147java_style_comment = cpp_style_comment 

1148"Same as :class:`cpp_style_comment`" 

1149 

1150python_style_comment = Regex(r"#.*").set_name("Python style comment") 

1151"Comment of the form ``# ... (to end of line)``" 

1152 

1153 

1154# build list of built-in expressions, for future reference if a global default value 

1155# gets updated 

1156_builtin_exprs: list[ParserElement] = [ 

1157 v for v in vars().values() if isinstance(v, ParserElement) 

1158] 

1159 

1160 

1161# compatibility function, superseded by DelimitedList class 

1162def delimited_list( 

1163 expr: Union[str, ParserElement], 

1164 delim: Union[str, ParserElement] = ",", 

1165 combine: bool = False, 

1166 min: typing.Optional[int] = None, 

1167 max: typing.Optional[int] = None, 

1168 *, 

1169 allow_trailing_delim: bool = False, 

1170) -> ParserElement: 

1171 """ 

1172 .. deprecated:: 3.1.0 

1173 Use the :class:`DelimitedList` class instead. 

1174 """ 

1175 return DelimitedList( 

1176 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim 

1177 ) 

1178 

1179 

1180# Compatibility synonyms 

1181# fmt: off 

1182opAssoc = OpAssoc 

1183anyOpenTag = any_open_tag 

1184anyCloseTag = any_close_tag 

1185commonHTMLEntity = common_html_entity 

1186cStyleComment = c_style_comment 

1187htmlComment = html_comment 

1188restOfLine = rest_of_line 

1189dblSlashComment = dbl_slash_comment 

1190cppStyleComment = cpp_style_comment 

1191javaStyleComment = java_style_comment 

1192pythonStyleComment = python_style_comment 

1193delimitedList = replaced_by_pep8("delimitedList", DelimitedList) 

1194delimited_list = replaced_by_pep8("delimited_list", DelimitedList) 

1195countedArray = replaced_by_pep8("countedArray", counted_array) 

1196matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal) 

1197matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr) 

1198oneOf = replaced_by_pep8("oneOf", one_of) 

1199dictOf = replaced_by_pep8("dictOf", dict_of) 

1200originalTextFor = replaced_by_pep8("originalTextFor", original_text_for) 

1201nestedExpr = replaced_by_pep8("nestedExpr", nested_expr) 

1202makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags) 

1203makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags) 

1204replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity) 

1205infixNotation = replaced_by_pep8("infixNotation", infix_notation) 

1206# fmt: on