Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/helpers.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

344 statements  

1# helpers.py 

2import html.entities 

3import operator 

4import re 

5import sys 

6import typing 

7 

8from . import __diag__ 

9from .core import * 

10from .util import ( 

11 _bslash, 

12 _flatten, 

13 _escape_regex_range_chars, 

14 make_compressed_re, 

15 replaced_by_pep8, 

16) 

17 

18 

19def _suppression(expr: Union[ParserElement, str]) -> ParserElement: 

20 # internal helper to avoid wrapping Suppress inside another Suppress 

21 if isinstance(expr, Suppress): 

22 return expr 

23 return Suppress(expr) 

24 

25 

26# 

27# global helpers 

28# 

29def counted_array( 

30 expr: ParserElement, int_expr: typing.Optional[ParserElement] = None, **kwargs 

31) -> ParserElement: 

32 """Helper to define a counted list of expressions. 

33 

34 This helper defines a pattern of the form:: 

35 

36 integer expr expr expr... 

37 

38 where the leading integer tells how many expr expressions follow. 

39 The matched tokens returns the array of expr tokens as a list - the 

40 leading count token is suppressed. 

41 

42 If ``int_expr`` is specified, it should be a pyparsing expression 

43 that produces an integer value. 

44 

45 Examples: 

46 

47 .. doctest:: 

48 

49 >>> counted_array(Word(alphas)).parse_string('2 ab cd ef') 

50 ParseResults(['ab', 'cd'], {}) 

51 

52 - In this parser, the leading integer value is given in binary, 

53 '10' indicating that 2 values are in the array: 

54 

55 .. doctest:: 

56 

57 >>> binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) 

58 >>> counted_array(Word(alphas), int_expr=binary_constant 

59 ... ).parse_string('10 ab cd ef') 

60 ParseResults(['ab', 'cd'], {}) 

61 

62 - If other fields must be parsed after the count but before the 

63 list items, give the fields results names and they will 

64 be preserved in the returned ParseResults: 

65 

66 .. doctest:: 

67 

68 >>> ppc = pyparsing.common 

69 >>> count_with_metadata = ppc.integer + Word(alphas)("type") 

70 >>> typed_array = counted_array(Word(alphanums), 

71 ... int_expr=count_with_metadata)("items") 

72 >>> result = typed_array.parse_string("3 bool True True False") 

73 >>> print(result.dump()) 

74 ['True', 'True', 'False'] 

75 - items: ['True', 'True', 'False'] 

76 - type: 'bool' 

77 """ 

78 intExpr: typing.Optional[ParserElement] = deprecate_argument( 

79 kwargs, "intExpr", None 

80 ) 

81 

82 intExpr = intExpr or int_expr 

83 array_expr = Forward() 

84 

85 def count_field_parse_action(s, l, t): 

86 nonlocal array_expr 

87 n = t[0] 

88 array_expr <<= (expr * n) if n else Empty() 

89 # clear list contents, but keep any named results 

90 del t[:] 

91 

92 if intExpr is None: 

93 intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) 

94 else: 

95 intExpr = intExpr.copy() 

96 intExpr.set_name("arrayLen") 

97 intExpr.add_parse_action(count_field_parse_action, call_during_try=True) 

98 return (intExpr + array_expr).set_name(f"(len) {expr}...") 

99 

100 

101def match_previous_literal(expr: ParserElement) -> ParserElement: 

102 """Helper to define an expression that is indirectly defined from 

103 the tokens matched in a previous expression, that is, it looks for 

104 a 'repeat' of a previous expression. For example:: 

105 

106 .. testcode:: 

107 

108 first = Word(nums) 

109 second = match_previous_literal(first) 

110 match_expr = first + ":" + second 

111 

112 will match ``"1:1"``, but not ``"1:2"``. Because this 

113 matches a previous literal, will also match the leading 

114 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

115 :class:`match_previous_expr`. Do *not* use with packrat parsing 

116 enabled. 

117 """ 

118 rep = Forward() 

119 

120 def copy_token_to_repeater(s, l, t): 

121 if not t: 

122 rep << Empty() 

123 return 

124 

125 if len(t) == 1: 

126 rep << t[0] 

127 return 

128 

129 # flatten t tokens 

130 tflat = _flatten(t.as_list()) 

131 rep << And(Literal(tt) for tt in tflat) 

132 

133 expr.add_parse_action(copy_token_to_repeater, call_during_try=True) 

134 rep.set_name("(prev) " + str(expr)) 

135 return rep 

136 

137 

138def match_previous_expr(expr: ParserElement) -> ParserElement: 

139 """Helper to define an expression that is indirectly defined from 

140 the tokens matched in a previous expression, that is, it looks for 

141 a 'repeat' of a previous expression. For example: 

142 

143 .. testcode:: 

144 

145 first = Word(nums) 

146 second = match_previous_expr(first) 

147 match_expr = first + ":" + second 

148 

149 will match ``"1:1"``, but not ``"1:2"``. Because this 

150 matches by expressions, will *not* match the leading ``"1:1"`` 

151 in ``"1:10"``; the expressions are evaluated first, and then 

152 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

153 with packrat parsing enabled. 

154 """ 

155 rep = Forward() 

156 e2 = expr.copy() 

157 rep <<= e2 

158 

159 def copy_token_to_repeater(s, l, t): 

160 matchTokens = _flatten(t.as_list()) 

161 

162 def must_match_these_tokens(s, l, t): 

163 theseTokens = _flatten(t.as_list()) 

164 if theseTokens != matchTokens: 

165 raise ParseException( 

166 s, l, f"Expected {matchTokens}, found{theseTokens}" 

167 ) 

168 

169 rep.set_parse_action(must_match_these_tokens, call_during_try=True) 

170 

171 expr.add_parse_action(copy_token_to_repeater, call_during_try=True) 

172 rep.set_name("(prev) " + str(expr)) 

173 return rep 

174 

175 

176def one_of( 

177 strs: Union[typing.Iterable[str], str], 

178 caseless: bool = False, 

179 use_regex: bool = True, 

180 as_keyword: bool = False, 

181 **kwargs, 

182) -> ParserElement: 

183 """Helper to quickly define a set of alternative :class:`Literal` s, 

184 and makes sure to do longest-first testing when there is a conflict, 

185 regardless of the input order, but returns 

186 a :class:`MatchFirst` for best performance. 

187 

188 :param strs: a string of space-delimited literals, or a collection of 

189 string literals 

190 :param caseless: treat all literals as caseless 

191 :param use_regex: bool - as an optimization, will 

192 generate a :class:`Regex` object; otherwise, will generate 

193 a :class:`MatchFirst` object (if ``caseless=True`` or 

194 ``as_keyword=True``, or if creating a :class:`Regex` raises an exception) 

195 :param as_keyword: bool - enforce :class:`Keyword`-style matching on the 

196 generated expressions 

197 

198 Parameters ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 

199 compatibility, but will be removed in a future release. 

200 

201 Example: 

202 

203 .. testcode:: 

204 

205 comp_oper = one_of("< = > <= >= !=") 

206 var = Word(alphas) 

207 number = Word(nums) 

208 term = var | number 

209 comparison_expr = term + comp_oper + term 

210 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) 

211 

212 prints: 

213 

214 .. testoutput:: 

215 

216 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

217 """ 

218 useRegex: bool = deprecate_argument(kwargs, "useRegex", True) 

219 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

220 

221 asKeyword = asKeyword or as_keyword 

222 useRegex = useRegex and use_regex 

223 

224 if ( 

225 isinstance(caseless, str_type) 

226 and __diag__.warn_on_multiple_string_args_to_oneof 

227 ): 

228 warnings.warn( 

229 "warn_on_multiple_string_args_to_oneof:" 

230 " More than one string argument passed to one_of, pass" 

231 " choices as a list or space-delimited string", 

232 PyparsingDiagnosticWarning, 

233 stacklevel=2, 

234 ) 

235 

236 if caseless: 

237 is_equal = lambda a, b: a.upper() == b.upper() 

238 masks = lambda a, b: b.upper().startswith(a.upper()) 

239 else: 

240 is_equal = operator.eq 

241 masks = lambda a, b: b.startswith(a) 

242 

243 symbols: list[str] 

244 if isinstance(strs, str_type): 

245 strs = typing.cast(str, strs) 

246 symbols = strs.split() 

247 elif isinstance(strs, Iterable): 

248 symbols = list(strs) 

249 else: 

250 raise TypeError("Invalid argument to one_of, expected string or iterable") 

251 if not symbols: 

252 return NoMatch() 

253 

254 # reorder given symbols to take care to avoid masking longer choices with shorter ones 

255 # (but only if the given symbols are not just single characters) 

256 i = 0 

257 while i < len(symbols) - 1: 

258 cur = symbols[i] 

259 for j, other in enumerate(symbols[i + 1 :]): 

260 if is_equal(other, cur): 

261 del symbols[i + j + 1] 

262 break 

263 if len(other) > len(cur) and masks(cur, other): 

264 del symbols[i + j + 1] 

265 symbols.insert(i, other) 

266 break 

267 else: 

268 i += 1 

269 

270 if useRegex: 

271 re_flags: int = re.IGNORECASE if caseless else 0 

272 

273 try: 

274 if all(len(sym) == 1 for sym in symbols): 

275 # symbols are just single characters, create range regex pattern 

276 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" 

277 else: 

278 patt = "|".join(re.escape(sym) for sym in symbols) 

279 

280 # wrap with \b word break markers if defining as keywords 

281 if asKeyword: 

282 patt = rf"\b(?:{patt})\b" 

283 

284 ret = Regex(patt, flags=re_flags) 

285 ret.set_name(" | ".join(repr(s) for s in symbols)) 

286 

287 if caseless: 

288 # add parse action to return symbols as specified, not in random 

289 # casing as found in input string 

290 symbol_map = {sym.lower(): sym for sym in symbols} 

291 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) 

292 

293 return ret 

294 

295 except re.error: 

296 warnings.warn( 

297 "Exception creating Regex for one_of, building MatchFirst", 

298 PyparsingDiagnosticWarning, 

299 stacklevel=2, 

300 ) 

301 

302 # last resort, just use MatchFirst of Token class corresponding to caseless 

303 # and asKeyword settings 

304 CASELESS = KEYWORD = True 

305 parse_element_class = { 

306 (CASELESS, KEYWORD): CaselessKeyword, 

307 (CASELESS, not KEYWORD): CaselessLiteral, 

308 (not CASELESS, KEYWORD): Keyword, 

309 (not CASELESS, not KEYWORD): Literal, 

310 }[(caseless, asKeyword)] 

311 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name( 

312 " | ".join(symbols) 

313 ) 

314 

315 

316def dict_of(key: ParserElement, value: ParserElement) -> Dict: 

317 """Helper to easily and clearly define a dictionary by specifying 

318 the respective patterns for the key and value. Takes care of 

319 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

320 :class:`Group` tokens in the proper order. The key pattern 

321 can include delimiting markers or punctuation, as long as they are 

322 suppressed, thereby leaving the significant key text. The value 

323 pattern can include named results, so that the :class:`Dict` results 

324 can include named token fields. 

325 

326 Example: 

327 

328 .. doctest:: 

329 

330 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

331 

332 >>> data_word = Word(alphas) 

333 >>> label = data_word + FollowedBy(':') 

334 >>> attr_expr = ( 

335 ... label 

336 ... + Suppress(':') 

337 ... + OneOrMore(data_word, stop_on=label) 

338 ... .set_parse_action(' '.join)) 

339 >>> print(attr_expr[1, ...].parse_string(text).dump()) 

340 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

341 

342 >>> attr_label = label 

343 >>> attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label 

344 ... ).set_parse_action(' '.join) 

345 

346 # similar to Dict, but simpler call format 

347 >>> result = dict_of(attr_label, attr_value).parse_string(text) 

348 >>> print(result.dump()) 

349 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

350 - color: 'light blue' 

351 - posn: 'upper left' 

352 - shape: 'SQUARE' 

353 - texture: 'burlap' 

354 [0]: 

355 ['shape', 'SQUARE'] 

356 [1]: 

357 ['posn', 'upper left'] 

358 [2]: 

359 ['color', 'light blue'] 

360 [3]: 

361 ['texture', 'burlap'] 

362 

363 >>> print(result['shape']) 

364 SQUARE 

365 >>> print(result.shape) # object attribute access works too 

366 SQUARE 

367 >>> print(result.as_dict()) 

368 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'} 

369 """ 

370 return Dict(OneOrMore(Group(key + value))) 

371 

372 

373def original_text_for( 

374 expr: ParserElement, as_string: bool = True, **kwargs 

375) -> ParserElement: 

376 """Helper to return the original, untokenized text for a given 

377 expression. Useful to restore the parsed fields of an HTML start 

378 tag into the raw tag text itself, or to revert separate tokens with 

379 intervening whitespace back to the original matching input text. By 

380 default, returns a string containing the original parsed text. 

381 

382 If the optional ``as_string`` argument is passed as 

383 ``False``, then the return value is 

384 a :class:`ParseResults` containing any results names that 

385 were originally matched, and a single token containing the original 

386 matched text from the input string. So if the expression passed to 

387 :class:`original_text_for` contains expressions with defined 

388 results names, you must set ``as_string`` to ``False`` if you 

389 want to preserve those results name values. 

390 

391 The ``asString`` pre-PEP8 argument is retained for compatibility, 

392 but will be removed in a future release. 

393 

394 Example: 

395 

396 .. testcode:: 

397 

398 src = "this is test <b> bold <i>text</i> </b> normal text " 

399 for tag in ("b", "i"): 

400 opener, closer = make_html_tags(tag) 

401 patt = original_text_for(opener + ... + closer) 

402 print(patt.search_string(src)[0]) 

403 

404 prints: 

405 

406 .. testoutput:: 

407 

408 ['<b> bold <i>text</i> </b>'] 

409 ['<i>text</i>'] 

410 """ 

411 asString: bool = deprecate_argument(kwargs, "asString", True) 

412 

413 asString = asString and as_string 

414 

415 locMarker = Empty().set_parse_action(lambda s, loc, t: loc) 

416 endlocMarker = locMarker.copy() 

417 endlocMarker.callPreparse = False 

418 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

419 if asString: 

420 extractText = lambda s, l, t: s[t._original_start : t._original_end] 

421 else: 

422 

423 def extractText(s, l, t): 

424 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] 

425 

426 matchExpr.set_parse_action(extractText) 

427 matchExpr.ignoreExprs = expr.ignoreExprs 

428 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) 

429 return matchExpr 

430 

431 

432def ungroup(expr: ParserElement) -> ParserElement: 

433 """Helper to undo pyparsing's default grouping of And expressions, 

434 even if all but one are non-empty. 

435 """ 

436 return TokenConverter(expr).add_parse_action(lambda t: t[0]) 

437 

438 

439def locatedExpr(expr: ParserElement) -> ParserElement: 

440 """ 

441 .. deprecated:: 3.0.0 

442 Use the :class:`Located` class instead. Note that `Located` 

443 returns results with one less grouping level. 

444 

445 Helper to decorate a returned token with its starting and ending 

446 locations in the input string. 

447 

448 This helper adds the following results names: 

449 

450 - ``locn_start`` - location where matched expression begins 

451 - ``locn_end`` - location where matched expression ends 

452 - ``value`` - the actual parsed results 

453 

454 Be careful if the input text contains ``<TAB>`` characters, you 

455 may want to call :meth:`ParserElement.parse_with_tabs` 

456 """ 

457 warnings.warn( 

458 f"{'locatedExpr'!r} deprecated - use {'Located'!r}", 

459 PyparsingDeprecationWarning, 

460 stacklevel=2, 

461 ) 

462 

463 locator = Empty().set_parse_action(lambda ss, ll, tt: ll) 

464 return Group( 

465 locator("locn_start") 

466 + expr("value") 

467 + locator.copy().leave_whitespace()("locn_end") 

468 ) 

469 

470 

471# define special default value to permit None as a significant value for 

472# ignore_expr 

473_NO_IGNORE_EXPR_GIVEN = NoMatch() 

474 

475 

476def nested_expr( 

477 opener: Union[str, ParserElement] = "(", 

478 closer: Union[str, ParserElement] = ")", 

479 content: typing.Optional[ParserElement] = None, 

480 ignore_expr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN, 

481 **kwargs, 

482) -> ParserElement: 

483 """Helper method for defining nested lists enclosed in opening and 

484 closing delimiters (``"("`` and ``")"`` are the default). 

485 

486 :param opener: str - opening character for a nested list 

487 (default= ``"("``); can also be a pyparsing expression 

488 

489 :param closer: str - closing character for a nested list 

490 (default= ``")"``); can also be a pyparsing expression 

491 

492 :param content: expression for items within the nested lists 

493 

494 :param ignore_expr: expression for ignoring opening and closing delimiters 

495 (default = :class:`quoted_string`) 

496 

497 Parameter ``ignoreExpr`` is retained for compatibility 

498 but will be removed in a future release. 

499 

500 If an expression is not provided for the content argument, the 

501 nested expression will capture all whitespace-delimited content 

502 between delimiters as a list of separate values. 

503 

504 Use the ``ignore_expr`` argument to define expressions that may 

505 contain opening or closing characters that should not be treated as 

506 opening or closing characters for nesting, such as quoted_string or 

507 a comment expression. Specify multiple expressions using an 

508 :class:`Or` or :class:`MatchFirst`. The default is 

509 :class:`quoted_string`, but if no expressions are to be ignored, then 

510 pass ``None`` for this argument. 

511 

512 Example: 

513 

514 .. testcode:: 

515 

516 data_type = one_of("void int short long char float double") 

517 decl_data_type = Combine(data_type + Opt(Word('*'))) 

518 ident = Word(alphas+'_', alphanums+'_') 

519 number = pyparsing_common.number 

520 arg = Group(decl_data_type + ident) 

521 LPAR, RPAR = map(Suppress, "()") 

522 

523 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) 

524 

525 c_function = (decl_data_type("type") 

526 + ident("name") 

527 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR 

528 + code_body("body")) 

529 c_function.ignore(c_style_comment) 

530 

531 source_code = ''' 

532 int is_odd(int x) { 

533 return (x%2); 

534 } 

535 

536 int dec_to_hex(char hchar) { 

537 if (hchar >= '0' && hchar <= '9') { 

538 return (ord(hchar)-ord('0')); 

539 } else { 

540 return (10+ord(hchar)-ord('A')); 

541 } 

542 } 

543 ''' 

544 for func in c_function.search_string(source_code): 

545 print(f"{func.name} ({func.type}) args: {func.args}") 

546 

547 

548 prints: 

549 

550 .. testoutput:: 

551 

552 is_odd (int) args: [['int', 'x']] 

553 dec_to_hex (int) args: [['char', 'hchar']] 

554 """ 

555 ignoreExpr: ParserElement = deprecate_argument( 

556 kwargs, "ignoreExpr", _NO_IGNORE_EXPR_GIVEN 

557 ) 

558 

559 if ignoreExpr != ignore_expr: 

560 ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr # type: ignore [assignment] 

561 

562 if ignoreExpr is _NO_IGNORE_EXPR_GIVEN: 

563 ignoreExpr = quoted_string() 

564 

565 if opener == closer: 

566 raise ValueError("opening and closing strings cannot be the same") 

567 

568 if content is None: 

569 if isinstance(opener, str_type) and isinstance(closer, str_type): 

570 opener = typing.cast(str, opener) 

571 closer = typing.cast(str, closer) 

572 if len(opener) == 1 and len(closer) == 1: 

573 if ignoreExpr is not None: 

574 content = Combine( 

575 OneOrMore( 

576 ~ignoreExpr 

577 + CharsNotIn( 

578 opener + closer + ParserElement.DEFAULT_WHITE_CHARS, 

579 exact=1, 

580 ) 

581 ) 

582 ) 

583 else: 

584 content = Combine( 

585 Empty() 

586 + CharsNotIn( 

587 opener + closer + ParserElement.DEFAULT_WHITE_CHARS 

588 ) 

589 ) 

590 else: 

591 if ignoreExpr is not None: 

592 content = Combine( 

593 OneOrMore( 

594 ~ignoreExpr 

595 + ~Literal(opener) 

596 + ~Literal(closer) 

597 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

598 ) 

599 ) 

600 else: 

601 content = Combine( 

602 OneOrMore( 

603 ~Literal(opener) 

604 + ~Literal(closer) 

605 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

606 ) 

607 ) 

608 else: 

609 raise ValueError( 

610 "opening and closing arguments must be strings if no content expression is given" 

611 ) 

612 

613 # for these internally-created context expressions, simulate whitespace-skipping 

614 if ParserElement.DEFAULT_WHITE_CHARS: 

615 content.set_parse_action( 

616 lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS) 

617 ) 

618 

619 ret = Forward() 

620 if ignoreExpr is not None: 

621 ret <<= Group( 

622 _suppression(opener) 

623 + ZeroOrMore(ignoreExpr | ret | content) 

624 + _suppression(closer) 

625 ) 

626 else: 

627 ret <<= Group( 

628 _suppression(opener) + ZeroOrMore(ret | content) + _suppression(closer) 

629 ) 

630 

631 ret.set_name(f"nested {opener}{closer} expression") 

632 

633 # don't override error message from content expressions 

634 ret.errmsg = None 

635 return ret 

636 

637 

638def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): 

639 """Internal helper to construct opening and closing tag expressions, 

640 given a tag name""" 

641 if isinstance(tagStr, str_type): 

642 resname = tagStr 

643 tagStr = Keyword(tagStr, caseless=not xml) 

644 else: 

645 resname = tagStr.name 

646 

647 tagAttrName = Word(alphas, alphanums + "_-:") 

648 if xml: 

649 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) 

650 openTag = ( 

651 suppress_LT 

652 + tagStr("tag") 

653 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

654 + Opt("/", default=[False])("empty").set_parse_action( 

655 lambda s, l, t: t[0] == "/" 

656 ) 

657 + suppress_GT 

658 ) 

659 else: 

660 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( 

661 printables, exclude_chars=">" 

662 ) 

663 openTag = ( 

664 suppress_LT 

665 + tagStr("tag") 

666 + Dict( 

667 ZeroOrMore( 

668 Group( 

669 tagAttrName.set_parse_action(lambda t: t[0].lower()) 

670 + Opt(Suppress("=") + tagAttrValue) 

671 ) 

672 ) 

673 ) 

674 + Opt("/", default=[False])("empty").set_parse_action( 

675 lambda s, l, t: t[0] == "/" 

676 ) 

677 + suppress_GT 

678 ) 

679 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) 

680 

681 openTag.set_name(f"<{resname}>") 

682 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

683 openTag.add_parse_action( 

684 lambda t: t.__setitem__( 

685 "start" + "".join(resname.replace(":", " ").title().split()), t.copy() 

686 ) 

687 ) 

688 closeTag = closeTag( 

689 "end" + "".join(resname.replace(":", " ").title().split()) 

690 ).set_name(f"</{resname}>") 

691 openTag.tag = resname 

692 closeTag.tag = resname 

693 openTag.tag_body = SkipTo(closeTag()) 

694 return openTag, closeTag 

695 

696 

697def make_html_tags( 

698 tag_str: Union[str, ParserElement], 

699) -> tuple[ParserElement, ParserElement]: 

700 """Helper to construct opening and closing tag expressions for HTML, 

701 given a tag name. Matches tags in either upper or lower case, 

702 attributes with namespaces and with quoted or unquoted values. 

703 

704 Example: 

705 

706 .. testcode:: 

707 

708 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

709 # make_html_tags returns pyparsing expressions for the opening and 

710 # closing tags as a 2-tuple 

711 a, a_end = make_html_tags("A") 

712 link_expr = a + SkipTo(a_end)("link_text") + a_end 

713 

714 for link in link_expr.search_string(text): 

715 # attributes in the <A> tag (like "href" shown here) are 

716 # also accessible as named results 

717 print(link.link_text, '->', link.href) 

718 

719 prints: 

720 

721 .. testoutput:: 

722 

723 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

724 """ 

725 return _makeTags(tag_str, False) 

726 

727 

728def make_xml_tags( 

729 tag_str: Union[str, ParserElement], 

730) -> tuple[ParserElement, ParserElement]: 

731 """Helper to construct opening and closing tag expressions for XML, 

732 given a tag name. Matches tags only in the given upper/lower case. 

733 

734 Example: similar to :class:`make_html_tags` 

735 """ 

736 return _makeTags(tag_str, True) 

737 

738 

739any_open_tag: ParserElement 

740any_close_tag: ParserElement 

741any_open_tag, any_close_tag = make_html_tags( 

742 Word(alphas, alphanums + "_:").set_name("any tag") 

743) 

744 

745_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} 

746_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace( 

747 " ", "|" 

748) 

749common_html_entity = Regex( 

750 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});" 

751).set_name("common HTML entity") 

752 

753 

754def replace_html_entity(s, l, t): 

755 """Helper parser action to replace common HTML entities with their special characters""" 

756 return _htmlEntityMap.get(t.entity) 

757 

758 

759class OpAssoc(Enum): 

760 """Enumeration of operator associativity 

761 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" 

762 

763 LEFT = 1 

764 RIGHT = 2 

765 

766 

767InfixNotationOperatorArgType = Union[ 

768 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]] 

769] 

770InfixNotationOperatorSpec = Union[ 

771 tuple[ 

772 InfixNotationOperatorArgType, 

773 int, 

774 OpAssoc, 

775 typing.Optional[ParseAction], 

776 ], 

777 tuple[ 

778 InfixNotationOperatorArgType, 

779 int, 

780 OpAssoc, 

781 ], 

782] 

783 

784 

785def infix_notation( 

786 base_expr: ParserElement, 

787 op_list: list[InfixNotationOperatorSpec], 

788 lpar: Union[str, ParserElement] = Suppress("("), 

789 rpar: Union[str, ParserElement] = Suppress(")"), 

790) -> Forward: 

791 """Helper method for constructing grammars of expressions made up of 

792 operators working in a precedence hierarchy. Operators may be unary 

793 or binary, left- or right-associative. Parse actions can also be 

794 attached to operator expressions. The generated parser will also 

795 recognize the use of parentheses to override operator precedences 

796 (see example below). 

797 

798 Note: if you define a deep operator list, you may see performance 

799 issues when using infix_notation. See 

800 :class:`ParserElement.enable_packrat` for a mechanism to potentially 

801 improve your parser performance. 

802 

803 Parameters: 

804 

805 :param base_expr: expression representing the most basic operand to 

806 be used in the expression 

807 :param op_list: list of tuples, one for each operator precedence level 

808 in the expression grammar; each tuple is of the form ``(op_expr, 

809 num_operands, right_left_assoc, (optional)parse_action)``, where: 

810 

811 - ``op_expr`` is the pyparsing expression for the operator; may also 

812 be a string, which will be converted to a Literal; if ``num_operands`` 

813 is 3, ``op_expr`` is a tuple of two expressions, for the two 

814 operators separating the 3 terms 

815 - ``num_operands`` is the number of terms for this operator (must be 1, 

816 2, or 3) 

817 - ``right_left_assoc`` is the indicator whether the operator is right 

818 or left associative, using the pyparsing-defined constants 

819 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. 

820 - ``parse_action`` is the parse action to be associated with 

821 expressions matching this operator expression (the parse action 

822 tuple member may be omitted); if the parse action is passed 

823 a tuple or list of functions, this is equivalent to calling 

824 ``set_parse_action(*fn)`` 

825 (:class:`ParserElement.set_parse_action`) 

826 

827 :param lpar: expression for matching left-parentheses; if passed as a 

828 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as 

829 an expression (such as ``Literal('(')``), then it will be kept in 

830 the parsed results, and grouped with them. (default= ``Suppress('(')``) 

831 :param rpar: expression for matching right-parentheses; if passed as a 

832 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as 

833 an expression (such as ``Literal(')')``), then it will be kept in 

834 the parsed results, and grouped with them. (default= ``Suppress(')')``) 

835 

836 Example: 

837 

838 .. testcode:: 

839 

840 # simple example of four-function arithmetic with ints and 

841 # variable names 

842 integer = pyparsing_common.signed_integer 

843 varname = pyparsing_common.identifier 

844 

845 arith_expr = infix_notation(integer | varname, 

846 [ 

847 ('-', 1, OpAssoc.RIGHT), 

848 (one_of('* /'), 2, OpAssoc.LEFT), 

849 (one_of('+ -'), 2, OpAssoc.LEFT), 

850 ]) 

851 

852 arith_expr.run_tests(''' 

853 5+3*6 

854 (5+3)*6 

855 (5+x)*y 

856 -2--11 

857 ''', full_dump=False) 

858 

859 prints: 

860 

861 .. testoutput:: 

862 :options: +NORMALIZE_WHITESPACE 

863 

864 

865 5+3*6 

866 [[5, '+', [3, '*', 6]]] 

867 

868 (5+3)*6 

869 [[[5, '+', 3], '*', 6]] 

870 

871 (5+x)*y 

872 [[[5, '+', 'x'], '*', 'y']] 

873 

874 -2--11 

875 [[['-', 2], '-', ['-', 11]]] 

876 """ 

877 

878 # captive version of FollowedBy that does not do parse actions or capture results names 

879 class _FB(FollowedBy): 

880 def parseImpl(self, instring, loc, doActions=True): 

881 self.expr.try_parse(instring, loc) 

882 return loc, [] 

883 

884 _FB.__name__ = "FollowedBy>" 

885 

886 ret = Forward() 

887 ret.set_name(f"{base_expr.name}_expression") 

888 if isinstance(lpar, str): 

889 lpar = Suppress(lpar) 

890 if isinstance(rpar, str): 

891 rpar = Suppress(rpar) 

892 

893 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}_expression") 

894 

895 # if lpar and rpar are not suppressed, wrap in group 

896 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): 

897 lastExpr = base_expr | Group(nested_expr) 

898 else: 

899 lastExpr = base_expr | nested_expr 

900 

901 arity: int 

902 rightLeftAssoc: opAssoc 

903 pa: typing.Optional[ParseAction] 

904 opExpr1: ParserElement 

905 opExpr2: ParserElement 

906 matchExpr: ParserElement 

907 match_lookahead: ParserElement 

908 for operDef in op_list: 

909 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] 

910 if isinstance(opExpr, str_type): 

911 opExpr = ParserElement._literalStringClass(opExpr) 

912 opExpr = typing.cast(ParserElement, opExpr) 

913 if arity == 3: 

914 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: 

915 raise ValueError( 

916 "if numterms=3, opExpr must be a tuple or list of two expressions" 

917 ) 

918 opExpr1, opExpr2 = opExpr 

919 term_name = f"{opExpr1}{opExpr2} operations" 

920 else: 

921 term_name = f"{opExpr} operations" 

922 

923 if not 1 <= arity <= 3: 

924 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

925 

926 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): 

927 raise ValueError("operator must indicate right or left associativity") 

928 

929 thisExpr: ParserElement = Forward().set_name(term_name) 

930 thisExpr = typing.cast(Forward, thisExpr) 

931 match_lookahead = And([]) 

932 if rightLeftAssoc is OpAssoc.LEFT: 

933 if arity == 1: 

934 match_lookahead = _FB(lastExpr + opExpr) 

935 matchExpr = Group(lastExpr + opExpr[1, ...]) 

936 elif arity == 2: 

937 if opExpr is not None: 

938 match_lookahead = _FB(lastExpr + opExpr + lastExpr) 

939 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...]) 

940 else: 

941 match_lookahead = _FB(lastExpr + lastExpr) 

942 matchExpr = Group(lastExpr[2, ...]) 

943 elif arity == 3: 

944 match_lookahead = _FB( 

945 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr 

946 ) 

947 matchExpr = Group( 

948 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...] 

949 ) 

950 elif rightLeftAssoc is OpAssoc.RIGHT: 

951 if arity == 1: 

952 # try to avoid LR with this extra test 

953 if not isinstance(opExpr, Opt): 

954 opExpr = Opt(opExpr) 

955 match_lookahead = _FB(opExpr.expr + thisExpr) 

956 matchExpr = Group(opExpr + thisExpr) 

957 elif arity == 2: 

958 if opExpr is not None: 

959 match_lookahead = _FB(lastExpr + opExpr + thisExpr) 

960 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...]) 

961 else: 

962 match_lookahead = _FB(lastExpr + thisExpr) 

963 matchExpr = Group(lastExpr + thisExpr[1, ...]) 

964 elif arity == 3: 

965 match_lookahead = _FB( 

966 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr 

967 ) 

968 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

969 

970 # suppress lookahead expr from railroad diagrams 

971 match_lookahead.show_in_diagram = False 

972 

973 # TODO - determine why this statement can't be included in the following 

974 # if pa block 

975 matchExpr = match_lookahead + matchExpr 

976 

977 if pa: 

978 if isinstance(pa, (tuple, list)): 

979 matchExpr.set_parse_action(*pa) 

980 else: 

981 matchExpr.set_parse_action(pa) 

982 

983 thisExpr <<= (matchExpr | lastExpr).set_name(term_name) 

984 lastExpr = thisExpr 

985 

986 ret <<= lastExpr 

987 return ret 

988 

989 

990def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): 

991 """ 

992 .. deprecated:: 3.0.0 

993 Use the :class:`IndentedBlock` class instead. Note that `IndentedBlock` 

994 has a difference method signature. 

995 

996 Helper method for defining space-delimited indentation blocks, 

997 such as those used to define block statements in Python source code. 

998 

999 :param blockStatementExpr: expression defining syntax of statement that 

1000 is repeated within the indented block 

1001 

1002 :param indentStack: list created by caller to manage indentation stack 

1003 (multiple ``statementWithIndentedBlock`` expressions within a single 

1004 grammar should share a common ``indentStack``) 

1005 

1006 :param indent: boolean indicating whether block must be indented beyond 

1007 the current level; set to ``False`` for block of left-most statements 

1008 

1009 A valid block must contain at least one ``blockStatement``. 

1010 

1011 (Note that indentedBlock uses internal parse actions which make it 

1012 incompatible with packrat parsing.) 

1013 

1014 Example: 

1015 

1016 .. testcode:: 

1017 

1018 data = ''' 

1019 def A(z): 

1020 A1 

1021 B = 100 

1022 G = A2 

1023 A2 

1024 A3 

1025 B 

1026 def BB(a,b,c): 

1027 BB1 

1028 def BBA(): 

1029 bba1 

1030 bba2 

1031 bba3 

1032 C 

1033 D 

1034 def spam(x,y): 

1035 def eggs(z): 

1036 pass 

1037 ''' 

1038 

1039 indentStack = [1] 

1040 stmt = Forward() 

1041 

1042 identifier = Word(alphas, alphanums) 

1043 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") 

1044 func_body = indentedBlock(stmt, indentStack) 

1045 funcDef = Group(funcDecl + func_body) 

1046 

1047 rvalue = Forward() 

1048 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") 

1049 rvalue << (funcCall | identifier | Word(nums)) 

1050 assignment = Group(identifier + "=" + rvalue) 

1051 stmt << (funcDef | assignment | identifier) 

1052 

1053 module_body = stmt[1, ...] 

1054 

1055 parseTree = module_body.parseString(data) 

1056 parseTree.pprint() 

1057 

1058 prints: 

1059 

1060 .. testoutput:: 

1061 

1062 [['def', 

1063 'A', 

1064 ['(', 'z', ')'], 

1065 ':', 

1066 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

1067 'B', 

1068 ['def', 

1069 'BB', 

1070 ['(', 'a', 'b', 'c', ')'], 

1071 ':', 

1072 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

1073 'C', 

1074 'D', 

1075 ['def', 

1076 'spam', 

1077 ['(', 'x', 'y', ')'], 

1078 ':', 

1079 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

1080 """ 

1081 warnings.warn( 

1082 f"{'indentedBlock'!r} deprecated - use {'IndentedBlock'!r}", 

1083 PyparsingDeprecationWarning, 

1084 stacklevel=2, 

1085 ) 

1086 

1087 backup_stacks.append(indentStack[:]) 

1088 

1089 def reset_stack(): 

1090 indentStack[:] = backup_stacks[-1] 

1091 

1092 def checkPeerIndent(s, l, t): 

1093 if l >= len(s): 

1094 return 

1095 curCol = col(l, s) 

1096 if curCol != indentStack[-1]: 

1097 if curCol > indentStack[-1]: 

1098 raise ParseException(s, l, "illegal nesting") 

1099 raise ParseException(s, l, "not a peer entry") 

1100 

1101 def checkSubIndent(s, l, t): 

1102 curCol = col(l, s) 

1103 if curCol > indentStack[-1]: 

1104 indentStack.append(curCol) 

1105 else: 

1106 raise ParseException(s, l, "not a subentry") 

1107 

1108 def checkUnindent(s, l, t): 

1109 if l >= len(s): 

1110 return 

1111 curCol = col(l, s) 

1112 if not (indentStack and curCol in indentStack): 

1113 raise ParseException(s, l, "not an unindent") 

1114 if curCol < indentStack[-1]: 

1115 indentStack.pop() 

1116 

1117 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) 

1118 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") 

1119 PEER = Empty().set_parse_action(checkPeerIndent).set_name("") 

1120 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") 

1121 if indent: 

1122 smExpr = Group( 

1123 Opt(NL) 

1124 + INDENT 

1125 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1126 + UNDENT 

1127 ) 

1128 else: 

1129 smExpr = Group( 

1130 Opt(NL) 

1131 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1132 + Opt(UNDENT) 

1133 ) 

1134 

1135 # add a parse action to remove backup_stack from list of backups 

1136 smExpr.add_parse_action( 

1137 lambda: backup_stacks.pop(-1) and None if backup_stacks else None 

1138 ) 

1139 smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) 

1140 blockStatementExpr.ignore(_bslash + LineEnd()) 

1141 return smExpr.set_name("indented block") 

1142 

1143 

1144# it's easy to get these comment structures wrong - they're very common, 

1145# so may as well make them available 

1146c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment") 

1147"Comment of the form ``/* ... */``" 

1148 

1149html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") 

1150"Comment of the form ``<!-- ... -->``" 

1151 

1152rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") 

1153dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") 

1154"Comment of the form ``// ... (to end of line)``" 

1155 

1156cpp_style_comment = Regex( 

1157 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)" 

1158).set_name("C++ style comment") 

1159"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" 

1160 

1161java_style_comment = cpp_style_comment 

1162"Same as :class:`cpp_style_comment`" 

1163 

1164python_style_comment = Regex(r"#.*").set_name("Python style comment") 

1165"Comment of the form ``# ... (to end of line)``" 

1166 

1167 

1168# build list of built-in expressions, for future reference if a global default value 

1169# gets updated 

1170_builtin_exprs: list[ParserElement] = [ 

1171 v for v in vars().values() if isinstance(v, ParserElement) 

1172] 

1173 

1174 

1175# compatibility function, superseded by DelimitedList class 

1176def delimited_list( 

1177 expr: Union[str, ParserElement], 

1178 delim: Union[str, ParserElement] = ",", 

1179 combine: bool = False, 

1180 min: typing.Optional[int] = None, 

1181 max: typing.Optional[int] = None, 

1182 *, 

1183 allow_trailing_delim: bool = False, 

1184) -> ParserElement: 

1185 """ 

1186 .. deprecated:: 3.1.0 

1187 Use the :class:`DelimitedList` class instead. 

1188 """ 

1189 return DelimitedList( 

1190 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim 

1191 ) 

1192 

1193 

1194# Compatibility synonyms 

1195# fmt: off 

1196opAssoc = OpAssoc 

1197anyOpenTag = any_open_tag 

1198anyCloseTag = any_close_tag 

1199commonHTMLEntity = common_html_entity 

1200cStyleComment = c_style_comment 

1201htmlComment = html_comment 

1202restOfLine = rest_of_line 

1203dblSlashComment = dbl_slash_comment 

1204cppStyleComment = cpp_style_comment 

1205javaStyleComment = java_style_comment 

1206pythonStyleComment = python_style_comment 

1207delimitedList = replaced_by_pep8("delimitedList", DelimitedList) 

1208delimited_list = replaced_by_pep8("delimited_list", DelimitedList) 

1209countedArray = replaced_by_pep8("countedArray", counted_array) 

1210matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal) 

1211matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr) 

1212oneOf = replaced_by_pep8("oneOf", one_of) 

1213dictOf = replaced_by_pep8("dictOf", dict_of) 

1214originalTextFor = replaced_by_pep8("originalTextFor", original_text_for) 

1215nestedExpr = replaced_by_pep8("nestedExpr", nested_expr) 

1216makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags) 

1217makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags) 

1218replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity) 

1219infixNotation = replaced_by_pep8("infixNotation", infix_notation) 

1220# fmt: on