Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/helpers.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

344 statements  

1# helpers.py 

2import html.entities 

3import operator 

4import re 

5import sys 

6import typing 

7 

8from . import __diag__ 

9from .core import * 

10from .util import ( 

11 _bslash, 

12 _flatten, 

13 _escape_regex_range_chars, 

14 make_compressed_re, 

15 replaced_by_pep8, 

16) 

17 

18 

19def _suppression(expr: Union[ParserElement, str]) -> ParserElement: 

20 # internal helper to avoid wrapping Suppress inside another Suppress 

21 if isinstance(expr, Suppress): 

22 return expr 

23 return Suppress(expr) 

24 

25 

26# 

27# global helpers 

28# 

29def counted_array( 

30 expr: ParserElement, int_expr: typing.Optional[ParserElement] = None, **kwargs 

31) -> ParserElement: 

32 """Helper to define a counted list of expressions. 

33 

34 This helper defines a pattern of the form:: 

35 

36 integer expr expr expr... 

37 

38 where the leading integer tells how many expr expressions follow. 

39 The matched tokens returns the array of expr tokens as a list - the 

40 leading count token is suppressed. 

41 

42 If ``int_expr`` is specified, it should be a pyparsing expression 

43 that produces an integer value. 

44 

45 Examples: 

46 

47 .. doctest:: 

48 

49 >>> counted_array(Word(alphas)).parse_string('2 ab cd ef') 

50 ParseResults(['ab', 'cd'], {}) 

51 

52 - In this parser, the leading integer value is given in binary, 

53 '10' indicating that 2 values are in the array: 

54 

55 .. doctest:: 

56 

57 >>> binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) 

58 >>> counted_array(Word(alphas), int_expr=binary_constant 

59 ... ).parse_string('10 ab cd ef') 

60 ParseResults(['ab', 'cd'], {}) 

61 

62 - If other fields must be parsed after the count but before the 

63 list items, give the fields results names and they will 

64 be preserved in the returned ParseResults: 

65 

66 .. doctest:: 

67 

68 >>> ppc = pyparsing.common 

69 >>> count_with_metadata = ppc.integer + Word(alphas)("type") 

70 >>> typed_array = counted_array(Word(alphanums), 

71 ... int_expr=count_with_metadata)("items") 

72 >>> result = typed_array.parse_string("3 bool True True False") 

73 >>> print(result.dump()) 

74 ['True', 'True', 'False'] 

75 - items: ['True', 'True', 'False'] 

76 - type: 'bool' 

77 """ 

78 intExpr: typing.Optional[ParserElement] = deprecate_argument( 

79 kwargs, "intExpr", None 

80 ) 

81 

82 intExpr = intExpr or int_expr 

83 array_expr = Forward() 

84 

85 def count_field_parse_action(s, l, t): 

86 nonlocal array_expr 

87 n = t[0] 

88 array_expr <<= (expr * n) if n else Empty() 

89 # clear list contents, but keep any named results 

90 del t[:] 

91 

92 if intExpr is None: 

93 intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) 

94 else: 

95 intExpr = intExpr.copy() 

96 intExpr.set_name("arrayLen") 

97 intExpr.add_parse_action(count_field_parse_action, call_during_try=True) 

98 return (intExpr + array_expr).set_name(f"(len) {expr}...") 

99 

100 

101def match_previous_literal(expr: ParserElement) -> ParserElement: 

102 """Helper to define an expression that is indirectly defined from 

103 the tokens matched in a previous expression, that is, it looks for 

104 a 'repeat' of a previous expression. For example:: 

105 

106 .. testcode:: 

107 

108 first = Word(nums) 

109 second = match_previous_literal(first) 

110 match_expr = first + ":" + second 

111 

112 will match ``"1:1"``, but not ``"1:2"``. Because this 

113 matches a previous literal, will also match the leading 

114 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

115 :class:`match_previous_expr`. Do *not* use with packrat parsing 

116 enabled. 

117 """ 

118 rep = Forward() 

119 

120 def copy_token_to_repeater(s, l, t): 

121 if not t: 

122 rep << Empty() 

123 return 

124 

125 if len(t) == 1: 

126 rep << t[0] 

127 return 

128 

129 # flatten t tokens 

130 tflat = _flatten(t.as_list()) 

131 rep << And(Literal(tt) for tt in tflat) 

132 

133 expr.add_parse_action(copy_token_to_repeater, call_during_try=True) 

134 rep.set_name("(prev) " + str(expr)) 

135 return rep 

136 

137 

138def match_previous_expr(expr: ParserElement) -> ParserElement: 

139 """Helper to define an expression that is indirectly defined from 

140 the tokens matched in a previous expression, that is, it looks for 

141 a 'repeat' of a previous expression. For example: 

142 

143 .. testcode:: 

144 

145 first = Word(nums) 

146 second = match_previous_expr(first) 

147 match_expr = first + ":" + second 

148 

149 will match ``"1:1"``, but not ``"1:2"``. Because this 

150 matches by expressions, will *not* match the leading ``"1:1"`` 

151 in ``"1:10"``; the expressions are evaluated first, and then 

152 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

153 with packrat parsing enabled. 

154 """ 

155 rep = Forward() 

156 e2 = expr.copy() 

157 rep <<= e2 

158 

159 def copy_token_to_repeater(s, l, t): 

160 matchTokens = _flatten(t.as_list()) 

161 

162 def must_match_these_tokens(s, l, t): 

163 theseTokens = _flatten(t.as_list()) 

164 if theseTokens != matchTokens: 

165 raise ParseException( 

166 s, l, f"Expected {matchTokens}, found{theseTokens}" 

167 ) 

168 

169 rep.set_parse_action(must_match_these_tokens, call_during_try=True) 

170 

171 expr.add_parse_action(copy_token_to_repeater, call_during_try=True) 

172 rep.set_name("(prev) " + str(expr)) 

173 return rep 

174 

175 

176def one_of( 

177 strs: Union[typing.Iterable[str], str], 

178 caseless: bool = False, 

179 use_regex: bool = True, 

180 as_keyword: bool = False, 

181 **kwargs, 

182) -> ParserElement: 

183 """Helper to quickly define a set of alternative :class:`Literal` s, 

184 and makes sure to do longest-first testing when there is a conflict, 

185 regardless of the input order, but returns 

186 a :class:`MatchFirst` for best performance. 

187 

188 :param strs: a string of space-delimited literals, or a collection of 

189 string literals 

190 :param caseless: treat all literals as caseless 

191 :param use_regex: bool - as an optimization, will 

192 generate a :class:`Regex` object; otherwise, will generate 

193 a :class:`MatchFirst` object (if ``caseless=True`` or 

194 ``as_keyword=True``, or if creating a :class:`Regex` raises an exception) 

195 :param as_keyword: bool - enforce :class:`Keyword`-style matching on the 

196 generated expressions 

197 

198 Parameters ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 

199 compatibility, but will be removed in a future release. 

200 

201 Example: 

202 

203 .. testcode:: 

204 

205 comp_oper = one_of("< = > <= >= !=") 

206 var = Word(alphas) 

207 number = Word(nums) 

208 term = var | number 

209 comparison_expr = term + comp_oper + term 

210 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) 

211 

212 prints: 

213 

214 .. testoutput:: 

215 

216 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

217 """ 

218 useRegex: bool = deprecate_argument(kwargs, "useRegex", True) 

219 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

220 

221 asKeyword = asKeyword or as_keyword 

222 useRegex = useRegex and use_regex 

223 

224 if ( 

225 isinstance(caseless, str_type) 

226 and __diag__.warn_on_multiple_string_args_to_oneof 

227 ): 

228 warnings.warn( 

229 "warn_on_multiple_string_args_to_oneof:" 

230 " More than one string argument passed to one_of, pass" 

231 " choices as a list or space-delimited string", 

232 stacklevel=2, 

233 ) 

234 

235 if caseless: 

236 is_equal = lambda a, b: a.upper() == b.upper() 

237 masks = lambda a, b: b.upper().startswith(a.upper()) 

238 else: 

239 is_equal = operator.eq 

240 masks = lambda a, b: b.startswith(a) 

241 

242 symbols: list[str] 

243 if isinstance(strs, str_type): 

244 strs = typing.cast(str, strs) 

245 symbols = strs.split() 

246 elif isinstance(strs, Iterable): 

247 symbols = list(strs) 

248 else: 

249 raise TypeError("Invalid argument to one_of, expected string or iterable") 

250 if not symbols: 

251 return NoMatch() 

252 

253 # reorder given symbols to take care to avoid masking longer choices with shorter ones 

254 # (but only if the given symbols are not just single characters) 

255 i = 0 

256 while i < len(symbols) - 1: 

257 cur = symbols[i] 

258 for j, other in enumerate(symbols[i + 1 :]): 

259 if is_equal(other, cur): 

260 del symbols[i + j + 1] 

261 break 

262 if len(other) > len(cur) and masks(cur, other): 

263 del symbols[i + j + 1] 

264 symbols.insert(i, other) 

265 break 

266 else: 

267 i += 1 

268 

269 if useRegex: 

270 re_flags: int = re.IGNORECASE if caseless else 0 

271 

272 try: 

273 if all(len(sym) == 1 for sym in symbols): 

274 # symbols are just single characters, create range regex pattern 

275 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" 

276 else: 

277 patt = "|".join(re.escape(sym) for sym in symbols) 

278 

279 # wrap with \b word break markers if defining as keywords 

280 if asKeyword: 

281 patt = rf"\b(?:{patt})\b" 

282 

283 ret = Regex(patt, flags=re_flags) 

284 ret.set_name(" | ".join(repr(s) for s in symbols)) 

285 

286 if caseless: 

287 # add parse action to return symbols as specified, not in random 

288 # casing as found in input string 

289 symbol_map = {sym.lower(): sym for sym in symbols} 

290 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) 

291 

292 return ret 

293 

294 except re.error: 

295 warnings.warn( 

296 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 

297 ) 

298 

299 # last resort, just use MatchFirst of Token class corresponding to caseless 

300 # and asKeyword settings 

301 CASELESS = KEYWORD = True 

302 parse_element_class = { 

303 (CASELESS, KEYWORD): CaselessKeyword, 

304 (CASELESS, not KEYWORD): CaselessLiteral, 

305 (not CASELESS, KEYWORD): Keyword, 

306 (not CASELESS, not KEYWORD): Literal, 

307 }[(caseless, asKeyword)] 

308 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name( 

309 " | ".join(symbols) 

310 ) 

311 

312 

313def dict_of(key: ParserElement, value: ParserElement) -> Dict: 

314 """Helper to easily and clearly define a dictionary by specifying 

315 the respective patterns for the key and value. Takes care of 

316 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

317 :class:`Group` tokens in the proper order. The key pattern 

318 can include delimiting markers or punctuation, as long as they are 

319 suppressed, thereby leaving the significant key text. The value 

320 pattern can include named results, so that the :class:`Dict` results 

321 can include named token fields. 

322 

323 Example: 

324 

325 .. doctest:: 

326 

327 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

328 

329 >>> data_word = Word(alphas) 

330 >>> label = data_word + FollowedBy(':') 

331 >>> attr_expr = ( 

332 ... label 

333 ... + Suppress(':') 

334 ... + OneOrMore(data_word, stop_on=label) 

335 ... .set_parse_action(' '.join)) 

336 >>> print(attr_expr[1, ...].parse_string(text).dump()) 

337 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

338 

339 >>> attr_label = label 

340 >>> attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label 

341 ... ).set_parse_action(' '.join) 

342 

343 # similar to Dict, but simpler call format 

344 >>> result = dict_of(attr_label, attr_value).parse_string(text) 

345 >>> print(result.dump()) 

346 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

347 - color: 'light blue' 

348 - posn: 'upper left' 

349 - shape: 'SQUARE' 

350 - texture: 'burlap' 

351 [0]: 

352 ['shape', 'SQUARE'] 

353 [1]: 

354 ['posn', 'upper left'] 

355 [2]: 

356 ['color', 'light blue'] 

357 [3]: 

358 ['texture', 'burlap'] 

359 

360 >>> print(result['shape']) 

361 SQUARE 

362 >>> print(result.shape) # object attribute access works too 

363 SQUARE 

364 >>> print(result.as_dict()) 

365 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'} 

366 """ 

367 return Dict(OneOrMore(Group(key + value))) 

368 

369 

370def original_text_for( 

371 expr: ParserElement, as_string: bool = True, **kwargs 

372) -> ParserElement: 

373 """Helper to return the original, untokenized text for a given 

374 expression. Useful to restore the parsed fields of an HTML start 

375 tag into the raw tag text itself, or to revert separate tokens with 

376 intervening whitespace back to the original matching input text. By 

377 default, returns a string containing the original parsed text. 

378 

379 If the optional ``as_string`` argument is passed as 

380 ``False``, then the return value is 

381 a :class:`ParseResults` containing any results names that 

382 were originally matched, and a single token containing the original 

383 matched text from the input string. So if the expression passed to 

384 :class:`original_text_for` contains expressions with defined 

385 results names, you must set ``as_string`` to ``False`` if you 

386 want to preserve those results name values. 

387 

388 The ``asString`` pre-PEP8 argument is retained for compatibility, 

389 but will be removed in a future release. 

390 

391 Example: 

392 

393 .. testcode:: 

394 

395 src = "this is test <b> bold <i>text</i> </b> normal text " 

396 for tag in ("b", "i"): 

397 opener, closer = make_html_tags(tag) 

398 patt = original_text_for(opener + ... + closer) 

399 print(patt.search_string(src)[0]) 

400 

401 prints: 

402 

403 .. testoutput:: 

404 

405 ['<b> bold <i>text</i> </b>'] 

406 ['<i>text</i>'] 

407 """ 

408 asString: bool = deprecate_argument(kwargs, "asString", True) 

409 

410 asString = asString and as_string 

411 

412 locMarker = Empty().set_parse_action(lambda s, loc, t: loc) 

413 endlocMarker = locMarker.copy() 

414 endlocMarker.callPreparse = False 

415 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

416 if asString: 

417 extractText = lambda s, l, t: s[t._original_start : t._original_end] 

418 else: 

419 

420 def extractText(s, l, t): 

421 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] 

422 

423 matchExpr.set_parse_action(extractText) 

424 matchExpr.ignoreExprs = expr.ignoreExprs 

425 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) 

426 return matchExpr 

427 

428 

429def ungroup(expr: ParserElement) -> ParserElement: 

430 """Helper to undo pyparsing's default grouping of And expressions, 

431 even if all but one are non-empty. 

432 """ 

433 return TokenConverter(expr).add_parse_action(lambda t: t[0]) 

434 

435 

436def locatedExpr(expr: ParserElement) -> ParserElement: 

437 """ 

438 .. deprecated:: 3.0.0 

439 Use the :class:`Located` class instead. Note that `Located` 

440 returns results with one less grouping level. 

441 

442 Helper to decorate a returned token with its starting and ending 

443 locations in the input string. 

444 

445 This helper adds the following results names: 

446 

447 - ``locn_start`` - location where matched expression begins 

448 - ``locn_end`` - location where matched expression ends 

449 - ``value`` - the actual parsed results 

450 

451 Be careful if the input text contains ``<TAB>`` characters, you 

452 may want to call :meth:`ParserElement.parse_with_tabs` 

453 """ 

454 warnings.warn( 

455 f"{'locatedExpr'!r} deprecated - use {'Located'!r}", 

456 DeprecationWarning, 

457 stacklevel=2, 

458 ) 

459 

460 locator = Empty().set_parse_action(lambda ss, ll, tt: ll) 

461 return Group( 

462 locator("locn_start") 

463 + expr("value") 

464 + locator.copy().leave_whitespace()("locn_end") 

465 ) 

466 

467 

468# define special default value to permit None as a significant value for 

469# ignore_expr 

470_NO_IGNORE_EXPR_GIVEN = NoMatch() 

471 

472 

473def nested_expr( 

474 opener: Union[str, ParserElement] = "(", 

475 closer: Union[str, ParserElement] = ")", 

476 content: typing.Optional[ParserElement] = None, 

477 ignore_expr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN, 

478 **kwargs, 

479) -> ParserElement: 

480 """Helper method for defining nested lists enclosed in opening and 

481 closing delimiters (``"("`` and ``")"`` are the default). 

482 

483 :param opener: str - opening character for a nested list 

484 (default= ``"("``); can also be a pyparsing expression 

485 

486 :param closer: str - closing character for a nested list 

487 (default= ``")"``); can also be a pyparsing expression 

488 

489 :param content: expression for items within the nested lists 

490 

491 :param ignore_expr: expression for ignoring opening and closing delimiters 

492 (default = :class:`quoted_string`) 

493 

494 Parameter ``ignoreExpr`` is retained for compatibility 

495 but will be removed in a future release. 

496 

497 If an expression is not provided for the content argument, the 

498 nested expression will capture all whitespace-delimited content 

499 between delimiters as a list of separate values. 

500 

501 Use the ``ignore_expr`` argument to define expressions that may 

502 contain opening or closing characters that should not be treated as 

503 opening or closing characters for nesting, such as quoted_string or 

504 a comment expression. Specify multiple expressions using an 

505 :class:`Or` or :class:`MatchFirst`. The default is 

506 :class:`quoted_string`, but if no expressions are to be ignored, then 

507 pass ``None`` for this argument. 

508 

509 Example: 

510 

511 .. testcode:: 

512 

513 data_type = one_of("void int short long char float double") 

514 decl_data_type = Combine(data_type + Opt(Word('*'))) 

515 ident = Word(alphas+'_', alphanums+'_') 

516 number = pyparsing_common.number 

517 arg = Group(decl_data_type + ident) 

518 LPAR, RPAR = map(Suppress, "()") 

519 

520 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) 

521 

522 c_function = (decl_data_type("type") 

523 + ident("name") 

524 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR 

525 + code_body("body")) 

526 c_function.ignore(c_style_comment) 

527 

528 source_code = ''' 

529 int is_odd(int x) { 

530 return (x%2); 

531 } 

532 

533 int dec_to_hex(char hchar) { 

534 if (hchar >= '0' && hchar <= '9') { 

535 return (ord(hchar)-ord('0')); 

536 } else { 

537 return (10+ord(hchar)-ord('A')); 

538 } 

539 } 

540 ''' 

541 for func in c_function.search_string(source_code): 

542 print(f"{func.name} ({func.type}) args: {func.args}") 

543 

544 

545 prints: 

546 

547 .. testoutput:: 

548 

549 is_odd (int) args: [['int', 'x']] 

550 dec_to_hex (int) args: [['char', 'hchar']] 

551 """ 

552 ignoreExpr: ParserElement = deprecate_argument( 

553 kwargs, "ignoreExpr", _NO_IGNORE_EXPR_GIVEN 

554 ) 

555 

556 if ignoreExpr != ignore_expr: 

557 ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr # type: ignore [assignment] 

558 

559 if ignoreExpr is _NO_IGNORE_EXPR_GIVEN: 

560 ignoreExpr = quoted_string() 

561 

562 if opener == closer: 

563 raise ValueError("opening and closing strings cannot be the same") 

564 

565 if content is None: 

566 if isinstance(opener, str_type) and isinstance(closer, str_type): 

567 opener = typing.cast(str, opener) 

568 closer = typing.cast(str, closer) 

569 if len(opener) == 1 and len(closer) == 1: 

570 if ignoreExpr is not None: 

571 content = Combine( 

572 OneOrMore( 

573 ~ignoreExpr 

574 + CharsNotIn( 

575 opener + closer + ParserElement.DEFAULT_WHITE_CHARS, 

576 exact=1, 

577 ) 

578 ) 

579 ) 

580 else: 

581 content = Combine( 

582 Empty() 

583 + CharsNotIn( 

584 opener + closer + ParserElement.DEFAULT_WHITE_CHARS 

585 ) 

586 ) 

587 else: 

588 if ignoreExpr is not None: 

589 content = Combine( 

590 OneOrMore( 

591 ~ignoreExpr 

592 + ~Literal(opener) 

593 + ~Literal(closer) 

594 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

595 ) 

596 ) 

597 else: 

598 content = Combine( 

599 OneOrMore( 

600 ~Literal(opener) 

601 + ~Literal(closer) 

602 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

603 ) 

604 ) 

605 else: 

606 raise ValueError( 

607 "opening and closing arguments must be strings if no content expression is given" 

608 ) 

609 

610 # for these internally-created context expressions, simulate whitespace-skipping 

611 if ParserElement.DEFAULT_WHITE_CHARS: 

612 content.set_parse_action( 

613 lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS) 

614 ) 

615 

616 ret = Forward() 

617 if ignoreExpr is not None: 

618 ret <<= Group( 

619 _suppression(opener) 

620 + ZeroOrMore(ignoreExpr | ret | content) 

621 + _suppression(closer) 

622 ) 

623 else: 

624 ret <<= Group( 

625 _suppression(opener) + ZeroOrMore(ret | content) + _suppression(closer) 

626 ) 

627 

628 ret.set_name(f"nested {opener}{closer} expression") 

629 

630 # don't override error message from content expressions 

631 ret.errmsg = None 

632 return ret 

633 

634 

635def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): 

636 """Internal helper to construct opening and closing tag expressions, 

637 given a tag name""" 

638 if isinstance(tagStr, str_type): 

639 resname = tagStr 

640 tagStr = Keyword(tagStr, caseless=not xml) 

641 else: 

642 resname = tagStr.name 

643 

644 tagAttrName = Word(alphas, alphanums + "_-:") 

645 if xml: 

646 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) 

647 openTag = ( 

648 suppress_LT 

649 + tagStr("tag") 

650 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

651 + Opt("/", default=[False])("empty").set_parse_action( 

652 lambda s, l, t: t[0] == "/" 

653 ) 

654 + suppress_GT 

655 ) 

656 else: 

657 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( 

658 printables, exclude_chars=">" 

659 ) 

660 openTag = ( 

661 suppress_LT 

662 + tagStr("tag") 

663 + Dict( 

664 ZeroOrMore( 

665 Group( 

666 tagAttrName.set_parse_action(lambda t: t[0].lower()) 

667 + Opt(Suppress("=") + tagAttrValue) 

668 ) 

669 ) 

670 ) 

671 + Opt("/", default=[False])("empty").set_parse_action( 

672 lambda s, l, t: t[0] == "/" 

673 ) 

674 + suppress_GT 

675 ) 

676 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) 

677 

678 openTag.set_name(f"<{resname}>") 

679 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

680 openTag.add_parse_action( 

681 lambda t: t.__setitem__( 

682 "start" + "".join(resname.replace(":", " ").title().split()), t.copy() 

683 ) 

684 ) 

685 closeTag = closeTag( 

686 "end" + "".join(resname.replace(":", " ").title().split()) 

687 ).set_name(f"</{resname}>") 

688 openTag.tag = resname 

689 closeTag.tag = resname 

690 openTag.tag_body = SkipTo(closeTag()) 

691 return openTag, closeTag 

692 

693 

694def make_html_tags( 

695 tag_str: Union[str, ParserElement], 

696) -> tuple[ParserElement, ParserElement]: 

697 """Helper to construct opening and closing tag expressions for HTML, 

698 given a tag name. Matches tags in either upper or lower case, 

699 attributes with namespaces and with quoted or unquoted values. 

700 

701 Example: 

702 

703 .. testcode:: 

704 

705 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

706 # make_html_tags returns pyparsing expressions for the opening and 

707 # closing tags as a 2-tuple 

708 a, a_end = make_html_tags("A") 

709 link_expr = a + SkipTo(a_end)("link_text") + a_end 

710 

711 for link in link_expr.search_string(text): 

712 # attributes in the <A> tag (like "href" shown here) are 

713 # also accessible as named results 

714 print(link.link_text, '->', link.href) 

715 

716 prints: 

717 

718 .. testoutput:: 

719 

720 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

721 """ 

722 return _makeTags(tag_str, False) 

723 

724 

725def make_xml_tags( 

726 tag_str: Union[str, ParserElement], 

727) -> tuple[ParserElement, ParserElement]: 

728 """Helper to construct opening and closing tag expressions for XML, 

729 given a tag name. Matches tags only in the given upper/lower case. 

730 

731 Example: similar to :class:`make_html_tags` 

732 """ 

733 return _makeTags(tag_str, True) 

734 

735 

736any_open_tag: ParserElement 

737any_close_tag: ParserElement 

738any_open_tag, any_close_tag = make_html_tags( 

739 Word(alphas, alphanums + "_:").set_name("any tag") 

740) 

741 

742_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} 

743_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace( 

744 " ", "|" 

745) 

746common_html_entity = Regex( 

747 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});" 

748).set_name("common HTML entity") 

749 

750 

751def replace_html_entity(s, l, t): 

752 """Helper parser action to replace common HTML entities with their special characters""" 

753 return _htmlEntityMap.get(t.entity) 

754 

755 

756class OpAssoc(Enum): 

757 """Enumeration of operator associativity 

758 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" 

759 

760 LEFT = 1 

761 RIGHT = 2 

762 

763 

764InfixNotationOperatorArgType = Union[ 

765 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]] 

766] 

767InfixNotationOperatorSpec = Union[ 

768 tuple[ 

769 InfixNotationOperatorArgType, 

770 int, 

771 OpAssoc, 

772 typing.Optional[ParseAction], 

773 ], 

774 tuple[ 

775 InfixNotationOperatorArgType, 

776 int, 

777 OpAssoc, 

778 ], 

779] 

780 

781 

782def infix_notation( 

783 base_expr: ParserElement, 

784 op_list: list[InfixNotationOperatorSpec], 

785 lpar: Union[str, ParserElement] = Suppress("("), 

786 rpar: Union[str, ParserElement] = Suppress(")"), 

787) -> Forward: 

788 """Helper method for constructing grammars of expressions made up of 

789 operators working in a precedence hierarchy. Operators may be unary 

790 or binary, left- or right-associative. Parse actions can also be 

791 attached to operator expressions. The generated parser will also 

792 recognize the use of parentheses to override operator precedences 

793 (see example below). 

794 

795 Note: if you define a deep operator list, you may see performance 

796 issues when using infix_notation. See 

797 :class:`ParserElement.enable_packrat` for a mechanism to potentially 

798 improve your parser performance. 

799 

800 Parameters: 

801 

802 :param base_expr: expression representing the most basic operand to 

803 be used in the expression 

804 :param op_list: list of tuples, one for each operator precedence level 

805 in the expression grammar; each tuple is of the form ``(op_expr, 

806 num_operands, right_left_assoc, (optional)parse_action)``, where: 

807 

808 - ``op_expr`` is the pyparsing expression for the operator; may also 

809 be a string, which will be converted to a Literal; if ``num_operands`` 

810 is 3, ``op_expr`` is a tuple of two expressions, for the two 

811 operators separating the 3 terms 

812 - ``num_operands`` is the number of terms for this operator (must be 1, 

813 2, or 3) 

814 - ``right_left_assoc`` is the indicator whether the operator is right 

815 or left associative, using the pyparsing-defined constants 

816 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. 

817 - ``parse_action`` is the parse action to be associated with 

818 expressions matching this operator expression (the parse action 

819 tuple member may be omitted); if the parse action is passed 

820 a tuple or list of functions, this is equivalent to calling 

821 ``set_parse_action(*fn)`` 

822 (:class:`ParserElement.set_parse_action`) 

823 

824 :param lpar: expression for matching left-parentheses; if passed as a 

825 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as 

826 an expression (such as ``Literal('(')``), then it will be kept in 

827 the parsed results, and grouped with them. (default= ``Suppress('(')``) 

828 :param rpar: expression for matching right-parentheses; if passed as a 

829 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as 

830 an expression (such as ``Literal(')')``), then it will be kept in 

831 the parsed results, and grouped with them. (default= ``Suppress(')')``) 

832 

833 Example: 

834 

835 .. testcode:: 

836 

837 # simple example of four-function arithmetic with ints and 

838 # variable names 

839 integer = pyparsing_common.signed_integer 

840 varname = pyparsing_common.identifier 

841 

842 arith_expr = infix_notation(integer | varname, 

843 [ 

844 ('-', 1, OpAssoc.RIGHT), 

845 (one_of('* /'), 2, OpAssoc.LEFT), 

846 (one_of('+ -'), 2, OpAssoc.LEFT), 

847 ]) 

848 

849 arith_expr.run_tests(''' 

850 5+3*6 

851 (5+3)*6 

852 (5+x)*y 

853 -2--11 

854 ''', full_dump=False) 

855 

856 prints: 

857 

858 .. testoutput:: 

859 :options: +NORMALIZE_WHITESPACE 

860 

861 

862 5+3*6 

863 [[5, '+', [3, '*', 6]]] 

864 

865 (5+3)*6 

866 [[[5, '+', 3], '*', 6]] 

867 

868 (5+x)*y 

869 [[[5, '+', 'x'], '*', 'y']] 

870 

871 -2--11 

872 [[['-', 2], '-', ['-', 11]]] 

873 """ 

874 

875 # captive version of FollowedBy that does not do parse actions or capture results names 

876 class _FB(FollowedBy): 

877 def parseImpl(self, instring, loc, doActions=True): 

878 self.expr.try_parse(instring, loc) 

879 return loc, [] 

880 

881 _FB.__name__ = "FollowedBy>" 

882 

883 ret = Forward() 

884 ret.set_name(f"{base_expr.name}_expression") 

885 if isinstance(lpar, str): 

886 lpar = Suppress(lpar) 

887 if isinstance(rpar, str): 

888 rpar = Suppress(rpar) 

889 

890 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}_expression") 

891 

892 # if lpar and rpar are not suppressed, wrap in group 

893 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): 

894 lastExpr = base_expr | Group(nested_expr) 

895 else: 

896 lastExpr = base_expr | nested_expr 

897 

898 arity: int 

899 rightLeftAssoc: opAssoc 

900 pa: typing.Optional[ParseAction] 

901 opExpr1: ParserElement 

902 opExpr2: ParserElement 

903 matchExpr: ParserElement 

904 match_lookahead: ParserElement 

905 for operDef in op_list: 

906 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] 

907 if isinstance(opExpr, str_type): 

908 opExpr = ParserElement._literalStringClass(opExpr) 

909 opExpr = typing.cast(ParserElement, opExpr) 

910 if arity == 3: 

911 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: 

912 raise ValueError( 

913 "if numterms=3, opExpr must be a tuple or list of two expressions" 

914 ) 

915 opExpr1, opExpr2 = opExpr 

916 term_name = f"{opExpr1}{opExpr2} operations" 

917 else: 

918 term_name = f"{opExpr} operations" 

919 

920 if not 1 <= arity <= 3: 

921 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

922 

923 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): 

924 raise ValueError("operator must indicate right or left associativity") 

925 

926 thisExpr: ParserElement = Forward().set_name(term_name) 

927 thisExpr = typing.cast(Forward, thisExpr) 

928 match_lookahead = And([]) 

929 if rightLeftAssoc is OpAssoc.LEFT: 

930 if arity == 1: 

931 match_lookahead = _FB(lastExpr + opExpr) 

932 matchExpr = Group(lastExpr + opExpr[1, ...]) 

933 elif arity == 2: 

934 if opExpr is not None: 

935 match_lookahead = _FB(lastExpr + opExpr + lastExpr) 

936 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...]) 

937 else: 

938 match_lookahead = _FB(lastExpr + lastExpr) 

939 matchExpr = Group(lastExpr[2, ...]) 

940 elif arity == 3: 

941 match_lookahead = _FB( 

942 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr 

943 ) 

944 matchExpr = Group( 

945 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...] 

946 ) 

947 elif rightLeftAssoc is OpAssoc.RIGHT: 

948 if arity == 1: 

949 # try to avoid LR with this extra test 

950 if not isinstance(opExpr, Opt): 

951 opExpr = Opt(opExpr) 

952 match_lookahead = _FB(opExpr.expr + thisExpr) 

953 matchExpr = Group(opExpr + thisExpr) 

954 elif arity == 2: 

955 if opExpr is not None: 

956 match_lookahead = _FB(lastExpr + opExpr + thisExpr) 

957 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...]) 

958 else: 

959 match_lookahead = _FB(lastExpr + thisExpr) 

960 matchExpr = Group(lastExpr + thisExpr[1, ...]) 

961 elif arity == 3: 

962 match_lookahead = _FB( 

963 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr 

964 ) 

965 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

966 

967 # suppress lookahead expr from railroad diagrams 

968 match_lookahead.show_in_diagram = False 

969 

970 # TODO - determine why this statement can't be included in the following 

971 # if pa block 

972 matchExpr = match_lookahead + matchExpr 

973 

974 if pa: 

975 if isinstance(pa, (tuple, list)): 

976 matchExpr.set_parse_action(*pa) 

977 else: 

978 matchExpr.set_parse_action(pa) 

979 

980 thisExpr <<= (matchExpr | lastExpr).set_name(term_name) 

981 lastExpr = thisExpr 

982 

983 ret <<= lastExpr 

984 return ret 

985 

986 

987def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): 

988 """ 

989 .. deprecated:: 3.0.0 

990 Use the :class:`IndentedBlock` class instead. Note that `IndentedBlock` 

991 has a difference method signature. 

992 

993 Helper method for defining space-delimited indentation blocks, 

994 such as those used to define block statements in Python source code. 

995 

996 :param blockStatementExpr: expression defining syntax of statement that 

997 is repeated within the indented block 

998 

999 :param indentStack: list created by caller to manage indentation stack 

1000 (multiple ``statementWithIndentedBlock`` expressions within a single 

1001 grammar should share a common ``indentStack``) 

1002 

1003 :param indent: boolean indicating whether block must be indented beyond 

1004 the current level; set to ``False`` for block of left-most statements 

1005 

1006 A valid block must contain at least one ``blockStatement``. 

1007 

1008 (Note that indentedBlock uses internal parse actions which make it 

1009 incompatible with packrat parsing.) 

1010 

1011 Example: 

1012 

1013 .. testcode:: 

1014 

1015 data = ''' 

1016 def A(z): 

1017 A1 

1018 B = 100 

1019 G = A2 

1020 A2 

1021 A3 

1022 B 

1023 def BB(a,b,c): 

1024 BB1 

1025 def BBA(): 

1026 bba1 

1027 bba2 

1028 bba3 

1029 C 

1030 D 

1031 def spam(x,y): 

1032 def eggs(z): 

1033 pass 

1034 ''' 

1035 

1036 indentStack = [1] 

1037 stmt = Forward() 

1038 

1039 identifier = Word(alphas, alphanums) 

1040 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") 

1041 func_body = indentedBlock(stmt, indentStack) 

1042 funcDef = Group(funcDecl + func_body) 

1043 

1044 rvalue = Forward() 

1045 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") 

1046 rvalue << (funcCall | identifier | Word(nums)) 

1047 assignment = Group(identifier + "=" + rvalue) 

1048 stmt << (funcDef | assignment | identifier) 

1049 

1050 module_body = stmt[1, ...] 

1051 

1052 parseTree = module_body.parseString(data) 

1053 parseTree.pprint() 

1054 

1055 prints: 

1056 

1057 .. testoutput:: 

1058 

1059 [['def', 

1060 'A', 

1061 ['(', 'z', ')'], 

1062 ':', 

1063 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

1064 'B', 

1065 ['def', 

1066 'BB', 

1067 ['(', 'a', 'b', 'c', ')'], 

1068 ':', 

1069 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

1070 'C', 

1071 'D', 

1072 ['def', 

1073 'spam', 

1074 ['(', 'x', 'y', ')'], 

1075 ':', 

1076 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

1077 """ 

1078 warnings.warn( 

1079 f"{'indentedBlock'!r} deprecated - use {'IndentedBlock'!r}", 

1080 DeprecationWarning, 

1081 stacklevel=2, 

1082 ) 

1083 

1084 backup_stacks.append(indentStack[:]) 

1085 

1086 def reset_stack(): 

1087 indentStack[:] = backup_stacks[-1] 

1088 

1089 def checkPeerIndent(s, l, t): 

1090 if l >= len(s): 

1091 return 

1092 curCol = col(l, s) 

1093 if curCol != indentStack[-1]: 

1094 if curCol > indentStack[-1]: 

1095 raise ParseException(s, l, "illegal nesting") 

1096 raise ParseException(s, l, "not a peer entry") 

1097 

1098 def checkSubIndent(s, l, t): 

1099 curCol = col(l, s) 

1100 if curCol > indentStack[-1]: 

1101 indentStack.append(curCol) 

1102 else: 

1103 raise ParseException(s, l, "not a subentry") 

1104 

1105 def checkUnindent(s, l, t): 

1106 if l >= len(s): 

1107 return 

1108 curCol = col(l, s) 

1109 if not (indentStack and curCol in indentStack): 

1110 raise ParseException(s, l, "not an unindent") 

1111 if curCol < indentStack[-1]: 

1112 indentStack.pop() 

1113 

1114 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) 

1115 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") 

1116 PEER = Empty().set_parse_action(checkPeerIndent).set_name("") 

1117 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") 

1118 if indent: 

1119 smExpr = Group( 

1120 Opt(NL) 

1121 + INDENT 

1122 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1123 + UNDENT 

1124 ) 

1125 else: 

1126 smExpr = Group( 

1127 Opt(NL) 

1128 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1129 + Opt(UNDENT) 

1130 ) 

1131 

1132 # add a parse action to remove backup_stack from list of backups 

1133 smExpr.add_parse_action( 

1134 lambda: backup_stacks.pop(-1) and None if backup_stacks else None 

1135 ) 

1136 smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) 

1137 blockStatementExpr.ignore(_bslash + LineEnd()) 

1138 return smExpr.set_name("indented block") 

1139 

1140 

1141# it's easy to get these comment structures wrong - they're very common, 

1142# so may as well make them available 

1143c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment") 

1144"Comment of the form ``/* ... */``" 

1145 

1146html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") 

1147"Comment of the form ``<!-- ... -->``" 

1148 

1149rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") 

1150dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") 

1151"Comment of the form ``// ... (to end of line)``" 

1152 

1153cpp_style_comment = Regex( 

1154 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)" 

1155).set_name("C++ style comment") 

1156"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" 

1157 

1158java_style_comment = cpp_style_comment 

1159"Same as :class:`cpp_style_comment`" 

1160 

1161python_style_comment = Regex(r"#.*").set_name("Python style comment") 

1162"Comment of the form ``# ... (to end of line)``" 

1163 

1164 

1165# build list of built-in expressions, for future reference if a global default value 

1166# gets updated 

1167_builtin_exprs: list[ParserElement] = [ 

1168 v for v in vars().values() if isinstance(v, ParserElement) 

1169] 

1170 

1171 

1172# compatibility function, superseded by DelimitedList class 

1173def delimited_list( 

1174 expr: Union[str, ParserElement], 

1175 delim: Union[str, ParserElement] = ",", 

1176 combine: bool = False, 

1177 min: typing.Optional[int] = None, 

1178 max: typing.Optional[int] = None, 

1179 *, 

1180 allow_trailing_delim: bool = False, 

1181) -> ParserElement: 

1182 """ 

1183 .. deprecated:: 3.1.0 

1184 Use the :class:`DelimitedList` class instead. 

1185 """ 

1186 return DelimitedList( 

1187 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim 

1188 ) 

1189 

1190 

1191# Compatibility synonyms 

1192# fmt: off 

1193opAssoc = OpAssoc 

1194anyOpenTag = any_open_tag 

1195anyCloseTag = any_close_tag 

1196commonHTMLEntity = common_html_entity 

1197cStyleComment = c_style_comment 

1198htmlComment = html_comment 

1199restOfLine = rest_of_line 

1200dblSlashComment = dbl_slash_comment 

1201cppStyleComment = cpp_style_comment 

1202javaStyleComment = java_style_comment 

1203pythonStyleComment = python_style_comment 

1204delimitedList = replaced_by_pep8("delimitedList", DelimitedList) 

1205delimited_list = replaced_by_pep8("delimited_list", DelimitedList) 

1206countedArray = replaced_by_pep8("countedArray", counted_array) 

1207matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal) 

1208matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr) 

1209oneOf = replaced_by_pep8("oneOf", one_of) 

1210dictOf = replaced_by_pep8("dictOf", dict_of) 

1211originalTextFor = replaced_by_pep8("originalTextFor", original_text_for) 

1212nestedExpr = replaced_by_pep8("nestedExpr", nested_expr) 

1213makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags) 

1214makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags) 

1215replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity) 

1216infixNotation = replaced_by_pep8("infixNotation", infix_notation) 

1217# fmt: on