Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/helpers.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

344 statements  

1# helpers.py 

2import html.entities 

3import operator 

4import re 

5import sys 

6import typing 

7 

8from . import __diag__ 

9from .core import * 

10from .util import ( 

11 _bslash, 

12 _flatten, 

13 _escape_regex_range_chars, 

14 make_compressed_re, 

15 replaced_by_pep8, 

16) 

17 

18def _suppression(expr: Union[ParserElement, str]) -> ParserElement: 

19 # internal helper to avoid wrapping Suppress inside another Suppress 

20 if isinstance(expr, Suppress): 

21 return expr 

22 return Suppress(expr) 

23 

24# 

25# global helpers 

26# 

27def counted_array( 

28 expr: ParserElement, int_expr: typing.Optional[ParserElement] = None, **kwargs 

29) -> ParserElement: 

30 """Helper to define a counted list of expressions. 

31 

32 This helper defines a pattern of the form:: 

33 

34 integer expr expr expr... 

35 

36 where the leading integer tells how many expr expressions follow. 

37 The matched tokens returns the array of expr tokens as a list - the 

38 leading count token is suppressed. 

39 

40 If ``int_expr`` is specified, it should be a pyparsing expression 

41 that produces an integer value. 

42 

43 Examples: 

44 

45 .. doctest:: 

46 

47 >>> counted_array(Word(alphas)).parse_string('2 ab cd ef') 

48 ParseResults(['ab', 'cd'], {}) 

49 

50 - In this parser, the leading integer value is given in binary, 

51 '10' indicating that 2 values are in the array: 

52 

53 .. doctest:: 

54 

55 >>> binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) 

56 >>> counted_array(Word(alphas), int_expr=binary_constant 

57 ... ).parse_string('10 ab cd ef') 

58 ParseResults(['ab', 'cd'], {}) 

59 

60 - If other fields must be parsed after the count but before the 

61 list items, give the fields results names and they will 

62 be preserved in the returned ParseResults: 

63 

64 .. doctest:: 

65 

66 >>> ppc = pyparsing.common 

67 >>> count_with_metadata = ppc.integer + Word(alphas)("type") 

68 >>> typed_array = counted_array(Word(alphanums), 

69 ... int_expr=count_with_metadata)("items") 

70 >>> result = typed_array.parse_string("3 bool True True False") 

71 >>> print(result.dump()) 

72 ['True', 'True', 'False'] 

73 - items: ['True', 'True', 'False'] 

74 - type: 'bool' 

75 """ 

76 intExpr: typing.Optional[ParserElement] = deprecate_argument( 

77 kwargs, "intExpr", None 

78 ) 

79 

80 intExpr = intExpr or int_expr 

81 array_expr = Forward() 

82 

83 def count_field_parse_action(s, l, t): 

84 nonlocal array_expr 

85 n = t[0] 

86 array_expr <<= (expr * n) if n else Empty() 

87 # clear list contents, but keep any named results 

88 del t[:] 

89 

90 if intExpr is None: 

91 intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) 

92 else: 

93 intExpr = intExpr.copy() 

94 intExpr.set_name("arrayLen") 

95 intExpr.add_parse_action(count_field_parse_action, call_during_try=True) 

96 return (intExpr + array_expr).set_name(f"(len) {expr}...") 

97 

98 

99def match_previous_literal(expr: ParserElement) -> ParserElement: 

100 """Helper to define an expression that is indirectly defined from 

101 the tokens matched in a previous expression, that is, it looks for 

102 a 'repeat' of a previous expression. For example:: 

103 

104 .. testcode:: 

105 

106 first = Word(nums) 

107 second = match_previous_literal(first) 

108 match_expr = first + ":" + second 

109 

110 will match ``"1:1"``, but not ``"1:2"``. Because this 

111 matches a previous literal, will also match the leading 

112 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

113 :class:`match_previous_expr`. Do *not* use with packrat parsing 

114 enabled. 

115 """ 

116 rep = Forward() 

117 

118 def copy_token_to_repeater(s, l, t): 

119 if not t: 

120 rep << Empty() 

121 return 

122 

123 if len(t) == 1: 

124 rep << t[0] 

125 return 

126 

127 # flatten t tokens 

128 tflat = _flatten(t.as_list()) 

129 rep << And(Literal(tt) for tt in tflat) 

130 

131 expr.add_parse_action(copy_token_to_repeater, call_during_try=True) 

132 rep.set_name("(prev) " + str(expr)) 

133 return rep 

134 

135 

136def match_previous_expr(expr: ParserElement) -> ParserElement: 

137 """Helper to define an expression that is indirectly defined from 

138 the tokens matched in a previous expression, that is, it looks for 

139 a 'repeat' of a previous expression. For example: 

140 

141 .. testcode:: 

142 

143 first = Word(nums) 

144 second = match_previous_expr(first) 

145 match_expr = first + ":" + second 

146 

147 will match ``"1:1"``, but not ``"1:2"``. Because this 

148 matches by expressions, will *not* match the leading ``"1:1"`` 

149 in ``"1:10"``; the expressions are evaluated first, and then 

150 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

151 with packrat parsing enabled. 

152 """ 

153 rep = Forward() 

154 e2 = expr.copy() 

155 rep <<= e2 

156 

157 def copy_token_to_repeater(s, l, t): 

158 matchTokens = _flatten(t.as_list()) 

159 

160 def must_match_these_tokens(s, l, t): 

161 theseTokens = _flatten(t.as_list()) 

162 if theseTokens != matchTokens: 

163 raise ParseException( 

164 s, l, f"Expected {matchTokens}, found{theseTokens}" 

165 ) 

166 

167 rep.set_parse_action(must_match_these_tokens, call_during_try=True) 

168 

169 expr.add_parse_action(copy_token_to_repeater, call_during_try=True) 

170 rep.set_name("(prev) " + str(expr)) 

171 return rep 

172 

173 

174def one_of( 

175 strs: Union[typing.Iterable[str], str], 

176 caseless: bool = False, 

177 use_regex: bool = True, 

178 as_keyword: bool = False, 

179 **kwargs, 

180) -> ParserElement: 

181 """Helper to quickly define a set of alternative :class:`Literal` s, 

182 and makes sure to do longest-first testing when there is a conflict, 

183 regardless of the input order, but returns 

184 a :class:`MatchFirst` for best performance. 

185 

186 :param strs: a string of space-delimited literals, or a collection of 

187 string literals 

188 :param caseless: treat all literals as caseless 

189 :param use_regex: bool - as an optimization, will 

190 generate a :class:`Regex` object; otherwise, will generate 

191 a :class:`MatchFirst` object (if ``caseless=True`` or 

192 ``as_keyword=True``, or if creating a :class:`Regex` raises an exception) 

193 :param as_keyword: bool - enforce :class:`Keyword`-style matching on the 

194 generated expressions 

195 

196 Parameters ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 

197 compatibility, but will be removed in a future release. 

198 

199 Example: 

200 

201 .. testcode:: 

202 

203 comp_oper = one_of("< = > <= >= !=") 

204 var = Word(alphas) 

205 number = Word(nums) 

206 term = var | number 

207 comparison_expr = term + comp_oper + term 

208 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) 

209 

210 prints: 

211 

212 .. testoutput:: 

213 

214 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

215 """ 

216 useRegex: bool = deprecate_argument(kwargs, "useRegex", True) 

217 asKeyword: bool = deprecate_argument(kwargs, "asKeyword", False) 

218 

219 asKeyword = asKeyword or as_keyword 

220 useRegex = useRegex and use_regex 

221 

222 if ( 

223 isinstance(caseless, str_type) 

224 and __diag__.warn_on_multiple_string_args_to_oneof 

225 ): 

226 warnings.warn( 

227 "warn_on_multiple_string_args_to_oneof:" 

228 " More than one string argument passed to one_of, pass" 

229 " choices as a list or space-delimited string", 

230 stacklevel=2, 

231 ) 

232 

233 if caseless: 

234 is_equal = lambda a, b: a.upper() == b.upper() 

235 masks = lambda a, b: b.upper().startswith(a.upper()) 

236 else: 

237 is_equal = operator.eq 

238 masks = lambda a, b: b.startswith(a) 

239 

240 symbols: list[str] 

241 if isinstance(strs, str_type): 

242 strs = typing.cast(str, strs) 

243 symbols = strs.split() 

244 elif isinstance(strs, Iterable): 

245 symbols = list(strs) 

246 else: 

247 raise TypeError("Invalid argument to one_of, expected string or iterable") 

248 if not symbols: 

249 return NoMatch() 

250 

251 # reorder given symbols to take care to avoid masking longer choices with shorter ones 

252 # (but only if the given symbols are not just single characters) 

253 i = 0 

254 while i < len(symbols) - 1: 

255 cur = symbols[i] 

256 for j, other in enumerate(symbols[i + 1 :]): 

257 if is_equal(other, cur): 

258 del symbols[i + j + 1] 

259 break 

260 if len(other) > len(cur) and masks(cur, other): 

261 del symbols[i + j + 1] 

262 symbols.insert(i, other) 

263 break 

264 else: 

265 i += 1 

266 

267 if useRegex: 

268 re_flags: int = re.IGNORECASE if caseless else 0 

269 

270 try: 

271 if all(len(sym) == 1 for sym in symbols): 

272 # symbols are just single characters, create range regex pattern 

273 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" 

274 else: 

275 patt = "|".join(re.escape(sym) for sym in symbols) 

276 

277 # wrap with \b word break markers if defining as keywords 

278 if asKeyword: 

279 patt = rf"\b(?:{patt})\b" 

280 

281 ret = Regex(patt, flags=re_flags) 

282 ret.set_name(" | ".join(repr(s) for s in symbols)) 

283 

284 if caseless: 

285 # add parse action to return symbols as specified, not in random 

286 # casing as found in input string 

287 symbol_map = {sym.lower(): sym for sym in symbols} 

288 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) 

289 

290 return ret 

291 

292 except re.error: 

293 warnings.warn( 

294 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 

295 ) 

296 

297 # last resort, just use MatchFirst of Token class corresponding to caseless 

298 # and asKeyword settings 

299 CASELESS = KEYWORD = True 

300 parse_element_class = { 

301 (CASELESS, KEYWORD): CaselessKeyword, 

302 (CASELESS, not KEYWORD): CaselessLiteral, 

303 (not CASELESS, KEYWORD): Keyword, 

304 (not CASELESS, not KEYWORD): Literal, 

305 }[(caseless, asKeyword)] 

306 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name( 

307 " | ".join(symbols) 

308 ) 

309 

310 

311def dict_of(key: ParserElement, value: ParserElement) -> Dict: 

312 """Helper to easily and clearly define a dictionary by specifying 

313 the respective patterns for the key and value. Takes care of 

314 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

315 :class:`Group` tokens in the proper order. The key pattern 

316 can include delimiting markers or punctuation, as long as they are 

317 suppressed, thereby leaving the significant key text. The value 

318 pattern can include named results, so that the :class:`Dict` results 

319 can include named token fields. 

320 

321 Example: 

322 

323 .. doctest:: 

324 

325 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

326 

327 >>> data_word = Word(alphas) 

328 >>> label = data_word + FollowedBy(':') 

329 >>> attr_expr = ( 

330 ... label 

331 ... + Suppress(':') 

332 ... + OneOrMore(data_word, stop_on=label) 

333 ... .set_parse_action(' '.join)) 

334 >>> print(attr_expr[1, ...].parse_string(text).dump()) 

335 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

336 

337 >>> attr_label = label 

338 >>> attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label 

339 ... ).set_parse_action(' '.join) 

340 

341 # similar to Dict, but simpler call format 

342 >>> result = dict_of(attr_label, attr_value).parse_string(text) 

343 >>> print(result.dump()) 

344 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

345 - color: 'light blue' 

346 - posn: 'upper left' 

347 - shape: 'SQUARE' 

348 - texture: 'burlap' 

349 [0]: 

350 ['shape', 'SQUARE'] 

351 [1]: 

352 ['posn', 'upper left'] 

353 [2]: 

354 ['color', 'light blue'] 

355 [3]: 

356 ['texture', 'burlap'] 

357 

358 >>> print(result['shape']) 

359 SQUARE 

360 >>> print(result.shape) # object attribute access works too 

361 SQUARE 

362 >>> print(result.as_dict()) 

363 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'} 

364 """ 

365 return Dict(OneOrMore(Group(key + value))) 

366 

367 

368def original_text_for( 

369 expr: ParserElement, as_string: bool = True, **kwargs 

370) -> ParserElement: 

371 """Helper to return the original, untokenized text for a given 

372 expression. Useful to restore the parsed fields of an HTML start 

373 tag into the raw tag text itself, or to revert separate tokens with 

374 intervening whitespace back to the original matching input text. By 

375 default, returns a string containing the original parsed text. 

376 

377 If the optional ``as_string`` argument is passed as 

378 ``False``, then the return value is 

379 a :class:`ParseResults` containing any results names that 

380 were originally matched, and a single token containing the original 

381 matched text from the input string. So if the expression passed to 

382 :class:`original_text_for` contains expressions with defined 

383 results names, you must set ``as_string`` to ``False`` if you 

384 want to preserve those results name values. 

385 

386 The ``asString`` pre-PEP8 argument is retained for compatibility, 

387 but will be removed in a future release. 

388 

389 Example: 

390 

391 .. testcode:: 

392 

393 src = "this is test <b> bold <i>text</i> </b> normal text " 

394 for tag in ("b", "i"): 

395 opener, closer = make_html_tags(tag) 

396 patt = original_text_for(opener + ... + closer) 

397 print(patt.search_string(src)[0]) 

398 

399 prints: 

400 

401 .. testoutput:: 

402 

403 ['<b> bold <i>text</i> </b>'] 

404 ['<i>text</i>'] 

405 """ 

406 asString: bool = deprecate_argument(kwargs, "asString", True) 

407 

408 asString = asString and as_string 

409 

410 locMarker = Empty().set_parse_action(lambda s, loc, t: loc) 

411 endlocMarker = locMarker.copy() 

412 endlocMarker.callPreparse = False 

413 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

414 if asString: 

415 extractText = lambda s, l, t: s[t._original_start : t._original_end] 

416 else: 

417 

418 def extractText(s, l, t): 

419 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] 

420 

421 matchExpr.set_parse_action(extractText) 

422 matchExpr.ignoreExprs = expr.ignoreExprs 

423 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) 

424 return matchExpr 

425 

426 

427def ungroup(expr: ParserElement) -> ParserElement: 

428 """Helper to undo pyparsing's default grouping of And expressions, 

429 even if all but one are non-empty. 

430 """ 

431 return TokenConverter(expr).add_parse_action(lambda t: t[0]) 

432 

433 

434def locatedExpr(expr: ParserElement) -> ParserElement: 

435 """ 

436 .. deprecated:: 3.0.0 

437 Use the :class:`Located` class instead. Note that `Located` 

438 returns results with one less grouping level. 

439 

440 Helper to decorate a returned token with its starting and ending 

441 locations in the input string. 

442 

443 This helper adds the following results names: 

444 

445 - ``locn_start`` - location where matched expression begins 

446 - ``locn_end`` - location where matched expression ends 

447 - ``value`` - the actual parsed results 

448 

449 Be careful if the input text contains ``<TAB>`` characters, you 

450 may want to call :meth:`ParserElement.parse_with_tabs` 

451 """ 

452 warnings.warn( 

453 f"{'locatedExpr'!r} deprecated - use {'Located'!r}", 

454 DeprecationWarning, 

455 stacklevel=2, 

456 ) 

457 

458 locator = Empty().set_parse_action(lambda ss, ll, tt: ll) 

459 return Group( 

460 locator("locn_start") 

461 + expr("value") 

462 + locator.copy().leave_whitespace()("locn_end") 

463 ) 

464 

465 

466# define special default value to permit None as a significant value for 

467# ignore_expr 

468_NO_IGNORE_EXPR_GIVEN = NoMatch() 

469 

470 

471def nested_expr( 

472 opener: Union[str, ParserElement] = "(", 

473 closer: Union[str, ParserElement] = ")", 

474 content: typing.Optional[ParserElement] = None, 

475 ignore_expr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN, 

476 **kwargs, 

477) -> ParserElement: 

478 """Helper method for defining nested lists enclosed in opening and 

479 closing delimiters (``"("`` and ``")"`` are the default). 

480 

481 :param opener: str - opening character for a nested list 

482 (default= ``"("``); can also be a pyparsing expression 

483 

484 :param closer: str - closing character for a nested list 

485 (default= ``")"``); can also be a pyparsing expression 

486 

487 :param content: expression for items within the nested lists 

488 

489 :param ignore_expr: expression for ignoring opening and closing delimiters 

490 (default = :class:`quoted_string`) 

491 

492 Parameter ``ignoreExpr`` is retained for compatibility 

493 but will be removed in a future release. 

494 

495 If an expression is not provided for the content argument, the 

496 nested expression will capture all whitespace-delimited content 

497 between delimiters as a list of separate values. 

498 

499 Use the ``ignore_expr`` argument to define expressions that may 

500 contain opening or closing characters that should not be treated as 

501 opening or closing characters for nesting, such as quoted_string or 

502 a comment expression. Specify multiple expressions using an 

503 :class:`Or` or :class:`MatchFirst`. The default is 

504 :class:`quoted_string`, but if no expressions are to be ignored, then 

505 pass ``None`` for this argument. 

506 

507 Example: 

508 

509 .. testcode:: 

510 

511 data_type = one_of("void int short long char float double") 

512 decl_data_type = Combine(data_type + Opt(Word('*'))) 

513 ident = Word(alphas+'_', alphanums+'_') 

514 number = pyparsing_common.number 

515 arg = Group(decl_data_type + ident) 

516 LPAR, RPAR = map(Suppress, "()") 

517 

518 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) 

519 

520 c_function = (decl_data_type("type") 

521 + ident("name") 

522 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR 

523 + code_body("body")) 

524 c_function.ignore(c_style_comment) 

525 

526 source_code = ''' 

527 int is_odd(int x) { 

528 return (x%2); 

529 } 

530 

531 int dec_to_hex(char hchar) { 

532 if (hchar >= '0' && hchar <= '9') { 

533 return (ord(hchar)-ord('0')); 

534 } else { 

535 return (10+ord(hchar)-ord('A')); 

536 } 

537 } 

538 ''' 

539 for func in c_function.search_string(source_code): 

540 print(f"{func.name} ({func.type}) args: {func.args}") 

541 

542 

543 prints: 

544 

545 .. testoutput:: 

546 

547 is_odd (int) args: [['int', 'x']] 

548 dec_to_hex (int) args: [['char', 'hchar']] 

549 """ 

550 ignoreExpr: ParserElement = deprecate_argument( 

551 kwargs, "ignoreExpr", _NO_IGNORE_EXPR_GIVEN 

552 ) 

553 

554 if ignoreExpr != ignore_expr: 

555 ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr # type: ignore [assignment] 

556 

557 if ignoreExpr is _NO_IGNORE_EXPR_GIVEN: 

558 ignoreExpr = quoted_string() 

559 

560 if opener == closer: 

561 raise ValueError("opening and closing strings cannot be the same") 

562 

563 if content is None: 

564 if isinstance(opener, str_type) and isinstance(closer, str_type): 

565 opener = typing.cast(str, opener) 

566 closer = typing.cast(str, closer) 

567 if len(opener) == 1 and len(closer) == 1: 

568 if ignoreExpr is not None: 

569 content = Combine( 

570 OneOrMore( 

571 ~ignoreExpr 

572 + CharsNotIn( 

573 opener + closer + ParserElement.DEFAULT_WHITE_CHARS, 

574 exact=1, 

575 ) 

576 ) 

577 ) 

578 else: 

579 content = Combine( 

580 Empty() 

581 + CharsNotIn( 

582 opener + closer + ParserElement.DEFAULT_WHITE_CHARS 

583 ) 

584 ) 

585 else: 

586 if ignoreExpr is not None: 

587 content = Combine( 

588 OneOrMore( 

589 ~ignoreExpr 

590 + ~Literal(opener) 

591 + ~Literal(closer) 

592 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

593 ) 

594 ) 

595 else: 

596 content = Combine( 

597 OneOrMore( 

598 ~Literal(opener) 

599 + ~Literal(closer) 

600 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

601 ) 

602 ) 

603 else: 

604 raise ValueError( 

605 "opening and closing arguments must be strings if no content expression is given" 

606 ) 

607 

608 # for these internally-created context expressions, simulate whitespace-skipping 

609 if ParserElement.DEFAULT_WHITE_CHARS: 

610 content.set_parse_action( 

611 lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS) 

612 ) 

613 

614 ret = Forward() 

615 if ignoreExpr is not None: 

616 ret <<= Group( 

617 _suppression(opener) + ZeroOrMore(ignoreExpr | ret | content) + _suppression(closer) 

618 ) 

619 else: 

620 ret <<= Group(_suppression(opener) + ZeroOrMore(ret | content) + _suppression(closer)) 

621 

622 ret.set_name(f"nested {opener}{closer} expression") 

623 

624 # don't override error message from content expressions 

625 ret.errmsg = None 

626 return ret 

627 

628 

629def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): 

630 """Internal helper to construct opening and closing tag expressions, 

631 given a tag name""" 

632 if isinstance(tagStr, str_type): 

633 resname = tagStr 

634 tagStr = Keyword(tagStr, caseless=not xml) 

635 else: 

636 resname = tagStr.name 

637 

638 tagAttrName = Word(alphas, alphanums + "_-:") 

639 if xml: 

640 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) 

641 openTag = ( 

642 suppress_LT 

643 + tagStr("tag") 

644 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

645 + Opt("/", default=[False])("empty").set_parse_action( 

646 lambda s, l, t: t[0] == "/" 

647 ) 

648 + suppress_GT 

649 ) 

650 else: 

651 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( 

652 printables, exclude_chars=">" 

653 ) 

654 openTag = ( 

655 suppress_LT 

656 + tagStr("tag") 

657 + Dict( 

658 ZeroOrMore( 

659 Group( 

660 tagAttrName.set_parse_action(lambda t: t[0].lower()) 

661 + Opt(Suppress("=") + tagAttrValue) 

662 ) 

663 ) 

664 ) 

665 + Opt("/", default=[False])("empty").set_parse_action( 

666 lambda s, l, t: t[0] == "/" 

667 ) 

668 + suppress_GT 

669 ) 

670 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) 

671 

672 openTag.set_name(f"<{resname}>") 

673 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

674 openTag.add_parse_action( 

675 lambda t: t.__setitem__( 

676 "start" + "".join(resname.replace(":", " ").title().split()), t.copy() 

677 ) 

678 ) 

679 closeTag = closeTag( 

680 "end" + "".join(resname.replace(":", " ").title().split()) 

681 ).set_name(f"</{resname}>") 

682 openTag.tag = resname 

683 closeTag.tag = resname 

684 openTag.tag_body = SkipTo(closeTag()) 

685 return openTag, closeTag 

686 

687 

688def make_html_tags( 

689 tag_str: Union[str, ParserElement], 

690) -> tuple[ParserElement, ParserElement]: 

691 """Helper to construct opening and closing tag expressions for HTML, 

692 given a tag name. Matches tags in either upper or lower case, 

693 attributes with namespaces and with quoted or unquoted values. 

694 

695 Example: 

696 

697 .. testcode:: 

698 

699 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

700 # make_html_tags returns pyparsing expressions for the opening and 

701 # closing tags as a 2-tuple 

702 a, a_end = make_html_tags("A") 

703 link_expr = a + SkipTo(a_end)("link_text") + a_end 

704 

705 for link in link_expr.search_string(text): 

706 # attributes in the <A> tag (like "href" shown here) are 

707 # also accessible as named results 

708 print(link.link_text, '->', link.href) 

709 

710 prints: 

711 

712 .. testoutput:: 

713 

714 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

715 """ 

716 return _makeTags(tag_str, False) 

717 

718 

719def make_xml_tags( 

720 tag_str: Union[str, ParserElement], 

721) -> tuple[ParserElement, ParserElement]: 

722 """Helper to construct opening and closing tag expressions for XML, 

723 given a tag name. Matches tags only in the given upper/lower case. 

724 

725 Example: similar to :class:`make_html_tags` 

726 """ 

727 return _makeTags(tag_str, True) 

728 

729 

730any_open_tag: ParserElement 

731any_close_tag: ParserElement 

732any_open_tag, any_close_tag = make_html_tags( 

733 Word(alphas, alphanums + "_:").set_name("any tag") 

734) 

735 

736_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} 

737_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace( 

738 " ", "|" 

739) 

740common_html_entity = Regex( 

741 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});" 

742).set_name("common HTML entity") 

743 

744 

745def replace_html_entity(s, l, t): 

746 """Helper parser action to replace common HTML entities with their special characters""" 

747 return _htmlEntityMap.get(t.entity) 

748 

749 

750class OpAssoc(Enum): 

751 """Enumeration of operator associativity 

752 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" 

753 

754 LEFT = 1 

755 RIGHT = 2 

756 

757 

758InfixNotationOperatorArgType = Union[ 

759 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]] 

760] 

761InfixNotationOperatorSpec = Union[ 

762 tuple[ 

763 InfixNotationOperatorArgType, 

764 int, 

765 OpAssoc, 

766 typing.Optional[ParseAction], 

767 ], 

768 tuple[ 

769 InfixNotationOperatorArgType, 

770 int, 

771 OpAssoc, 

772 ], 

773] 

774 

775 

776def infix_notation( 

777 base_expr: ParserElement, 

778 op_list: list[InfixNotationOperatorSpec], 

779 lpar: Union[str, ParserElement] = Suppress("("), 

780 rpar: Union[str, ParserElement] = Suppress(")"), 

781) -> Forward: 

782 """Helper method for constructing grammars of expressions made up of 

783 operators working in a precedence hierarchy. Operators may be unary 

784 or binary, left- or right-associative. Parse actions can also be 

785 attached to operator expressions. The generated parser will also 

786 recognize the use of parentheses to override operator precedences 

787 (see example below). 

788 

789 Note: if you define a deep operator list, you may see performance 

790 issues when using infix_notation. See 

791 :class:`ParserElement.enable_packrat` for a mechanism to potentially 

792 improve your parser performance. 

793 

794 Parameters: 

795 

796 :param base_expr: expression representing the most basic operand to 

797 be used in the expression 

798 :param op_list: list of tuples, one for each operator precedence level 

799 in the expression grammar; each tuple is of the form ``(op_expr, 

800 num_operands, right_left_assoc, (optional)parse_action)``, where: 

801 

802 - ``op_expr`` is the pyparsing expression for the operator; may also 

803 be a string, which will be converted to a Literal; if ``num_operands`` 

804 is 3, ``op_expr`` is a tuple of two expressions, for the two 

805 operators separating the 3 terms 

806 - ``num_operands`` is the number of terms for this operator (must be 1, 

807 2, or 3) 

808 - ``right_left_assoc`` is the indicator whether the operator is right 

809 or left associative, using the pyparsing-defined constants 

810 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. 

811 - ``parse_action`` is the parse action to be associated with 

812 expressions matching this operator expression (the parse action 

813 tuple member may be omitted); if the parse action is passed 

814 a tuple or list of functions, this is equivalent to calling 

815 ``set_parse_action(*fn)`` 

816 (:class:`ParserElement.set_parse_action`) 

817 

818 :param lpar: expression for matching left-parentheses; if passed as a 

819 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as 

820 an expression (such as ``Literal('(')``), then it will be kept in 

821 the parsed results, and grouped with them. (default= ``Suppress('(')``) 

822 :param rpar: expression for matching right-parentheses; if passed as a 

823 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as 

824 an expression (such as ``Literal(')')``), then it will be kept in 

825 the parsed results, and grouped with them. (default= ``Suppress(')')``) 

826 

827 Example: 

828 

829 .. testcode:: 

830 

831 # simple example of four-function arithmetic with ints and 

832 # variable names 

833 integer = pyparsing_common.signed_integer 

834 varname = pyparsing_common.identifier 

835 

836 arith_expr = infix_notation(integer | varname, 

837 [ 

838 ('-', 1, OpAssoc.RIGHT), 

839 (one_of('* /'), 2, OpAssoc.LEFT), 

840 (one_of('+ -'), 2, OpAssoc.LEFT), 

841 ]) 

842 

843 arith_expr.run_tests(''' 

844 5+3*6 

845 (5+3)*6 

846 (5+x)*y 

847 -2--11 

848 ''', full_dump=False) 

849 

850 prints: 

851 

852 .. testoutput:: 

853 :options: +NORMALIZE_WHITESPACE 

854 

855 

856 5+3*6 

857 [[5, '+', [3, '*', 6]]] 

858 

859 (5+3)*6 

860 [[[5, '+', 3], '*', 6]] 

861 

862 (5+x)*y 

863 [[[5, '+', 'x'], '*', 'y']] 

864 

865 -2--11 

866 [[['-', 2], '-', ['-', 11]]] 

867 """ 

868 

869 # captive version of FollowedBy that does not do parse actions or capture results names 

870 class _FB(FollowedBy): 

871 def parseImpl(self, instring, loc, doActions=True): 

872 self.expr.try_parse(instring, loc) 

873 return loc, [] 

874 

875 _FB.__name__ = "FollowedBy>" 

876 

877 ret = Forward() 

878 ret.set_name(f"{base_expr.name}_expression") 

879 if isinstance(lpar, str): 

880 lpar = Suppress(lpar) 

881 if isinstance(rpar, str): 

882 rpar = Suppress(rpar) 

883 

884 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}_expression") 

885 

886 # if lpar and rpar are not suppressed, wrap in group 

887 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): 

888 lastExpr = base_expr | Group(nested_expr) 

889 else: 

890 lastExpr = base_expr | nested_expr 

891 

892 arity: int 

893 rightLeftAssoc: opAssoc 

894 pa: typing.Optional[ParseAction] 

895 opExpr1: ParserElement 

896 opExpr2: ParserElement 

897 matchExpr: ParserElement 

898 match_lookahead: ParserElement 

899 for operDef in op_list: 

900 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] 

901 if isinstance(opExpr, str_type): 

902 opExpr = ParserElement._literalStringClass(opExpr) 

903 opExpr = typing.cast(ParserElement, opExpr) 

904 if arity == 3: 

905 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: 

906 raise ValueError( 

907 "if numterms=3, opExpr must be a tuple or list of two expressions" 

908 ) 

909 opExpr1, opExpr2 = opExpr 

910 term_name = f"{opExpr1}{opExpr2} operations" 

911 else: 

912 term_name = f"{opExpr} operations" 

913 

914 if not 1 <= arity <= 3: 

915 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

916 

917 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): 

918 raise ValueError("operator must indicate right or left associativity") 

919 

920 thisExpr: ParserElement = Forward().set_name(term_name) 

921 thisExpr = typing.cast(Forward, thisExpr) 

922 match_lookahead = And([]) 

923 if rightLeftAssoc is OpAssoc.LEFT: 

924 if arity == 1: 

925 match_lookahead = _FB(lastExpr + opExpr) 

926 matchExpr = Group(lastExpr + opExpr[1, ...]) 

927 elif arity == 2: 

928 if opExpr is not None: 

929 match_lookahead = _FB(lastExpr + opExpr + lastExpr) 

930 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...]) 

931 else: 

932 match_lookahead = _FB(lastExpr + lastExpr) 

933 matchExpr = Group(lastExpr[2, ...]) 

934 elif arity == 3: 

935 match_lookahead = _FB( 

936 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr 

937 ) 

938 matchExpr = Group( 

939 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...] 

940 ) 

941 elif rightLeftAssoc is OpAssoc.RIGHT: 

942 if arity == 1: 

943 # try to avoid LR with this extra test 

944 if not isinstance(opExpr, Opt): 

945 opExpr = Opt(opExpr) 

946 match_lookahead = _FB(opExpr.expr + thisExpr) 

947 matchExpr = Group(opExpr + thisExpr) 

948 elif arity == 2: 

949 if opExpr is not None: 

950 match_lookahead = _FB(lastExpr + opExpr + thisExpr) 

951 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...]) 

952 else: 

953 match_lookahead = _FB(lastExpr + thisExpr) 

954 matchExpr = Group(lastExpr + thisExpr[1, ...]) 

955 elif arity == 3: 

956 match_lookahead = _FB( 

957 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr 

958 ) 

959 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

960 

961 # suppress lookahead expr from railroad diagrams 

962 match_lookahead.show_in_diagram = False 

963 

964 # TODO - determine why this statement can't be included in the following 

965 # if pa block 

966 matchExpr = match_lookahead + matchExpr 

967 

968 if pa: 

969 if isinstance(pa, (tuple, list)): 

970 matchExpr.set_parse_action(*pa) 

971 else: 

972 matchExpr.set_parse_action(pa) 

973 

974 thisExpr <<= (matchExpr | lastExpr).set_name(term_name) 

975 lastExpr = thisExpr 

976 

977 ret <<= lastExpr 

978 return ret 

979 

980 

981def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): 

982 """ 

983 .. deprecated:: 3.0.0 

984 Use the :class:`IndentedBlock` class instead. Note that `IndentedBlock` 

985 has a difference method signature. 

986 

987 Helper method for defining space-delimited indentation blocks, 

988 such as those used to define block statements in Python source code. 

989 

990 :param blockStatementExpr: expression defining syntax of statement that 

991 is repeated within the indented block 

992 

993 :param indentStack: list created by caller to manage indentation stack 

994 (multiple ``statementWithIndentedBlock`` expressions within a single 

995 grammar should share a common ``indentStack``) 

996 

997 :param indent: boolean indicating whether block must be indented beyond 

998 the current level; set to ``False`` for block of left-most statements 

999 

1000 A valid block must contain at least one ``blockStatement``. 

1001 

1002 (Note that indentedBlock uses internal parse actions which make it 

1003 incompatible with packrat parsing.) 

1004 

1005 Example: 

1006 

1007 .. testcode:: 

1008 

1009 data = ''' 

1010 def A(z): 

1011 A1 

1012 B = 100 

1013 G = A2 

1014 A2 

1015 A3 

1016 B 

1017 def BB(a,b,c): 

1018 BB1 

1019 def BBA(): 

1020 bba1 

1021 bba2 

1022 bba3 

1023 C 

1024 D 

1025 def spam(x,y): 

1026 def eggs(z): 

1027 pass 

1028 ''' 

1029 

1030 indentStack = [1] 

1031 stmt = Forward() 

1032 

1033 identifier = Word(alphas, alphanums) 

1034 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") 

1035 func_body = indentedBlock(stmt, indentStack) 

1036 funcDef = Group(funcDecl + func_body) 

1037 

1038 rvalue = Forward() 

1039 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") 

1040 rvalue << (funcCall | identifier | Word(nums)) 

1041 assignment = Group(identifier + "=" + rvalue) 

1042 stmt << (funcDef | assignment | identifier) 

1043 

1044 module_body = stmt[1, ...] 

1045 

1046 parseTree = module_body.parseString(data) 

1047 parseTree.pprint() 

1048 

1049 prints: 

1050 

1051 .. testoutput:: 

1052 

1053 [['def', 

1054 'A', 

1055 ['(', 'z', ')'], 

1056 ':', 

1057 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

1058 'B', 

1059 ['def', 

1060 'BB', 

1061 ['(', 'a', 'b', 'c', ')'], 

1062 ':', 

1063 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

1064 'C', 

1065 'D', 

1066 ['def', 

1067 'spam', 

1068 ['(', 'x', 'y', ')'], 

1069 ':', 

1070 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

1071 """ 

1072 warnings.warn( 

1073 f"{'indentedBlock'!r} deprecated - use {'IndentedBlock'!r}", 

1074 DeprecationWarning, 

1075 stacklevel=2, 

1076 ) 

1077 

1078 backup_stacks.append(indentStack[:]) 

1079 

1080 def reset_stack(): 

1081 indentStack[:] = backup_stacks[-1] 

1082 

1083 def checkPeerIndent(s, l, t): 

1084 if l >= len(s): 

1085 return 

1086 curCol = col(l, s) 

1087 if curCol != indentStack[-1]: 

1088 if curCol > indentStack[-1]: 

1089 raise ParseException(s, l, "illegal nesting") 

1090 raise ParseException(s, l, "not a peer entry") 

1091 

1092 def checkSubIndent(s, l, t): 

1093 curCol = col(l, s) 

1094 if curCol > indentStack[-1]: 

1095 indentStack.append(curCol) 

1096 else: 

1097 raise ParseException(s, l, "not a subentry") 

1098 

1099 def checkUnindent(s, l, t): 

1100 if l >= len(s): 

1101 return 

1102 curCol = col(l, s) 

1103 if not (indentStack and curCol in indentStack): 

1104 raise ParseException(s, l, "not an unindent") 

1105 if curCol < indentStack[-1]: 

1106 indentStack.pop() 

1107 

1108 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) 

1109 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") 

1110 PEER = Empty().set_parse_action(checkPeerIndent).set_name("") 

1111 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") 

1112 if indent: 

1113 smExpr = Group( 

1114 Opt(NL) 

1115 + INDENT 

1116 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1117 + UNDENT 

1118 ) 

1119 else: 

1120 smExpr = Group( 

1121 Opt(NL) 

1122 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1123 + Opt(UNDENT) 

1124 ) 

1125 

1126 # add a parse action to remove backup_stack from list of backups 

1127 smExpr.add_parse_action( 

1128 lambda: backup_stacks.pop(-1) and None if backup_stacks else None 

1129 ) 

1130 smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) 

1131 blockStatementExpr.ignore(_bslash + LineEnd()) 

1132 return smExpr.set_name("indented block") 

1133 

1134 

1135# it's easy to get these comment structures wrong - they're very common, 

1136# so may as well make them available 

1137c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment") 

1138"Comment of the form ``/* ... */``" 

1139 

1140html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") 

1141"Comment of the form ``<!-- ... -->``" 

1142 

1143rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") 

1144dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") 

1145"Comment of the form ``// ... (to end of line)``" 

1146 

1147cpp_style_comment = Regex( 

1148 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)" 

1149).set_name("C++ style comment") 

1150"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" 

1151 

1152java_style_comment = cpp_style_comment 

1153"Same as :class:`cpp_style_comment`" 

1154 

1155python_style_comment = Regex(r"#.*").set_name("Python style comment") 

1156"Comment of the form ``# ... (to end of line)``" 

1157 

1158 

1159# build list of built-in expressions, for future reference if a global default value 

1160# gets updated 

1161_builtin_exprs: list[ParserElement] = [ 

1162 v for v in vars().values() if isinstance(v, ParserElement) 

1163] 

1164 

1165 

1166# compatibility function, superseded by DelimitedList class 

1167def delimited_list( 

1168 expr: Union[str, ParserElement], 

1169 delim: Union[str, ParserElement] = ",", 

1170 combine: bool = False, 

1171 min: typing.Optional[int] = None, 

1172 max: typing.Optional[int] = None, 

1173 *, 

1174 allow_trailing_delim: bool = False, 

1175) -> ParserElement: 

1176 """ 

1177 .. deprecated:: 3.1.0 

1178 Use the :class:`DelimitedList` class instead. 

1179 """ 

1180 return DelimitedList( 

1181 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim 

1182 ) 

1183 

1184 

1185# Compatibility synonyms 

1186# fmt: off 

1187opAssoc = OpAssoc 

1188anyOpenTag = any_open_tag 

1189anyCloseTag = any_close_tag 

1190commonHTMLEntity = common_html_entity 

1191cStyleComment = c_style_comment 

1192htmlComment = html_comment 

1193restOfLine = rest_of_line 

1194dblSlashComment = dbl_slash_comment 

1195cppStyleComment = cpp_style_comment 

1196javaStyleComment = java_style_comment 

1197pythonStyleComment = python_style_comment 

1198delimitedList = replaced_by_pep8("delimitedList", DelimitedList) 

1199delimited_list = replaced_by_pep8("delimited_list", DelimitedList) 

1200countedArray = replaced_by_pep8("countedArray", counted_array) 

1201matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal) 

1202matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr) 

1203oneOf = replaced_by_pep8("oneOf", one_of) 

1204dictOf = replaced_by_pep8("dictOf", dict_of) 

1205originalTextFor = replaced_by_pep8("originalTextFor", original_text_for) 

1206nestedExpr = replaced_by_pep8("nestedExpr", nested_expr) 

1207makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags) 

1208makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags) 

1209replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity) 

1210infixNotation = replaced_by_pep8("infixNotation", infix_notation) 

1211# fmt: on