Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/helpers.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

333 statements  

1# helpers.py 

2import html.entities 

3import operator 

4import re 

5import sys 

6import typing 

7 

8from . import __diag__ 

9from .core import * 

10from .util import ( 

11 _bslash, 

12 _flatten, 

13 _escape_regex_range_chars, 

14 make_compressed_re, 

15 replaced_by_pep8, 

16) 

17 

18 

19# 

20# global helpers 

21# 

22def counted_array( 

23 expr: ParserElement, 

24 int_expr: typing.Optional[ParserElement] = None, 

25 *, 

26 intExpr: typing.Optional[ParserElement] = None, 

27) -> ParserElement: 

28 """Helper to define a counted list of expressions. 

29 

30 This helper defines a pattern of the form:: 

31 

32 integer expr expr expr... 

33 

34 where the leading integer tells how many expr expressions follow. 

35 The matched tokens returns the array of expr tokens as a list - the 

36 leading count token is suppressed. 

37 

38 If ``int_expr`` is specified, it should be a pyparsing expression 

39 that produces an integer value. 

40 

41 Examples: 

42 

43 .. doctest:: 

44 

45 >>> counted_array(Word(alphas)).parse_string('2 ab cd ef') 

46 ParseResults(['ab', 'cd'], {}) 

47 

48 - In this parser, the leading integer value is given in binary, 

49 '10' indicating that 2 values are in the array: 

50 

51 .. doctest:: 

52 

53 >>> binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) 

54 >>> counted_array(Word(alphas), int_expr=binary_constant 

55 ... ).parse_string('10 ab cd ef') 

56 ParseResults(['ab', 'cd'], {}) 

57 

58 - If other fields must be parsed after the count but before the 

59 list items, give the fields results names and they will 

60 be preserved in the returned ParseResults: 

61 

62 .. doctest:: 

63 

64 >>> ppc = pyparsing.common 

65 >>> count_with_metadata = ppc.integer + Word(alphas)("type") 

66 >>> typed_array = counted_array(Word(alphanums), 

67 ... int_expr=count_with_metadata)("items") 

68 >>> result = typed_array.parse_string("3 bool True True False") 

69 >>> print(result.dump()) 

70 ['True', 'True', 'False'] 

71 - items: ['True', 'True', 'False'] 

72 - type: 'bool' 

73 """ 

74 intExpr = intExpr or int_expr 

75 array_expr = Forward() 

76 

77 def count_field_parse_action(s, l, t): 

78 nonlocal array_expr 

79 n = t[0] 

80 array_expr <<= (expr * n) if n else Empty() 

81 # clear list contents, but keep any named results 

82 del t[:] 

83 

84 if intExpr is None: 

85 intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) 

86 else: 

87 intExpr = intExpr.copy() 

88 intExpr.set_name("arrayLen") 

89 intExpr.add_parse_action(count_field_parse_action, call_during_try=True) 

90 return (intExpr + array_expr).set_name(f"(len) {expr}...") 

91 

92 

93def match_previous_literal(expr: ParserElement) -> ParserElement: 

94 """Helper to define an expression that is indirectly defined from 

95 the tokens matched in a previous expression, that is, it looks for 

96 a 'repeat' of a previous expression. For example:: 

97 

98 .. testcode:: 

99 

100 first = Word(nums) 

101 second = match_previous_literal(first) 

102 match_expr = first + ":" + second 

103 

104 will match ``"1:1"``, but not ``"1:2"``. Because this 

105 matches a previous literal, will also match the leading 

106 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

107 :class:`match_previous_expr`. Do *not* use with packrat parsing 

108 enabled. 

109 """ 

110 rep = Forward() 

111 

112 def copy_token_to_repeater(s, l, t): 

113 if not t: 

114 rep << Empty() 

115 return 

116 

117 if len(t) == 1: 

118 rep << t[0] 

119 return 

120 

121 # flatten t tokens 

122 tflat = _flatten(t.as_list()) 

123 rep << And(Literal(tt) for tt in tflat) 

124 

125 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

126 rep.set_name("(prev) " + str(expr)) 

127 return rep 

128 

129 

130def match_previous_expr(expr: ParserElement) -> ParserElement: 

131 """Helper to define an expression that is indirectly defined from 

132 the tokens matched in a previous expression, that is, it looks for 

133 a 'repeat' of a previous expression. For example: 

134 

135 .. testcode:: 

136 

137 first = Word(nums) 

138 second = match_previous_expr(first) 

139 match_expr = first + ":" + second 

140 

141 will match ``"1:1"``, but not ``"1:2"``. Because this 

142 matches by expressions, will *not* match the leading ``"1:1"`` 

143 in ``"1:10"``; the expressions are evaluated first, and then 

144 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

145 with packrat parsing enabled. 

146 """ 

147 rep = Forward() 

148 e2 = expr.copy() 

149 rep <<= e2 

150 

151 def copy_token_to_repeater(s, l, t): 

152 matchTokens = _flatten(t.as_list()) 

153 

154 def must_match_these_tokens(s, l, t): 

155 theseTokens = _flatten(t.as_list()) 

156 if theseTokens != matchTokens: 

157 raise ParseException( 

158 s, l, f"Expected {matchTokens}, found{theseTokens}" 

159 ) 

160 

161 rep.set_parse_action(must_match_these_tokens, callDuringTry=True) 

162 

163 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

164 rep.set_name("(prev) " + str(expr)) 

165 return rep 

166 

167 

168def one_of( 

169 strs: Union[typing.Iterable[str], str], 

170 caseless: bool = False, 

171 use_regex: bool = True, 

172 as_keyword: bool = False, 

173 *, 

174 useRegex: bool = True, 

175 asKeyword: bool = False, 

176) -> ParserElement: 

177 """Helper to quickly define a set of alternative :class:`Literal` s, 

178 and makes sure to do longest-first testing when there is a conflict, 

179 regardless of the input order, but returns 

180 a :class:`MatchFirst` for best performance. 

181 

182 :param strs: a string of space-delimited literals, or a collection of 

183 string literals 

184 :param caseless: treat all literals as caseless 

185 :param use_regex: bool - as an optimization, will 

186 generate a :class:`Regex` object; otherwise, will generate 

187 a :class:`MatchFirst` object (if ``caseless=True`` or 

188 ``as_keyword=True``, or if creating a :class:`Regex` raises an exception) 

189 :param as_keyword: bool - enforce :class:`Keyword`-style matching on the 

190 generated expressions 

191  

192 Parameters ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 

193 compatibility, but will be removed in a future release. 

194 

195 Example: 

196 

197 .. testcode:: 

198 

199 comp_oper = one_of("< = > <= >= !=") 

200 var = Word(alphas) 

201 number = Word(nums) 

202 term = var | number 

203 comparison_expr = term + comp_oper + term 

204 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) 

205 

206 prints: 

207 

208 .. testoutput:: 

209 

210 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

211 """ 

212 asKeyword = asKeyword or as_keyword 

213 useRegex = useRegex and use_regex 

214 

215 if ( 

216 isinstance(caseless, str_type) 

217 and __diag__.warn_on_multiple_string_args_to_oneof 

218 ): 

219 warnings.warn( 

220 "warn_on_multiple_string_args_to_oneof:" 

221 " More than one string argument passed to one_of, pass" 

222 " choices as a list or space-delimited string", 

223 stacklevel=2, 

224 ) 

225 

226 if caseless: 

227 is_equal = lambda a, b: a.upper() == b.upper() 

228 masks = lambda a, b: b.upper().startswith(a.upper()) 

229 else: 

230 is_equal = operator.eq 

231 masks = lambda a, b: b.startswith(a) 

232 

233 symbols: list[str] 

234 if isinstance(strs, str_type): 

235 strs = typing.cast(str, strs) 

236 symbols = strs.split() 

237 elif isinstance(strs, Iterable): 

238 symbols = list(strs) 

239 else: 

240 raise TypeError("Invalid argument to one_of, expected string or iterable") 

241 if not symbols: 

242 return NoMatch() 

243 

244 # reorder given symbols to take care to avoid masking longer choices with shorter ones 

245 # (but only if the given symbols are not just single characters) 

246 i = 0 

247 while i < len(symbols) - 1: 

248 cur = symbols[i] 

249 for j, other in enumerate(symbols[i + 1 :]): 

250 if is_equal(other, cur): 

251 del symbols[i + j + 1] 

252 break 

253 if len(other) > len(cur) and masks(cur, other): 

254 del symbols[i + j + 1] 

255 symbols.insert(i, other) 

256 break 

257 else: 

258 i += 1 

259 

260 if useRegex: 

261 re_flags: int = re.IGNORECASE if caseless else 0 

262 

263 try: 

264 if all(len(sym) == 1 for sym in symbols): 

265 # symbols are just single characters, create range regex pattern 

266 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" 

267 else: 

268 patt = "|".join(re.escape(sym) for sym in symbols) 

269 

270 # wrap with \b word break markers if defining as keywords 

271 if asKeyword: 

272 patt = rf"\b(?:{patt})\b" 

273 

274 ret = Regex(patt, flags=re_flags) 

275 ret.set_name(" | ".join(repr(s) for s in symbols)) 

276 

277 if caseless: 

278 # add parse action to return symbols as specified, not in random 

279 # casing as found in input string 

280 symbol_map = {sym.lower(): sym for sym in symbols} 

281 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) 

282 

283 return ret 

284 

285 except re.error: 

286 warnings.warn( 

287 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 

288 ) 

289 

290 # last resort, just use MatchFirst of Token class corresponding to caseless 

291 # and asKeyword settings 

292 CASELESS = KEYWORD = True 

293 parse_element_class = { 

294 (CASELESS, KEYWORD): CaselessKeyword, 

295 (CASELESS, not KEYWORD): CaselessLiteral, 

296 (not CASELESS, KEYWORD): Keyword, 

297 (not CASELESS, not KEYWORD): Literal, 

298 }[(caseless, asKeyword)] 

299 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name( 

300 " | ".join(symbols) 

301 ) 

302 

303 

304def dict_of(key: ParserElement, value: ParserElement) -> Dict: 

305 """Helper to easily and clearly define a dictionary by specifying 

306 the respective patterns for the key and value. Takes care of 

307 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

308 :class:`Group` tokens in the proper order. The key pattern 

309 can include delimiting markers or punctuation, as long as they are 

310 suppressed, thereby leaving the significant key text. The value 

311 pattern can include named results, so that the :class:`Dict` results 

312 can include named token fields. 

313 

314 Example: 

315 

316 .. doctest:: 

317 

318 >>> text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

319  

320 >>> data_word = Word(alphas) 

321 >>> label = data_word + FollowedBy(':') 

322 >>> attr_expr = ( 

323 ... label 

324 ... + Suppress(':') 

325 ... + OneOrMore(data_word, stop_on=label) 

326 ... .set_parse_action(' '.join)) 

327 >>> print(attr_expr[1, ...].parse_string(text).dump()) 

328 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

329 

330 >>> attr_label = label 

331 >>> attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label 

332 ... ).set_parse_action(' '.join) 

333 

334 # similar to Dict, but simpler call format 

335 >>> result = dict_of(attr_label, attr_value).parse_string(text) 

336 >>> print(result.dump()) 

337 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

338 - color: 'light blue' 

339 - posn: 'upper left' 

340 - shape: 'SQUARE' 

341 - texture: 'burlap' 

342 [0]: 

343 ['shape', 'SQUARE'] 

344 [1]: 

345 ['posn', 'upper left'] 

346 [2]: 

347 ['color', 'light blue'] 

348 [3]: 

349 ['texture', 'burlap'] 

350 

351 >>> print(result['shape']) 

352 SQUARE 

353 >>> print(result.shape) # object attribute access works too 

354 SQUARE 

355 >>> print(result.as_dict()) 

356 {'shape': 'SQUARE', 'posn': 'upper left', 'color': 'light blue', 'texture': 'burlap'} 

357 """ 

358 return Dict(OneOrMore(Group(key + value))) 

359 

360 

361def original_text_for( 

362 expr: ParserElement, as_string: bool = True, *, asString: bool = True 

363) -> ParserElement: 

364 """Helper to return the original, untokenized text for a given 

365 expression. Useful to restore the parsed fields of an HTML start 

366 tag into the raw tag text itself, or to revert separate tokens with 

367 intervening whitespace back to the original matching input text. By 

368 default, returns a string containing the original parsed text. 

369 

370 If the optional ``as_string`` argument is passed as 

371 ``False``, then the return value is 

372 a :class:`ParseResults` containing any results names that 

373 were originally matched, and a single token containing the original 

374 matched text from the input string. So if the expression passed to 

375 :class:`original_text_for` contains expressions with defined 

376 results names, you must set ``as_string`` to ``False`` if you 

377 want to preserve those results name values. 

378 

379 The ``asString`` pre-PEP8 argument is retained for compatibility, 

380 but will be removed in a future release. 

381 

382 Example: 

383 

384 .. testcode:: 

385 

386 src = "this is test <b> bold <i>text</i> </b> normal text " 

387 for tag in ("b", "i"): 

388 opener, closer = make_html_tags(tag) 

389 patt = original_text_for(opener + ... + closer) 

390 print(patt.search_string(src)[0]) 

391 

392 prints: 

393 

394 .. testoutput:: 

395 

396 ['<b> bold <i>text</i> </b>'] 

397 ['<i>text</i>'] 

398 """ 

399 asString = asString and as_string 

400 

401 locMarker = Empty().set_parse_action(lambda s, loc, t: loc) 

402 endlocMarker = locMarker.copy() 

403 endlocMarker.callPreparse = False 

404 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

405 if asString: 

406 extractText = lambda s, l, t: s[t._original_start : t._original_end] 

407 else: 

408 

409 def extractText(s, l, t): 

410 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] 

411 

412 matchExpr.set_parse_action(extractText) 

413 matchExpr.ignoreExprs = expr.ignoreExprs 

414 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) 

415 return matchExpr 

416 

417 

418def ungroup(expr: ParserElement) -> ParserElement: 

419 """Helper to undo pyparsing's default grouping of And expressions, 

420 even if all but one are non-empty. 

421 """ 

422 return TokenConverter(expr).add_parse_action(lambda t: t[0]) 

423 

424 

425def locatedExpr(expr: ParserElement) -> ParserElement: 

426 """ 

427 .. deprecated:: 3.0.0 

428 Use the :class:`Located` class instead. 

429 

430 Helper to decorate a returned token with its starting and ending 

431 locations in the input string. 

432 

433 This helper adds the following results names: 

434 

435 - ``locn_start`` - location where matched expression begins 

436 - ``locn_end`` - location where matched expression ends 

437 - ``value`` - the actual parsed results 

438 

439 Be careful if the input text contains ``<TAB>`` characters, you 

440 may want to call :meth:`ParserElement.parse_with_tabs` 

441 

442 Example: 

443 

444 .. testcode:: 

445 

446 wd = Word(alphas) 

447 res = locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222") 

448 for match in res: 

449 print(match) 

450 

451 prints: 

452 

453 .. testoutput:: 

454 

455 [[0, 'ljsdf', 5]] 

456 [[8, 'lksdjjf', 15]] 

457 [[18, 'lkkjj', 23]] 

458 """ 

459 locator = Empty().set_parse_action(lambda ss, ll, tt: ll) 

460 return Group( 

461 locator("locn_start") 

462 + expr("value") 

463 + locator.copy().leaveWhitespace()("locn_end") 

464 ) 

465 

466 

467# define special default value to permit None as a significant value for 

468# ignore_expr 

469_NO_IGNORE_EXPR_GIVEN = NoMatch() 

470 

471 

472def nested_expr( 

473 opener: Union[str, ParserElement] = "(", 

474 closer: Union[str, ParserElement] = ")", 

475 content: typing.Optional[ParserElement] = None, 

476 ignore_expr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN, 

477 *, 

478 ignoreExpr: typing.Optional[ParserElement] = _NO_IGNORE_EXPR_GIVEN, 

479) -> ParserElement: 

480 """Helper method for defining nested lists enclosed in opening and 

481 closing delimiters (``"("`` and ``")"`` are the default). 

482 

483 :param opener: str - opening character for a nested list 

484 (default= ``"("``); can also be a pyparsing expression 

485 

486 :param closer: str - closing character for a nested list 

487 (default= ``")"``); can also be a pyparsing expression 

488 

489 :param content: expression for items within the nested lists 

490 

491 :param ignore_expr: expression for ignoring opening and closing delimiters 

492 (default = :class:`quoted_string`) 

493 

494 Parameter ``ignoreExpr`` is retained for compatibility 

495 but will be removed in a future release. 

496 

497 If an expression is not provided for the content argument, the 

498 nested expression will capture all whitespace-delimited content 

499 between delimiters as a list of separate values. 

500 

501 Use the ``ignore_expr`` argument to define expressions that may 

502 contain opening or closing characters that should not be treated as 

503 opening or closing characters for nesting, such as quoted_string or 

504 a comment expression. Specify multiple expressions using an 

505 :class:`Or` or :class:`MatchFirst`. The default is 

506 :class:`quoted_string`, but if no expressions are to be ignored, then 

507 pass ``None`` for this argument. 

508 

509 Example: 

510 

511 .. testcode:: 

512 

513 data_type = one_of("void int short long char float double") 

514 decl_data_type = Combine(data_type + Opt(Word('*'))) 

515 ident = Word(alphas+'_', alphanums+'_') 

516 number = pyparsing_common.number 

517 arg = Group(decl_data_type + ident) 

518 LPAR, RPAR = map(Suppress, "()") 

519 

520 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) 

521 

522 c_function = (decl_data_type("type") 

523 + ident("name") 

524 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR 

525 + code_body("body")) 

526 c_function.ignore(c_style_comment) 

527 

528 source_code = ''' 

529 int is_odd(int x) { 

530 return (x%2); 

531 } 

532 

533 int dec_to_hex(char hchar) { 

534 if (hchar >= '0' && hchar <= '9') { 

535 return (ord(hchar)-ord('0')); 

536 } else { 

537 return (10+ord(hchar)-ord('A')); 

538 } 

539 } 

540 ''' 

541 for func in c_function.search_string(source_code): 

542 print(f"{func.name} ({func.type}) args: {func.args}") 

543 

544 

545 prints: 

546 

547 .. testoutput:: 

548 

549 is_odd (int) args: [['int', 'x']] 

550 dec_to_hex (int) args: [['char', 'hchar']] 

551 """ 

552 if ignoreExpr != ignore_expr: 

553 ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr 

554 

555 if ignoreExpr is _NO_IGNORE_EXPR_GIVEN: 

556 ignoreExpr = quoted_string() 

557 

558 if opener == closer: 

559 raise ValueError("opening and closing strings cannot be the same") 

560 

561 if content is None: 

562 if isinstance(opener, str_type) and isinstance(closer, str_type): 

563 opener = typing.cast(str, opener) 

564 closer = typing.cast(str, closer) 

565 if len(opener) == 1 and len(closer) == 1: 

566 if ignoreExpr is not None: 

567 content = Combine( 

568 OneOrMore( 

569 ~ignoreExpr 

570 + CharsNotIn( 

571 opener + closer + ParserElement.DEFAULT_WHITE_CHARS, 

572 exact=1, 

573 ) 

574 ) 

575 ) 

576 else: 

577 content = Combine( 

578 Empty() 

579 + CharsNotIn( 

580 opener + closer + ParserElement.DEFAULT_WHITE_CHARS 

581 ) 

582 ) 

583 else: 

584 if ignoreExpr is not None: 

585 content = Combine( 

586 OneOrMore( 

587 ~ignoreExpr 

588 + ~Literal(opener) 

589 + ~Literal(closer) 

590 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

591 ) 

592 ) 

593 else: 

594 content = Combine( 

595 OneOrMore( 

596 ~Literal(opener) 

597 + ~Literal(closer) 

598 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

599 ) 

600 ) 

601 else: 

602 raise ValueError( 

603 "opening and closing arguments must be strings if no content expression is given" 

604 ) 

605 

606 # for these internally-created context expressions, simulate whitespace-skipping 

607 if ParserElement.DEFAULT_WHITE_CHARS: 

608 content.set_parse_action( 

609 lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS) 

610 ) 

611 

612 ret = Forward() 

613 if ignoreExpr is not None: 

614 ret <<= Group( 

615 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) 

616 ) 

617 else: 

618 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) 

619 

620 ret.set_name(f"nested {opener}{closer} expression") 

621 

622 # don't override error message from content expressions 

623 ret.errmsg = None 

624 return ret 

625 

626 

627def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): 

628 """Internal helper to construct opening and closing tag expressions, 

629 given a tag name""" 

630 if isinstance(tagStr, str_type): 

631 resname = tagStr 

632 tagStr = Keyword(tagStr, caseless=not xml) 

633 else: 

634 resname = tagStr.name 

635 

636 tagAttrName = Word(alphas, alphanums + "_-:") 

637 if xml: 

638 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) 

639 openTag = ( 

640 suppress_LT 

641 + tagStr("tag") 

642 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

643 + Opt("/", default=[False])("empty").set_parse_action( 

644 lambda s, l, t: t[0] == "/" 

645 ) 

646 + suppress_GT 

647 ) 

648 else: 

649 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( 

650 printables, exclude_chars=">" 

651 ) 

652 openTag = ( 

653 suppress_LT 

654 + tagStr("tag") 

655 + Dict( 

656 ZeroOrMore( 

657 Group( 

658 tagAttrName.set_parse_action(lambda t: t[0].lower()) 

659 + Opt(Suppress("=") + tagAttrValue) 

660 ) 

661 ) 

662 ) 

663 + Opt("/", default=[False])("empty").set_parse_action( 

664 lambda s, l, t: t[0] == "/" 

665 ) 

666 + suppress_GT 

667 ) 

668 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) 

669 

670 openTag.set_name(f"<{resname}>") 

671 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

672 openTag.add_parse_action( 

673 lambda t: t.__setitem__( 

674 "start" + "".join(resname.replace(":", " ").title().split()), t.copy() 

675 ) 

676 ) 

677 closeTag = closeTag( 

678 "end" + "".join(resname.replace(":", " ").title().split()) 

679 ).set_name(f"</{resname}>") 

680 openTag.tag = resname 

681 closeTag.tag = resname 

682 openTag.tag_body = SkipTo(closeTag()) 

683 return openTag, closeTag 

684 

685 

686def make_html_tags( 

687 tag_str: Union[str, ParserElement], 

688) -> tuple[ParserElement, ParserElement]: 

689 """Helper to construct opening and closing tag expressions for HTML, 

690 given a tag name. Matches tags in either upper or lower case, 

691 attributes with namespaces and with quoted or unquoted values. 

692 

693 Example: 

694 

695 .. testcode:: 

696 

697 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

698 # make_html_tags returns pyparsing expressions for the opening and 

699 # closing tags as a 2-tuple 

700 a, a_end = make_html_tags("A") 

701 link_expr = a + SkipTo(a_end)("link_text") + a_end 

702 

703 for link in link_expr.search_string(text): 

704 # attributes in the <A> tag (like "href" shown here) are 

705 # also accessible as named results 

706 print(link.link_text, '->', link.href) 

707 

708 prints: 

709 

710 .. testoutput:: 

711 

712 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

713 """ 

714 return _makeTags(tag_str, False) 

715 

716 

717def make_xml_tags( 

718 tag_str: Union[str, ParserElement], 

719) -> tuple[ParserElement, ParserElement]: 

720 """Helper to construct opening and closing tag expressions for XML, 

721 given a tag name. Matches tags only in the given upper/lower case. 

722 

723 Example: similar to :class:`make_html_tags` 

724 """ 

725 return _makeTags(tag_str, True) 

726 

727 

728any_open_tag: ParserElement 

729any_close_tag: ParserElement 

730any_open_tag, any_close_tag = make_html_tags( 

731 Word(alphas, alphanums + "_:").set_name("any tag") 

732) 

733 

734_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} 

735_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace( 

736 " ", "|" 

737) 

738common_html_entity = Regex( 

739 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});" 

740).set_name("common HTML entity") 

741 

742 

743def replace_html_entity(s, l, t): 

744 """Helper parser action to replace common HTML entities with their special characters""" 

745 return _htmlEntityMap.get(t.entity) 

746 

747 

748class OpAssoc(Enum): 

749 """Enumeration of operator associativity 

750 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" 

751 

752 LEFT = 1 

753 RIGHT = 2 

754 

755 

756InfixNotationOperatorArgType = Union[ 

757 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]] 

758] 

759InfixNotationOperatorSpec = Union[ 

760 tuple[ 

761 InfixNotationOperatorArgType, 

762 int, 

763 OpAssoc, 

764 typing.Optional[ParseAction], 

765 ], 

766 tuple[ 

767 InfixNotationOperatorArgType, 

768 int, 

769 OpAssoc, 

770 ], 

771] 

772 

773 

774def infix_notation( 

775 base_expr: ParserElement, 

776 op_list: list[InfixNotationOperatorSpec], 

777 lpar: Union[str, ParserElement] = Suppress("("), 

778 rpar: Union[str, ParserElement] = Suppress(")"), 

779) -> Forward: 

780 """Helper method for constructing grammars of expressions made up of 

781 operators working in a precedence hierarchy. Operators may be unary 

782 or binary, left- or right-associative. Parse actions can also be 

783 attached to operator expressions. The generated parser will also 

784 recognize the use of parentheses to override operator precedences 

785 (see example below). 

786 

787 Note: if you define a deep operator list, you may see performance 

788 issues when using infix_notation. See 

789 :class:`ParserElement.enable_packrat` for a mechanism to potentially 

790 improve your parser performance. 

791 

792 Parameters: 

793 

794 :param base_expr: expression representing the most basic operand to 

795 be used in the expression 

796 :param op_list: list of tuples, one for each operator precedence level 

797 in the expression grammar; each tuple is of the form ``(op_expr, 

798 num_operands, right_left_assoc, (optional)parse_action)``, where: 

799 

800 - ``op_expr`` is the pyparsing expression for the operator; may also 

801 be a string, which will be converted to a Literal; if ``num_operands`` 

802 is 3, ``op_expr`` is a tuple of two expressions, for the two 

803 operators separating the 3 terms 

804 - ``num_operands`` is the number of terms for this operator (must be 1, 

805 2, or 3) 

806 - ``right_left_assoc`` is the indicator whether the operator is right 

807 or left associative, using the pyparsing-defined constants 

808 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. 

809 - ``parse_action`` is the parse action to be associated with 

810 expressions matching this operator expression (the parse action 

811 tuple member may be omitted); if the parse action is passed 

812 a tuple or list of functions, this is equivalent to calling 

813 ``set_parse_action(*fn)`` 

814 (:class:`ParserElement.set_parse_action`) 

815 

816 :param lpar: expression for matching left-parentheses; if passed as a 

817 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as 

818 an expression (such as ``Literal('(')``), then it will be kept in 

819 the parsed results, and grouped with them. (default= ``Suppress('(')``) 

820 :param rpar: expression for matching right-parentheses; if passed as a 

821 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as 

822 an expression (such as ``Literal(')')``), then it will be kept in 

823 the parsed results, and grouped with them. (default= ``Suppress(')')``) 

824 

825 Example: 

826 

827 .. testcode:: 

828 

829 # simple example of four-function arithmetic with ints and 

830 # variable names 

831 integer = pyparsing_common.signed_integer 

832 varname = pyparsing_common.identifier 

833 

834 arith_expr = infix_notation(integer | varname, 

835 [ 

836 ('-', 1, OpAssoc.RIGHT), 

837 (one_of('* /'), 2, OpAssoc.LEFT), 

838 (one_of('+ -'), 2, OpAssoc.LEFT), 

839 ]) 

840 

841 arith_expr.run_tests(''' 

842 5+3*6 

843 (5+3)*6 

844 (5+x)*y 

845 -2--11 

846 ''', full_dump=False) 

847 

848 prints: 

849 

850 .. testoutput:: 

851 :options: +NORMALIZE_WHITESPACE 

852 

853 

854 5+3*6 

855 [[5, '+', [3, '*', 6]]] 

856 

857 (5+3)*6 

858 [[[5, '+', 3], '*', 6]] 

859 

860 (5+x)*y 

861 [[[5, '+', 'x'], '*', 'y']] 

862 

863 -2--11 

864 [[['-', 2], '-', ['-', 11]]] 

865 """ 

866 

867 # captive version of FollowedBy that does not do parse actions or capture results names 

868 class _FB(FollowedBy): 

869 def parseImpl(self, instring, loc, doActions=True): 

870 self.expr.try_parse(instring, loc) 

871 return loc, [] 

872 

873 _FB.__name__ = "FollowedBy>" 

874 

875 ret = Forward() 

876 ret.set_name(f"{base_expr.name}_expression") 

877 if isinstance(lpar, str): 

878 lpar = Suppress(lpar) 

879 if isinstance(rpar, str): 

880 rpar = Suppress(rpar) 

881 

882 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}_expression") 

883 

884 # if lpar and rpar are not suppressed, wrap in group 

885 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): 

886 lastExpr = base_expr | Group(nested_expr) 

887 else: 

888 lastExpr = base_expr | nested_expr 

889 

890 arity: int 

891 rightLeftAssoc: opAssoc 

892 pa: typing.Optional[ParseAction] 

893 opExpr1: ParserElement 

894 opExpr2: ParserElement 

895 matchExpr: ParserElement 

896 match_lookahead: ParserElement 

897 for operDef in op_list: 

898 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] 

899 if isinstance(opExpr, str_type): 

900 opExpr = ParserElement._literalStringClass(opExpr) 

901 opExpr = typing.cast(ParserElement, opExpr) 

902 if arity == 3: 

903 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: 

904 raise ValueError( 

905 "if numterms=3, opExpr must be a tuple or list of two expressions" 

906 ) 

907 opExpr1, opExpr2 = opExpr 

908 term_name = f"{opExpr1}{opExpr2} operations" 

909 else: 

910 term_name = f"{opExpr} operations" 

911 

912 if not 1 <= arity <= 3: 

913 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

914 

915 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): 

916 raise ValueError("operator must indicate right or left associativity") 

917 

918 thisExpr: ParserElement = Forward().set_name(term_name) 

919 thisExpr = typing.cast(Forward, thisExpr) 

920 match_lookahead = And([]) 

921 if rightLeftAssoc is OpAssoc.LEFT: 

922 if arity == 1: 

923 match_lookahead = _FB(lastExpr + opExpr) 

924 matchExpr = Group(lastExpr + opExpr[1, ...]) 

925 elif arity == 2: 

926 if opExpr is not None: 

927 match_lookahead = _FB(lastExpr + opExpr + lastExpr) 

928 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...]) 

929 else: 

930 match_lookahead = _FB(lastExpr + lastExpr) 

931 matchExpr = Group(lastExpr[2, ...]) 

932 elif arity == 3: 

933 match_lookahead = _FB( 

934 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr 

935 ) 

936 matchExpr = Group( 

937 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...] 

938 ) 

939 elif rightLeftAssoc is OpAssoc.RIGHT: 

940 if arity == 1: 

941 # try to avoid LR with this extra test 

942 if not isinstance(opExpr, Opt): 

943 opExpr = Opt(opExpr) 

944 match_lookahead = _FB(opExpr.expr + thisExpr) 

945 matchExpr = Group(opExpr + thisExpr) 

946 elif arity == 2: 

947 if opExpr is not None: 

948 match_lookahead = _FB(lastExpr + opExpr + thisExpr) 

949 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...]) 

950 else: 

951 match_lookahead = _FB(lastExpr + thisExpr) 

952 matchExpr = Group(lastExpr + thisExpr[1, ...]) 

953 elif arity == 3: 

954 match_lookahead = _FB( 

955 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr 

956 ) 

957 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

958 

959 # suppress lookahead expr from railroad diagrams 

960 match_lookahead.show_in_diagram = False 

961 

962 # TODO - determine why this statement can't be included in the following 

963 # if pa block 

964 matchExpr = match_lookahead + matchExpr 

965 

966 if pa: 

967 if isinstance(pa, (tuple, list)): 

968 matchExpr.set_parse_action(*pa) 

969 else: 

970 matchExpr.set_parse_action(pa) 

971 

972 thisExpr <<= (matchExpr | lastExpr).setName(term_name) 

973 lastExpr = thisExpr 

974 

975 ret <<= lastExpr 

976 return ret 

977 

978 

979def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): 

980 """ 

981 .. deprecated:: 3.0.0 

982 Use the :class:`IndentedBlock` class instead. 

983 

984 Helper method for defining space-delimited indentation blocks, 

985 such as those used to define block statements in Python source code. 

986 

987 :param blockStatementExpr: expression defining syntax of statement that 

988 is repeated within the indented block 

989 

990 :param indentStack: list created by caller to manage indentation stack 

991 (multiple ``statementWithIndentedBlock`` expressions within a single 

992 grammar should share a common ``indentStack``) 

993 

994 :param indent: boolean indicating whether block must be indented beyond 

995 the current level; set to ``False`` for block of left-most statements 

996 

997 A valid block must contain at least one ``blockStatement``. 

998 

999 (Note that indentedBlock uses internal parse actions which make it 

1000 incompatible with packrat parsing.) 

1001 

1002 Example: 

1003 

1004 .. testcode:: 

1005 

1006 data = ''' 

1007 def A(z): 

1008 A1 

1009 B = 100 

1010 G = A2 

1011 A2 

1012 A3 

1013 B 

1014 def BB(a,b,c): 

1015 BB1 

1016 def BBA(): 

1017 bba1 

1018 bba2 

1019 bba3 

1020 C 

1021 D 

1022 def spam(x,y): 

1023 def eggs(z): 

1024 pass 

1025 ''' 

1026 

1027 indentStack = [1] 

1028 stmt = Forward() 

1029 

1030 identifier = Word(alphas, alphanums) 

1031 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") 

1032 func_body = indentedBlock(stmt, indentStack) 

1033 funcDef = Group(funcDecl + func_body) 

1034 

1035 rvalue = Forward() 

1036 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") 

1037 rvalue << (funcCall | identifier | Word(nums)) 

1038 assignment = Group(identifier + "=" + rvalue) 

1039 stmt << (funcDef | assignment | identifier) 

1040 

1041 module_body = stmt[1, ...] 

1042 

1043 parseTree = module_body.parseString(data) 

1044 parseTree.pprint() 

1045 

1046 prints: 

1047 

1048 .. testoutput:: 

1049 

1050 [['def', 

1051 'A', 

1052 ['(', 'z', ')'], 

1053 ':', 

1054 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

1055 'B', 

1056 ['def', 

1057 'BB', 

1058 ['(', 'a', 'b', 'c', ')'], 

1059 ':', 

1060 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

1061 'C', 

1062 'D', 

1063 ['def', 

1064 'spam', 

1065 ['(', 'x', 'y', ')'], 

1066 ':', 

1067 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

1068 """ 

1069 backup_stacks.append(indentStack[:]) 

1070 

1071 def reset_stack(): 

1072 indentStack[:] = backup_stacks[-1] 

1073 

1074 def checkPeerIndent(s, l, t): 

1075 if l >= len(s): 

1076 return 

1077 curCol = col(l, s) 

1078 if curCol != indentStack[-1]: 

1079 if curCol > indentStack[-1]: 

1080 raise ParseException(s, l, "illegal nesting") 

1081 raise ParseException(s, l, "not a peer entry") 

1082 

1083 def checkSubIndent(s, l, t): 

1084 curCol = col(l, s) 

1085 if curCol > indentStack[-1]: 

1086 indentStack.append(curCol) 

1087 else: 

1088 raise ParseException(s, l, "not a subentry") 

1089 

1090 def checkUnindent(s, l, t): 

1091 if l >= len(s): 

1092 return 

1093 curCol = col(l, s) 

1094 if not (indentStack and curCol in indentStack): 

1095 raise ParseException(s, l, "not an unindent") 

1096 if curCol < indentStack[-1]: 

1097 indentStack.pop() 

1098 

1099 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) 

1100 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") 

1101 PEER = Empty().set_parse_action(checkPeerIndent).set_name("") 

1102 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") 

1103 if indent: 

1104 smExpr = Group( 

1105 Opt(NL) 

1106 + INDENT 

1107 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1108 + UNDENT 

1109 ) 

1110 else: 

1111 smExpr = Group( 

1112 Opt(NL) 

1113 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1114 + Opt(UNDENT) 

1115 ) 

1116 

1117 # add a parse action to remove backup_stack from list of backups 

1118 smExpr.add_parse_action( 

1119 lambda: backup_stacks.pop(-1) and None if backup_stacks else None 

1120 ) 

1121 smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) 

1122 blockStatementExpr.ignore(_bslash + LineEnd()) 

1123 return smExpr.set_name("indented block") 

1124 

1125 

1126# it's easy to get these comment structures wrong - they're very common, 

1127# so may as well make them available 

1128c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment") 

1129"Comment of the form ``/* ... */``" 

1130 

1131html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") 

1132"Comment of the form ``<!-- ... -->``" 

1133 

1134rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") 

1135dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") 

1136"Comment of the form ``// ... (to end of line)``" 

1137 

1138cpp_style_comment = Regex( 

1139 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)" 

1140).set_name("C++ style comment") 

1141"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" 

1142 

1143java_style_comment = cpp_style_comment 

1144"Same as :class:`cpp_style_comment`" 

1145 

1146python_style_comment = Regex(r"#.*").set_name("Python style comment") 

1147"Comment of the form ``# ... (to end of line)``" 

1148 

1149 

1150# build list of built-in expressions, for future reference if a global default value 

1151# gets updated 

1152_builtin_exprs: list[ParserElement] = [ 

1153 v for v in vars().values() if isinstance(v, ParserElement) 

1154] 

1155 

1156 

1157# compatibility function, superseded by DelimitedList class 

1158def delimited_list( 

1159 expr: Union[str, ParserElement], 

1160 delim: Union[str, ParserElement] = ",", 

1161 combine: bool = False, 

1162 min: typing.Optional[int] = None, 

1163 max: typing.Optional[int] = None, 

1164 *, 

1165 allow_trailing_delim: bool = False, 

1166) -> ParserElement: 

1167 """ 

1168 .. deprecated:: 3.1.0 

1169 Use the :class:`DelimitedList` class instead. 

1170 """ 

1171 return DelimitedList( 

1172 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim 

1173 ) 

1174 

1175 

1176# Compatibility synonyms 

1177# fmt: off 

1178opAssoc = OpAssoc 

1179anyOpenTag = any_open_tag 

1180anyCloseTag = any_close_tag 

1181commonHTMLEntity = common_html_entity 

1182cStyleComment = c_style_comment 

1183htmlComment = html_comment 

1184restOfLine = rest_of_line 

1185dblSlashComment = dbl_slash_comment 

1186cppStyleComment = cpp_style_comment 

1187javaStyleComment = java_style_comment 

1188pythonStyleComment = python_style_comment 

1189delimitedList = replaced_by_pep8("delimitedList", DelimitedList) 

1190delimited_list = replaced_by_pep8("delimited_list", DelimitedList) 

1191countedArray = replaced_by_pep8("countedArray", counted_array) 

1192matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal) 

1193matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr) 

1194oneOf = replaced_by_pep8("oneOf", one_of) 

1195dictOf = replaced_by_pep8("dictOf", dict_of) 

1196originalTextFor = replaced_by_pep8("originalTextFor", original_text_for) 

1197nestedExpr = replaced_by_pep8("nestedExpr", nested_expr) 

1198makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags) 

1199makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags) 

1200replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity) 

1201infixNotation = replaced_by_pep8("infixNotation", infix_notation) 

1202# fmt: on