Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pyparsing/helpers.py: 33%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

326 statements  

1# helpers.py 

2import html.entities 

3import operator 

4import re 

5import sys 

6import typing 

7 

8from . import __diag__ 

9from .core import * 

10from .util import ( 

11 _bslash, 

12 _flatten, 

13 _escape_regex_range_chars, 

14 make_compressed_re, 

15 replaced_by_pep8, 

16) 

17 

18 

19# 

20# global helpers 

21# 

22def counted_array( 

23 expr: ParserElement, 

24 int_expr: typing.Optional[ParserElement] = None, 

25 *, 

26 intExpr: typing.Optional[ParserElement] = None, 

27) -> ParserElement: 

28 """Helper to define a counted list of expressions. 

29 

30 This helper defines a pattern of the form:: 

31 

32 integer expr expr expr... 

33 

34 where the leading integer tells how many expr expressions follow. 

35 The matched tokens returns the array of expr tokens as a list - the 

36 leading count token is suppressed. 

37 

38 If ``int_expr`` is specified, it should be a pyparsing expression 

39 that produces an integer value. 

40 

41 Example:: 

42 

43 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] 

44 

45 # in this parser, the leading integer value is given in binary, 

46 # '10' indicating that 2 values are in the array 

47 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) 

48 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] 

49 

50 # if other fields must be parsed after the count but before the 

51 # list items, give the fields results names and they will 

52 # be preserved in the returned ParseResults: 

53 count_with_metadata = integer + Word(alphas)("type") 

54 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") 

55 result = typed_array.parse_string("3 bool True True False") 

56 print(result.dump()) 

57 

58 # prints 

59 # ['True', 'True', 'False'] 

60 # - items: ['True', 'True', 'False'] 

61 # - type: 'bool' 

62 """ 

63 intExpr = intExpr or int_expr 

64 array_expr = Forward() 

65 

66 def count_field_parse_action(s, l, t): 

67 nonlocal array_expr 

68 n = t[0] 

69 array_expr <<= (expr * n) if n else Empty() 

70 # clear list contents, but keep any named results 

71 del t[:] 

72 

73 if intExpr is None: 

74 intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) 

75 else: 

76 intExpr = intExpr.copy() 

77 intExpr.set_name("arrayLen") 

78 intExpr.add_parse_action(count_field_parse_action, call_during_try=True) 

79 return (intExpr + array_expr).set_name(f"(len) {expr}...") 

80 

81 

82def match_previous_literal(expr: ParserElement) -> ParserElement: 

83 """Helper to define an expression that is indirectly defined from 

84 the tokens matched in a previous expression, that is, it looks for 

85 a 'repeat' of a previous expression. For example:: 

86 

87 first = Word(nums) 

88 second = match_previous_literal(first) 

89 match_expr = first + ":" + second 

90 

91 will match ``"1:1"``, but not ``"1:2"``. Because this 

92 matches a previous literal, will also match the leading 

93 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

94 :class:`match_previous_expr`. Do *not* use with packrat parsing 

95 enabled. 

96 """ 

97 rep = Forward() 

98 

99 def copy_token_to_repeater(s, l, t): 

100 if not t: 

101 rep << Empty() 

102 return 

103 

104 if len(t) == 1: 

105 rep << t[0] 

106 return 

107 

108 # flatten t tokens 

109 tflat = _flatten(t.as_list()) 

110 rep << And(Literal(tt) for tt in tflat) 

111 

112 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

113 rep.set_name("(prev) " + str(expr)) 

114 return rep 

115 

116 

117def match_previous_expr(expr: ParserElement) -> ParserElement: 

118 """Helper to define an expression that is indirectly defined from 

119 the tokens matched in a previous expression, that is, it looks for 

120 a 'repeat' of a previous expression. For example:: 

121 

122 first = Word(nums) 

123 second = match_previous_expr(first) 

124 match_expr = first + ":" + second 

125 

126 will match ``"1:1"``, but not ``"1:2"``. Because this 

127 matches by expressions, will *not* match the leading ``"1:1"`` 

128 in ``"1:10"``; the expressions are evaluated first, and then 

129 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

130 with packrat parsing enabled. 

131 """ 

132 rep = Forward() 

133 e2 = expr.copy() 

134 rep <<= e2 

135 

136 def copy_token_to_repeater(s, l, t): 

137 matchTokens = _flatten(t.as_list()) 

138 

139 def must_match_these_tokens(s, l, t): 

140 theseTokens = _flatten(t.as_list()) 

141 if theseTokens != matchTokens: 

142 raise ParseException( 

143 s, l, f"Expected {matchTokens}, found{theseTokens}" 

144 ) 

145 

146 rep.set_parse_action(must_match_these_tokens, callDuringTry=True) 

147 

148 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

149 rep.set_name("(prev) " + str(expr)) 

150 return rep 

151 

152 

153def one_of( 

154 strs: Union[typing.Iterable[str], str], 

155 caseless: bool = False, 

156 use_regex: bool = True, 

157 as_keyword: bool = False, 

158 *, 

159 useRegex: bool = True, 

160 asKeyword: bool = False, 

161) -> ParserElement: 

162 """Helper to quickly define a set of alternative :class:`Literal` s, 

163 and makes sure to do longest-first testing when there is a conflict, 

164 regardless of the input order, but returns 

165 a :class:`MatchFirst` for best performance. 

166 

167 Parameters: 

168 

169 - ``strs`` - a string of space-delimited literals, or a collection of 

170 string literals 

171 - ``caseless`` - treat all literals as caseless - (default= ``False``) 

172 - ``use_regex`` - as an optimization, will 

173 generate a :class:`Regex` object; otherwise, will generate 

174 a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if 

175 creating a :class:`Regex` raises an exception) - (default= ``True``) 

176 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the 

177 generated expressions - (default= ``False``) 

178 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, 

179 but will be removed in a future release 

180 

181 Example:: 

182 

183 comp_oper = one_of("< = > <= >= !=") 

184 var = Word(alphas) 

185 number = Word(nums) 

186 term = var | number 

187 comparison_expr = term + comp_oper + term 

188 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) 

189 

190 prints:: 

191 

192 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

193 """ 

194 asKeyword = asKeyword or as_keyword 

195 useRegex = useRegex and use_regex 

196 

197 if ( 

198 isinstance(caseless, str_type) 

199 and __diag__.warn_on_multiple_string_args_to_oneof 

200 ): 

201 warnings.warn( 

202 "warn_on_multiple_string_args_to_oneof:" 

203 " More than one string argument passed to one_of, pass" 

204 " choices as a list or space-delimited string", 

205 stacklevel=2, 

206 ) 

207 

208 if caseless: 

209 is_equal = lambda a, b: a.upper() == b.upper() 

210 masks = lambda a, b: b.upper().startswith(a.upper()) 

211 parse_element_class = CaselessKeyword if asKeyword else CaselessLiteral 

212 else: 

213 is_equal = operator.eq 

214 masks = lambda a, b: b.startswith(a) 

215 parse_element_class = Keyword if asKeyword else Literal 

216 

217 symbols: list[str] 

218 if isinstance(strs, str_type): 

219 strs = typing.cast(str, strs) 

220 symbols = strs.split() 

221 elif isinstance(strs, Iterable): 

222 symbols = list(strs) 

223 else: 

224 raise TypeError("Invalid argument to one_of, expected string or iterable") 

225 if not symbols: 

226 return NoMatch() 

227 

228 # reorder given symbols to take care to avoid masking longer choices with shorter ones 

229 # (but only if the given symbols are not just single characters) 

230 i = 0 

231 while i < len(symbols) - 1: 

232 cur = symbols[i] 

233 for j, other in enumerate(symbols[i + 1 :]): 

234 if is_equal(other, cur): 

235 del symbols[i + j + 1] 

236 break 

237 if len(other) > len(cur) and masks(cur, other): 

238 del symbols[i + j + 1] 

239 symbols.insert(i, other) 

240 break 

241 else: 

242 i += 1 

243 

244 if useRegex: 

245 re_flags: int = re.IGNORECASE if caseless else 0 

246 

247 try: 

248 if all(len(sym) == 1 for sym in symbols): 

249 # symbols are just single characters, create range regex pattern 

250 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" 

251 else: 

252 patt = "|".join(re.escape(sym) for sym in symbols) 

253 

254 # wrap with \b word break markers if defining as keywords 

255 if asKeyword: 

256 patt = rf"\b(?:{patt})\b" 

257 

258 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) 

259 

260 if caseless: 

261 # add parse action to return symbols as specified, not in random 

262 # casing as found in input string 

263 symbol_map = {sym.lower(): sym for sym in symbols} 

264 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) 

265 

266 return ret 

267 

268 except re.error: 

269 warnings.warn( 

270 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 

271 ) 

272 

273 # last resort, just use MatchFirst 

274 return MatchFirst(parse_element_class(sym) for sym in symbols).set_name( 

275 " | ".join(symbols) 

276 ) 

277 

278 

279def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: 

280 """Helper to easily and clearly define a dictionary by specifying 

281 the respective patterns for the key and value. Takes care of 

282 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

283 :class:`Group` tokens in the proper order. The key pattern 

284 can include delimiting markers or punctuation, as long as they are 

285 suppressed, thereby leaving the significant key text. The value 

286 pattern can include named results, so that the :class:`Dict` results 

287 can include named token fields. 

288 

289 Example:: 

290 

291 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

292 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

293 print(attr_expr[1, ...].parse_string(text).dump()) 

294 

295 attr_label = label 

296 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) 

297 

298 # similar to Dict, but simpler call format 

299 result = dict_of(attr_label, attr_value).parse_string(text) 

300 print(result.dump()) 

301 print(result['shape']) 

302 print(result.shape) # object attribute access works too 

303 print(result.as_dict()) 

304 

305 prints:: 

306 

307 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

308 - color: 'light blue' 

309 - posn: 'upper left' 

310 - shape: 'SQUARE' 

311 - texture: 'burlap' 

312 SQUARE 

313 SQUARE 

314 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 

315 """ 

316 return Dict(OneOrMore(Group(key + value))) 

317 

318 

319def original_text_for( 

320 expr: ParserElement, as_string: bool = True, *, asString: bool = True 

321) -> ParserElement: 

322 """Helper to return the original, untokenized text for a given 

323 expression. Useful to restore the parsed fields of an HTML start 

324 tag into the raw tag text itself, or to revert separate tokens with 

325 intervening whitespace back to the original matching input text. By 

326 default, returns a string containing the original parsed text. 

327 

328 If the optional ``as_string`` argument is passed as 

329 ``False``, then the return value is 

330 a :class:`ParseResults` containing any results names that 

331 were originally matched, and a single token containing the original 

332 matched text from the input string. So if the expression passed to 

333 :class:`original_text_for` contains expressions with defined 

334 results names, you must set ``as_string`` to ``False`` if you 

335 want to preserve those results name values. 

336 

337 The ``asString`` pre-PEP8 argument is retained for compatibility, 

338 but will be removed in a future release. 

339 

340 Example:: 

341 

342 src = "this is test <b> bold <i>text</i> </b> normal text " 

343 for tag in ("b", "i"): 

344 opener, closer = make_html_tags(tag) 

345 patt = original_text_for(opener + ... + closer) 

346 print(patt.search_string(src)[0]) 

347 

348 prints:: 

349 

350 ['<b> bold <i>text</i> </b>'] 

351 ['<i>text</i>'] 

352 """ 

353 asString = asString and as_string 

354 

355 locMarker = Empty().set_parse_action(lambda s, loc, t: loc) 

356 endlocMarker = locMarker.copy() 

357 endlocMarker.callPreparse = False 

358 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

359 if asString: 

360 extractText = lambda s, l, t: s[t._original_start : t._original_end] 

361 else: 

362 

363 def extractText(s, l, t): 

364 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] 

365 

366 matchExpr.set_parse_action(extractText) 

367 matchExpr.ignoreExprs = expr.ignoreExprs 

368 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) 

369 return matchExpr 

370 

371 

372def ungroup(expr: ParserElement) -> ParserElement: 

373 """Helper to undo pyparsing's default grouping of And expressions, 

374 even if all but one are non-empty. 

375 """ 

376 return TokenConverter(expr).add_parse_action(lambda t: t[0]) 

377 

378 

379def locatedExpr(expr: ParserElement) -> ParserElement: 

380 """ 

381 (DEPRECATED - future code should use the :class:`Located` class) 

382 Helper to decorate a returned token with its starting and ending 

383 locations in the input string. 

384 

385 This helper adds the following results names: 

386 

387 - ``locn_start`` - location where matched expression begins 

388 - ``locn_end`` - location where matched expression ends 

389 - ``value`` - the actual parsed results 

390 

391 Be careful if the input text contains ``<TAB>`` characters, you 

392 may want to call :class:`ParserElement.parse_with_tabs` 

393 

394 Example:: 

395 

396 wd = Word(alphas) 

397 for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

398 print(match) 

399 

400 prints:: 

401 

402 [[0, 'ljsdf', 5]] 

403 [[8, 'lksdjjf', 15]] 

404 [[18, 'lkkjj', 23]] 

405 """ 

406 locator = Empty().set_parse_action(lambda ss, ll, tt: ll) 

407 return Group( 

408 locator("locn_start") 

409 + expr("value") 

410 + locator.copy().leaveWhitespace()("locn_end") 

411 ) 

412 

413 

414def nested_expr( 

415 opener: Union[str, ParserElement] = "(", 

416 closer: Union[str, ParserElement] = ")", 

417 content: typing.Optional[ParserElement] = None, 

418 ignore_expr: ParserElement = quoted_string(), 

419 *, 

420 ignoreExpr: ParserElement = quoted_string(), 

421) -> ParserElement: 

422 """Helper method for defining nested lists enclosed in opening and 

423 closing delimiters (``"("`` and ``")"`` are the default). 

424 

425 Parameters: 

426 

427 - ``opener`` - opening character for a nested list 

428 (default= ``"("``); can also be a pyparsing expression 

429 - ``closer`` - closing character for a nested list 

430 (default= ``")"``); can also be a pyparsing expression 

431 - ``content`` - expression for items within the nested lists 

432 (default= ``None``) 

433 - ``ignore_expr`` - expression for ignoring opening and closing delimiters 

434 (default= :class:`quoted_string`) 

435 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility 

436 but will be removed in a future release 

437 

438 If an expression is not provided for the content argument, the 

439 nested expression will capture all whitespace-delimited content 

440 between delimiters as a list of separate values. 

441 

442 Use the ``ignore_expr`` argument to define expressions that may 

443 contain opening or closing characters that should not be treated as 

444 opening or closing characters for nesting, such as quoted_string or 

445 a comment expression. Specify multiple expressions using an 

446 :class:`Or` or :class:`MatchFirst`. The default is 

447 :class:`quoted_string`, but if no expressions are to be ignored, then 

448 pass ``None`` for this argument. 

449 

450 Example:: 

451 

452 data_type = one_of("void int short long char float double") 

453 decl_data_type = Combine(data_type + Opt(Word('*'))) 

454 ident = Word(alphas+'_', alphanums+'_') 

455 number = pyparsing_common.number 

456 arg = Group(decl_data_type + ident) 

457 LPAR, RPAR = map(Suppress, "()") 

458 

459 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) 

460 

461 c_function = (decl_data_type("type") 

462 + ident("name") 

463 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR 

464 + code_body("body")) 

465 c_function.ignore(c_style_comment) 

466 

467 source_code = ''' 

468 int is_odd(int x) { 

469 return (x%2); 

470 } 

471 

472 int dec_to_hex(char hchar) { 

473 if (hchar >= '0' && hchar <= '9') { 

474 return (ord(hchar)-ord('0')); 

475 } else { 

476 return (10+ord(hchar)-ord('A')); 

477 } 

478 } 

479 ''' 

480 for func in c_function.search_string(source_code): 

481 print("%(name)s (%(type)s) args: %(args)s" % func) 

482 

483 

484 prints:: 

485 

486 is_odd (int) args: [['int', 'x']] 

487 dec_to_hex (int) args: [['char', 'hchar']] 

488 """ 

489 if ignoreExpr != ignore_expr: 

490 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr 

491 if opener == closer: 

492 raise ValueError("opening and closing strings cannot be the same") 

493 if content is None: 

494 if isinstance(opener, str_type) and isinstance(closer, str_type): 

495 opener = typing.cast(str, opener) 

496 closer = typing.cast(str, closer) 

497 if len(opener) == 1 and len(closer) == 1: 

498 if ignoreExpr is not None: 

499 content = Combine( 

500 OneOrMore( 

501 ~ignoreExpr 

502 + CharsNotIn( 

503 opener + closer + ParserElement.DEFAULT_WHITE_CHARS, 

504 exact=1, 

505 ) 

506 ) 

507 ).set_parse_action(lambda t: t[0].strip()) 

508 else: 

509 content = empty.copy() + CharsNotIn( 

510 opener + closer + ParserElement.DEFAULT_WHITE_CHARS 

511 ).set_parse_action(lambda t: t[0].strip()) 

512 else: 

513 if ignoreExpr is not None: 

514 content = Combine( 

515 OneOrMore( 

516 ~ignoreExpr 

517 + ~Literal(opener) 

518 + ~Literal(closer) 

519 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

520 ) 

521 ).set_parse_action(lambda t: t[0].strip()) 

522 else: 

523 content = Combine( 

524 OneOrMore( 

525 ~Literal(opener) 

526 + ~Literal(closer) 

527 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

528 ) 

529 ).set_parse_action(lambda t: t[0].strip()) 

530 else: 

531 raise ValueError( 

532 "opening and closing arguments must be strings if no content expression is given" 

533 ) 

534 ret = Forward() 

535 if ignoreExpr is not None: 

536 ret <<= Group( 

537 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) 

538 ) 

539 else: 

540 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) 

541 ret.set_name(f"nested {opener}{closer} expression") 

542 # don't override error message from content expressions 

543 ret.errmsg = None 

544 return ret 

545 

546 

547def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): 

548 """Internal helper to construct opening and closing tag expressions, given a tag name""" 

549 if isinstance(tagStr, str_type): 

550 resname = tagStr 

551 tagStr = Keyword(tagStr, caseless=not xml) 

552 else: 

553 resname = tagStr.name 

554 

555 tagAttrName = Word(alphas, alphanums + "_-:") 

556 if xml: 

557 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) 

558 openTag = ( 

559 suppress_LT 

560 + tagStr("tag") 

561 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

562 + Opt("/", default=[False])("empty").set_parse_action( 

563 lambda s, l, t: t[0] == "/" 

564 ) 

565 + suppress_GT 

566 ) 

567 else: 

568 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( 

569 printables, exclude_chars=">" 

570 ) 

571 openTag = ( 

572 suppress_LT 

573 + tagStr("tag") 

574 + Dict( 

575 ZeroOrMore( 

576 Group( 

577 tagAttrName.set_parse_action(lambda t: t[0].lower()) 

578 + Opt(Suppress("=") + tagAttrValue) 

579 ) 

580 ) 

581 ) 

582 + Opt("/", default=[False])("empty").set_parse_action( 

583 lambda s, l, t: t[0] == "/" 

584 ) 

585 + suppress_GT 

586 ) 

587 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) 

588 

589 openTag.set_name(f"<{resname}>") 

590 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

591 openTag.add_parse_action( 

592 lambda t: t.__setitem__( 

593 "start" + "".join(resname.replace(":", " ").title().split()), t.copy() 

594 ) 

595 ) 

596 closeTag = closeTag( 

597 "end" + "".join(resname.replace(":", " ").title().split()) 

598 ).set_name(f"</{resname}>") 

599 openTag.tag = resname 

600 closeTag.tag = resname 

601 openTag.tag_body = SkipTo(closeTag()) 

602 return openTag, closeTag 

603 

604 

605def make_html_tags( 

606 tag_str: Union[str, ParserElement] 

607) -> tuple[ParserElement, ParserElement]: 

608 """Helper to construct opening and closing tag expressions for HTML, 

609 given a tag name. Matches tags in either upper or lower case, 

610 attributes with namespaces and with quoted or unquoted values. 

611 

612 Example:: 

613 

614 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

615 # make_html_tags returns pyparsing expressions for the opening and 

616 # closing tags as a 2-tuple 

617 a, a_end = make_html_tags("A") 

618 link_expr = a + SkipTo(a_end)("link_text") + a_end 

619 

620 for link in link_expr.search_string(text): 

621 # attributes in the <A> tag (like "href" shown here) are 

622 # also accessible as named results 

623 print(link.link_text, '->', link.href) 

624 

625 prints:: 

626 

627 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

628 """ 

629 return _makeTags(tag_str, False) 

630 

631 

632def make_xml_tags( 

633 tag_str: Union[str, ParserElement] 

634) -> tuple[ParserElement, ParserElement]: 

635 """Helper to construct opening and closing tag expressions for XML, 

636 given a tag name. Matches tags only in the given upper/lower case. 

637 

638 Example: similar to :class:`make_html_tags` 

639 """ 

640 return _makeTags(tag_str, True) 

641 

642 

643any_open_tag: ParserElement 

644any_close_tag: ParserElement 

645any_open_tag, any_close_tag = make_html_tags( 

646 Word(alphas, alphanums + "_:").set_name("any tag") 

647) 

648 

649_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} 

650_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace( 

651 " ", "|" 

652) 

653common_html_entity = Regex( 

654 lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});" 

655).set_name("common HTML entity") 

656 

657 

658def replace_html_entity(s, l, t): 

659 """Helper parser action to replace common HTML entities with their special characters""" 

660 return _htmlEntityMap.get(t.entity) 

661 

662 

663class OpAssoc(Enum): 

664 """Enumeration of operator associativity 

665 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" 

666 

667 LEFT = 1 

668 RIGHT = 2 

669 

670 

671InfixNotationOperatorArgType = Union[ 

672 ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]] 

673] 

674InfixNotationOperatorSpec = Union[ 

675 tuple[ 

676 InfixNotationOperatorArgType, 

677 int, 

678 OpAssoc, 

679 typing.Optional[ParseAction], 

680 ], 

681 tuple[ 

682 InfixNotationOperatorArgType, 

683 int, 

684 OpAssoc, 

685 ], 

686] 

687 

688 

689def infix_notation( 

690 base_expr: ParserElement, 

691 op_list: list[InfixNotationOperatorSpec], 

692 lpar: Union[str, ParserElement] = Suppress("("), 

693 rpar: Union[str, ParserElement] = Suppress(")"), 

694) -> ParserElement: 

695 """Helper method for constructing grammars of expressions made up of 

696 operators working in a precedence hierarchy. Operators may be unary 

697 or binary, left- or right-associative. Parse actions can also be 

698 attached to operator expressions. The generated parser will also 

699 recognize the use of parentheses to override operator precedences 

700 (see example below). 

701 

702 Note: if you define a deep operator list, you may see performance 

703 issues when using infix_notation. See 

704 :class:`ParserElement.enable_packrat` for a mechanism to potentially 

705 improve your parser performance. 

706 

707 Parameters: 

708 

709 - ``base_expr`` - expression representing the most basic operand to 

710 be used in the expression 

711 - ``op_list`` - list of tuples, one for each operator precedence level 

712 in the expression grammar; each tuple is of the form ``(op_expr, 

713 num_operands, right_left_assoc, (optional)parse_action)``, where: 

714 

715 - ``op_expr`` is the pyparsing expression for the operator; may also 

716 be a string, which will be converted to a Literal; if ``num_operands`` 

717 is 3, ``op_expr`` is a tuple of two expressions, for the two 

718 operators separating the 3 terms 

719 - ``num_operands`` is the number of terms for this operator (must be 1, 

720 2, or 3) 

721 - ``right_left_assoc`` is the indicator whether the operator is right 

722 or left associative, using the pyparsing-defined constants 

723 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. 

724 - ``parse_action`` is the parse action to be associated with 

725 expressions matching this operator expression (the parse action 

726 tuple member may be omitted); if the parse action is passed 

727 a tuple or list of functions, this is equivalent to calling 

728 ``set_parse_action(*fn)`` 

729 (:class:`ParserElement.set_parse_action`) 

730 - ``lpar`` - expression for matching left-parentheses; if passed as a 

731 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as 

732 an expression (such as ``Literal('(')``), then it will be kept in 

733 the parsed results, and grouped with them. (default= ``Suppress('(')``) 

734 - ``rpar`` - expression for matching right-parentheses; if passed as a 

735 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as 

736 an expression (such as ``Literal(')')``), then it will be kept in 

737 the parsed results, and grouped with them. (default= ``Suppress(')')``) 

738 

739 Example:: 

740 

741 # simple example of four-function arithmetic with ints and 

742 # variable names 

743 integer = pyparsing_common.signed_integer 

744 varname = pyparsing_common.identifier 

745 

746 arith_expr = infix_notation(integer | varname, 

747 [ 

748 ('-', 1, OpAssoc.RIGHT), 

749 (one_of('* /'), 2, OpAssoc.LEFT), 

750 (one_of('+ -'), 2, OpAssoc.LEFT), 

751 ]) 

752 

753 arith_expr.run_tests(''' 

754 5+3*6 

755 (5+3)*6 

756 -2--11 

757 ''', full_dump=False) 

758 

759 prints:: 

760 

761 5+3*6 

762 [[5, '+', [3, '*', 6]]] 

763 

764 (5+3)*6 

765 [[[5, '+', 3], '*', 6]] 

766 

767 (5+x)*y 

768 [[[5, '+', 'x'], '*', 'y']] 

769 

770 -2--11 

771 [[['-', 2], '-', ['-', 11]]] 

772 """ 

773 

774 # captive version of FollowedBy that does not do parse actions or capture results names 

775 class _FB(FollowedBy): 

776 def parseImpl(self, instring, loc, doActions=True): 

777 self.expr.try_parse(instring, loc) 

778 return loc, [] 

779 

780 _FB.__name__ = "FollowedBy>" 

781 

782 ret = Forward() 

783 ret.set_name(f"{base_expr.name}_expression") 

784 if isinstance(lpar, str): 

785 lpar = Suppress(lpar) 

786 if isinstance(rpar, str): 

787 rpar = Suppress(rpar) 

788 

789 nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}") 

790 

791 # if lpar and rpar are not suppressed, wrap in group 

792 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): 

793 lastExpr = base_expr | Group(nested_expr) 

794 else: 

795 lastExpr = base_expr | nested_expr 

796 

797 arity: int 

798 rightLeftAssoc: opAssoc 

799 pa: typing.Optional[ParseAction] 

800 opExpr1: ParserElement 

801 opExpr2: ParserElement 

802 matchExpr: ParserElement 

803 match_lookahead: ParserElement 

804 for operDef in op_list: 

805 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] 

806 if isinstance(opExpr, str_type): 

807 opExpr = ParserElement._literalStringClass(opExpr) 

808 opExpr = typing.cast(ParserElement, opExpr) 

809 if arity == 3: 

810 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: 

811 raise ValueError( 

812 "if numterms=3, opExpr must be a tuple or list of two expressions" 

813 ) 

814 opExpr1, opExpr2 = opExpr 

815 term_name = f"{opExpr1}{opExpr2} operations" 

816 else: 

817 term_name = f"{opExpr} operations" 

818 

819 if not 1 <= arity <= 3: 

820 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

821 

822 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): 

823 raise ValueError("operator must indicate right or left associativity") 

824 

825 thisExpr: ParserElement = Forward().set_name(term_name) 

826 thisExpr = typing.cast(Forward, thisExpr) 

827 match_lookahead = And([]) 

828 if rightLeftAssoc is OpAssoc.LEFT: 

829 if arity == 1: 

830 match_lookahead = _FB(lastExpr + opExpr) 

831 matchExpr = Group(lastExpr + opExpr[1, ...]) 

832 elif arity == 2: 

833 if opExpr is not None: 

834 match_lookahead = _FB(lastExpr + opExpr + lastExpr) 

835 matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...]) 

836 else: 

837 match_lookahead = _FB(lastExpr + lastExpr) 

838 matchExpr = Group(lastExpr[2, ...]) 

839 elif arity == 3: 

840 match_lookahead = _FB( 

841 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr 

842 ) 

843 matchExpr = Group( 

844 lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...] 

845 ) 

846 elif rightLeftAssoc is OpAssoc.RIGHT: 

847 if arity == 1: 

848 # try to avoid LR with this extra test 

849 if not isinstance(opExpr, Opt): 

850 opExpr = Opt(opExpr) 

851 match_lookahead = _FB(opExpr.expr + thisExpr) 

852 matchExpr = Group(opExpr + thisExpr) 

853 elif arity == 2: 

854 if opExpr is not None: 

855 match_lookahead = _FB(lastExpr + opExpr + thisExpr) 

856 matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...]) 

857 else: 

858 match_lookahead = _FB(lastExpr + thisExpr) 

859 matchExpr = Group(lastExpr + thisExpr[1, ...]) 

860 elif arity == 3: 

861 match_lookahead = _FB( 

862 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr 

863 ) 

864 matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

865 

866 # suppress lookahead expr from railroad diagrams 

867 match_lookahead.show_in_diagram = False 

868 

869 # TODO - determine why this statement can't be included in the following 

870 # if pa block 

871 matchExpr = match_lookahead + matchExpr 

872 

873 if pa: 

874 if isinstance(pa, (tuple, list)): 

875 matchExpr.set_parse_action(*pa) 

876 else: 

877 matchExpr.set_parse_action(pa) 

878 

879 thisExpr <<= (matchExpr | lastExpr).setName(term_name) 

880 lastExpr = thisExpr 

881 

882 ret <<= lastExpr 

883 return ret 

884 

885 

886def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): 

887 """ 

888 (DEPRECATED - use :class:`IndentedBlock` class instead) 

889 Helper method for defining space-delimited indentation blocks, 

890 such as those used to define block statements in Python source code. 

891 

892 Parameters: 

893 

894 - ``blockStatementExpr`` - expression defining syntax of statement that 

895 is repeated within the indented block 

896 - ``indentStack`` - list created by caller to manage indentation stack 

897 (multiple ``statementWithIndentedBlock`` expressions within a single 

898 grammar should share a common ``indentStack``) 

899 - ``indent`` - boolean indicating whether block must be indented beyond 

900 the current level; set to ``False`` for block of left-most statements 

901 (default= ``True``) 

902 

903 A valid block must contain at least one ``blockStatement``. 

904 

905 (Note that indentedBlock uses internal parse actions which make it 

906 incompatible with packrat parsing.) 

907 

908 Example:: 

909 

910 data = ''' 

911 def A(z): 

912 A1 

913 B = 100 

914 G = A2 

915 A2 

916 A3 

917 B 

918 def BB(a,b,c): 

919 BB1 

920 def BBA(): 

921 bba1 

922 bba2 

923 bba3 

924 C 

925 D 

926 def spam(x,y): 

927 def eggs(z): 

928 pass 

929 ''' 

930 

931 

932 indentStack = [1] 

933 stmt = Forward() 

934 

935 identifier = Word(alphas, alphanums) 

936 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") 

937 func_body = indentedBlock(stmt, indentStack) 

938 funcDef = Group(funcDecl + func_body) 

939 

940 rvalue = Forward() 

941 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") 

942 rvalue << (funcCall | identifier | Word(nums)) 

943 assignment = Group(identifier + "=" + rvalue) 

944 stmt << (funcDef | assignment | identifier) 

945 

946 module_body = stmt[1, ...] 

947 

948 parseTree = module_body.parseString(data) 

949 parseTree.pprint() 

950 

951 prints:: 

952 

953 [['def', 

954 'A', 

955 ['(', 'z', ')'], 

956 ':', 

957 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

958 'B', 

959 ['def', 

960 'BB', 

961 ['(', 'a', 'b', 'c', ')'], 

962 ':', 

963 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

964 'C', 

965 'D', 

966 ['def', 

967 'spam', 

968 ['(', 'x', 'y', ')'], 

969 ':', 

970 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

971 """ 

972 backup_stacks.append(indentStack[:]) 

973 

974 def reset_stack(): 

975 indentStack[:] = backup_stacks[-1] 

976 

977 def checkPeerIndent(s, l, t): 

978 if l >= len(s): 

979 return 

980 curCol = col(l, s) 

981 if curCol != indentStack[-1]: 

982 if curCol > indentStack[-1]: 

983 raise ParseException(s, l, "illegal nesting") 

984 raise ParseException(s, l, "not a peer entry") 

985 

986 def checkSubIndent(s, l, t): 

987 curCol = col(l, s) 

988 if curCol > indentStack[-1]: 

989 indentStack.append(curCol) 

990 else: 

991 raise ParseException(s, l, "not a subentry") 

992 

993 def checkUnindent(s, l, t): 

994 if l >= len(s): 

995 return 

996 curCol = col(l, s) 

997 if not (indentStack and curCol in indentStack): 

998 raise ParseException(s, l, "not an unindent") 

999 if curCol < indentStack[-1]: 

1000 indentStack.pop() 

1001 

1002 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) 

1003 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") 

1004 PEER = Empty().set_parse_action(checkPeerIndent).set_name("") 

1005 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") 

1006 if indent: 

1007 smExpr = Group( 

1008 Opt(NL) 

1009 + INDENT 

1010 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1011 + UNDENT 

1012 ) 

1013 else: 

1014 smExpr = Group( 

1015 Opt(NL) 

1016 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1017 + Opt(UNDENT) 

1018 ) 

1019 

1020 # add a parse action to remove backup_stack from list of backups 

1021 smExpr.add_parse_action( 

1022 lambda: backup_stacks.pop(-1) and None if backup_stacks else None 

1023 ) 

1024 smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) 

1025 blockStatementExpr.ignore(_bslash + LineEnd()) 

1026 return smExpr.set_name("indented block") 

1027 

1028 

1029# it's easy to get these comment structures wrong - they're very common, 

1030# so may as well make them available 

1031c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment") 

1032"Comment of the form ``/* ... */``" 

1033 

1034html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") 

1035"Comment of the form ``<!-- ... -->``" 

1036 

1037rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") 

1038dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") 

1039"Comment of the form ``// ... (to end of line)``" 

1040 

1041cpp_style_comment = Regex( 

1042 r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)" 

1043).set_name("C++ style comment") 

1044"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" 

1045 

1046java_style_comment = cpp_style_comment 

1047"Same as :class:`cpp_style_comment`" 

1048 

1049python_style_comment = Regex(r"#.*").set_name("Python style comment") 

1050"Comment of the form ``# ... (to end of line)``" 

1051 

1052 

1053# build list of built-in expressions, for future reference if a global default value 

1054# gets updated 

1055_builtin_exprs: list[ParserElement] = [ 

1056 v for v in vars().values() if isinstance(v, ParserElement) 

1057] 

1058 

1059 

1060# compatibility function, superseded by DelimitedList class 

1061def delimited_list( 

1062 expr: Union[str, ParserElement], 

1063 delim: Union[str, ParserElement] = ",", 

1064 combine: bool = False, 

1065 min: typing.Optional[int] = None, 

1066 max: typing.Optional[int] = None, 

1067 *, 

1068 allow_trailing_delim: bool = False, 

1069) -> ParserElement: 

1070 """(DEPRECATED - use :class:`DelimitedList` class)""" 

1071 return DelimitedList( 

1072 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim 

1073 ) 

1074 

1075 

1076# Compatibility synonyms 

1077# fmt: off 

1078opAssoc = OpAssoc 

1079anyOpenTag = any_open_tag 

1080anyCloseTag = any_close_tag 

1081commonHTMLEntity = common_html_entity 

1082cStyleComment = c_style_comment 

1083htmlComment = html_comment 

1084restOfLine = rest_of_line 

1085dblSlashComment = dbl_slash_comment 

1086cppStyleComment = cpp_style_comment 

1087javaStyleComment = java_style_comment 

1088pythonStyleComment = python_style_comment 

1089delimitedList = replaced_by_pep8("delimitedList", DelimitedList) 

1090delimited_list = replaced_by_pep8("delimited_list", DelimitedList) 

1091countedArray = replaced_by_pep8("countedArray", counted_array) 

1092matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal) 

1093matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr) 

1094oneOf = replaced_by_pep8("oneOf", one_of) 

1095dictOf = replaced_by_pep8("dictOf", dict_of) 

1096originalTextFor = replaced_by_pep8("originalTextFor", original_text_for) 

1097nestedExpr = replaced_by_pep8("nestedExpr", nested_expr) 

1098makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags) 

1099makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags) 

1100replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity) 

1101infixNotation = replaced_by_pep8("infixNotation", infix_notation) 

1102# fmt: on