Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/helpers.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

309 statements  

1# helpers.py 

2import html.entities 

3import re 

4import sys 

5import typing 

6 

7from . import __diag__ 

8from .core import * 

9from .util import ( 

10 _bslash, 

11 _flatten, 

12 _escape_regex_range_chars, 

13 replaced_by_pep8, 

14) 

15 

16 

17# 

18# global helpers 

19# 

20def counted_array( 

21 expr: ParserElement, 

22 int_expr: typing.Optional[ParserElement] = None, 

23 *, 

24 intExpr: typing.Optional[ParserElement] = None, 

25) -> ParserElement: 

26 """Helper to define a counted list of expressions. 

27 

28 This helper defines a pattern of the form:: 

29 

30 integer expr expr expr... 

31 

32 where the leading integer tells how many expr expressions follow. 

33 The matched tokens returns the array of expr tokens as a list - the 

34 leading count token is suppressed. 

35 

36 If ``int_expr`` is specified, it should be a pyparsing expression 

37 that produces an integer value. 

38 

39 Example:: 

40 

41 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] 

42 

43 # in this parser, the leading integer value is given in binary, 

44 # '10' indicating that 2 values are in the array 

45 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) 

46 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] 

47 

48 # if other fields must be parsed after the count but before the 

49 # list items, give the fields results names and they will 

50 # be preserved in the returned ParseResults: 

51 count_with_metadata = integer + Word(alphas)("type") 

52 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") 

53 result = typed_array.parse_string("3 bool True True False") 

54 print(result.dump()) 

55 

56 # prints 

57 # ['True', 'True', 'False'] 

58 # - items: ['True', 'True', 'False'] 

59 # - type: 'bool' 

60 """ 

61 intExpr = intExpr or int_expr 

62 array_expr = Forward() 

63 

64 def count_field_parse_action(s, l, t): 

65 nonlocal array_expr 

66 n = t[0] 

67 array_expr <<= (expr * n) if n else Empty() 

68 # clear list contents, but keep any named results 

69 del t[:] 

70 

71 if intExpr is None: 

72 intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) 

73 else: 

74 intExpr = intExpr.copy() 

75 intExpr.set_name("arrayLen") 

76 intExpr.add_parse_action(count_field_parse_action, call_during_try=True) 

77 return (intExpr + array_expr).set_name(f"(len) {expr}...") 

78 

79 

80def match_previous_literal(expr: ParserElement) -> ParserElement: 

81 """Helper to define an expression that is indirectly defined from 

82 the tokens matched in a previous expression, that is, it looks for 

83 a 'repeat' of a previous expression. For example:: 

84 

85 first = Word(nums) 

86 second = match_previous_literal(first) 

87 match_expr = first + ":" + second 

88 

89 will match ``"1:1"``, but not ``"1:2"``. Because this 

90 matches a previous literal, will also match the leading 

91 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

92 :class:`match_previous_expr`. Do *not* use with packrat parsing 

93 enabled. 

94 """ 

95 rep = Forward() 

96 

97 def copy_token_to_repeater(s, l, t): 

98 if not t: 

99 rep << Empty() 

100 return 

101 

102 if len(t) == 1: 

103 rep << t[0] 

104 return 

105 

106 # flatten t tokens 

107 tflat = _flatten(t.as_list()) 

108 rep << And(Literal(tt) for tt in tflat) 

109 

110 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

111 rep.set_name("(prev) " + str(expr)) 

112 return rep 

113 

114 

115def match_previous_expr(expr: ParserElement) -> ParserElement: 

116 """Helper to define an expression that is indirectly defined from 

117 the tokens matched in a previous expression, that is, it looks for 

118 a 'repeat' of a previous expression. For example:: 

119 

120 first = Word(nums) 

121 second = match_previous_expr(first) 

122 match_expr = first + ":" + second 

123 

124 will match ``"1:1"``, but not ``"1:2"``. Because this 

125 matches by expressions, will *not* match the leading ``"1:1"`` 

126 in ``"1:10"``; the expressions are evaluated first, and then 

127 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

128 with packrat parsing enabled. 

129 """ 

130 rep = Forward() 

131 e2 = expr.copy() 

132 rep <<= e2 

133 

134 def copy_token_to_repeater(s, l, t): 

135 matchTokens = _flatten(t.as_list()) 

136 

137 def must_match_these_tokens(s, l, t): 

138 theseTokens = _flatten(t.as_list()) 

139 if theseTokens != matchTokens: 

140 raise ParseException( 

141 s, l, f"Expected {matchTokens}, found{theseTokens}" 

142 ) 

143 

144 rep.set_parse_action(must_match_these_tokens, callDuringTry=True) 

145 

146 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

147 rep.set_name("(prev) " + str(expr)) 

148 return rep 

149 

150 

151def one_of( 

152 strs: Union[typing.Iterable[str], str], 

153 caseless: bool = False, 

154 use_regex: bool = True, 

155 as_keyword: bool = False, 

156 *, 

157 useRegex: bool = True, 

158 asKeyword: bool = False, 

159) -> ParserElement: 

160 """Helper to quickly define a set of alternative :class:`Literal` s, 

161 and makes sure to do longest-first testing when there is a conflict, 

162 regardless of the input order, but returns 

163 a :class:`MatchFirst` for best performance. 

164 

165 Parameters: 

166 

167 - ``strs`` - a string of space-delimited literals, or a collection of 

168 string literals 

169 - ``caseless`` - treat all literals as caseless - (default= ``False``) 

170 - ``use_regex`` - as an optimization, will 

171 generate a :class:`Regex` object; otherwise, will generate 

172 a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if 

173 creating a :class:`Regex` raises an exception) - (default= ``True``) 

174 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the 

175 generated expressions - (default= ``False``) 

176 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, 

177 but will be removed in a future release 

178 

179 Example:: 

180 

181 comp_oper = one_of("< = > <= >= !=") 

182 var = Word(alphas) 

183 number = Word(nums) 

184 term = var | number 

185 comparison_expr = term + comp_oper + term 

186 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) 

187 

188 prints:: 

189 

190 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

191 """ 

192 asKeyword = asKeyword or as_keyword 

193 useRegex = useRegex and use_regex 

194 

195 if ( 

196 isinstance(caseless, str_type) 

197 and __diag__.warn_on_multiple_string_args_to_oneof 

198 ): 

199 warnings.warn( 

200 "More than one string argument passed to one_of, pass" 

201 " choices as a list or space-delimited string", 

202 stacklevel=2, 

203 ) 

204 

205 if caseless: 

206 isequal = lambda a, b: a.upper() == b.upper() 

207 masks = lambda a, b: b.upper().startswith(a.upper()) 

208 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral 

209 else: 

210 isequal = lambda a, b: a == b 

211 masks = lambda a, b: b.startswith(a) 

212 parseElementClass = Keyword if asKeyword else Literal 

213 

214 symbols: List[str] = [] 

215 if isinstance(strs, str_type): 

216 strs = typing.cast(str, strs) 

217 symbols = strs.split() 

218 elif isinstance(strs, Iterable): 

219 symbols = list(strs) 

220 else: 

221 raise TypeError("Invalid argument to one_of, expected string or iterable") 

222 if not symbols: 

223 return NoMatch() 

224 

225 # reorder given symbols to take care to avoid masking longer choices with shorter ones 

226 # (but only if the given symbols are not just single characters) 

227 if any(len(sym) > 1 for sym in symbols): 

228 i = 0 

229 while i < len(symbols) - 1: 

230 cur = symbols[i] 

231 for j, other in enumerate(symbols[i + 1 :]): 

232 if isequal(other, cur): 

233 del symbols[i + j + 1] 

234 break 

235 if masks(cur, other): 

236 del symbols[i + j + 1] 

237 symbols.insert(i, other) 

238 break 

239 else: 

240 i += 1 

241 

242 if useRegex: 

243 re_flags: int = re.IGNORECASE if caseless else 0 

244 

245 try: 

246 if all(len(sym) == 1 for sym in symbols): 

247 # symbols are just single characters, create range regex pattern 

248 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" 

249 else: 

250 patt = "|".join(re.escape(sym) for sym in symbols) 

251 

252 # wrap with \b word break markers if defining as keywords 

253 if asKeyword: 

254 patt = rf"\b(?:{patt})\b" 

255 

256 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) 

257 

258 if caseless: 

259 # add parse action to return symbols as specified, not in random 

260 # casing as found in input string 

261 symbol_map = {sym.lower(): sym for sym in symbols} 

262 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) 

263 

264 return ret 

265 

266 except re.error: 

267 warnings.warn( 

268 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 

269 ) 

270 

271 # last resort, just use MatchFirst 

272 return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( 

273 " | ".join(symbols) 

274 ) 

275 

276 

277def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: 

278 """Helper to easily and clearly define a dictionary by specifying 

279 the respective patterns for the key and value. Takes care of 

280 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

281 :class:`Group` tokens in the proper order. The key pattern 

282 can include delimiting markers or punctuation, as long as they are 

283 suppressed, thereby leaving the significant key text. The value 

284 pattern can include named results, so that the :class:`Dict` results 

285 can include named token fields. 

286 

287 Example:: 

288 

289 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

290 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

291 print(attr_expr[1, ...].parse_string(text).dump()) 

292 

293 attr_label = label 

294 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) 

295 

296 # similar to Dict, but simpler call format 

297 result = dict_of(attr_label, attr_value).parse_string(text) 

298 print(result.dump()) 

299 print(result['shape']) 

300 print(result.shape) # object attribute access works too 

301 print(result.as_dict()) 

302 

303 prints:: 

304 

305 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

306 - color: 'light blue' 

307 - posn: 'upper left' 

308 - shape: 'SQUARE' 

309 - texture: 'burlap' 

310 SQUARE 

311 SQUARE 

312 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 

313 """ 

314 return Dict(OneOrMore(Group(key + value))) 

315 

316 

317def original_text_for( 

318 expr: ParserElement, as_string: bool = True, *, asString: bool = True 

319) -> ParserElement: 

320 """Helper to return the original, untokenized text for a given 

321 expression. Useful to restore the parsed fields of an HTML start 

322 tag into the raw tag text itself, or to revert separate tokens with 

323 intervening whitespace back to the original matching input text. By 

324 default, returns a string containing the original parsed text. 

325 

326 If the optional ``as_string`` argument is passed as 

327 ``False``, then the return value is 

328 a :class:`ParseResults` containing any results names that 

329 were originally matched, and a single token containing the original 

330 matched text from the input string. So if the expression passed to 

331 :class:`original_text_for` contains expressions with defined 

332 results names, you must set ``as_string`` to ``False`` if you 

333 want to preserve those results name values. 

334 

335 The ``asString`` pre-PEP8 argument is retained for compatibility, 

336 but will be removed in a future release. 

337 

338 Example:: 

339 

340 src = "this is test <b> bold <i>text</i> </b> normal text " 

341 for tag in ("b", "i"): 

342 opener, closer = make_html_tags(tag) 

343 patt = original_text_for(opener + ... + closer) 

344 print(patt.search_string(src)[0]) 

345 

346 prints:: 

347 

348 ['<b> bold <i>text</i> </b>'] 

349 ['<i>text</i>'] 

350 """ 

351 asString = asString and as_string 

352 

353 locMarker = Empty().set_parse_action(lambda s, loc, t: loc) 

354 endlocMarker = locMarker.copy() 

355 endlocMarker.callPreparse = False 

356 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

357 if asString: 

358 extractText = lambda s, l, t: s[t._original_start : t._original_end] 

359 else: 

360 

361 def extractText(s, l, t): 

362 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] 

363 

364 matchExpr.set_parse_action(extractText) 

365 matchExpr.ignoreExprs = expr.ignoreExprs 

366 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) 

367 return matchExpr 

368 

369 

370def ungroup(expr: ParserElement) -> ParserElement: 

371 """Helper to undo pyparsing's default grouping of And expressions, 

372 even if all but one are non-empty. 

373 """ 

374 return TokenConverter(expr).add_parse_action(lambda t: t[0]) 

375 

376 

377def locatedExpr(expr: ParserElement) -> ParserElement: 

378 """ 

379 (DEPRECATED - future code should use the :class:`Located` class) 

380 Helper to decorate a returned token with its starting and ending 

381 locations in the input string. 

382 

383 This helper adds the following results names: 

384 

385 - ``locn_start`` - location where matched expression begins 

386 - ``locn_end`` - location where matched expression ends 

387 - ``value`` - the actual parsed results 

388 

389 Be careful if the input text contains ``<TAB>`` characters, you 

390 may want to call :class:`ParserElement.parse_with_tabs` 

391 

392 Example:: 

393 

394 wd = Word(alphas) 

395 for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

396 print(match) 

397 

398 prints:: 

399 

400 [[0, 'ljsdf', 5]] 

401 [[8, 'lksdjjf', 15]] 

402 [[18, 'lkkjj', 23]] 

403 """ 

404 locator = Empty().set_parse_action(lambda ss, ll, tt: ll) 

405 return Group( 

406 locator("locn_start") 

407 + expr("value") 

408 + locator.copy().leaveWhitespace()("locn_end") 

409 ) 

410 

411 

412def nested_expr( 

413 opener: Union[str, ParserElement] = "(", 

414 closer: Union[str, ParserElement] = ")", 

415 content: typing.Optional[ParserElement] = None, 

416 ignore_expr: ParserElement = quoted_string(), 

417 *, 

418 ignoreExpr: ParserElement = quoted_string(), 

419) -> ParserElement: 

420 """Helper method for defining nested lists enclosed in opening and 

421 closing delimiters (``"("`` and ``")"`` are the default). 

422 

423 Parameters: 

424 

425 - ``opener`` - opening character for a nested list 

426 (default= ``"("``); can also be a pyparsing expression 

427 - ``closer`` - closing character for a nested list 

428 (default= ``")"``); can also be a pyparsing expression 

429 - ``content`` - expression for items within the nested lists 

430 (default= ``None``) 

431 - ``ignore_expr`` - expression for ignoring opening and closing delimiters 

432 (default= :class:`quoted_string`) 

433 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility 

434 but will be removed in a future release 

435 

436 If an expression is not provided for the content argument, the 

437 nested expression will capture all whitespace-delimited content 

438 between delimiters as a list of separate values. 

439 

440 Use the ``ignore_expr`` argument to define expressions that may 

441 contain opening or closing characters that should not be treated as 

442 opening or closing characters for nesting, such as quoted_string or 

443 a comment expression. Specify multiple expressions using an 

444 :class:`Or` or :class:`MatchFirst`. The default is 

445 :class:`quoted_string`, but if no expressions are to be ignored, then 

446 pass ``None`` for this argument. 

447 

448 Example:: 

449 

450 data_type = one_of("void int short long char float double") 

451 decl_data_type = Combine(data_type + Opt(Word('*'))) 

452 ident = Word(alphas+'_', alphanums+'_') 

453 number = pyparsing_common.number 

454 arg = Group(decl_data_type + ident) 

455 LPAR, RPAR = map(Suppress, "()") 

456 

457 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) 

458 

459 c_function = (decl_data_type("type") 

460 + ident("name") 

461 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR 

462 + code_body("body")) 

463 c_function.ignore(c_style_comment) 

464 

465 source_code = ''' 

466 int is_odd(int x) { 

467 return (x%2); 

468 } 

469 

470 int dec_to_hex(char hchar) { 

471 if (hchar >= '0' && hchar <= '9') { 

472 return (ord(hchar)-ord('0')); 

473 } else { 

474 return (10+ord(hchar)-ord('A')); 

475 } 

476 } 

477 ''' 

478 for func in c_function.search_string(source_code): 

479 print("%(name)s (%(type)s) args: %(args)s" % func) 

480 

481 

482 prints:: 

483 

484 is_odd (int) args: [['int', 'x']] 

485 dec_to_hex (int) args: [['char', 'hchar']] 

486 """ 

487 if ignoreExpr != ignore_expr: 

488 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr 

489 if opener == closer: 

490 raise ValueError("opening and closing strings cannot be the same") 

491 if content is None: 

492 if isinstance(opener, str_type) and isinstance(closer, str_type): 

493 opener = typing.cast(str, opener) 

494 closer = typing.cast(str, closer) 

495 if len(opener) == 1 and len(closer) == 1: 

496 if ignoreExpr is not None: 

497 content = Combine( 

498 OneOrMore( 

499 ~ignoreExpr 

500 + CharsNotIn( 

501 opener + closer + ParserElement.DEFAULT_WHITE_CHARS, 

502 exact=1, 

503 ) 

504 ) 

505 ).set_parse_action(lambda t: t[0].strip()) 

506 else: 

507 content = empty.copy() + CharsNotIn( 

508 opener + closer + ParserElement.DEFAULT_WHITE_CHARS 

509 ).set_parse_action(lambda t: t[0].strip()) 

510 else: 

511 if ignoreExpr is not None: 

512 content = Combine( 

513 OneOrMore( 

514 ~ignoreExpr 

515 + ~Literal(opener) 

516 + ~Literal(closer) 

517 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

518 ) 

519 ).set_parse_action(lambda t: t[0].strip()) 

520 else: 

521 content = Combine( 

522 OneOrMore( 

523 ~Literal(opener) 

524 + ~Literal(closer) 

525 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

526 ) 

527 ).set_parse_action(lambda t: t[0].strip()) 

528 else: 

529 raise ValueError( 

530 "opening and closing arguments must be strings if no content expression is given" 

531 ) 

532 ret = Forward() 

533 if ignoreExpr is not None: 

534 ret <<= Group( 

535 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) 

536 ) 

537 else: 

538 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) 

539 ret.set_name(f"nested {opener}{closer} expression") 

540 # don't override error message from content expressions 

541 ret.errmsg = None 

542 return ret 

543 

544 

545def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): 

546 """Internal helper to construct opening and closing tag expressions, given a tag name""" 

547 if isinstance(tagStr, str_type): 

548 resname = tagStr 

549 tagStr = Keyword(tagStr, caseless=not xml) 

550 else: 

551 resname = tagStr.name 

552 

553 tagAttrName = Word(alphas, alphanums + "_-:") 

554 if xml: 

555 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) 

556 openTag = ( 

557 suppress_LT 

558 + tagStr("tag") 

559 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

560 + Opt("/", default=[False])("empty").set_parse_action( 

561 lambda s, l, t: t[0] == "/" 

562 ) 

563 + suppress_GT 

564 ) 

565 else: 

566 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( 

567 printables, exclude_chars=">" 

568 ) 

569 openTag = ( 

570 suppress_LT 

571 + tagStr("tag") 

572 + Dict( 

573 ZeroOrMore( 

574 Group( 

575 tagAttrName.set_parse_action(lambda t: t[0].lower()) 

576 + Opt(Suppress("=") + tagAttrValue) 

577 ) 

578 ) 

579 ) 

580 + Opt("/", default=[False])("empty").set_parse_action( 

581 lambda s, l, t: t[0] == "/" 

582 ) 

583 + suppress_GT 

584 ) 

585 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) 

586 

587 openTag.set_name(f"<{resname}>") 

588 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

589 openTag.add_parse_action( 

590 lambda t: t.__setitem__( 

591 "start" + "".join(resname.replace(":", " ").title().split()), t.copy() 

592 ) 

593 ) 

594 closeTag = closeTag( 

595 "end" + "".join(resname.replace(":", " ").title().split()) 

596 ).set_name(f"</{resname}>") 

597 openTag.tag = resname 

598 closeTag.tag = resname 

599 openTag.tag_body = SkipTo(closeTag()) 

600 return openTag, closeTag 

601 

602 

603def make_html_tags( 

604 tag_str: Union[str, ParserElement] 

605) -> Tuple[ParserElement, ParserElement]: 

606 """Helper to construct opening and closing tag expressions for HTML, 

607 given a tag name. Matches tags in either upper or lower case, 

608 attributes with namespaces and with quoted or unquoted values. 

609 

610 Example:: 

611 

612 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

613 # make_html_tags returns pyparsing expressions for the opening and 

614 # closing tags as a 2-tuple 

615 a, a_end = make_html_tags("A") 

616 link_expr = a + SkipTo(a_end)("link_text") + a_end 

617 

618 for link in link_expr.search_string(text): 

619 # attributes in the <A> tag (like "href" shown here) are 

620 # also accessible as named results 

621 print(link.link_text, '->', link.href) 

622 

623 prints:: 

624 

625 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

626 """ 

627 return _makeTags(tag_str, False) 

628 

629 

630def make_xml_tags( 

631 tag_str: Union[str, ParserElement] 

632) -> Tuple[ParserElement, ParserElement]: 

633 """Helper to construct opening and closing tag expressions for XML, 

634 given a tag name. Matches tags only in the given upper/lower case. 

635 

636 Example: similar to :class:`make_html_tags` 

637 """ 

638 return _makeTags(tag_str, True) 

639 

640 

641any_open_tag: ParserElement 

642any_close_tag: ParserElement 

643any_open_tag, any_close_tag = make_html_tags( 

644 Word(alphas, alphanums + "_:").set_name("any tag") 

645) 

646 

647_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} 

648common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name( 

649 "common HTML entity" 

650) 

651 

652 

653def replace_html_entity(s, l, t): 

654 """Helper parser action to replace common HTML entities with their special characters""" 

655 return _htmlEntityMap.get(t.entity) 

656 

657 

658class OpAssoc(Enum): 

659 """Enumeration of operator associativity 

660 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" 

661 

662 LEFT = 1 

663 RIGHT = 2 

664 

665 

666InfixNotationOperatorArgType = Union[ 

667 ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] 

668] 

669InfixNotationOperatorSpec = Union[ 

670 Tuple[ 

671 InfixNotationOperatorArgType, 

672 int, 

673 OpAssoc, 

674 typing.Optional[ParseAction], 

675 ], 

676 Tuple[ 

677 InfixNotationOperatorArgType, 

678 int, 

679 OpAssoc, 

680 ], 

681] 

682 

683 

684def infix_notation( 

685 base_expr: ParserElement, 

686 op_list: List[InfixNotationOperatorSpec], 

687 lpar: Union[str, ParserElement] = Suppress("("), 

688 rpar: Union[str, ParserElement] = Suppress(")"), 

689) -> ParserElement: 

690 """Helper method for constructing grammars of expressions made up of 

691 operators working in a precedence hierarchy. Operators may be unary 

692 or binary, left- or right-associative. Parse actions can also be 

693 attached to operator expressions. The generated parser will also 

694 recognize the use of parentheses to override operator precedences 

695 (see example below). 

696 

697 Note: if you define a deep operator list, you may see performance 

698 issues when using infix_notation. See 

699 :class:`ParserElement.enable_packrat` for a mechanism to potentially 

700 improve your parser performance. 

701 

702 Parameters: 

703 

704 - ``base_expr`` - expression representing the most basic operand to 

705 be used in the expression 

706 - ``op_list`` - list of tuples, one for each operator precedence level 

707 in the expression grammar; each tuple is of the form ``(op_expr, 

708 num_operands, right_left_assoc, (optional)parse_action)``, where: 

709 

710 - ``op_expr`` is the pyparsing expression for the operator; may also 

711 be a string, which will be converted to a Literal; if ``num_operands`` 

712 is 3, ``op_expr`` is a tuple of two expressions, for the two 

713 operators separating the 3 terms 

714 - ``num_operands`` is the number of terms for this operator (must be 1, 

715 2, or 3) 

716 - ``right_left_assoc`` is the indicator whether the operator is right 

717 or left associative, using the pyparsing-defined constants 

718 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. 

719 - ``parse_action`` is the parse action to be associated with 

720 expressions matching this operator expression (the parse action 

721 tuple member may be omitted); if the parse action is passed 

722 a tuple or list of functions, this is equivalent to calling 

723 ``set_parse_action(*fn)`` 

724 (:class:`ParserElement.set_parse_action`) 

725 - ``lpar`` - expression for matching left-parentheses; if passed as a 

726 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as 

727 an expression (such as ``Literal('(')``), then it will be kept in 

728 the parsed results, and grouped with them. (default= ``Suppress('(')``) 

729 - ``rpar`` - expression for matching right-parentheses; if passed as a 

730 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as 

731 an expression (such as ``Literal(')')``), then it will be kept in 

732 the parsed results, and grouped with them. (default= ``Suppress(')')``) 

733 

734 Example:: 

735 

736 # simple example of four-function arithmetic with ints and 

737 # variable names 

738 integer = pyparsing_common.signed_integer 

739 varname = pyparsing_common.identifier 

740 

741 arith_expr = infix_notation(integer | varname, 

742 [ 

743 ('-', 1, OpAssoc.RIGHT), 

744 (one_of('* /'), 2, OpAssoc.LEFT), 

745 (one_of('+ -'), 2, OpAssoc.LEFT), 

746 ]) 

747 

748 arith_expr.run_tests(''' 

749 5+3*6 

750 (5+3)*6 

751 -2--11 

752 ''', full_dump=False) 

753 

754 prints:: 

755 

756 5+3*6 

757 [[5, '+', [3, '*', 6]]] 

758 

759 (5+3)*6 

760 [[[5, '+', 3], '*', 6]] 

761 

762 (5+x)*y 

763 [[[5, '+', 'x'], '*', 'y']] 

764 

765 -2--11 

766 [[['-', 2], '-', ['-', 11]]] 

767 """ 

768 

769 # captive version of FollowedBy that does not do parse actions or capture results names 

770 class _FB(FollowedBy): 

771 def parseImpl(self, instring, loc, doActions=True): 

772 self.expr.try_parse(instring, loc) 

773 return loc, [] 

774 

775 _FB.__name__ = "FollowedBy>" 

776 

777 ret = Forward() 

778 if isinstance(lpar, str): 

779 lpar = Suppress(lpar) 

780 if isinstance(rpar, str): 

781 rpar = Suppress(rpar) 

782 

783 # if lpar and rpar are not suppressed, wrap in group 

784 if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): 

785 lastExpr = base_expr | Group(lpar + ret + rpar).set_name( 

786 f"nested_{base_expr.name}" 

787 ) 

788 else: 

789 lastExpr = base_expr | (lpar + ret + rpar).set_name(f"nested_{base_expr.name}") 

790 root_expr = lastExpr 

791 

792 arity: int 

793 rightLeftAssoc: opAssoc 

794 pa: typing.Optional[ParseAction] 

795 opExpr1: ParserElement 

796 opExpr2: ParserElement 

797 for operDef in op_list: 

798 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] 

799 if isinstance(opExpr, str_type): 

800 opExpr = ParserElement._literalStringClass(opExpr) 

801 opExpr = typing.cast(ParserElement, opExpr) 

802 if arity == 3: 

803 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: 

804 raise ValueError( 

805 "if numterms=3, opExpr must be a tuple or list of two expressions" 

806 ) 

807 opExpr1, opExpr2 = opExpr 

808 term_name = f"{opExpr1}{opExpr2} term" 

809 else: 

810 term_name = f"{opExpr} term" 

811 

812 if not 1 <= arity <= 3: 

813 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

814 

815 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): 

816 raise ValueError("operator must indicate right or left associativity") 

817 

818 thisExpr: ParserElement = Forward().set_name(term_name) 

819 thisExpr = typing.cast(Forward, thisExpr) 

820 if rightLeftAssoc is OpAssoc.LEFT: 

821 if arity == 1: 

822 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) 

823 elif arity == 2: 

824 if opExpr is not None: 

825 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( 

826 lastExpr + (opExpr + lastExpr)[1, ...] 

827 ) 

828 else: 

829 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) 

830 elif arity == 3: 

831 matchExpr = _FB( 

832 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr 

833 ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) 

834 elif rightLeftAssoc is OpAssoc.RIGHT: 

835 if arity == 1: 

836 # try to avoid LR with this extra test 

837 if not isinstance(opExpr, Opt): 

838 opExpr = Opt(opExpr) 

839 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) 

840 elif arity == 2: 

841 if opExpr is not None: 

842 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( 

843 lastExpr + (opExpr + thisExpr)[1, ...] 

844 ) 

845 else: 

846 matchExpr = _FB(lastExpr + thisExpr) + Group( 

847 lastExpr + thisExpr[1, ...] 

848 ) 

849 elif arity == 3: 

850 matchExpr = _FB( 

851 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr 

852 ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

853 if pa: 

854 if isinstance(pa, (tuple, list)): 

855 matchExpr.set_parse_action(*pa) 

856 else: 

857 matchExpr.set_parse_action(pa) 

858 thisExpr <<= (matchExpr | lastExpr).setName(term_name) 

859 lastExpr = thisExpr 

860 ret <<= lastExpr 

861 root_expr.set_name("base_expr") 

862 return ret 

863 

864 

865def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): 

866 """ 

867 (DEPRECATED - use :class:`IndentedBlock` class instead) 

868 Helper method for defining space-delimited indentation blocks, 

869 such as those used to define block statements in Python source code. 

870 

871 Parameters: 

872 

873 - ``blockStatementExpr`` - expression defining syntax of statement that 

874 is repeated within the indented block 

875 - ``indentStack`` - list created by caller to manage indentation stack 

876 (multiple ``statementWithIndentedBlock`` expressions within a single 

877 grammar should share a common ``indentStack``) 

878 - ``indent`` - boolean indicating whether block must be indented beyond 

879 the current level; set to ``False`` for block of left-most statements 

880 (default= ``True``) 

881 

882 A valid block must contain at least one ``blockStatement``. 

883 

884 (Note that indentedBlock uses internal parse actions which make it 

885 incompatible with packrat parsing.) 

886 

887 Example:: 

888 

889 data = ''' 

890 def A(z): 

891 A1 

892 B = 100 

893 G = A2 

894 A2 

895 A3 

896 B 

897 def BB(a,b,c): 

898 BB1 

899 def BBA(): 

900 bba1 

901 bba2 

902 bba3 

903 C 

904 D 

905 def spam(x,y): 

906 def eggs(z): 

907 pass 

908 ''' 

909 

910 

911 indentStack = [1] 

912 stmt = Forward() 

913 

914 identifier = Word(alphas, alphanums) 

915 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") 

916 func_body = indentedBlock(stmt, indentStack) 

917 funcDef = Group(funcDecl + func_body) 

918 

919 rvalue = Forward() 

920 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") 

921 rvalue << (funcCall | identifier | Word(nums)) 

922 assignment = Group(identifier + "=" + rvalue) 

923 stmt << (funcDef | assignment | identifier) 

924 

925 module_body = stmt[1, ...] 

926 

927 parseTree = module_body.parseString(data) 

928 parseTree.pprint() 

929 

930 prints:: 

931 

932 [['def', 

933 'A', 

934 ['(', 'z', ')'], 

935 ':', 

936 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

937 'B', 

938 ['def', 

939 'BB', 

940 ['(', 'a', 'b', 'c', ')'], 

941 ':', 

942 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

943 'C', 

944 'D', 

945 ['def', 

946 'spam', 

947 ['(', 'x', 'y', ')'], 

948 ':', 

949 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

950 """ 

951 backup_stacks.append(indentStack[:]) 

952 

953 def reset_stack(): 

954 indentStack[:] = backup_stacks[-1] 

955 

956 def checkPeerIndent(s, l, t): 

957 if l >= len(s): 

958 return 

959 curCol = col(l, s) 

960 if curCol != indentStack[-1]: 

961 if curCol > indentStack[-1]: 

962 raise ParseException(s, l, "illegal nesting") 

963 raise ParseException(s, l, "not a peer entry") 

964 

965 def checkSubIndent(s, l, t): 

966 curCol = col(l, s) 

967 if curCol > indentStack[-1]: 

968 indentStack.append(curCol) 

969 else: 

970 raise ParseException(s, l, "not a subentry") 

971 

972 def checkUnindent(s, l, t): 

973 if l >= len(s): 

974 return 

975 curCol = col(l, s) 

976 if not (indentStack and curCol in indentStack): 

977 raise ParseException(s, l, "not an unindent") 

978 if curCol < indentStack[-1]: 

979 indentStack.pop() 

980 

981 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) 

982 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") 

983 PEER = Empty().set_parse_action(checkPeerIndent).set_name("") 

984 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") 

985 if indent: 

986 smExpr = Group( 

987 Opt(NL) 

988 + INDENT 

989 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

990 + UNDENT 

991 ) 

992 else: 

993 smExpr = Group( 

994 Opt(NL) 

995 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

996 + Opt(UNDENT) 

997 ) 

998 

999 # add a parse action to remove backup_stack from list of backups 

1000 smExpr.add_parse_action( 

1001 lambda: backup_stacks.pop(-1) and None if backup_stacks else None 

1002 ) 

1003 smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) 

1004 blockStatementExpr.ignore(_bslash + LineEnd()) 

1005 return smExpr.set_name("indented block") 

1006 

1007 

1008# it's easy to get these comment structures wrong - they're very common, so may as well make them available 

1009c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( 

1010 "C style comment" 

1011) 

1012"Comment of the form ``/* ... */``" 

1013 

1014html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") 

1015"Comment of the form ``<!-- ... -->``" 

1016 

1017rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") 

1018dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") 

1019"Comment of the form ``// ... (to end of line)``" 

1020 

1021cpp_style_comment = Combine( 

1022 Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment 

1023).set_name("C++ style comment") 

1024"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" 

1025 

1026java_style_comment = cpp_style_comment 

1027"Same as :class:`cpp_style_comment`" 

1028 

1029python_style_comment = Regex(r"#.*").set_name("Python style comment") 

1030"Comment of the form ``# ... (to end of line)``" 

1031 

1032 

1033# build list of built-in expressions, for future reference if a global default value 

1034# gets updated 

1035_builtin_exprs: List[ParserElement] = [ 

1036 v for v in vars().values() if isinstance(v, ParserElement) 

1037] 

1038 

1039 

1040# compatibility function, superseded by DelimitedList class 

1041def delimited_list( 

1042 expr: Union[str, ParserElement], 

1043 delim: Union[str, ParserElement] = ",", 

1044 combine: bool = False, 

1045 min: typing.Optional[int] = None, 

1046 max: typing.Optional[int] = None, 

1047 *, 

1048 allow_trailing_delim: bool = False, 

1049) -> ParserElement: 

1050 """(DEPRECATED - use :class:`DelimitedList` class)""" 

1051 return DelimitedList( 

1052 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim 

1053 ) 

1054 

1055 

1056# Compatibility synonyms 

1057# fmt: off 

1058opAssoc = OpAssoc 

1059anyOpenTag = any_open_tag 

1060anyCloseTag = any_close_tag 

1061commonHTMLEntity = common_html_entity 

1062cStyleComment = c_style_comment 

1063htmlComment = html_comment 

1064restOfLine = rest_of_line 

1065dblSlashComment = dbl_slash_comment 

1066cppStyleComment = cpp_style_comment 

1067javaStyleComment = java_style_comment 

1068pythonStyleComment = python_style_comment 

1069delimitedList = replaced_by_pep8("delimitedList", DelimitedList) 

1070delimited_list = replaced_by_pep8("delimited_list", DelimitedList) 

1071countedArray = replaced_by_pep8("countedArray", counted_array) 

1072matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal) 

1073matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr) 

1074oneOf = replaced_by_pep8("oneOf", one_of) 

1075dictOf = replaced_by_pep8("dictOf", dict_of) 

1076originalTextFor = replaced_by_pep8("originalTextFor", original_text_for) 

1077nestedExpr = replaced_by_pep8("nestedExpr", nested_expr) 

1078makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags) 

1079makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags) 

1080replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity) 

1081infixNotation = replaced_by_pep8("infixNotation", infix_notation) 

1082# fmt: on