Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyparsing/helpers.py: 29%

317 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:51 +0000

1# helpers.py 

2import html.entities 

3import re 

4import sys 

5import typing 

6 

7from . import __diag__ 

8from .core import * 

9from .util import ( 

10 _bslash, 

11 _flatten, 

12 _escape_regex_range_chars, 

13 replaced_by_pep8, 

14) 

15 

16 

17# 

18# global helpers 

19# 

20def counted_array( 

21 expr: ParserElement, 

22 int_expr: typing.Optional[ParserElement] = None, 

23 *, 

24 intExpr: typing.Optional[ParserElement] = None, 

25) -> ParserElement: 

26 """Helper to define a counted list of expressions. 

27 

28 This helper defines a pattern of the form:: 

29 

30 integer expr expr expr... 

31 

32 where the leading integer tells how many expr expressions follow. 

33 The matched tokens returns the array of expr tokens as a list - the 

34 leading count token is suppressed. 

35 

36 If ``int_expr`` is specified, it should be a pyparsing expression 

37 that produces an integer value. 

38 

39 Example:: 

40 

41 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] 

42 

43 # in this parser, the leading integer value is given in binary, 

44 # '10' indicating that 2 values are in the array 

45 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) 

46 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] 

47 

48 # if other fields must be parsed after the count but before the 

49 # list items, give the fields results names and they will 

50 # be preserved in the returned ParseResults: 

51 count_with_metadata = integer + Word(alphas)("type") 

52 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") 

53 result = typed_array.parse_string("3 bool True True False") 

54 print(result.dump()) 

55 

56 # prints 

57 # ['True', 'True', 'False'] 

58 # - items: ['True', 'True', 'False'] 

59 # - type: 'bool' 

60 """ 

61 intExpr = intExpr or int_expr 

62 array_expr = Forward() 

63 

64 def count_field_parse_action(s, l, t): 

65 nonlocal array_expr 

66 n = t[0] 

67 array_expr <<= (expr * n) if n else Empty() 

68 # clear list contents, but keep any named results 

69 del t[:] 

70 

71 if intExpr is None: 

72 intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) 

73 else: 

74 intExpr = intExpr.copy() 

75 intExpr.set_name("arrayLen") 

76 intExpr.add_parse_action(count_field_parse_action, call_during_try=True) 

77 return (intExpr + array_expr).set_name("(len) " + str(expr) + "...") 

78 

79 

80def match_previous_literal(expr: ParserElement) -> ParserElement: 

81 """Helper to define an expression that is indirectly defined from 

82 the tokens matched in a previous expression, that is, it looks for 

83 a 'repeat' of a previous expression. For example:: 

84 

85 first = Word(nums) 

86 second = match_previous_literal(first) 

87 match_expr = first + ":" + second 

88 

89 will match ``"1:1"``, but not ``"1:2"``. Because this 

90 matches a previous literal, will also match the leading 

91 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

92 :class:`match_previous_expr`. Do *not* use with packrat parsing 

93 enabled. 

94 """ 

95 rep = Forward() 

96 

97 def copy_token_to_repeater(s, l, t): 

98 if t: 

99 if len(t) == 1: 

100 rep << t[0] 

101 else: 

102 # flatten t tokens 

103 tflat = _flatten(t.as_list()) 

104 rep << And(Literal(tt) for tt in tflat) 

105 else: 

106 rep << Empty() 

107 

108 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

109 rep.set_name("(prev) " + str(expr)) 

110 return rep 

111 

112 

113def match_previous_expr(expr: ParserElement) -> ParserElement: 

114 """Helper to define an expression that is indirectly defined from 

115 the tokens matched in a previous expression, that is, it looks for 

116 a 'repeat' of a previous expression. For example:: 

117 

118 first = Word(nums) 

119 second = match_previous_expr(first) 

120 match_expr = first + ":" + second 

121 

122 will match ``"1:1"``, but not ``"1:2"``. Because this 

123 matches by expressions, will *not* match the leading ``"1:1"`` 

124 in ``"1:10"``; the expressions are evaluated first, and then 

125 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

126 with packrat parsing enabled. 

127 """ 

128 rep = Forward() 

129 e2 = expr.copy() 

130 rep <<= e2 

131 

132 def copy_token_to_repeater(s, l, t): 

133 matchTokens = _flatten(t.as_list()) 

134 

135 def must_match_these_tokens(s, l, t): 

136 theseTokens = _flatten(t.as_list()) 

137 if theseTokens != matchTokens: 

138 raise ParseException( 

139 s, l, f"Expected {matchTokens}, found{theseTokens}" 

140 ) 

141 

142 rep.set_parse_action(must_match_these_tokens, callDuringTry=True) 

143 

144 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

145 rep.set_name("(prev) " + str(expr)) 

146 return rep 

147 

148 

149def one_of( 

150 strs: Union[typing.Iterable[str], str], 

151 caseless: bool = False, 

152 use_regex: bool = True, 

153 as_keyword: bool = False, 

154 *, 

155 useRegex: bool = True, 

156 asKeyword: bool = False, 

157) -> ParserElement: 

158 """Helper to quickly define a set of alternative :class:`Literal` s, 

159 and makes sure to do longest-first testing when there is a conflict, 

160 regardless of the input order, but returns 

161 a :class:`MatchFirst` for best performance. 

162 

163 Parameters: 

164 

165 - ``strs`` - a string of space-delimited literals, or a collection of 

166 string literals 

167 - ``caseless`` - treat all literals as caseless - (default= ``False``) 

168 - ``use_regex`` - as an optimization, will 

169 generate a :class:`Regex` object; otherwise, will generate 

170 a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if 

171 creating a :class:`Regex` raises an exception) - (default= ``True``) 

172 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the 

173 generated expressions - (default= ``False``) 

174 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, 

175 but will be removed in a future release 

176 

177 Example:: 

178 

179 comp_oper = one_of("< = > <= >= !=") 

180 var = Word(alphas) 

181 number = Word(nums) 

182 term = var | number 

183 comparison_expr = term + comp_oper + term 

184 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) 

185 

186 prints:: 

187 

188 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

189 """ 

190 asKeyword = asKeyword or as_keyword 

191 useRegex = useRegex and use_regex 

192 

193 if ( 

194 isinstance(caseless, str_type) 

195 and __diag__.warn_on_multiple_string_args_to_oneof 

196 ): 

197 warnings.warn( 

198 "More than one string argument passed to one_of, pass" 

199 " choices as a list or space-delimited string", 

200 stacklevel=2, 

201 ) 

202 

203 if caseless: 

204 isequal = lambda a, b: a.upper() == b.upper() 

205 masks = lambda a, b: b.upper().startswith(a.upper()) 

206 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral 

207 else: 

208 isequal = lambda a, b: a == b 

209 masks = lambda a, b: b.startswith(a) 

210 parseElementClass = Keyword if asKeyword else Literal 

211 

212 symbols: List[str] = [] 

213 if isinstance(strs, str_type): 

214 strs = typing.cast(str, strs) 

215 symbols = strs.split() 

216 elif isinstance(strs, Iterable): 

217 symbols = list(strs) 

218 else: 

219 raise TypeError("Invalid argument to one_of, expected string or iterable") 

220 if not symbols: 

221 return NoMatch() 

222 

223 # reorder given symbols to take care to avoid masking longer choices with shorter ones 

224 # (but only if the given symbols are not just single characters) 

225 if any(len(sym) > 1 for sym in symbols): 

226 i = 0 

227 while i < len(symbols) - 1: 

228 cur = symbols[i] 

229 for j, other in enumerate(symbols[i + 1 :]): 

230 if isequal(other, cur): 

231 del symbols[i + j + 1] 

232 break 

233 elif masks(cur, other): 

234 del symbols[i + j + 1] 

235 symbols.insert(i, other) 

236 break 

237 else: 

238 i += 1 

239 

240 if useRegex: 

241 re_flags: int = re.IGNORECASE if caseless else 0 

242 

243 try: 

244 if all(len(sym) == 1 for sym in symbols): 

245 # symbols are just single characters, create range regex pattern 

246 patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" 

247 else: 

248 patt = "|".join(re.escape(sym) for sym in symbols) 

249 

250 # wrap with \b word break markers if defining as keywords 

251 if asKeyword: 

252 patt = rf"\b(?:{patt})\b" 

253 

254 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) 

255 

256 if caseless: 

257 # add parse action to return symbols as specified, not in random 

258 # casing as found in input string 

259 symbol_map = {sym.lower(): sym for sym in symbols} 

260 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) 

261 

262 return ret 

263 

264 except re.error: 

265 warnings.warn( 

266 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 

267 ) 

268 

269 # last resort, just use MatchFirst 

270 return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( 

271 " | ".join(symbols) 

272 ) 

273 

274 

275def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: 

276 """Helper to easily and clearly define a dictionary by specifying 

277 the respective patterns for the key and value. Takes care of 

278 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

279 :class:`Group` tokens in the proper order. The key pattern 

280 can include delimiting markers or punctuation, as long as they are 

281 suppressed, thereby leaving the significant key text. The value 

282 pattern can include named results, so that the :class:`Dict` results 

283 can include named token fields. 

284 

285 Example:: 

286 

287 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

288 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

289 print(attr_expr[1, ...].parse_string(text).dump()) 

290 

291 attr_label = label 

292 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) 

293 

294 # similar to Dict, but simpler call format 

295 result = dict_of(attr_label, attr_value).parse_string(text) 

296 print(result.dump()) 

297 print(result['shape']) 

298 print(result.shape) # object attribute access works too 

299 print(result.as_dict()) 

300 

301 prints:: 

302 

303 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

304 - color: 'light blue' 

305 - posn: 'upper left' 

306 - shape: 'SQUARE' 

307 - texture: 'burlap' 

308 SQUARE 

309 SQUARE 

310 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 

311 """ 

312 return Dict(OneOrMore(Group(key + value))) 

313 

314 

315def original_text_for( 

316 expr: ParserElement, as_string: bool = True, *, asString: bool = True 

317) -> ParserElement: 

318 """Helper to return the original, untokenized text for a given 

319 expression. Useful to restore the parsed fields of an HTML start 

320 tag into the raw tag text itself, or to revert separate tokens with 

321 intervening whitespace back to the original matching input text. By 

322 default, returns a string containing the original parsed text. 

323 

324 If the optional ``as_string`` argument is passed as 

325 ``False``, then the return value is 

326 a :class:`ParseResults` containing any results names that 

327 were originally matched, and a single token containing the original 

328 matched text from the input string. So if the expression passed to 

329 :class:`original_text_for` contains expressions with defined 

330 results names, you must set ``as_string`` to ``False`` if you 

331 want to preserve those results name values. 

332 

333 The ``asString`` pre-PEP8 argument is retained for compatibility, 

334 but will be removed in a future release. 

335 

336 Example:: 

337 

338 src = "this is test <b> bold <i>text</i> </b> normal text " 

339 for tag in ("b", "i"): 

340 opener, closer = make_html_tags(tag) 

341 patt = original_text_for(opener + ... + closer) 

342 print(patt.search_string(src)[0]) 

343 

344 prints:: 

345 

346 ['<b> bold <i>text</i> </b>'] 

347 ['<i>text</i>'] 

348 """ 

349 asString = asString and as_string 

350 

351 locMarker = Empty().set_parse_action(lambda s, loc, t: loc) 

352 endlocMarker = locMarker.copy() 

353 endlocMarker.callPreparse = False 

354 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

355 if asString: 

356 extractText = lambda s, l, t: s[t._original_start : t._original_end] 

357 else: 

358 

359 def extractText(s, l, t): 

360 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] 

361 

362 matchExpr.set_parse_action(extractText) 

363 matchExpr.ignoreExprs = expr.ignoreExprs 

364 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) 

365 return matchExpr 

366 

367 

368def ungroup(expr: ParserElement) -> ParserElement: 

369 """Helper to undo pyparsing's default grouping of And expressions, 

370 even if all but one are non-empty. 

371 """ 

372 return TokenConverter(expr).add_parse_action(lambda t: t[0]) 

373 

374 

375def locatedExpr(expr: ParserElement) -> ParserElement: 

376 """ 

377 (DEPRECATED - future code should use the :class:`Located` class) 

378 Helper to decorate a returned token with its starting and ending 

379 locations in the input string. 

380 

381 This helper adds the following results names: 

382 

383 - ``locn_start`` - location where matched expression begins 

384 - ``locn_end`` - location where matched expression ends 

385 - ``value`` - the actual parsed results 

386 

387 Be careful if the input text contains ``<TAB>`` characters, you 

388 may want to call :class:`ParserElement.parse_with_tabs` 

389 

390 Example:: 

391 

392 wd = Word(alphas) 

393 for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 

394 print(match) 

395 

396 prints:: 

397 

398 [[0, 'ljsdf', 5]] 

399 [[8, 'lksdjjf', 15]] 

400 [[18, 'lkkjj', 23]] 

401 """ 

402 locator = Empty().set_parse_action(lambda ss, ll, tt: ll) 

403 return Group( 

404 locator("locn_start") 

405 + expr("value") 

406 + locator.copy().leaveWhitespace()("locn_end") 

407 ) 

408 

409 

410def nested_expr( 

411 opener: Union[str, ParserElement] = "(", 

412 closer: Union[str, ParserElement] = ")", 

413 content: typing.Optional[ParserElement] = None, 

414 ignore_expr: ParserElement = quoted_string(), 

415 *, 

416 ignoreExpr: ParserElement = quoted_string(), 

417) -> ParserElement: 

418 """Helper method for defining nested lists enclosed in opening and 

419 closing delimiters (``"("`` and ``")"`` are the default). 

420 

421 Parameters: 

422 

423 - ``opener`` - opening character for a nested list 

424 (default= ``"("``); can also be a pyparsing expression 

425 - ``closer`` - closing character for a nested list 

426 (default= ``")"``); can also be a pyparsing expression 

427 - ``content`` - expression for items within the nested lists 

428 (default= ``None``) 

429 - ``ignore_expr`` - expression for ignoring opening and closing delimiters 

430 (default= :class:`quoted_string`) 

431 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility 

432 but will be removed in a future release 

433 

434 If an expression is not provided for the content argument, the 

435 nested expression will capture all whitespace-delimited content 

436 between delimiters as a list of separate values. 

437 

438 Use the ``ignore_expr`` argument to define expressions that may 

439 contain opening or closing characters that should not be treated as 

440 opening or closing characters for nesting, such as quoted_string or 

441 a comment expression. Specify multiple expressions using an 

442 :class:`Or` or :class:`MatchFirst`. The default is 

443 :class:`quoted_string`, but if no expressions are to be ignored, then 

444 pass ``None`` for this argument. 

445 

446 Example:: 

447 

448 data_type = one_of("void int short long char float double") 

449 decl_data_type = Combine(data_type + Opt(Word('*'))) 

450 ident = Word(alphas+'_', alphanums+'_') 

451 number = pyparsing_common.number 

452 arg = Group(decl_data_type + ident) 

453 LPAR, RPAR = map(Suppress, "()") 

454 

455 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) 

456 

457 c_function = (decl_data_type("type") 

458 + ident("name") 

459 + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR 

460 + code_body("body")) 

461 c_function.ignore(c_style_comment) 

462 

463 source_code = ''' 

464 int is_odd(int x) { 

465 return (x%2); 

466 } 

467 

468 int dec_to_hex(char hchar) { 

469 if (hchar >= '0' && hchar <= '9') { 

470 return (ord(hchar)-ord('0')); 

471 } else { 

472 return (10+ord(hchar)-ord('A')); 

473 } 

474 } 

475 ''' 

476 for func in c_function.search_string(source_code): 

477 print("%(name)s (%(type)s) args: %(args)s" % func) 

478 

479 

480 prints:: 

481 

482 is_odd (int) args: [['int', 'x']] 

483 dec_to_hex (int) args: [['char', 'hchar']] 

484 """ 

485 if ignoreExpr != ignore_expr: 

486 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr 

487 if opener == closer: 

488 raise ValueError("opening and closing strings cannot be the same") 

489 if content is None: 

490 if isinstance(opener, str_type) and isinstance(closer, str_type): 

491 opener = typing.cast(str, opener) 

492 closer = typing.cast(str, closer) 

493 if len(opener) == 1 and len(closer) == 1: 

494 if ignoreExpr is not None: 

495 content = Combine( 

496 OneOrMore( 

497 ~ignoreExpr 

498 + CharsNotIn( 

499 opener + closer + ParserElement.DEFAULT_WHITE_CHARS, 

500 exact=1, 

501 ) 

502 ) 

503 ).set_parse_action(lambda t: t[0].strip()) 

504 else: 

505 content = empty.copy() + CharsNotIn( 

506 opener + closer + ParserElement.DEFAULT_WHITE_CHARS 

507 ).set_parse_action(lambda t: t[0].strip()) 

508 else: 

509 if ignoreExpr is not None: 

510 content = Combine( 

511 OneOrMore( 

512 ~ignoreExpr 

513 + ~Literal(opener) 

514 + ~Literal(closer) 

515 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

516 ) 

517 ).set_parse_action(lambda t: t[0].strip()) 

518 else: 

519 content = Combine( 

520 OneOrMore( 

521 ~Literal(opener) 

522 + ~Literal(closer) 

523 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

524 ) 

525 ).set_parse_action(lambda t: t[0].strip()) 

526 else: 

527 raise ValueError( 

528 "opening and closing arguments must be strings if no content expression is given" 

529 ) 

530 ret = Forward() 

531 if ignoreExpr is not None: 

532 ret <<= Group( 

533 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) 

534 ) 

535 else: 

536 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) 

537 ret.set_name("nested %s%s expression" % (opener, closer)) 

538 return ret 

539 

540 

541def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): 

542 """Internal helper to construct opening and closing tag expressions, given a tag name""" 

543 if isinstance(tagStr, str_type): 

544 resname = tagStr 

545 tagStr = Keyword(tagStr, caseless=not xml) 

546 else: 

547 resname = tagStr.name 

548 

549 tagAttrName = Word(alphas, alphanums + "_-:") 

550 if xml: 

551 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) 

552 openTag = ( 

553 suppress_LT 

554 + tagStr("tag") 

555 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

556 + Opt("/", default=[False])("empty").set_parse_action( 

557 lambda s, l, t: t[0] == "/" 

558 ) 

559 + suppress_GT 

560 ) 

561 else: 

562 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( 

563 printables, exclude_chars=">" 

564 ) 

565 openTag = ( 

566 suppress_LT 

567 + tagStr("tag") 

568 + Dict( 

569 ZeroOrMore( 

570 Group( 

571 tagAttrName.set_parse_action(lambda t: t[0].lower()) 

572 + Opt(Suppress("=") + tagAttrValue) 

573 ) 

574 ) 

575 ) 

576 + Opt("/", default=[False])("empty").set_parse_action( 

577 lambda s, l, t: t[0] == "/" 

578 ) 

579 + suppress_GT 

580 ) 

581 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) 

582 

583 openTag.set_name("<%s>" % resname) 

584 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

585 openTag.add_parse_action( 

586 lambda t: t.__setitem__( 

587 "start" + "".join(resname.replace(":", " ").title().split()), t.copy() 

588 ) 

589 ) 

590 closeTag = closeTag( 

591 "end" + "".join(resname.replace(":", " ").title().split()) 

592 ).set_name("</%s>" % resname) 

593 openTag.tag = resname 

594 closeTag.tag = resname 

595 openTag.tag_body = SkipTo(closeTag()) 

596 return openTag, closeTag 

597 

598 

599def make_html_tags( 

600 tag_str: Union[str, ParserElement] 

601) -> Tuple[ParserElement, ParserElement]: 

602 """Helper to construct opening and closing tag expressions for HTML, 

603 given a tag name. Matches tags in either upper or lower case, 

604 attributes with namespaces and with quoted or unquoted values. 

605 

606 Example:: 

607 

608 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

609 # make_html_tags returns pyparsing expressions for the opening and 

610 # closing tags as a 2-tuple 

611 a, a_end = make_html_tags("A") 

612 link_expr = a + SkipTo(a_end)("link_text") + a_end 

613 

614 for link in link_expr.search_string(text): 

615 # attributes in the <A> tag (like "href" shown here) are 

616 # also accessible as named results 

617 print(link.link_text, '->', link.href) 

618 

619 prints:: 

620 

621 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

622 """ 

623 return _makeTags(tag_str, False) 

624 

625 

626def make_xml_tags( 

627 tag_str: Union[str, ParserElement] 

628) -> Tuple[ParserElement, ParserElement]: 

629 """Helper to construct opening and closing tag expressions for XML, 

630 given a tag name. Matches tags only in the given upper/lower case. 

631 

632 Example: similar to :class:`make_html_tags` 

633 """ 

634 return _makeTags(tag_str, True) 

635 

636 

637any_open_tag: ParserElement 

638any_close_tag: ParserElement 

639any_open_tag, any_close_tag = make_html_tags( 

640 Word(alphas, alphanums + "_:").set_name("any tag") 

641) 

642 

643_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} 

644common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name( 

645 "common HTML entity" 

646) 

647 

648 

649def replace_html_entity(s, l, t): 

650 """Helper parser action to replace common HTML entities with their special characters""" 

651 return _htmlEntityMap.get(t.entity) 

652 

653 

654class OpAssoc(Enum): 

655 """Enumeration of operator associativity 

656 - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" 

657 

658 LEFT = 1 

659 RIGHT = 2 

660 

661 

662InfixNotationOperatorArgType = Union[ 

663 ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] 

664] 

665InfixNotationOperatorSpec = Union[ 

666 Tuple[ 

667 InfixNotationOperatorArgType, 

668 int, 

669 OpAssoc, 

670 typing.Optional[ParseAction], 

671 ], 

672 Tuple[ 

673 InfixNotationOperatorArgType, 

674 int, 

675 OpAssoc, 

676 ], 

677] 

678 

679 

680def infix_notation( 

681 base_expr: ParserElement, 

682 op_list: List[InfixNotationOperatorSpec], 

683 lpar: Union[str, ParserElement] = Suppress("("), 

684 rpar: Union[str, ParserElement] = Suppress(")"), 

685) -> ParserElement: 

686 """Helper method for constructing grammars of expressions made up of 

687 operators working in a precedence hierarchy. Operators may be unary 

688 or binary, left- or right-associative. Parse actions can also be 

689 attached to operator expressions. The generated parser will also 

690 recognize the use of parentheses to override operator precedences 

691 (see example below). 

692 

693 Note: if you define a deep operator list, you may see performance 

694 issues when using infix_notation. See 

695 :class:`ParserElement.enable_packrat` for a mechanism to potentially 

696 improve your parser performance. 

697 

698 Parameters: 

699 

700 - ``base_expr`` - expression representing the most basic operand to 

701 be used in the expression 

702 - ``op_list`` - list of tuples, one for each operator precedence level 

703 in the expression grammar; each tuple is of the form ``(op_expr, 

704 num_operands, right_left_assoc, (optional)parse_action)``, where: 

705 

706 - ``op_expr`` is the pyparsing expression for the operator; may also 

707 be a string, which will be converted to a Literal; if ``num_operands`` 

708 is 3, ``op_expr`` is a tuple of two expressions, for the two 

709 operators separating the 3 terms 

710 - ``num_operands`` is the number of terms for this operator (must be 1, 

711 2, or 3) 

712 - ``right_left_assoc`` is the indicator whether the operator is right 

713 or left associative, using the pyparsing-defined constants 

714 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. 

715 - ``parse_action`` is the parse action to be associated with 

716 expressions matching this operator expression (the parse action 

717 tuple member may be omitted); if the parse action is passed 

718 a tuple or list of functions, this is equivalent to calling 

719 ``set_parse_action(*fn)`` 

720 (:class:`ParserElement.set_parse_action`) 

721 - ``lpar`` - expression for matching left-parentheses; if passed as a 

722 str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as 

723 an expression (such as ``Literal('(')``), then it will be kept in 

724 the parsed results, and grouped with them. (default= ``Suppress('(')``) 

725 - ``rpar`` - expression for matching right-parentheses; if passed as a 

726 str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as 

727 an expression (such as ``Literal(')')``), then it will be kept in 

728 the parsed results, and grouped with them. (default= ``Suppress(')')``) 

729 

730 Example:: 

731 

732 # simple example of four-function arithmetic with ints and 

733 # variable names 

734 integer = pyparsing_common.signed_integer 

735 varname = pyparsing_common.identifier 

736 

737 arith_expr = infix_notation(integer | varname, 

738 [ 

739 ('-', 1, OpAssoc.RIGHT), 

740 (one_of('* /'), 2, OpAssoc.LEFT), 

741 (one_of('+ -'), 2, OpAssoc.LEFT), 

742 ]) 

743 

744 arith_expr.run_tests(''' 

745 5+3*6 

746 (5+3)*6 

747 -2--11 

748 ''', full_dump=False) 

749 

750 prints:: 

751 

752 5+3*6 

753 [[5, '+', [3, '*', 6]]] 

754 

755 (5+3)*6 

756 [[[5, '+', 3], '*', 6]] 

757 

758 (5+x)*y 

759 [[[5, '+', 'x'], '*', 'y']] 

760 

761 -2--11 

762 [[['-', 2], '-', ['-', 11]]] 

763 """ 

764 

765 # captive version of FollowedBy that does not do parse actions or capture results names 

766 class _FB(FollowedBy): 

767 def parseImpl(self, instring, loc, doActions=True): 

768 self.expr.try_parse(instring, loc) 

769 return loc, [] 

770 

771 _FB.__name__ = "FollowedBy>" 

772 

773 ret = Forward() 

774 if isinstance(lpar, str): 

775 lpar = Suppress(lpar) 

776 if isinstance(rpar, str): 

777 rpar = Suppress(rpar) 

778 

779 # if lpar and rpar are not suppressed, wrap in group 

780 if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)): 

781 lastExpr = base_expr | Group(lpar + ret + rpar) 

782 else: 

783 lastExpr = base_expr | (lpar + ret + rpar) 

784 

785 arity: int 

786 rightLeftAssoc: opAssoc 

787 pa: typing.Optional[ParseAction] 

788 opExpr1: ParserElement 

789 opExpr2: ParserElement 

790 for i, operDef in enumerate(op_list): 

791 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] 

792 if isinstance(opExpr, str_type): 

793 opExpr = ParserElement._literalStringClass(opExpr) 

794 opExpr = typing.cast(ParserElement, opExpr) 

795 if arity == 3: 

796 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: 

797 raise ValueError( 

798 "if numterms=3, opExpr must be a tuple or list of two expressions" 

799 ) 

800 opExpr1, opExpr2 = opExpr 

801 term_name = f"{opExpr1}{opExpr2} term" 

802 else: 

803 term_name = f"{opExpr} term" 

804 

805 if not 1 <= arity <= 3: 

806 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

807 

808 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): 

809 raise ValueError("operator must indicate right or left associativity") 

810 

811 thisExpr: ParserElement = Forward().set_name(term_name) 

812 thisExpr = typing.cast(Forward, thisExpr) 

813 if rightLeftAssoc is OpAssoc.LEFT: 

814 if arity == 1: 

815 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) 

816 elif arity == 2: 

817 if opExpr is not None: 

818 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( 

819 lastExpr + (opExpr + lastExpr)[1, ...] 

820 ) 

821 else: 

822 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) 

823 elif arity == 3: 

824 matchExpr = _FB( 

825 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr 

826 ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) 

827 elif rightLeftAssoc is OpAssoc.RIGHT: 

828 if arity == 1: 

829 # try to avoid LR with this extra test 

830 if not isinstance(opExpr, Opt): 

831 opExpr = Opt(opExpr) 

832 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) 

833 elif arity == 2: 

834 if opExpr is not None: 

835 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( 

836 lastExpr + (opExpr + thisExpr)[1, ...] 

837 ) 

838 else: 

839 matchExpr = _FB(lastExpr + thisExpr) + Group( 

840 lastExpr + thisExpr[1, ...] 

841 ) 

842 elif arity == 3: 

843 matchExpr = _FB( 

844 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr 

845 ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

846 if pa: 

847 if isinstance(pa, (tuple, list)): 

848 matchExpr.set_parse_action(*pa) 

849 else: 

850 matchExpr.set_parse_action(pa) 

851 thisExpr <<= (matchExpr | lastExpr).setName(term_name) 

852 lastExpr = thisExpr 

853 ret <<= lastExpr 

854 return ret 

855 

856 

857def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): 

858 """ 

859 (DEPRECATED - use :class:`IndentedBlock` class instead) 

860 Helper method for defining space-delimited indentation blocks, 

861 such as those used to define block statements in Python source code. 

862 

863 Parameters: 

864 

865 - ``blockStatementExpr`` - expression defining syntax of statement that 

866 is repeated within the indented block 

867 - ``indentStack`` - list created by caller to manage indentation stack 

868 (multiple ``statementWithIndentedBlock`` expressions within a single 

869 grammar should share a common ``indentStack``) 

870 - ``indent`` - boolean indicating whether block must be indented beyond 

871 the current level; set to ``False`` for block of left-most statements 

872 (default= ``True``) 

873 

874 A valid block must contain at least one ``blockStatement``. 

875 

876 (Note that indentedBlock uses internal parse actions which make it 

877 incompatible with packrat parsing.) 

878 

879 Example:: 

880 

881 data = ''' 

882 def A(z): 

883 A1 

884 B = 100 

885 G = A2 

886 A2 

887 A3 

888 B 

889 def BB(a,b,c): 

890 BB1 

891 def BBA(): 

892 bba1 

893 bba2 

894 bba3 

895 C 

896 D 

897 def spam(x,y): 

898 def eggs(z): 

899 pass 

900 ''' 

901 

902 

903 indentStack = [1] 

904 stmt = Forward() 

905 

906 identifier = Word(alphas, alphanums) 

907 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") 

908 func_body = indentedBlock(stmt, indentStack) 

909 funcDef = Group(funcDecl + func_body) 

910 

911 rvalue = Forward() 

912 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") 

913 rvalue << (funcCall | identifier | Word(nums)) 

914 assignment = Group(identifier + "=" + rvalue) 

915 stmt << (funcDef | assignment | identifier) 

916 

917 module_body = stmt[1, ...] 

918 

919 parseTree = module_body.parseString(data) 

920 parseTree.pprint() 

921 

922 prints:: 

923 

924 [['def', 

925 'A', 

926 ['(', 'z', ')'], 

927 ':', 

928 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

929 'B', 

930 ['def', 

931 'BB', 

932 ['(', 'a', 'b', 'c', ')'], 

933 ':', 

934 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

935 'C', 

936 'D', 

937 ['def', 

938 'spam', 

939 ['(', 'x', 'y', ')'], 

940 ':', 

941 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

942 """ 

943 backup_stacks.append(indentStack[:]) 

944 

945 def reset_stack(): 

946 indentStack[:] = backup_stacks[-1] 

947 

948 def checkPeerIndent(s, l, t): 

949 if l >= len(s): 

950 return 

951 curCol = col(l, s) 

952 if curCol != indentStack[-1]: 

953 if curCol > indentStack[-1]: 

954 raise ParseException(s, l, "illegal nesting") 

955 raise ParseException(s, l, "not a peer entry") 

956 

957 def checkSubIndent(s, l, t): 

958 curCol = col(l, s) 

959 if curCol > indentStack[-1]: 

960 indentStack.append(curCol) 

961 else: 

962 raise ParseException(s, l, "not a subentry") 

963 

964 def checkUnindent(s, l, t): 

965 if l >= len(s): 

966 return 

967 curCol = col(l, s) 

968 if not (indentStack and curCol in indentStack): 

969 raise ParseException(s, l, "not an unindent") 

970 if curCol < indentStack[-1]: 

971 indentStack.pop() 

972 

973 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) 

974 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") 

975 PEER = Empty().set_parse_action(checkPeerIndent).set_name("") 

976 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") 

977 if indent: 

978 smExpr = Group( 

979 Opt(NL) 

980 + INDENT 

981 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

982 + UNDENT 

983 ) 

984 else: 

985 smExpr = Group( 

986 Opt(NL) 

987 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

988 + Opt(UNDENT) 

989 ) 

990 

991 # add a parse action to remove backup_stack from list of backups 

992 smExpr.add_parse_action( 

993 lambda: backup_stacks.pop(-1) and None if backup_stacks else None 

994 ) 

995 smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) 

996 blockStatementExpr.ignore(_bslash + LineEnd()) 

997 return smExpr.set_name("indented block") 

998 

999 

1000# it's easy to get these comment structures wrong - they're very common, so may as well make them available 

1001c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( 

1002 "C style comment" 

1003) 

1004"Comment of the form ``/* ... */``" 

1005 

1006html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") 

1007"Comment of the form ``<!-- ... -->``" 

1008 

1009rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") 

1010dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") 

1011"Comment of the form ``// ... (to end of line)``" 

1012 

1013cpp_style_comment = Combine( 

1014 Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment 

1015).set_name("C++ style comment") 

1016"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" 

1017 

1018java_style_comment = cpp_style_comment 

1019"Same as :class:`cpp_style_comment`" 

1020 

1021python_style_comment = Regex(r"#.*").set_name("Python style comment") 

1022"Comment of the form ``# ... (to end of line)``" 

1023 

1024 

1025# build list of built-in expressions, for future reference if a global default value 

1026# gets updated 

1027_builtin_exprs: List[ParserElement] = [ 

1028 v for v in vars().values() if isinstance(v, ParserElement) 

1029] 

1030 

1031 

1032# compatibility function, superseded by DelimitedList class 

1033def delimited_list( 

1034 expr: Union[str, ParserElement], 

1035 delim: Union[str, ParserElement] = ",", 

1036 combine: bool = False, 

1037 min: typing.Optional[int] = None, 

1038 max: typing.Optional[int] = None, 

1039 *, 

1040 allow_trailing_delim: bool = False, 

1041) -> ParserElement: 

1042 """(DEPRECATED - use :class:`DelimitedList` class)""" 

1043 return DelimitedList( 

1044 expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim 

1045 ) 

1046 

1047 

1048# pre-PEP8 compatible names 

1049# fmt: off 

1050opAssoc = OpAssoc 

1051anyOpenTag = any_open_tag 

1052anyCloseTag = any_close_tag 

1053commonHTMLEntity = common_html_entity 

1054cStyleComment = c_style_comment 

1055htmlComment = html_comment 

1056restOfLine = rest_of_line 

1057dblSlashComment = dbl_slash_comment 

1058cppStyleComment = cpp_style_comment 

1059javaStyleComment = java_style_comment 

1060pythonStyleComment = python_style_comment 

1061 

1062@replaced_by_pep8(DelimitedList) 

1063def delimitedList(): ... 

1064 

1065@replaced_by_pep8(DelimitedList) 

1066def delimited_list(): ... 

1067 

1068@replaced_by_pep8(counted_array) 

1069def countedArray(): ... 

1070 

1071@replaced_by_pep8(match_previous_literal) 

1072def matchPreviousLiteral(): ... 

1073 

1074@replaced_by_pep8(match_previous_expr) 

1075def matchPreviousExpr(): ... 

1076 

1077@replaced_by_pep8(one_of) 

1078def oneOf(): ... 

1079 

1080@replaced_by_pep8(dict_of) 

1081def dictOf(): ... 

1082 

1083@replaced_by_pep8(original_text_for) 

1084def originalTextFor(): ... 

1085 

1086@replaced_by_pep8(nested_expr) 

1087def nestedExpr(): ... 

1088 

1089@replaced_by_pep8(make_html_tags) 

1090def makeHTMLTags(): ... 

1091 

1092@replaced_by_pep8(make_xml_tags) 

1093def makeXMLTags(): ... 

1094 

1095@replaced_by_pep8(replace_html_entity) 

1096def replaceHTMLEntity(): ... 

1097 

1098@replaced_by_pep8(infix_notation) 

1099def infixNotation(): ... 

1100# fmt: on