Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/_vendor/pyparsing/helpers.py: 31%

314 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:48 +0000

1# helpers.py 

2import html.entities 

3import re 

4import typing 

5 

6from . import __diag__ 

7from .core import * 

8from .util import _bslash, _flatten, _escape_regex_range_chars 

9 

10 

11# 

12# global helpers 

13# 

14def delimited_list( 

15 expr: Union[str, ParserElement], 

16 delim: Union[str, ParserElement] = ",", 

17 combine: bool = False, 

18 min: typing.Optional[int] = None, 

19 max: typing.Optional[int] = None, 

20 *, 

21 allow_trailing_delim: bool = False, 

22) -> ParserElement: 

23 """Helper to define a delimited list of expressions - the delimiter 

24 defaults to ','. By default, the list elements and delimiters can 

25 have intervening whitespace, and comments, but this can be 

26 overridden by passing ``combine=True`` in the constructor. If 

27 ``combine`` is set to ``True``, the matching tokens are 

28 returned as a single token string, with the delimiters included; 

29 otherwise, the matching tokens are returned as a list of tokens, 

30 with the delimiters suppressed. 

31 

32 If ``allow_trailing_delim`` is set to True, then the list may end with 

33 a delimiter. 

34 

35 Example:: 

36 

37 delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

38 delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

39 """ 

40 if isinstance(expr, str_type): 

41 expr = ParserElement._literalStringClass(expr) 

42 

43 dlName = "{expr} [{delim} {expr}]...{end}".format( 

44 expr=str(expr.copy().streamline()), 

45 delim=str(delim), 

46 end=" [{}]".format(str(delim)) if allow_trailing_delim else "", 

47 ) 

48 

49 if not combine: 

50 delim = Suppress(delim) 

51 

52 if min is not None: 

53 if min < 1: 

54 raise ValueError("min must be greater than 0") 

55 min -= 1 

56 if max is not None: 

57 if min is not None and max <= min: 

58 raise ValueError("max must be greater than, or equal to min") 

59 max -= 1 

60 delimited_list_expr = expr + (delim + expr)[min, max] 

61 

62 if allow_trailing_delim: 

63 delimited_list_expr += Opt(delim) 

64 

65 if combine: 

66 return Combine(delimited_list_expr).set_name(dlName) 

67 else: 

68 return delimited_list_expr.set_name(dlName) 

69 

70 

71def counted_array( 

72 expr: ParserElement, 

73 int_expr: typing.Optional[ParserElement] = None, 

74 *, 

75 intExpr: typing.Optional[ParserElement] = None, 

76) -> ParserElement: 

77 """Helper to define a counted list of expressions. 

78 

79 This helper defines a pattern of the form:: 

80 

81 integer expr expr expr... 

82 

83 where the leading integer tells how many expr expressions follow. 

84 The matched tokens returns the array of expr tokens as a list - the 

85 leading count token is suppressed. 

86 

87 If ``int_expr`` is specified, it should be a pyparsing expression 

88 that produces an integer value. 

89 

90 Example:: 

91 

92 counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] 

93 

94 # in this parser, the leading integer value is given in binary, 

95 # '10' indicating that 2 values are in the array 

96 binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) 

97 counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] 

98 

99 # if other fields must be parsed after the count but before the 

100 # list items, give the fields results names and they will 

101 # be preserved in the returned ParseResults: 

102 count_with_metadata = integer + Word(alphas)("type") 

103 typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") 

104 result = typed_array.parse_string("3 bool True True False") 

105 print(result.dump()) 

106 

107 # prints 

108 # ['True', 'True', 'False'] 

109 # - items: ['True', 'True', 'False'] 

110 # - type: 'bool' 

111 """ 

112 intExpr = intExpr or int_expr 

113 array_expr = Forward() 

114 

115 def count_field_parse_action(s, l, t): 

116 nonlocal array_expr 

117 n = t[0] 

118 array_expr <<= (expr * n) if n else Empty() 

119 # clear list contents, but keep any named results 

120 del t[:] 

121 

122 if intExpr is None: 

123 intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) 

124 else: 

125 intExpr = intExpr.copy() 

126 intExpr.set_name("arrayLen") 

127 intExpr.add_parse_action(count_field_parse_action, call_during_try=True) 

128 return (intExpr + array_expr).set_name("(len) " + str(expr) + "...") 

129 

130 

131def match_previous_literal(expr: ParserElement) -> ParserElement: 

132 """Helper to define an expression that is indirectly defined from 

133 the tokens matched in a previous expression, that is, it looks for 

134 a 'repeat' of a previous expression. For example:: 

135 

136 first = Word(nums) 

137 second = match_previous_literal(first) 

138 match_expr = first + ":" + second 

139 

140 will match ``"1:1"``, but not ``"1:2"``. Because this 

141 matches a previous literal, will also match the leading 

142 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

143 :class:`match_previous_expr`. Do *not* use with packrat parsing 

144 enabled. 

145 """ 

146 rep = Forward() 

147 

148 def copy_token_to_repeater(s, l, t): 

149 if t: 

150 if len(t) == 1: 

151 rep << t[0] 

152 else: 

153 # flatten t tokens 

154 tflat = _flatten(t.as_list()) 

155 rep << And(Literal(tt) for tt in tflat) 

156 else: 

157 rep << Empty() 

158 

159 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

160 rep.set_name("(prev) " + str(expr)) 

161 return rep 

162 

163 

164def match_previous_expr(expr: ParserElement) -> ParserElement: 

165 """Helper to define an expression that is indirectly defined from 

166 the tokens matched in a previous expression, that is, it looks for 

167 a 'repeat' of a previous expression. For example:: 

168 

169 first = Word(nums) 

170 second = match_previous_expr(first) 

171 match_expr = first + ":" + second 

172 

173 will match ``"1:1"``, but not ``"1:2"``. Because this 

174 matches by expressions, will *not* match the leading ``"1:1"`` 

175 in ``"1:10"``; the expressions are evaluated first, and then 

176 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

177 with packrat parsing enabled. 

178 """ 

179 rep = Forward() 

180 e2 = expr.copy() 

181 rep <<= e2 

182 

183 def copy_token_to_repeater(s, l, t): 

184 matchTokens = _flatten(t.as_list()) 

185 

186 def must_match_these_tokens(s, l, t): 

187 theseTokens = _flatten(t.as_list()) 

188 if theseTokens != matchTokens: 

189 raise ParseException( 

190 s, l, "Expected {}, found{}".format(matchTokens, theseTokens) 

191 ) 

192 

193 rep.set_parse_action(must_match_these_tokens, callDuringTry=True) 

194 

195 expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) 

196 rep.set_name("(prev) " + str(expr)) 

197 return rep 

198 

199 

200def one_of( 

201 strs: Union[typing.Iterable[str], str], 

202 caseless: bool = False, 

203 use_regex: bool = True, 

204 as_keyword: bool = False, 

205 *, 

206 useRegex: bool = True, 

207 asKeyword: bool = False, 

208) -> ParserElement: 

209 """Helper to quickly define a set of alternative :class:`Literal` s, 

210 and makes sure to do longest-first testing when there is a conflict, 

211 regardless of the input order, but returns 

212 a :class:`MatchFirst` for best performance. 

213 

214 Parameters: 

215 

216 - ``strs`` - a string of space-delimited literals, or a collection of 

217 string literals 

218 - ``caseless`` - treat all literals as caseless - (default= ``False``) 

219 - ``use_regex`` - as an optimization, will 

220 generate a :class:`Regex` object; otherwise, will generate 

221 a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if 

222 creating a :class:`Regex` raises an exception) - (default= ``True``) 

223 - ``as_keyword`` - enforce :class:`Keyword`-style matching on the 

224 generated expressions - (default= ``False``) 

225 - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, 

226 but will be removed in a future release 

227 

228 Example:: 

229 

230 comp_oper = one_of("< = > <= >= !=") 

231 var = Word(alphas) 

232 number = Word(nums) 

233 term = var | number 

234 comparison_expr = term + comp_oper + term 

235 print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) 

236 

237 prints:: 

238 

239 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

240 """ 

241 asKeyword = asKeyword or as_keyword 

242 useRegex = useRegex and use_regex 

243 

244 if ( 

245 isinstance(caseless, str_type) 

246 and __diag__.warn_on_multiple_string_args_to_oneof 

247 ): 

248 warnings.warn( 

249 "More than one string argument passed to one_of, pass" 

250 " choices as a list or space-delimited string", 

251 stacklevel=2, 

252 ) 

253 

254 if caseless: 

255 isequal = lambda a, b: a.upper() == b.upper() 

256 masks = lambda a, b: b.upper().startswith(a.upper()) 

257 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral 

258 else: 

259 isequal = lambda a, b: a == b 

260 masks = lambda a, b: b.startswith(a) 

261 parseElementClass = Keyword if asKeyword else Literal 

262 

263 symbols: List[str] = [] 

264 if isinstance(strs, str_type): 

265 symbols = strs.split() 

266 elif isinstance(strs, Iterable): 

267 symbols = list(strs) 

268 else: 

269 raise TypeError("Invalid argument to one_of, expected string or iterable") 

270 if not symbols: 

271 return NoMatch() 

272 

273 # reorder given symbols to take care to avoid masking longer choices with shorter ones 

274 # (but only if the given symbols are not just single characters) 

275 if any(len(sym) > 1 for sym in symbols): 

276 i = 0 

277 while i < len(symbols) - 1: 

278 cur = symbols[i] 

279 for j, other in enumerate(symbols[i + 1 :]): 

280 if isequal(other, cur): 

281 del symbols[i + j + 1] 

282 break 

283 elif masks(cur, other): 

284 del symbols[i + j + 1] 

285 symbols.insert(i, other) 

286 break 

287 else: 

288 i += 1 

289 

290 if useRegex: 

291 re_flags: int = re.IGNORECASE if caseless else 0 

292 

293 try: 

294 if all(len(sym) == 1 for sym in symbols): 

295 # symbols are just single characters, create range regex pattern 

296 patt = "[{}]".format( 

297 "".join(_escape_regex_range_chars(sym) for sym in symbols) 

298 ) 

299 else: 

300 patt = "|".join(re.escape(sym) for sym in symbols) 

301 

302 # wrap with \b word break markers if defining as keywords 

303 if asKeyword: 

304 patt = r"\b(?:{})\b".format(patt) 

305 

306 ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) 

307 

308 if caseless: 

309 # add parse action to return symbols as specified, not in random 

310 # casing as found in input string 

311 symbol_map = {sym.lower(): sym for sym in symbols} 

312 ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) 

313 

314 return ret 

315 

316 except re.error: 

317 warnings.warn( 

318 "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 

319 ) 

320 

321 # last resort, just use MatchFirst 

322 return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( 

323 " | ".join(symbols) 

324 ) 

325 

326 

327def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: 

328 """Helper to easily and clearly define a dictionary by specifying 

329 the respective patterns for the key and value. Takes care of 

330 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

331 :class:`Group` tokens in the proper order. The key pattern 

332 can include delimiting markers or punctuation, as long as they are 

333 suppressed, thereby leaving the significant key text. The value 

334 pattern can include named results, so that the :class:`Dict` results 

335 can include named token fields. 

336 

337 Example:: 

338 

339 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

340 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 

341 print(attr_expr[1, ...].parse_string(text).dump()) 

342 

343 attr_label = label 

344 attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) 

345 

346 # similar to Dict, but simpler call format 

347 result = dict_of(attr_label, attr_value).parse_string(text) 

348 print(result.dump()) 

349 print(result['shape']) 

350 print(result.shape) # object attribute access works too 

351 print(result.as_dict()) 

352 

353 prints:: 

354 

355 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

356 - color: 'light blue' 

357 - posn: 'upper left' 

358 - shape: 'SQUARE' 

359 - texture: 'burlap' 

360 SQUARE 

361 SQUARE 

362 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 

363 """ 

364 return Dict(OneOrMore(Group(key + value))) 

365 

366 

367def original_text_for( 

368 expr: ParserElement, as_string: bool = True, *, asString: bool = True 

369) -> ParserElement: 

370 """Helper to return the original, untokenized text for a given 

371 expression. Useful to restore the parsed fields of an HTML start 

372 tag into the raw tag text itself, or to revert separate tokens with 

373 intervening whitespace back to the original matching input text. By 

374 default, returns astring containing the original parsed text. 

375 

376 If the optional ``as_string`` argument is passed as 

377 ``False``, then the return value is 

378 a :class:`ParseResults` containing any results names that 

379 were originally matched, and a single token containing the original 

380 matched text from the input string. So if the expression passed to 

381 :class:`original_text_for` contains expressions with defined 

382 results names, you must set ``as_string`` to ``False`` if you 

383 want to preserve those results name values. 

384 

385 The ``asString`` pre-PEP8 argument is retained for compatibility, 

386 but will be removed in a future release. 

387 

388 Example:: 

389 

390 src = "this is test <b> bold <i>text</i> </b> normal text " 

391 for tag in ("b", "i"): 

392 opener, closer = make_html_tags(tag) 

393 patt = original_text_for(opener + SkipTo(closer) + closer) 

394 print(patt.search_string(src)[0]) 

395 

396 prints:: 

397 

398 ['<b> bold <i>text</i> </b>'] 

399 ['<i>text</i>'] 

400 """ 

401 asString = asString and as_string 

402 

403 locMarker = Empty().set_parse_action(lambda s, loc, t: loc) 

404 endlocMarker = locMarker.copy() 

405 endlocMarker.callPreparse = False 

406 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

407 if asString: 

408 extractText = lambda s, l, t: s[t._original_start : t._original_end] 

409 else: 

410 

411 def extractText(s, l, t): 

412 t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] 

413 

414 matchExpr.set_parse_action(extractText) 

415 matchExpr.ignoreExprs = expr.ignoreExprs 

416 matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) 

417 return matchExpr 

418 

419 

420def ungroup(expr: ParserElement) -> ParserElement: 

421 """Helper to undo pyparsing's default grouping of And expressions, 

422 even if all but one are non-empty. 

423 """ 

424 return TokenConverter(expr).add_parse_action(lambda t: t[0]) 

425 

426 

427def locatedExpr(expr: ParserElement) -> ParserElement: 

428 """ 

429 (DEPRECATED - future code should use the Located class) 

430 Helper to decorate a returned token with its starting and ending 

431 locations in the input string. 

432 

433 This helper adds the following results names: 

434 

435 - ``locn_start`` - location where matched expression begins 

436 - ``locn_end`` - location where matched expression ends 

437 - ``value`` - the actual parsed results 

438 

439 Be careful if the input text contains ``<TAB>`` characters, you 

440 may want to call :class:`ParserElement.parseWithTabs` 

441 

442 Example:: 

443 

444 wd = Word(alphas) 

445 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 

446 print(match) 

447 

448 prints:: 

449 

450 [[0, 'ljsdf', 5]] 

451 [[8, 'lksdjjf', 15]] 

452 [[18, 'lkkjj', 23]] 

453 """ 

454 locator = Empty().set_parse_action(lambda ss, ll, tt: ll) 

455 return Group( 

456 locator("locn_start") 

457 + expr("value") 

458 + locator.copy().leaveWhitespace()("locn_end") 

459 ) 

460 

461 

462def nested_expr( 

463 opener: Union[str, ParserElement] = "(", 

464 closer: Union[str, ParserElement] = ")", 

465 content: typing.Optional[ParserElement] = None, 

466 ignore_expr: ParserElement = quoted_string(), 

467 *, 

468 ignoreExpr: ParserElement = quoted_string(), 

469) -> ParserElement: 

470 """Helper method for defining nested lists enclosed in opening and 

471 closing delimiters (``"("`` and ``")"`` are the default). 

472 

473 Parameters: 

474 - ``opener`` - opening character for a nested list 

475 (default= ``"("``); can also be a pyparsing expression 

476 - ``closer`` - closing character for a nested list 

477 (default= ``")"``); can also be a pyparsing expression 

478 - ``content`` - expression for items within the nested lists 

479 (default= ``None``) 

480 - ``ignore_expr`` - expression for ignoring opening and closing delimiters 

481 (default= :class:`quoted_string`) 

482 - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility 

483 but will be removed in a future release 

484 

485 If an expression is not provided for the content argument, the 

486 nested expression will capture all whitespace-delimited content 

487 between delimiters as a list of separate values. 

488 

489 Use the ``ignore_expr`` argument to define expressions that may 

490 contain opening or closing characters that should not be treated as 

491 opening or closing characters for nesting, such as quoted_string or 

492 a comment expression. Specify multiple expressions using an 

493 :class:`Or` or :class:`MatchFirst`. The default is 

494 :class:`quoted_string`, but if no expressions are to be ignored, then 

495 pass ``None`` for this argument. 

496 

497 Example:: 

498 

499 data_type = one_of("void int short long char float double") 

500 decl_data_type = Combine(data_type + Opt(Word('*'))) 

501 ident = Word(alphas+'_', alphanums+'_') 

502 number = pyparsing_common.number 

503 arg = Group(decl_data_type + ident) 

504 LPAR, RPAR = map(Suppress, "()") 

505 

506 code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) 

507 

508 c_function = (decl_data_type("type") 

509 + ident("name") 

510 + LPAR + Opt(delimited_list(arg), [])("args") + RPAR 

511 + code_body("body")) 

512 c_function.ignore(c_style_comment) 

513 

514 source_code = ''' 

515 int is_odd(int x) { 

516 return (x%2); 

517 } 

518 

519 int dec_to_hex(char hchar) { 

520 if (hchar >= '0' && hchar <= '9') { 

521 return (ord(hchar)-ord('0')); 

522 } else { 

523 return (10+ord(hchar)-ord('A')); 

524 } 

525 } 

526 ''' 

527 for func in c_function.search_string(source_code): 

528 print("%(name)s (%(type)s) args: %(args)s" % func) 

529 

530 

531 prints:: 

532 

533 is_odd (int) args: [['int', 'x']] 

534 dec_to_hex (int) args: [['char', 'hchar']] 

535 """ 

536 if ignoreExpr != ignore_expr: 

537 ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr 

538 if opener == closer: 

539 raise ValueError("opening and closing strings cannot be the same") 

540 if content is None: 

541 if isinstance(opener, str_type) and isinstance(closer, str_type): 

542 if len(opener) == 1 and len(closer) == 1: 

543 if ignoreExpr is not None: 

544 content = Combine( 

545 OneOrMore( 

546 ~ignoreExpr 

547 + CharsNotIn( 

548 opener + closer + ParserElement.DEFAULT_WHITE_CHARS, 

549 exact=1, 

550 ) 

551 ) 

552 ).set_parse_action(lambda t: t[0].strip()) 

553 else: 

554 content = empty.copy() + CharsNotIn( 

555 opener + closer + ParserElement.DEFAULT_WHITE_CHARS 

556 ).set_parse_action(lambda t: t[0].strip()) 

557 else: 

558 if ignoreExpr is not None: 

559 content = Combine( 

560 OneOrMore( 

561 ~ignoreExpr 

562 + ~Literal(opener) 

563 + ~Literal(closer) 

564 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

565 ) 

566 ).set_parse_action(lambda t: t[0].strip()) 

567 else: 

568 content = Combine( 

569 OneOrMore( 

570 ~Literal(opener) 

571 + ~Literal(closer) 

572 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

573 ) 

574 ).set_parse_action(lambda t: t[0].strip()) 

575 else: 

576 raise ValueError( 

577 "opening and closing arguments must be strings if no content expression is given" 

578 ) 

579 ret = Forward() 

580 if ignoreExpr is not None: 

581 ret <<= Group( 

582 Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) 

583 ) 

584 else: 

585 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) 

586 ret.set_name("nested %s%s expression" % (opener, closer)) 

587 return ret 

588 

589 

590def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): 

591 """Internal helper to construct opening and closing tag expressions, given a tag name""" 

592 if isinstance(tagStr, str_type): 

593 resname = tagStr 

594 tagStr = Keyword(tagStr, caseless=not xml) 

595 else: 

596 resname = tagStr.name 

597 

598 tagAttrName = Word(alphas, alphanums + "_-:") 

599 if xml: 

600 tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) 

601 openTag = ( 

602 suppress_LT 

603 + tagStr("tag") 

604 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

605 + Opt("/", default=[False])("empty").set_parse_action( 

606 lambda s, l, t: t[0] == "/" 

607 ) 

608 + suppress_GT 

609 ) 

610 else: 

611 tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( 

612 printables, exclude_chars=">" 

613 ) 

614 openTag = ( 

615 suppress_LT 

616 + tagStr("tag") 

617 + Dict( 

618 ZeroOrMore( 

619 Group( 

620 tagAttrName.set_parse_action(lambda t: t[0].lower()) 

621 + Opt(Suppress("=") + tagAttrValue) 

622 ) 

623 ) 

624 ) 

625 + Opt("/", default=[False])("empty").set_parse_action( 

626 lambda s, l, t: t[0] == "/" 

627 ) 

628 + suppress_GT 

629 ) 

630 closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) 

631 

632 openTag.set_name("<%s>" % resname) 

633 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

634 openTag.add_parse_action( 

635 lambda t: t.__setitem__( 

636 "start" + "".join(resname.replace(":", " ").title().split()), t.copy() 

637 ) 

638 ) 

639 closeTag = closeTag( 

640 "end" + "".join(resname.replace(":", " ").title().split()) 

641 ).set_name("</%s>" % resname) 

642 openTag.tag = resname 

643 closeTag.tag = resname 

644 openTag.tag_body = SkipTo(closeTag()) 

645 return openTag, closeTag 

646 

647 

648def make_html_tags( 

649 tag_str: Union[str, ParserElement] 

650) -> Tuple[ParserElement, ParserElement]: 

651 """Helper to construct opening and closing tag expressions for HTML, 

652 given a tag name. Matches tags in either upper or lower case, 

653 attributes with namespaces and with quoted or unquoted values. 

654 

655 Example:: 

656 

657 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

658 # make_html_tags returns pyparsing expressions for the opening and 

659 # closing tags as a 2-tuple 

660 a, a_end = make_html_tags("A") 

661 link_expr = a + SkipTo(a_end)("link_text") + a_end 

662 

663 for link in link_expr.search_string(text): 

664 # attributes in the <A> tag (like "href" shown here) are 

665 # also accessible as named results 

666 print(link.link_text, '->', link.href) 

667 

668 prints:: 

669 

670 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

671 """ 

672 return _makeTags(tag_str, False) 

673 

674 

675def make_xml_tags( 

676 tag_str: Union[str, ParserElement] 

677) -> Tuple[ParserElement, ParserElement]: 

678 """Helper to construct opening and closing tag expressions for XML, 

679 given a tag name. Matches tags only in the given upper/lower case. 

680 

681 Example: similar to :class:`make_html_tags` 

682 """ 

683 return _makeTags(tag_str, True) 

684 

685 

686any_open_tag: ParserElement 

687any_close_tag: ParserElement 

688any_open_tag, any_close_tag = make_html_tags( 

689 Word(alphas, alphanums + "_:").set_name("any tag") 

690) 

691 

692_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} 

693common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name( 

694 "common HTML entity" 

695) 

696 

697 

698def replace_html_entity(t): 

699 """Helper parser action to replace common HTML entities with their special characters""" 

700 return _htmlEntityMap.get(t.entity) 

701 

702 

703class OpAssoc(Enum): 

704 LEFT = 1 

705 RIGHT = 2 

706 

707 

708InfixNotationOperatorArgType = Union[ 

709 ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] 

710] 

711InfixNotationOperatorSpec = Union[ 

712 Tuple[ 

713 InfixNotationOperatorArgType, 

714 int, 

715 OpAssoc, 

716 typing.Optional[ParseAction], 

717 ], 

718 Tuple[ 

719 InfixNotationOperatorArgType, 

720 int, 

721 OpAssoc, 

722 ], 

723] 

724 

725 

726def infix_notation( 

727 base_expr: ParserElement, 

728 op_list: List[InfixNotationOperatorSpec], 

729 lpar: Union[str, ParserElement] = Suppress("("), 

730 rpar: Union[str, ParserElement] = Suppress(")"), 

731) -> ParserElement: 

732 """Helper method for constructing grammars of expressions made up of 

733 operators working in a precedence hierarchy. Operators may be unary 

734 or binary, left- or right-associative. Parse actions can also be 

735 attached to operator expressions. The generated parser will also 

736 recognize the use of parentheses to override operator precedences 

737 (see example below). 

738 

739 Note: if you define a deep operator list, you may see performance 

740 issues when using infix_notation. See 

741 :class:`ParserElement.enable_packrat` for a mechanism to potentially 

742 improve your parser performance. 

743 

744 Parameters: 

745 - ``base_expr`` - expression representing the most basic operand to 

746 be used in the expression 

747 - ``op_list`` - list of tuples, one for each operator precedence level 

748 in the expression grammar; each tuple is of the form ``(op_expr, 

749 num_operands, right_left_assoc, (optional)parse_action)``, where: 

750 

751 - ``op_expr`` is the pyparsing expression for the operator; may also 

752 be a string, which will be converted to a Literal; if ``num_operands`` 

753 is 3, ``op_expr`` is a tuple of two expressions, for the two 

754 operators separating the 3 terms 

755 - ``num_operands`` is the number of terms for this operator (must be 1, 

756 2, or 3) 

757 - ``right_left_assoc`` is the indicator whether the operator is right 

758 or left associative, using the pyparsing-defined constants 

759 ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. 

760 - ``parse_action`` is the parse action to be associated with 

761 expressions matching this operator expression (the parse action 

762 tuple member may be omitted); if the parse action is passed 

763 a tuple or list of functions, this is equivalent to calling 

764 ``set_parse_action(*fn)`` 

765 (:class:`ParserElement.set_parse_action`) 

766 - ``lpar`` - expression for matching left-parentheses; if passed as a 

767 str, then will be parsed as Suppress(lpar). If lpar is passed as 

768 an expression (such as ``Literal('(')``), then it will be kept in 

769 the parsed results, and grouped with them. (default= ``Suppress('(')``) 

770 - ``rpar`` - expression for matching right-parentheses; if passed as a 

771 str, then will be parsed as Suppress(rpar). If rpar is passed as 

772 an expression (such as ``Literal(')')``), then it will be kept in 

773 the parsed results, and grouped with them. (default= ``Suppress(')')``) 

774 

775 Example:: 

776 

777 # simple example of four-function arithmetic with ints and 

778 # variable names 

779 integer = pyparsing_common.signed_integer 

780 varname = pyparsing_common.identifier 

781 

782 arith_expr = infix_notation(integer | varname, 

783 [ 

784 ('-', 1, OpAssoc.RIGHT), 

785 (one_of('* /'), 2, OpAssoc.LEFT), 

786 (one_of('+ -'), 2, OpAssoc.LEFT), 

787 ]) 

788 

789 arith_expr.run_tests(''' 

790 5+3*6 

791 (5+3)*6 

792 -2--11 

793 ''', full_dump=False) 

794 

795 prints:: 

796 

797 5+3*6 

798 [[5, '+', [3, '*', 6]]] 

799 

800 (5+3)*6 

801 [[[5, '+', 3], '*', 6]] 

802 

803 -2--11 

804 [[['-', 2], '-', ['-', 11]]] 

805 """ 

806 # captive version of FollowedBy that does not do parse actions or capture results names 

807 class _FB(FollowedBy): 

808 def parseImpl(self, instring, loc, doActions=True): 

809 self.expr.try_parse(instring, loc) 

810 return loc, [] 

811 

812 _FB.__name__ = "FollowedBy>" 

813 

814 ret = Forward() 

815 if isinstance(lpar, str): 

816 lpar = Suppress(lpar) 

817 if isinstance(rpar, str): 

818 rpar = Suppress(rpar) 

819 

820 # if lpar and rpar are not suppressed, wrap in group 

821 if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)): 

822 lastExpr = base_expr | Group(lpar + ret + rpar) 

823 else: 

824 lastExpr = base_expr | (lpar + ret + rpar) 

825 

826 for i, operDef in enumerate(op_list): 

827 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] 

828 if isinstance(opExpr, str_type): 

829 opExpr = ParserElement._literalStringClass(opExpr) 

830 if arity == 3: 

831 if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: 

832 raise ValueError( 

833 "if numterms=3, opExpr must be a tuple or list of two expressions" 

834 ) 

835 opExpr1, opExpr2 = opExpr 

836 term_name = "{}{} term".format(opExpr1, opExpr2) 

837 else: 

838 term_name = "{} term".format(opExpr) 

839 

840 if not 1 <= arity <= 3: 

841 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

842 

843 if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): 

844 raise ValueError("operator must indicate right or left associativity") 

845 

846 thisExpr: Forward = Forward().set_name(term_name) 

847 if rightLeftAssoc is OpAssoc.LEFT: 

848 if arity == 1: 

849 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) 

850 elif arity == 2: 

851 if opExpr is not None: 

852 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( 

853 lastExpr + (opExpr + lastExpr)[1, ...] 

854 ) 

855 else: 

856 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) 

857 elif arity == 3: 

858 matchExpr = _FB( 

859 lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr 

860 ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) 

861 elif rightLeftAssoc is OpAssoc.RIGHT: 

862 if arity == 1: 

863 # try to avoid LR with this extra test 

864 if not isinstance(opExpr, Opt): 

865 opExpr = Opt(opExpr) 

866 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) 

867 elif arity == 2: 

868 if opExpr is not None: 

869 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( 

870 lastExpr + (opExpr + thisExpr)[1, ...] 

871 ) 

872 else: 

873 matchExpr = _FB(lastExpr + thisExpr) + Group( 

874 lastExpr + thisExpr[1, ...] 

875 ) 

876 elif arity == 3: 

877 matchExpr = _FB( 

878 lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr 

879 ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

880 if pa: 

881 if isinstance(pa, (tuple, list)): 

882 matchExpr.set_parse_action(*pa) 

883 else: 

884 matchExpr.set_parse_action(pa) 

885 thisExpr <<= (matchExpr | lastExpr).setName(term_name) 

886 lastExpr = thisExpr 

887 ret <<= lastExpr 

888 return ret 

889 

890 

891def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): 

892 """ 

893 (DEPRECATED - use IndentedBlock class instead) 

894 Helper method for defining space-delimited indentation blocks, 

895 such as those used to define block statements in Python source code. 

896 

897 Parameters: 

898 

899 - ``blockStatementExpr`` - expression defining syntax of statement that 

900 is repeated within the indented block 

901 - ``indentStack`` - list created by caller to manage indentation stack 

902 (multiple ``statementWithIndentedBlock`` expressions within a single 

903 grammar should share a common ``indentStack``) 

904 - ``indent`` - boolean indicating whether block must be indented beyond 

905 the current level; set to ``False`` for block of left-most statements 

906 (default= ``True``) 

907 

908 A valid block must contain at least one ``blockStatement``. 

909 

910 (Note that indentedBlock uses internal parse actions which make it 

911 incompatible with packrat parsing.) 

912 

913 Example:: 

914 

915 data = ''' 

916 def A(z): 

917 A1 

918 B = 100 

919 G = A2 

920 A2 

921 A3 

922 B 

923 def BB(a,b,c): 

924 BB1 

925 def BBA(): 

926 bba1 

927 bba2 

928 bba3 

929 C 

930 D 

931 def spam(x,y): 

932 def eggs(z): 

933 pass 

934 ''' 

935 

936 

937 indentStack = [1] 

938 stmt = Forward() 

939 

940 identifier = Word(alphas, alphanums) 

941 funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") 

942 func_body = indentedBlock(stmt, indentStack) 

943 funcDef = Group(funcDecl + func_body) 

944 

945 rvalue = Forward() 

946 funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") 

947 rvalue << (funcCall | identifier | Word(nums)) 

948 assignment = Group(identifier + "=" + rvalue) 

949 stmt << (funcDef | assignment | identifier) 

950 

951 module_body = stmt[1, ...] 

952 

953 parseTree = module_body.parseString(data) 

954 parseTree.pprint() 

955 

956 prints:: 

957 

958 [['def', 

959 'A', 

960 ['(', 'z', ')'], 

961 ':', 

962 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

963 'B', 

964 ['def', 

965 'BB', 

966 ['(', 'a', 'b', 'c', ')'], 

967 ':', 

968 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

969 'C', 

970 'D', 

971 ['def', 

972 'spam', 

973 ['(', 'x', 'y', ')'], 

974 ':', 

975 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

976 """ 

977 backup_stacks.append(indentStack[:]) 

978 

979 def reset_stack(): 

980 indentStack[:] = backup_stacks[-1] 

981 

982 def checkPeerIndent(s, l, t): 

983 if l >= len(s): 

984 return 

985 curCol = col(l, s) 

986 if curCol != indentStack[-1]: 

987 if curCol > indentStack[-1]: 

988 raise ParseException(s, l, "illegal nesting") 

989 raise ParseException(s, l, "not a peer entry") 

990 

991 def checkSubIndent(s, l, t): 

992 curCol = col(l, s) 

993 if curCol > indentStack[-1]: 

994 indentStack.append(curCol) 

995 else: 

996 raise ParseException(s, l, "not a subentry") 

997 

998 def checkUnindent(s, l, t): 

999 if l >= len(s): 

1000 return 

1001 curCol = col(l, s) 

1002 if not (indentStack and curCol in indentStack): 

1003 raise ParseException(s, l, "not an unindent") 

1004 if curCol < indentStack[-1]: 

1005 indentStack.pop() 

1006 

1007 NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) 

1008 INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") 

1009 PEER = Empty().set_parse_action(checkPeerIndent).set_name("") 

1010 UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") 

1011 if indent: 

1012 smExpr = Group( 

1013 Opt(NL) 

1014 + INDENT 

1015 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1016 + UNDENT 

1017 ) 

1018 else: 

1019 smExpr = Group( 

1020 Opt(NL) 

1021 + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) 

1022 + Opt(UNDENT) 

1023 ) 

1024 

1025 # add a parse action to remove backup_stack from list of backups 

1026 smExpr.add_parse_action( 

1027 lambda: backup_stacks.pop(-1) and None if backup_stacks else None 

1028 ) 

1029 smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) 

1030 blockStatementExpr.ignore(_bslash + LineEnd()) 

1031 return smExpr.set_name("indented block") 

1032 

1033 

1034# it's easy to get these comment structures wrong - they're very common, so may as well make them available 

1035c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( 

1036 "C style comment" 

1037) 

1038"Comment of the form ``/* ... */``" 

1039 

1040html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") 

1041"Comment of the form ``<!-- ... -->``" 

1042 

1043rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") 

1044dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") 

1045"Comment of the form ``// ... (to end of line)``" 

1046 

1047cpp_style_comment = Combine( 

1048 Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment 

1049).set_name("C++ style comment") 

1050"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" 

1051 

1052java_style_comment = cpp_style_comment 

1053"Same as :class:`cpp_style_comment`" 

1054 

1055python_style_comment = Regex(r"#.*").set_name("Python style comment") 

1056"Comment of the form ``# ... (to end of line)``" 

1057 

1058 

1059# build list of built-in expressions, for future reference if a global default value 

1060# gets updated 

1061_builtin_exprs: List[ParserElement] = [ 

1062 v for v in vars().values() if isinstance(v, ParserElement) 

1063] 

1064 

1065 

1066# pre-PEP8 compatible names 

1067delimitedList = delimited_list 

1068countedArray = counted_array 

1069matchPreviousLiteral = match_previous_literal 

1070matchPreviousExpr = match_previous_expr 

1071oneOf = one_of 

1072dictOf = dict_of 

1073originalTextFor = original_text_for 

1074nestedExpr = nested_expr 

1075makeHTMLTags = make_html_tags 

1076makeXMLTags = make_xml_tags 

1077anyOpenTag, anyCloseTag = any_open_tag, any_close_tag 

1078commonHTMLEntity = common_html_entity 

1079replaceHTMLEntity = replace_html_entity 

1080opAssoc = OpAssoc 

1081infixNotation = infix_notation 

1082cStyleComment = c_style_comment 

1083htmlComment = html_comment 

1084restOfLine = rest_of_line 

1085dblSlashComment = dbl_slash_comment 

1086cppStyleComment = cpp_style_comment 

1087javaStyleComment = java_style_comment 

1088pythonStyleComment = python_style_comment